From f617029db3f8786d94c64f1a73c42b89b6d261fa Mon Sep 17 00:00:00 2001 From: Ilia Mirkin Date: Fri, 8 May 2015 00:15:22 -0400 Subject: [PATCH 001/834] nv50: keep track of PGRAPH state in nv50_screen Normally this is kept in nv50_context, and on switching the active context, the state is copied from the previous context. However when the last context is destroyed, this is lost, and a new context might later be created. When the currently-active context is destroyed, save its state in the screen, and restore it when setting the current context. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=90363 Reported-by: Matteo Bruni Signed-off-by: Ilia Mirkin Tested-by: Matteo Bruni Cc: mesa-stable@lists.freedesktop.org --- .../drivers/nouveau/nv50/nv50_context.c | 11 +++++-- .../drivers/nouveau/nv50/nv50_context.h | 29 +------------------ .../drivers/nouveau/nv50/nv50_screen.h | 24 +++++++++++++++ .../nouveau/nv50/nv50_state_validate.c | 2 ++ 4 files changed, 36 insertions(+), 30 deletions(-) diff --git a/src/gallium/drivers/nouveau/nv50/nv50_context.c b/src/gallium/drivers/nouveau/nv50/nv50_context.c index 2cfd5db5ea0..5b5d3912c20 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_context.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_context.c @@ -138,8 +138,11 @@ nv50_destroy(struct pipe_context *pipe) { struct nv50_context *nv50 = nv50_context(pipe); - if (nv50_context_screen(nv50)->cur_ctx == nv50) - nv50_context_screen(nv50)->cur_ctx = NULL; + if (nv50->screen->cur_ctx == nv50) { + nv50->screen->cur_ctx = NULL; + /* Save off the state in case another context gets created */ + nv50->screen->save_state = nv50->state; + } nouveau_pushbuf_bufctx(nv50->base.pushbuf, NULL); nouveau_pushbuf_kick(nv50->base.pushbuf, nv50->base.pushbuf->channel); @@ -290,6 +293,10 @@ nv50_create(struct pipe_screen *pscreen, void *priv) pipe->get_sample_position = nv50_context_get_sample_position; if (!screen->cur_ctx) { + /* Restore the last context's state here, normally handled during + * context switch + */ + nv50->state = screen->save_state; screen->cur_ctx = nv50; nouveau_pushbuf_bufctx(screen->base.pushbuf, nv50->bufctx); } diff --git a/src/gallium/drivers/nouveau/nv50/nv50_context.h b/src/gallium/drivers/nouveau/nv50/nv50_context.h index 45eb554eb4f..1f123ef7e92 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_context.h +++ b/src/gallium/drivers/nouveau/nv50/nv50_context.h @@ -104,28 +104,7 @@ struct nv50_context { uint32_t dirty; boolean cb_dirty; - struct { - uint32_t instance_elts; /* bitmask of per-instance elements */ - uint32_t instance_base; - uint32_t interpolant_ctrl; - uint32_t semantic_color; - uint32_t semantic_psize; - int32_t index_bias; - boolean uniform_buffer_bound[3]; - boolean prim_restart; - boolean point_sprite; - boolean rt_serialize; - boolean flushed; - boolean rasterizer_discard; - uint8_t tls_required; - boolean new_tls_space; - uint8_t num_vtxbufs; - uint8_t num_vtxelts; - uint8_t num_textures[3]; - uint8_t num_samplers[3]; - uint8_t prim_size; - uint16_t scissor; - } state; + struct nv50_graph_state state; struct nv50_blend_stateobj *blend; struct nv50_rasterizer_stateobj *rast; @@ -191,12 +170,6 @@ nv50_context(struct pipe_context *pipe) return (struct nv50_context *)pipe; } -static INLINE struct nv50_screen * -nv50_context_screen(struct nv50_context *nv50) -{ - return nv50_screen(&nv50->base.screen->base); -} - /* return index used in nv50_context arrays for a specific shader type */ static INLINE unsigned nv50_context_shader_stage(unsigned pipe) diff --git a/src/gallium/drivers/nouveau/nv50/nv50_screen.h b/src/gallium/drivers/nouveau/nv50/nv50_screen.h index f8ce365135a..881051b1862 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_screen.h +++ b/src/gallium/drivers/nouveau/nv50/nv50_screen.h @@ -25,10 +25,34 @@ struct nv50_context; struct nv50_blitter; +struct nv50_graph_state { + uint32_t instance_elts; /* bitmask of per-instance elements */ + uint32_t instance_base; + uint32_t interpolant_ctrl; + uint32_t semantic_color; + uint32_t semantic_psize; + int32_t index_bias; + boolean uniform_buffer_bound[3]; + boolean prim_restart; + boolean point_sprite; + boolean rt_serialize; + boolean flushed; + boolean rasterizer_discard; + uint8_t tls_required; + boolean new_tls_space; + uint8_t num_vtxbufs; + uint8_t num_vtxelts; + uint8_t num_textures[3]; + uint8_t num_samplers[3]; + uint8_t prim_size; + uint16_t scissor; +}; + struct nv50_screen { struct nouveau_screen base; struct nv50_context *cur_ctx; + struct nv50_graph_state save_state; struct nouveau_bo *code; struct nouveau_bo *uniforms; diff --git a/src/gallium/drivers/nouveau/nv50/nv50_state_validate.c b/src/gallium/drivers/nouveau/nv50/nv50_state_validate.c index 85e19b4c623..116bf4bba7c 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_state_validate.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_state_validate.c @@ -394,6 +394,8 @@ nv50_switch_pipe_context(struct nv50_context *ctx_to) if (ctx_from) ctx_to->state = ctx_from->state; + else + ctx_to->state = ctx_to->screen->save_state; ctx_to->dirty = ~0; ctx_to->viewports_dirty = ~0; From e9b1ea29bf1e8f09e83bd6358d0d2068053f09d4 Mon Sep 17 00:00:00 2001 From: Ilia Mirkin Date: Fri, 8 May 2015 00:26:24 -0400 Subject: [PATCH 002/834] nvc0: keep track of PGRAPH state in nvc0_screen See identical commit for nv50. Destroying the current context and then creating a new one or switching to another existing context would cause the "current" state to not be properly initialized, so we save it off in the screen. Signed-off-by: Ilia Mirkin Cc: mesa-stable@lists.freedesktop.org --- .../drivers/nouveau/nvc0/nvc0_context.c | 7 +++++- .../drivers/nouveau/nvc0/nvc0_context.h | 24 +----------------- .../drivers/nouveau/nvc0/nvc0_screen.h | 25 +++++++++++++++++++ .../nouveau/nvc0/nvc0_state_validate.c | 2 ++ 4 files changed, 34 insertions(+), 24 deletions(-) diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_context.c b/src/gallium/drivers/nouveau/nvc0/nvc0_context.c index 7662fb50f61..7904984f503 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_context.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_context.c @@ -139,8 +139,12 @@ nvc0_destroy(struct pipe_context *pipe) { struct nvc0_context *nvc0 = nvc0_context(pipe); - if (nvc0->screen->cur_ctx == nvc0) + if (nvc0->screen->cur_ctx == nvc0) { nvc0->screen->cur_ctx = NULL; + nvc0->screen->save_state = nvc0->state; + nvc0->screen->save_state.tfb = NULL; + } + /* Unset bufctx, we don't want to revalidate any resources after the flush. * Other contexts will always set their bufctx again on action calls. */ @@ -303,6 +307,7 @@ nvc0_create(struct pipe_screen *pscreen, void *priv) pipe->get_sample_position = nvc0_context_get_sample_position; if (!screen->cur_ctx) { + nvc0->state = screen->save_state; screen->cur_ctx = nvc0; nouveau_pushbuf_bufctx(screen->base.pushbuf, nvc0->bufctx); } diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h index ef251f35a1b..a8d7593b398 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h @@ -113,29 +113,7 @@ struct nvc0_context { uint32_t dirty; uint32_t dirty_cp; /* dirty flags for compute state */ - struct { - boolean flushed; - boolean rasterizer_discard; - boolean early_z_forced; - boolean prim_restart; - uint32_t instance_elts; /* bitmask of per-instance elements */ - uint32_t instance_base; - uint32_t constant_vbos; - uint32_t constant_elts; - int32_t index_bias; - uint16_t scissor; - uint8_t vbo_mode; /* 0 = normal, 1 = translate, 3 = translate, forced */ - uint8_t num_vtxbufs; - uint8_t num_vtxelts; - uint8_t num_textures[6]; - uint8_t num_samplers[6]; - uint8_t tls_required; /* bitmask of shader types using l[] */ - uint8_t c14_bound; /* whether immediate array constbuf is bound */ - uint8_t clip_enable; - uint32_t clip_mode; - uint32_t uniform_buffer_bound[5]; - struct nvc0_transform_feedback_state *tfb; - } state; + struct nvc0_graph_state state; struct nvc0_blend_stateobj *blend; struct nvc0_rasterizer_stateobj *rast; diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.h b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.h index 1a7d5027a7c..ef2bd43f006 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.h +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.h @@ -27,10 +27,35 @@ struct nvc0_context; struct nvc0_blitter; +struct nvc0_graph_state { + boolean flushed; + boolean rasterizer_discard; + boolean early_z_forced; + boolean prim_restart; + uint32_t instance_elts; /* bitmask of per-instance elements */ + uint32_t instance_base; + uint32_t constant_vbos; + uint32_t constant_elts; + int32_t index_bias; + uint16_t scissor; + uint8_t vbo_mode; /* 0 = normal, 1 = translate, 3 = translate, forced */ + uint8_t num_vtxbufs; + uint8_t num_vtxelts; + uint8_t num_textures[6]; + uint8_t num_samplers[6]; + uint8_t tls_required; /* bitmask of shader types using l[] */ + uint8_t c14_bound; /* whether immediate array constbuf is bound */ + uint8_t clip_enable; + uint32_t clip_mode; + uint32_t uniform_buffer_bound[5]; + struct nvc0_transform_feedback_state *tfb; +}; + struct nvc0_screen { struct nouveau_screen base; struct nvc0_context *cur_ctx; + struct nvc0_graph_state save_state; int num_occlusion_queries_active; diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c b/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c index 6051f128f66..d3ad81d2d66 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c @@ -543,6 +543,8 @@ nvc0_switch_pipe_context(struct nvc0_context *ctx_to) if (ctx_from) ctx_to->state = ctx_from->state; + else + ctx_to->state = ctx_to->screen->save_state; ctx_to->dirty = ~0; ctx_to->viewports_dirty = ~0; From 7892210400e8f3bd14697c0a3dd56e98454a45df Mon Sep 17 00:00:00 2001 From: Ilia Mirkin Date: Sat, 9 May 2015 13:25:51 -0400 Subject: [PATCH 003/834] nvc0: reset the instanced elements state when doing blit using 3d engine Since we update num_vtxelts here, we could otherwise end up with stale instancing information in the upper bits which wouldn't otherwise get reset. (Also we run the risk of the previous draw having set the first element as instanced.) This appears as one of the causes for the test pointed out in fdo#90363 to fail on nvc0. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=90363 Signed-off-by: Ilia Mirkin Cc: mesa-stable@lists.freedesktop.org --- src/gallium/drivers/nouveau/nvc0/nvc0_surface.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c b/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c index 4404d8c1a74..a820de7259a 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c @@ -1152,6 +1152,12 @@ nvc0_blit_3d(struct nvc0_context *nvc0, const struct pipe_blit_info *info) NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_32 | NVC0_3D_VERTEX_ATTRIB_FORMAT_CONST); } + if (nvc0->state.instance_elts) { + nvc0->state.instance_elts = 0; + BEGIN_NVC0(push, NVC0_3D(MACRO_VERTEX_ARRAY_PER_INSTANCE), 2); + PUSH_DATA (push, n); + PUSH_DATA (push, 0); + } nvc0->state.num_vtxelts = 2; for (i = 0; i < info->dst.box.depth; ++i, z += dz) { From da136dc07ddb6147d181c96f475b94f6281efd73 Mon Sep 17 00:00:00 2001 From: Ilia Mirkin Date: Sat, 9 May 2015 03:26:07 -0400 Subject: [PATCH 004/834] nv50/ir: only enable mul saturate on G200+ Commit 44673512a84 enabled support for saturating fmul. However experimentally this does not seem to work on the older chips. Restrict the feature to G200 (NVA0) and later. Reported-by: Pierre Moreau Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=90350 Signed-off-by: Ilia Mirkin Tested-by: Pierre Moreau Reviewed-by: Tobias Klausmann Cc: mesa-stable@lists.freedesktop.org --- src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp index 178a1671c3f..a742162ad3c 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp @@ -84,7 +84,7 @@ static const struct opProperties _initProps[] = // neg abs not sat c[] s[], a[], imm { OP_ADD, 0x3, 0x0, 0x0, 0x8, 0x2, 0x1, 0x1, 0x2 }, { OP_SUB, 0x3, 0x0, 0x0, 0x8, 0x2, 0x1, 0x1, 0x2 }, - { OP_MUL, 0x3, 0x0, 0x0, 0x8, 0x2, 0x1, 0x1, 0x2 }, + { OP_MUL, 0x3, 0x0, 0x0, 0x0, 0x2, 0x1, 0x1, 0x2 }, { OP_MAX, 0x3, 0x3, 0x0, 0x0, 0x2, 0x1, 0x1, 0x0 }, { OP_MIN, 0x3, 0x3, 0x0, 0x0, 0x2, 0x1, 0x1, 0x0 }, { OP_MAD, 0x7, 0x0, 0x0, 0x8, 0x6, 0x1, 0x1, 0x0 }, // special constraint @@ -188,6 +188,9 @@ void TargetNV50::initOpInfo() if (prop->mSat & 8) opInfo[prop->op].dstMods = NV50_IR_MOD_SAT; } + + if (chipset >= 0xa0) + opInfo[OP_MUL].dstMods = NV50_IR_MOD_SAT; } unsigned int From 1cbdafc47a46fa55fcd5afa9193525e694099944 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Sun, 10 May 2015 06:03:49 -0400 Subject: [PATCH 005/834] freedreno/ir3/nir: fix build break after f752effa Our lower if/else pass was missed when converting NIR to use linked lists rather than hashsets to track use/def sets. Signed-off-by: Rob Clark --- src/gallium/drivers/freedreno/ir3/ir3_nir_lower_if_else.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/gallium/drivers/freedreno/ir3/ir3_nir_lower_if_else.c b/src/gallium/drivers/freedreno/ir3/ir3_nir_lower_if_else.c index ae36019ed5f..dc9e4626f27 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_nir_lower_if_else.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_nir_lower_if_else.c @@ -74,14 +74,13 @@ valid_dest(nir_block *block, nir_dest *dest) * (so this is run iteratively in a loop). Therefore if * we get this far, it should not have any if_uses: */ - assert(dest->ssa.if_uses->entries == 0); + assert(list_empty(&dest->ssa.if_uses)); /* The only uses of this definition must be phi's in the * successor or in the current block */ - struct set_entry *entry; - set_foreach(dest->ssa.uses, entry) { - const nir_instr *dest_instr = entry->key; + nir_foreach_use(&dest->ssa, use) { + nir_instr *dest_instr = use->parent_instr; if (dest_instr->block == block) continue; if ((dest_instr->type == nir_instr_type_phi) && From 9ab90c058fdb86e9364af258fca7c4de23adbe50 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Mon, 11 May 2015 06:24:03 +1000 Subject: [PATCH 006/834] r600: use pipe->hw prim convert from radeonsi MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This avoids future addition to PIPE_PRIM_ from causing regressions on r600g. Reviewed-by: Marek Olšák Signed-off-by: Dave Airlie --- src/gallium/drivers/r600/r600_state_common.c | 31 ++++++++++---------- 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c index c50c7055851..13dc9ee8c10 100644 --- a/src/gallium/drivers/r600/r600_state_common.c +++ b/src/gallium/drivers/r600/r600_state_common.c @@ -95,22 +95,23 @@ static void r600_texture_barrier(struct pipe_context *ctx) static unsigned r600_conv_pipe_prim(unsigned prim) { static const unsigned prim_conv[] = { - V_008958_DI_PT_POINTLIST, - V_008958_DI_PT_LINELIST, - V_008958_DI_PT_LINELOOP, - V_008958_DI_PT_LINESTRIP, - V_008958_DI_PT_TRILIST, - V_008958_DI_PT_TRISTRIP, - V_008958_DI_PT_TRIFAN, - V_008958_DI_PT_QUADLIST, - V_008958_DI_PT_QUADSTRIP, - V_008958_DI_PT_POLYGON, - V_008958_DI_PT_LINELIST_ADJ, - V_008958_DI_PT_LINESTRIP_ADJ, - V_008958_DI_PT_TRILIST_ADJ, - V_008958_DI_PT_TRISTRIP_ADJ, - V_008958_DI_PT_RECTLIST + [PIPE_PRIM_POINTS] = V_008958_DI_PT_POINTLIST, + [PIPE_PRIM_LINES] = V_008958_DI_PT_LINELIST, + [PIPE_PRIM_LINE_LOOP] = V_008958_DI_PT_LINELOOP, + [PIPE_PRIM_LINE_STRIP] = V_008958_DI_PT_LINESTRIP, + [PIPE_PRIM_TRIANGLES] = V_008958_DI_PT_TRILIST, + [PIPE_PRIM_TRIANGLE_STRIP] = V_008958_DI_PT_TRISTRIP, + [PIPE_PRIM_TRIANGLE_FAN] = V_008958_DI_PT_TRIFAN, + [PIPE_PRIM_QUADS] = V_008958_DI_PT_QUADLIST, + [PIPE_PRIM_QUAD_STRIP] = V_008958_DI_PT_QUADSTRIP, + [PIPE_PRIM_POLYGON] = V_008958_DI_PT_POLYGON, + [PIPE_PRIM_LINES_ADJACENCY] = V_008958_DI_PT_LINELIST_ADJ, + [PIPE_PRIM_LINE_STRIP_ADJACENCY] = V_008958_DI_PT_LINESTRIP_ADJ, + [PIPE_PRIM_TRIANGLES_ADJACENCY] = V_008958_DI_PT_TRILIST_ADJ, + [PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY] = V_008958_DI_PT_TRISTRIP_ADJ, + [R600_PRIM_RECTANGLE_LIST] = V_008958_DI_PT_RECTLIST }; + assert(prim < Elements(prim_conv)); return prim_conv[prim]; } From 4a8cd2799c2467b9916dd0ba672f05a394aa9b9f Mon Sep 17 00:00:00 2001 From: Marta Lofstedt Date: Thu, 7 May 2015 17:13:47 +0200 Subject: [PATCH 007/834] main: glGetIntegeri_v fails for GL_VERTEX_BINDING_STRIDE MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The return type for GL_VERTEX_BINDING_STRIDE is missing, this cause glGetIntegeri_v to fail. Signed-off-by: Marta Lofstedt Reviewed-by: Tapani Pälli Reviewed-by: Emil Velikov Cc: "10.4 10.5" --- src/mesa/main/get.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/mesa/main/get.c b/src/mesa/main/get.c index a881bc589ba..09be715f911 100644 --- a/src/mesa/main/get.c +++ b/src/mesa/main/get.c @@ -1911,6 +1911,7 @@ find_value_indexed(const char *func, GLenum pname, GLuint index, union value *v) if (index >= ctx->Const.Program[MESA_SHADER_VERTEX].MaxAttribs) goto invalid_value; v->value_int = ctx->Array.VAO->VertexBinding[VERT_ATTRIB_GENERIC(index)].Stride; + return TYPE_INT; /* ARB_shader_image_load_store */ case GL_IMAGE_BINDING_NAME: { From abf3fefa1aa734844e0ca8e95e8c3a501909aa33 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tapani=20P=C3=A4lli?= Date: Thu, 7 May 2015 08:07:30 +0300 Subject: [PATCH 008/834] mesa: use _mesa_has_compute_shaders instead of extension check MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This was really the original purpose, for enabling the path for ES3.1 tests without the extension being set. Set also fallthrough comment for Coverity (caught by Matt). v2: .. and test the right way, not wrong one (Ilia Mirkin) Signed-off-by: Tapani Pälli Reviewed-by: Ilia Mirkin --- src/mesa/main/shader_query.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/mesa/main/shader_query.cpp b/src/mesa/main/shader_query.cpp index 6e46553724b..3445f89a356 100644 --- a/src/mesa/main/shader_query.cpp +++ b/src/mesa/main/shader_query.cpp @@ -28,6 +28,7 @@ * \author Ian Romanick */ +#include "main/context.h" #include "main/core.h" #include "glsl_symbol_table.h" #include "ir.h" @@ -986,8 +987,9 @@ _mesa_program_resource_prop(struct gl_shader_program *shProg, case GL_ACTIVE_VARIABLES: return get_buffer_property(shProg, res, prop, val, caller); case GL_REFERENCED_BY_COMPUTE_SHADER: - if (!ctx->Extensions.ARB_compute_shader) + if (!_mesa_has_compute_shaders(ctx)) goto invalid_enum; + /* fallthrough */ case GL_REFERENCED_BY_VERTEX_SHADER: case GL_REFERENCED_BY_GEOMETRY_SHADER: case GL_REFERENCED_BY_FRAGMENT_SHADER: From bfdae9149e00bd5c2521db3e75669ae043eed5cc Mon Sep 17 00:00:00 2001 From: Neil Roberts Date: Fri, 8 May 2015 17:35:18 +0100 Subject: [PATCH 009/834] i965/fs: Disable opt_sampler_eot for textureGather The opt_sampler_eot optimisation seems to break when the last instruction is SHADER_OPCODE_TG4. A bunch of Piglit tests end up doing this so it causes a lot of regressions. I can't find any documentation or known workarounds to indicate that this is expected behaviour, but considering that this is probably a pretty unlikely situation in a real use case we might as well disable it in order to avoid the regressions. In total this fixes 451 tests. Reviewed-by: Ben Widawsky Reviewed-by: Chris Forbes --- src/mesa/drivers/dri/i965/brw_fs.cpp | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index b2701b89689..3414d92efde 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -2658,6 +2658,16 @@ fs_visitor::opt_sampler_eot() if (unlikely(tex_inst->is_head_sentinel()) || !tex_inst->is_tex()) return false; + /* This optimisation doesn't seem to work for textureGather for some + * reason. I can't find any documentation or known workarounds to indicate + * that this is expected, but considering that it is probably pretty + * unlikely that a shader would directly write out the results from + * textureGather we might as well just disable it. + */ + if (tex_inst->opcode == SHADER_OPCODE_TG4 || + tex_inst->opcode == SHADER_OPCODE_TG4_OFFSET) + return false; + /* If there's no header present, we need to munge the LOAD_PAYLOAD as well. * It's very likely to be the previous instruction. */ From 6aaf09b93b668a24b557e05195b9897e8cee8559 Mon Sep 17 00:00:00 2001 From: Axel Davy Date: Fri, 1 May 2015 00:03:32 +0200 Subject: [PATCH 010/834] egl/wayland: properly destroy wayland objects the wl_registry and the wl_queue allocated weren't destroyed. CC: 10.5 Reviewed-by: Dave Airlie Reviewed-by: Daniel Stone Signed-off-by: Axel Davy --- src/egl/drivers/dri2/egl_dri2.c | 2 ++ src/egl/drivers/dri2/platform_wayland.c | 5 ++++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/src/egl/drivers/dri2/egl_dri2.c b/src/egl/drivers/dri2/egl_dri2.c index f4c29da61cf..169abcc4c63 100644 --- a/src/egl/drivers/dri2/egl_dri2.c +++ b/src/egl/drivers/dri2/egl_dri2.c @@ -730,6 +730,8 @@ dri2_terminate(_EGLDriver *drv, _EGLDisplay *disp) #ifdef HAVE_WAYLAND_PLATFORM case _EGL_PLATFORM_WAYLAND: wl_drm_destroy(dri2_dpy->wl_drm); + wl_registry_destroy(dri2_dpy->wl_registry); + wl_event_queue_destroy(dri2_dpy->wl_queue); if (dri2_dpy->own_device) { wl_display_disconnect(dri2_dpy->wl_dpy); } diff --git a/src/egl/drivers/dri2/platform_wayland.c b/src/egl/drivers/dri2/platform_wayland.c index e2260053917..a5bcf25af41 100644 --- a/src/egl/drivers/dri2/platform_wayland.c +++ b/src/egl/drivers/dri2/platform_wayland.c @@ -1029,7 +1029,7 @@ dri2_initialize_wayland(_EGLDriver *drv, _EGLDisplay *disp) wl_registry_add_listener(dri2_dpy->wl_registry, ®istry_listener, dri2_dpy); if (roundtrip(dri2_dpy) < 0 || dri2_dpy->wl_drm == NULL) - goto cleanup_dpy; + goto cleanup_registry; if (roundtrip(dri2_dpy) < 0 || dri2_dpy->fd == -1) goto cleanup_drm; @@ -1112,6 +1112,9 @@ dri2_initialize_wayland(_EGLDriver *drv, _EGLDisplay *disp) cleanup_drm: free(dri2_dpy->device_name); wl_drm_destroy(dri2_dpy->wl_drm); + cleanup_registry: + wl_registry_destroy(dri2_dpy->wl_registry); + wl_event_queue_destroy(dri2_dpy->wl_queue); cleanup_dpy: free(dri2_dpy); From 05ac39ac497ad7835cac7a161491282b5f69d711 Mon Sep 17 00:00:00 2001 From: Axel Davy Date: Fri, 1 May 2015 00:06:31 +0200 Subject: [PATCH 011/834] doc/egl: Remove depreciated EGL_SOFTWARE EGL_SOFTWARE is not supported anywhere in the code, whereas LIBGL_ALWAYS_SOFTWARE is. Reviewed-by: Dave Airlie Reviewed-by: Daniel Stone Signed-off-by: Axel Davy --- docs/egl.html | 8 -------- 1 file changed, 8 deletions(-) diff --git a/docs/egl.html b/docs/egl.html index d946bb0ae38..3ab1a6018fd 100644 --- a/docs/egl.html +++ b/docs/egl.html @@ -183,14 +183,6 @@ probably required only for some of the demos found in mesa/demo repository.

values are: debug, info, warning, and fatal.

- - -
EGL_SOFTWARE
-
- -

For drivers that support both hardware and software rendering, setting this -variable to true forces the use of software rendering.

-
From c4ff6d00cd7dde4646ff96733f68d3ddbf540c2c Mon Sep 17 00:00:00 2001 From: Axel Davy Date: Fri, 1 May 2015 00:20:34 +0200 Subject: [PATCH 012/834] glx/dri3: Add additional check for gpu offloading case Checks blitImage is implemented. Initially having the __DRIimageExtension extension at version 9 at least meant blitImage was supported. However some implementation do advertise version >= 9 without implementing it. CC: 10.5 Reviewed-by: Dave Airlie Reviewed-by: Daniel Stone Signed-off-by: Axel Davy --- src/glx/dri3_glx.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/glx/dri3_glx.c b/src/glx/dri3_glx.c index ff77a91b15f..dfb0093395f 100644 --- a/src/glx/dri3_glx.c +++ b/src/glx/dri3_glx.c @@ -1985,6 +1985,11 @@ dri3_create_screen(int screen, struct glx_display * priv) goto handle_error; } + if (psc->is_different_gpu && !psc->image->blitImage) { + ErrorMessageF("Different GPU, but blitImage not implemented for this driver\n"); + goto handle_error; + } + if (!psc->is_different_gpu && ( !psc->texBuffer || psc->texBuffer->base.version < 2 || !psc->texBuffer->setTexBuffer2 From fb0960a14bd6980aa63deef45ec3cf1ab99bcf0a Mon Sep 17 00:00:00 2001 From: Axel Davy Date: Fri, 1 May 2015 01:30:10 +0200 Subject: [PATCH 013/834] egl/wayland: Add support for render-nodes It is possible the server advertises a render-node. In that case no authentication is needed, and Gem names are forbidden. Reviewed-by: Dave Airlie Reviewed-by: Daniel Stone Signed-off-by: Axel Davy v2: do not check for __DRI_IMAGE_DRIVER, but instead do not advertise __DRI_DRI2_LOADER when on a render-node. --- src/egl/drivers/dri2/egl_dri2.h | 1 + src/egl/drivers/dri2/platform_wayland.c | 67 ++++++++++++++++++++----- 2 files changed, 55 insertions(+), 13 deletions(-) diff --git a/src/egl/drivers/dri2/egl_dri2.h b/src/egl/drivers/dri2/egl_dri2.h index 371fb4aee4a..3ee3f806db8 100644 --- a/src/egl/drivers/dri2/egl_dri2.h +++ b/src/egl/drivers/dri2/egl_dri2.h @@ -200,6 +200,7 @@ struct dri2_egl_display int authenticated; int formats; uint32_t capabilities; + int is_render_node; #endif }; diff --git a/src/egl/drivers/dri2/platform_wayland.c b/src/egl/drivers/dri2/platform_wayland.c index a5bcf25af41..bdb19c2855f 100644 --- a/src/egl/drivers/dri2/platform_wayland.c +++ b/src/egl/drivers/dri2/platform_wayland.c @@ -800,12 +800,33 @@ bad_format: return NULL; } +static char +is_fd_render_node(int fd) +{ + struct stat render; + + if (fstat(fd, &render)) + return 0; + + if (!S_ISCHR(render.st_mode)) + return 0; + + if (render.st_rdev & 0x80) + return 1; + return 0; +} + static int dri2_wl_authenticate(_EGLDisplay *disp, uint32_t id) { struct dri2_egl_display *dri2_dpy = dri2_egl_display(disp); int ret = 0; + if (dri2_dpy->is_render_node) { + _eglLog(_EGL_WARNING, "wayland-egl: client asks server to " + "authenticate for render-nodes"); + return 0; + } dri2_dpy->authenticated = 0; wl_drm_authenticate(dri2_dpy->wl_drm, id); @@ -847,8 +868,13 @@ drm_handle_device(void *data, struct wl_drm *drm, const char *device) return; } - drmGetMagic(dri2_dpy->fd, &magic); - wl_drm_authenticate(dri2_dpy->wl_drm, magic); + if (is_fd_render_node(dri2_dpy->fd)) { + dri2_dpy->is_render_node = 1; + dri2_dpy->authenticated = 1; + } else { + drmGetMagic(dri2_dpy->fd, &magic); + wl_drm_authenticate(dri2_dpy->wl_drm, magic); + } } static void @@ -1046,18 +1072,23 @@ dri2_initialize_wayland(_EGLDriver *drv, _EGLDisplay *disp) if (!dri2_load_driver(disp)) goto cleanup_driver_name; - dri2_dpy->dri2_loader_extension.base.name = __DRI_DRI2_LOADER; - dri2_dpy->dri2_loader_extension.base.version = 3; - dri2_dpy->dri2_loader_extension.getBuffers = dri2_wl_get_buffers; - dri2_dpy->dri2_loader_extension.flushFrontBuffer = dri2_wl_flush_front_buffer; - dri2_dpy->dri2_loader_extension.getBuffersWithFormat = - dri2_wl_get_buffers_with_format; + dri2_dpy->extensions[0] = &image_loader_extension.base; + dri2_dpy->extensions[1] = &image_lookup_extension.base; + dri2_dpy->extensions[2] = &use_invalidate.base; - dri2_dpy->extensions[0] = &dri2_dpy->dri2_loader_extension.base; - dri2_dpy->extensions[1] = &image_loader_extension.base; - dri2_dpy->extensions[2] = &image_lookup_extension.base; - dri2_dpy->extensions[3] = &use_invalidate.base; - dri2_dpy->extensions[4] = NULL; + /* render nodes cannot use Gem names, and thus do not support + * the __DRI_DRI2_LOADER extension */ + if (!dri2_dpy->is_render_node) { + dri2_dpy->dri2_loader_extension.base.name = __DRI_DRI2_LOADER; + dri2_dpy->dri2_loader_extension.base.version = 3; + dri2_dpy->dri2_loader_extension.getBuffers = dri2_wl_get_buffers; + dri2_dpy->dri2_loader_extension.flushFrontBuffer = dri2_wl_flush_front_buffer; + dri2_dpy->dri2_loader_extension.getBuffersWithFormat = + dri2_wl_get_buffers_with_format; + dri2_dpy->extensions[3] = &dri2_dpy->dri2_loader_extension.base; + dri2_dpy->extensions[4] = NULL; + } else + dri2_dpy->extensions[3] = NULL; dri2_dpy->swap_available = EGL_TRUE; @@ -1075,6 +1106,14 @@ dri2_initialize_wayland(_EGLDriver *drv, _EGLDisplay *disp) dri2_dpy->image->createImageFromFds == NULL) dri2_dpy->capabilities &= ~WL_DRM_CAPABILITY_PRIME; + /* We cannot use Gem names with render-nodes, only prime fds (dma-buf). + * The server needs to accept them */ + if (dri2_dpy->is_render_node && + !(dri2_dpy->capabilities & WL_DRM_CAPABILITY_PRIME)) { + _eglLog(_EGL_WARNING, "wayland-egl: display is not render-node capable"); + goto cleanup_screen; + } + types = EGL_WINDOW_BIT; for (i = 0; dri2_dpy->driver_configs[i]; i++) { config = dri2_dpy->driver_configs[i]; @@ -1103,6 +1142,8 @@ dri2_initialize_wayland(_EGLDriver *drv, _EGLDisplay *disp) return EGL_TRUE; + cleanup_screen: + dri2_dpy->core->destroyScreen(dri2_dpy->dri_screen); cleanup_driver: dlclose(dri2_dpy->driver); cleanup_driver_name: From 4cd546df82c557b9a765e40db2f96c4faa299846 Mon Sep 17 00:00:00 2001 From: Axel Davy Date: Fri, 1 May 2015 01:16:24 +0200 Subject: [PATCH 014/834] egl/wayland: Implement DRI_PRIME support When the server gpu and requested gpu are different: . They likely don't support the same tiling modes . They likely do not have fast access to the same locations Thus we do: . render to a tiled buffer we do not share with the server . Copy the content at every swap to a buffer with no tiling that we share with the server. This is similar to the glx dri3 DRI_PRIME implementation. Reviewed-by: Dave Airlie Reviewed-by: Daniel Stone Signed-off-by: Axel Davy --- src/egl/drivers/dri2/egl_dri2.h | 3 + src/egl/drivers/dri2/platform_wayland.c | 104 ++++++++++++++++++++---- 2 files changed, 92 insertions(+), 15 deletions(-) diff --git a/src/egl/drivers/dri2/egl_dri2.h b/src/egl/drivers/dri2/egl_dri2.h index 3ee3f806db8..cabeb2dfdc6 100644 --- a/src/egl/drivers/dri2/egl_dri2.h +++ b/src/egl/drivers/dri2/egl_dri2.h @@ -201,6 +201,7 @@ struct dri2_egl_display int formats; uint32_t capabilities; int is_render_node; + int is_different_gpu; #endif }; @@ -254,6 +255,8 @@ struct dri2_egl_surface #ifdef HAVE_WAYLAND_PLATFORM struct wl_buffer *wl_buffer; __DRIimage *dri_image; + /* for is_different_gpu case. NULL else */ + __DRIimage *linear_copy; #endif #ifdef HAVE_DRM_PLATFORM struct gbm_bo *bo; diff --git a/src/egl/drivers/dri2/platform_wayland.c b/src/egl/drivers/dri2/platform_wayland.c index bdb19c2855f..b111c3a8332 100644 --- a/src/egl/drivers/dri2/platform_wayland.c +++ b/src/egl/drivers/dri2/platform_wayland.c @@ -240,6 +240,8 @@ dri2_wl_destroy_surface(_EGLDriver *drv, _EGLDisplay *disp, _EGLSurface *surf) wl_buffer_destroy(dri2_surf->color_buffers[i].wl_buffer); if (dri2_surf->color_buffers[i].dri_image) dri2_dpy->image->destroyImage(dri2_surf->color_buffers[i].dri_image); + if (dri2_surf->color_buffers[i].linear_copy) + dri2_dpy->image->destroyImage(dri2_surf->color_buffers[i].linear_copy); } for (i = 0; i < __DRI_BUFFER_COUNT; i++) @@ -274,9 +276,12 @@ dri2_wl_release_buffers(struct dri2_egl_surface *dri2_surf) wl_buffer_destroy(dri2_surf->color_buffers[i].wl_buffer); if (dri2_surf->color_buffers[i].dri_image) dri2_dpy->image->destroyImage(dri2_surf->color_buffers[i].dri_image); + if (dri2_surf->color_buffers[i].linear_copy) + dri2_dpy->image->destroyImage(dri2_surf->color_buffers[i].linear_copy); dri2_surf->color_buffers[i].wl_buffer = NULL; dri2_surf->color_buffers[i].dri_image = NULL; + dri2_surf->color_buffers[i].linear_copy = NULL; dri2_surf->color_buffers[i].locked = 0; } @@ -338,13 +343,29 @@ get_back_bo(struct dri2_egl_surface *dri2_surf) if (dri2_surf->back == NULL) return -1; + + if (dri2_dpy->is_different_gpu && + dri2_surf->back->linear_copy == NULL) { + dri2_surf->back->linear_copy = + dri2_dpy->image->createImage(dri2_dpy->dri_screen, + dri2_surf->base.Width, + dri2_surf->base.Height, + dri_image_format, + __DRI_IMAGE_USE_SHARE | + __DRI_IMAGE_USE_LINEAR, + NULL); + if (dri2_surf->back->linear_copy == NULL) + return -1; + } + if (dri2_surf->back->dri_image == NULL) { dri2_surf->back->dri_image = dri2_dpy->image->createImage(dri2_dpy->dri_screen, dri2_surf->base.Width, dri2_surf->base.Height, dri_image_format, - __DRI_IMAGE_USE_SHARE, + dri2_dpy->is_different_gpu ? + 0 : __DRI_IMAGE_USE_SHARE, NULL); dri2_surf->back->age = 0; } @@ -432,8 +453,11 @@ update_buffers(struct dri2_egl_surface *dri2_surf) dri2_surf->color_buffers[i].wl_buffer) { wl_buffer_destroy(dri2_surf->color_buffers[i].wl_buffer); dri2_dpy->image->destroyImage(dri2_surf->color_buffers[i].dri_image); + if (dri2_dpy->is_different_gpu) + dri2_dpy->image->destroyImage(dri2_surf->color_buffers[i].linear_copy); dri2_surf->color_buffers[i].wl_buffer = NULL; dri2_surf->color_buffers[i].dri_image = NULL; + dri2_surf->color_buffers[i].linear_copy = NULL; } } @@ -578,16 +602,20 @@ create_wl_buffer(struct dri2_egl_surface *dri2_surf) { struct dri2_egl_display *dri2_dpy = dri2_egl_display(dri2_surf->base.Resource.Display); + __DRIimage *image; int fd, stride, name; if (dri2_surf->current->wl_buffer != NULL) return; + if (dri2_dpy->is_different_gpu) { + image = dri2_surf->current->linear_copy; + } else { + image = dri2_surf->current->dri_image; + } if (dri2_dpy->capabilities & WL_DRM_CAPABILITY_PRIME) { - dri2_dpy->image->queryImage(dri2_surf->current->dri_image, - __DRI_IMAGE_ATTRIB_FD, &fd); - dri2_dpy->image->queryImage(dri2_surf->current->dri_image, - __DRI_IMAGE_ATTRIB_STRIDE, &stride); + dri2_dpy->image->queryImage(image, __DRI_IMAGE_ATTRIB_FD, &fd); + dri2_dpy->image->queryImage(image, __DRI_IMAGE_ATTRIB_STRIDE, &stride); dri2_surf->current->wl_buffer = wl_drm_create_prime_buffer(dri2_dpy->wl_drm, @@ -600,10 +628,8 @@ create_wl_buffer(struct dri2_egl_surface *dri2_surf) 0, 0); close(fd); } else { - dri2_dpy->image->queryImage(dri2_surf->current->dri_image, - __DRI_IMAGE_ATTRIB_NAME, &name); - dri2_dpy->image->queryImage(dri2_surf->current->dri_image, - __DRI_IMAGE_ATTRIB_STRIDE, &stride); + dri2_dpy->image->queryImage(image, __DRI_IMAGE_ATTRIB_NAME, &name); + dri2_dpy->image->queryImage(image, __DRI_IMAGE_ATTRIB_STRIDE, &stride); dri2_surf->current->wl_buffer = wl_drm_create_buffer(dri2_dpy->wl_drm, @@ -683,6 +709,18 @@ dri2_wl_swap_buffers_with_damage(_EGLDriver *drv, } } + if (dri2_dpy->is_different_gpu) { + _EGLContext *ctx = _eglGetCurrentContext(); + struct dri2_egl_context *dri2_ctx = dri2_egl_context(ctx); + dri2_dpy->image->blitImage(dri2_ctx->dri_context, + dri2_surf->current->linear_copy, + dri2_surf->current->dri_image, + 0, 0, dri2_surf->base.Width, + dri2_surf->base.Height, + 0, 0, dri2_surf->base.Width, + dri2_surf->base.Height, 0); + } + dri2_flush_drawable_for_swapbuffers(disp, draw); (*dri2_dpy->flush->invalidate)(dri2_surf->dri_drawable); @@ -869,7 +907,6 @@ drm_handle_device(void *data, struct wl_drm *drm, const char *device) } if (is_fd_render_node(dri2_dpy->fd)) { - dri2_dpy->is_render_node = 1; dri2_dpy->authenticated = 1; } else { drmGetMagic(dri2_dpy->fd, &magic); @@ -1063,6 +1100,24 @@ dri2_initialize_wayland(_EGLDriver *drv, _EGLDisplay *disp) if (roundtrip(dri2_dpy) < 0 || !dri2_dpy->authenticated) goto cleanup_fd; + dri2_dpy->fd = loader_get_user_preferred_fd(dri2_dpy->fd, + &dri2_dpy->is_different_gpu); + if (dri2_dpy->is_different_gpu) { + free(dri2_dpy->device_name); + dri2_dpy->device_name = loader_get_device_name_for_fd(dri2_dpy->fd); + if (!dri2_dpy->device_name) { + _eglError(EGL_BAD_ALLOC, "wayland-egl: failed to get device name " + "for requested GPU"); + goto cleanup_fd; + } + } + + /* we have to do the check now, because loader_get_user_preferred_fd + * will return a render-node when the requested gpu is different + * to the server, but also if the client asks for the same gpu than + * the server by requesting its pci-id */ + dri2_dpy->is_render_node = is_fd_render_node(dri2_dpy->fd); + dri2_dpy->driver_name = loader_get_driver_for_fd(dri2_dpy->fd, 0); if (dri2_dpy->driver_name == NULL) { _eglError(EGL_BAD_ALLOC, "DRI2: failed to get driver name"); @@ -1097,10 +1152,10 @@ dri2_initialize_wayland(_EGLDriver *drv, _EGLDisplay *disp) dri2_wl_setup_swap_interval(dri2_dpy); - /* The server shouldn't advertise WL_DRM_CAPABILITY_PRIME if the driver - * doesn't have createImageFromFds, since we're using the same driver on - * both sides. We don't want crash if that happens anyway, so fall back to - * gem names if we don't have prime support. */ + /* To use Prime, we must have _DRI_IMAGE v7 at least. + * createImageFromFds support indicates that Prime export/import + * is supported by the driver. Fall back to + * gem names if we don't have Prime support. */ if (dri2_dpy->image->base.version < 7 || dri2_dpy->image->createImageFromFds == NULL) @@ -1114,6 +1169,16 @@ dri2_initialize_wayland(_EGLDriver *drv, _EGLDisplay *disp) goto cleanup_screen; } + if (dri2_dpy->is_different_gpu && + (dri2_dpy->image->base.version < 9 || + dri2_dpy->image->blitImage == NULL)) { + _eglLog(_EGL_WARNING, "wayland-egl: Different GPU selected, but the " + "Image extension in the driver is not " + "compatible. Version 9 or later and blitImage() " + "are required"); + goto cleanup_screen; + } + types = EGL_WINDOW_BIT; for (i = 0; dri2_dpy->driver_configs[i]; i++) { config = dri2_dpy->driver_configs[i]; @@ -1126,7 +1191,16 @@ dri2_initialize_wayland(_EGLDriver *drv, _EGLDisplay *disp) } disp->Extensions.WL_bind_wayland_display = EGL_TRUE; - disp->Extensions.WL_create_wayland_buffer_from_image = EGL_TRUE; + /* When cannot convert EGLImage to wl_buffer when on a different gpu, + * because the buffer of the EGLImage has likely a tiling mode the server + * gpu won't support. These is no way to check for now. Thus do not support the + * extension */ + if (!dri2_dpy->is_different_gpu) { + disp->Extensions.WL_create_wayland_buffer_from_image = EGL_TRUE; + } else { + dri2_wl_display_vtbl.create_wayland_buffer_from_image = + dri2_fallback_create_wayland_buffer_from_image; + } disp->Extensions.EXT_buffer_age = EGL_TRUE; disp->Extensions.EXT_swap_buffers_with_damage = EGL_TRUE; From f1cc478d89986c87f01fdaae510335965e19493c Mon Sep 17 00:00:00 2001 From: Axel Davy Date: Fri, 1 May 2015 11:12:46 +0200 Subject: [PATCH 015/834] egl/x11: move dri2_x11_swrast_create_image_khr to egl_dri2_fallback.h Reviewed-by: Dave Airlie Reviewed-by: Daniel Stone Signed-off-by: Axel Davy --- src/egl/drivers/dri2/egl_dri2_fallbacks.h | 9 +++++++++ src/egl/drivers/dri2/platform_x11.c | 11 +---------- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/src/egl/drivers/dri2/egl_dri2_fallbacks.h b/src/egl/drivers/dri2/egl_dri2_fallbacks.h index 9cba0010ba7..e769af36e60 100644 --- a/src/egl/drivers/dri2/egl_dri2_fallbacks.h +++ b/src/egl/drivers/dri2/egl_dri2_fallbacks.h @@ -45,6 +45,15 @@ dri2_fallback_create_pbuffer_surface(_EGLDriver *drv, _EGLDisplay *disp, return NULL; } +static inline _EGLImage* +dri2_fallback_create_image_khr(_EGLDriver *drv, _EGLDisplay *disp, + _EGLContext *ctx, EGLenum target, + EGLClientBuffer buffer, + const EGLint *attr_list) +{ + return NULL; +} + static inline EGLBoolean dri2_fallback_swap_interval(_EGLDriver *drv, _EGLDisplay *dpy, _EGLSurface *surf, EGLint interval) diff --git a/src/egl/drivers/dri2/platform_x11.c b/src/egl/drivers/dri2/platform_x11.c index ddb3b54e843..300072d6d92 100644 --- a/src/egl/drivers/dri2/platform_x11.c +++ b/src/egl/drivers/dri2/platform_x11.c @@ -1017,15 +1017,6 @@ dri2_x11_create_image_khr(_EGLDriver *drv, _EGLDisplay *disp, } } -static _EGLImage* -dri2_x11_swrast_create_image_khr(_EGLDriver *drv, _EGLDisplay *disp, - _EGLContext *ctx, EGLenum target, - EGLClientBuffer buffer, - const EGLint *attr_list) -{ - return NULL; -} - static EGLBoolean dri2_x11_get_sync_values(_EGLDisplay *display, _EGLSurface *surface, EGLuint64KHR *ust, EGLuint64KHR *msc, @@ -1058,7 +1049,7 @@ static struct dri2_egl_display_vtbl dri2_x11_swrast_display_vtbl = { .create_pixmap_surface = dri2_x11_create_pixmap_surface, .create_pbuffer_surface = dri2_x11_create_pbuffer_surface, .destroy_surface = dri2_x11_destroy_surface, - .create_image = dri2_x11_swrast_create_image_khr, + .create_image = dri2_fallback_create_image_khr, .swap_interval = dri2_fallback_swap_interval, .swap_buffers = dri2_x11_swap_buffers, .swap_buffers_region = dri2_fallback_swap_buffers_region, From cd25e52f6bb5279cd7b1992e5907df3966b900ce Mon Sep 17 00:00:00 2001 From: Axel Davy Date: Fri, 1 May 2015 11:16:41 +0200 Subject: [PATCH 016/834] egl/wayland: Simplify dri2_wl_create_surface This function is always used with EGL_WINDOW_BIT. Pixmaps are forbidden for Wayland, and PBuffers are unimplemented. Reviewed-by: Daniel Stone . Signed-off-by: Axel Davy --- src/egl/drivers/dri2/platform_wayland.c | 38 +++++++++---------------- 1 file changed, 13 insertions(+), 25 deletions(-) diff --git a/src/egl/drivers/dri2/platform_wayland.c b/src/egl/drivers/dri2/platform_wayland.c index b111c3a8332..e9f142c64d1 100644 --- a/src/egl/drivers/dri2/platform_wayland.c +++ b/src/egl/drivers/dri2/platform_wayland.c @@ -120,7 +120,7 @@ resize_callback(struct wl_egl_window *wl_win, void *data) * Called via eglCreateWindowSurface(), drv->API.CreateWindowSurface(). */ static _EGLSurface * -dri2_wl_create_surface(_EGLDriver *drv, _EGLDisplay *disp, EGLint type, +dri2_wl_create_surface(_EGLDriver *drv, _EGLDisplay *disp, _EGLConfig *conf, void *native_window, const EGLint *attrib_list) { @@ -137,7 +137,7 @@ dri2_wl_create_surface(_EGLDriver *drv, _EGLDisplay *disp, EGLint type, return NULL; } - if (!_eglInitSurface(&dri2_surf->base, disp, type, conf, attrib_list)) + if (!_eglInitSurface(&dri2_surf->base, disp, EGL_WINDOW_BIT, conf, attrib_list)) goto cleanup_surf; if (conf->RedSize == 5) @@ -147,25 +147,17 @@ dri2_wl_create_surface(_EGLDriver *drv, _EGLDisplay *disp, EGLint type, else dri2_surf->format = WL_DRM_FORMAT_ARGB8888; - switch (type) { - case EGL_WINDOW_BIT: - dri2_surf->wl_win = window; + dri2_surf->wl_win = window; - dri2_surf->wl_win->private = dri2_surf; - dri2_surf->wl_win->resize_callback = resize_callback; + dri2_surf->wl_win->private = dri2_surf; + dri2_surf->wl_win->resize_callback = resize_callback; - dri2_surf->base.Width = -1; - dri2_surf->base.Height = -1; - break; - default: - goto cleanup_surf; - } + dri2_surf->base.Width = -1; + dri2_surf->base.Height = -1; dri2_surf->dri_drawable = (*dri2_dpy->dri2->createNewDrawable) (dri2_dpy->dri_screen, - type == EGL_WINDOW_BIT ? - dri2_conf->dri_double_config : - dri2_conf->dri_single_config, + dri2_conf->dri_double_config, dri2_surf); if (dri2_surf->dri_drawable == NULL) { _eglError(EGL_BAD_ALLOC, "dri2->createNewDrawable"); @@ -193,8 +185,7 @@ dri2_wl_create_window_surface(_EGLDriver *drv, _EGLDisplay *disp, struct dri2_egl_display *dri2_dpy = dri2_egl_display(disp); _EGLSurface *surf; - surf = dri2_wl_create_surface(drv, disp, EGL_WINDOW_BIT, conf, - native_window, attrib_list); + surf = dri2_wl_create_surface(drv, disp, conf, native_window, attrib_list); if (surf != NULL) dri2_wl_swap_interval(drv, disp, surf, dri2_dpy->default_swap_interval); @@ -253,10 +244,8 @@ dri2_wl_destroy_surface(_EGLDriver *drv, _EGLDisplay *disp, _EGLSurface *surf) if (dri2_surf->throttle_callback) wl_callback_destroy(dri2_surf->throttle_callback); - if (dri2_surf->base.Type == EGL_WINDOW_BIT) { - dri2_surf->wl_win->private = NULL; - dri2_surf->wl_win->resize_callback = NULL; - } + dri2_surf->wl_win->private = NULL; + dri2_surf->wl_win->resize_callback = NULL; free(surf); @@ -428,9 +417,8 @@ update_buffers(struct dri2_egl_surface *dri2_surf) dri2_egl_display(dri2_surf->base.Resource.Display); int i; - if (dri2_surf->base.Type == EGL_WINDOW_BIT && - (dri2_surf->base.Width != dri2_surf->wl_win->width || - dri2_surf->base.Height != dri2_surf->wl_win->height)) { + if (dri2_surf->base.Width != dri2_surf->wl_win->width || + dri2_surf->base.Height != dri2_surf->wl_win->height) { dri2_wl_release_buffers(dri2_surf); From cdcfe48fb0431184fabb40aa5a244d086f551df5 Mon Sep 17 00:00:00 2001 From: Axel Davy Date: Fri, 1 May 2015 11:11:20 +0200 Subject: [PATCH 017/834] egl/wayland: Implement swrast support Reviewed-by: Dave Airlie Reviewed-by: Daniel Stone . Signed-off-by: Axel Davy --- src/egl/drivers/dri2/egl_dri2.c | 5 +- src/egl/drivers/dri2/egl_dri2.h | 4 + src/egl/drivers/dri2/platform_wayland.c | 704 +++++++++++++++++++++++- 3 files changed, 696 insertions(+), 17 deletions(-) diff --git a/src/egl/drivers/dri2/egl_dri2.c b/src/egl/drivers/dri2/egl_dri2.c index 169abcc4c63..467b81c6e8d 100644 --- a/src/egl/drivers/dri2/egl_dri2.c +++ b/src/egl/drivers/dri2/egl_dri2.c @@ -729,7 +729,10 @@ dri2_terminate(_EGLDriver *drv, _EGLDisplay *disp) #endif #ifdef HAVE_WAYLAND_PLATFORM case _EGL_PLATFORM_WAYLAND: - wl_drm_destroy(dri2_dpy->wl_drm); + if (dri2_dpy->wl_drm) + wl_drm_destroy(dri2_dpy->wl_drm); + if (dri2_dpy->wl_shm) + wl_shm_destroy(dri2_dpy->wl_shm); wl_registry_destroy(dri2_dpy->wl_registry); wl_event_queue_destroy(dri2_dpy->wl_queue); if (dri2_dpy->own_device) { diff --git a/src/egl/drivers/dri2/egl_dri2.h b/src/egl/drivers/dri2/egl_dri2.h index cabeb2dfdc6..adade3db9cc 100644 --- a/src/egl/drivers/dri2/egl_dri2.h +++ b/src/egl/drivers/dri2/egl_dri2.h @@ -196,6 +196,7 @@ struct dri2_egl_display struct wl_registry *wl_registry; struct wl_drm *wl_server_drm; struct wl_drm *wl_drm; + struct wl_shm *wl_shm; struct wl_event_queue *wl_queue; int authenticated; int formats; @@ -257,6 +258,9 @@ struct dri2_egl_surface __DRIimage *dri_image; /* for is_different_gpu case. NULL else */ __DRIimage *linear_copy; + /* for swrast */ + void *data; + int data_size; #endif #ifdef HAVE_DRM_PLATFORM struct gbm_bo *bo; diff --git a/src/egl/drivers/dri2/platform_wayland.c b/src/egl/drivers/dri2/platform_wayland.c index e9f142c64d1..9914b687fb8 100644 --- a/src/egl/drivers/dri2/platform_wayland.c +++ b/src/egl/drivers/dri2/platform_wayland.c @@ -1,5 +1,6 @@ /* * Copyright © 2011-2012 Intel Corporation + * Copyright © 2012 Collabora, Ltd. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -35,6 +36,7 @@ #include #include #include +#include #include "egl_dri2.h" #include "egl_dri2_fallbacks.h" @@ -233,13 +235,18 @@ dri2_wl_destroy_surface(_EGLDriver *drv, _EGLDisplay *disp, _EGLSurface *surf) dri2_dpy->image->destroyImage(dri2_surf->color_buffers[i].dri_image); if (dri2_surf->color_buffers[i].linear_copy) dri2_dpy->image->destroyImage(dri2_surf->color_buffers[i].linear_copy); + if (dri2_surf->color_buffers[i].data) + munmap(dri2_surf->color_buffers[i].data, + dri2_surf->color_buffers[i].data_size); } - for (i = 0; i < __DRI_BUFFER_COUNT; i++) - if (dri2_surf->dri_buffers[i] && - dri2_surf->dri_buffers[i]->attachment != __DRI_BUFFER_BACK_LEFT) - dri2_dpy->dri2->releaseBuffer(dri2_dpy->dri_screen, - dri2_surf->dri_buffers[i]); + if (dri2_dpy->dri2) { + for (i = 0; i < __DRI_BUFFER_COUNT; i++) + if (dri2_surf->dri_buffers[i] && + dri2_surf->dri_buffers[i]->attachment != __DRI_BUFFER_BACK_LEFT) + dri2_dpy->dri2->releaseBuffer(dri2_dpy->dri_screen, + dri2_surf->dri_buffers[i]); + } if (dri2_surf->throttle_callback) wl_callback_destroy(dri2_surf->throttle_callback); @@ -267,18 +274,24 @@ dri2_wl_release_buffers(struct dri2_egl_surface *dri2_surf) dri2_dpy->image->destroyImage(dri2_surf->color_buffers[i].dri_image); if (dri2_surf->color_buffers[i].linear_copy) dri2_dpy->image->destroyImage(dri2_surf->color_buffers[i].linear_copy); + if (dri2_surf->color_buffers[i].data) + munmap(dri2_surf->color_buffers[i].data, + dri2_surf->color_buffers[i].data_size); dri2_surf->color_buffers[i].wl_buffer = NULL; dri2_surf->color_buffers[i].dri_image = NULL; dri2_surf->color_buffers[i].linear_copy = NULL; + dri2_surf->color_buffers[i].data = NULL; dri2_surf->color_buffers[i].locked = 0; } - for (i = 0; i < __DRI_BUFFER_COUNT; i++) - if (dri2_surf->dri_buffers[i] && - dri2_surf->dri_buffers[i]->attachment != __DRI_BUFFER_BACK_LEFT) - dri2_dpy->dri2->releaseBuffer(dri2_dpy->dri_screen, - dri2_surf->dri_buffers[i]); + if (dri2_dpy->dri2) { + for (i = 0; i < __DRI_BUFFER_COUNT; i++) + if (dri2_surf->dri_buffers[i] && + dri2_surf->dri_buffers[i]->attachment != __DRI_BUFFER_BACK_LEFT) + dri2_dpy->dri2->releaseBuffer(dri2_dpy->dri_screen, + dri2_surf->dri_buffers[i]); + } } static int @@ -944,7 +957,7 @@ static const struct wl_drm_listener drm_listener = { }; static void -registry_handle_global(void *data, struct wl_registry *registry, uint32_t name, +registry_handle_global_drm(void *data, struct wl_registry *registry, uint32_t name, const char *interface, uint32_t version) { struct dri2_egl_display *dri2_dpy = data; @@ -964,8 +977,8 @@ registry_handle_global_remove(void *data, struct wl_registry *registry, { } -static const struct wl_registry_listener registry_listener = { - registry_handle_global, +static const struct wl_registry_listener registry_listener_drm = { + registry_handle_global_drm, registry_handle_global_remove }; @@ -1041,8 +1054,8 @@ static struct dri2_egl_display_vtbl dri2_wl_display_vtbl = { .get_sync_values = dri2_fallback_get_sync_values, }; -EGLBoolean -dri2_initialize_wayland(_EGLDriver *drv, _EGLDisplay *disp) +static EGLBoolean +dri2_initialize_wayland_drm(_EGLDriver *drv, _EGLDisplay *disp) { struct dri2_egl_display *dri2_dpy; const __DRIconfig *config; @@ -1078,7 +1091,7 @@ dri2_initialize_wayland(_EGLDriver *drv, _EGLDisplay *disp) wl_proxy_set_queue((struct wl_proxy *) dri2_dpy->wl_registry, dri2_dpy->wl_queue); wl_registry_add_listener(dri2_dpy->wl_registry, - ®istry_listener, dri2_dpy); + ®istry_listener_drm, dri2_dpy); if (roundtrip(dri2_dpy) < 0 || dri2_dpy->wl_drm == NULL) goto cleanup_registry; @@ -1223,3 +1236,662 @@ dri2_initialize_wayland(_EGLDriver *drv, _EGLDisplay *disp) return EGL_FALSE; } + +static int +dri2_wl_swrast_get_stride_for_format(int format, int w) +{ + if (format == WL_SHM_FORMAT_RGB565) + return 2 * w; + else /* ARGB8888 || XRGB8888 */ + return 4 * w; +} + +/* + * Taken from weston shared/os-compatibility.c + */ + +static int +set_cloexec_or_close(int fd) +{ + long flags; + + if (fd == -1) + return -1; + + flags = fcntl(fd, F_GETFD); + if (flags == -1) + goto err; + + if (fcntl(fd, F_SETFD, flags | FD_CLOEXEC) == -1) + goto err; + + return fd; + +err: + close(fd); + return -1; +} + +/* + * Taken from weston shared/os-compatibility.c + */ + +static int +create_tmpfile_cloexec(char *tmpname) +{ + int fd; + +#ifdef HAVE_MKOSTEMP + fd = mkostemp(tmpname, O_CLOEXEC); + if (fd >= 0) + unlink(tmpname); +#else + fd = mkstemp(tmpname); + if (fd >= 0) { + fd = set_cloexec_or_close(fd); + unlink(tmpname); + } +#endif + + return fd; +} + +/* + * Taken from weston shared/os-compatibility.c + * + * Create a new, unique, anonymous file of the given size, and + * return the file descriptor for it. The file descriptor is set + * CLOEXEC. The file is immediately suitable for mmap()'ing + * the given size at offset zero. + * + * The file should not have a permanent backing store like a disk, + * but may have if XDG_RUNTIME_DIR is not properly implemented in OS. + * + * The file name is deleted from the file system. + * + * The file is suitable for buffer sharing between processes by + * transmitting the file descriptor over Unix sockets using the + * SCM_RIGHTS methods. + * + * If the C library implements posix_fallocate(), it is used to + * guarantee that disk space is available for the file at the + * given size. If disk space is insufficent, errno is set to ENOSPC. + * If posix_fallocate() is not supported, program may receive + * SIGBUS on accessing mmap()'ed file contents instead. + */ +static int +os_create_anonymous_file(off_t size) +{ + static const char template[] = "/mesa-shared-XXXXXX"; + const char *path; + char *name; + int fd; + int ret; + + path = getenv("XDG_RUNTIME_DIR"); + if (!path) { + errno = ENOENT; + return -1; + } + + name = malloc(strlen(path) + sizeof(template)); + if (!name) + return -1; + + strcpy(name, path); + strcat(name, template); + + fd = create_tmpfile_cloexec(name); + + free(name); + + if (fd < 0) + return -1; + + ret = ftruncate(fd, size); + if (ret < 0) { + close(fd); + return -1; + } + + return fd; +} + + +static EGLBoolean +dri2_wl_swrast_allocate_buffer(struct dri2_egl_display *dri2_dpy, + int format, int w, int h, + void **data, int *size, + struct wl_buffer **buffer) +{ + struct wl_shm_pool *pool; + int fd, stride, size_map; + void *data_map; + + stride = dri2_wl_swrast_get_stride_for_format(format, w); + size_map = h * stride; + + /* Create a sharable buffer */ + fd = os_create_anonymous_file(size_map); + if (fd < 0) + return EGL_FALSE; + + data_map = mmap(NULL, size_map, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); + if (data_map == MAP_FAILED) { + close(fd); + return EGL_FALSE; + } + + /* Share it in a wl_buffer */ + pool = wl_shm_create_pool(dri2_dpy->wl_shm, fd, size_map); + *buffer = wl_shm_pool_create_buffer(pool, 0, w, h, stride, format); + wl_shm_pool_destroy(pool); + close(fd); + + *data = data_map; + *size = size_map; + return EGL_TRUE; +} + +static int +swrast_update_buffers(struct dri2_egl_surface *dri2_surf) +{ + struct dri2_egl_display *dri2_dpy = + dri2_egl_display(dri2_surf->base.Resource.Display); + int i; + + /* we need to do the following operations only once per frame */ + if (dri2_surf->back) + return 0; + + if (dri2_surf->base.Width != dri2_surf->wl_win->width || + dri2_surf->base.Height != dri2_surf->wl_win->height) { + + dri2_wl_release_buffers(dri2_surf); + + dri2_surf->base.Width = dri2_surf->wl_win->width; + dri2_surf->base.Height = dri2_surf->wl_win->height; + dri2_surf->dx = dri2_surf->wl_win->dx; + dri2_surf->dy = dri2_surf->wl_win->dy; + dri2_surf->current = NULL; + } + + /* find back buffer */ + + /* We always want to throttle to some event (either a frame callback or + * a sync request) after the commit so that we can be sure the + * compositor has had a chance to handle it and send us a release event + * before we look for a free buffer */ + while (dri2_surf->throttle_callback != NULL) + if (wl_display_dispatch_queue(dri2_dpy->wl_dpy, + dri2_dpy->wl_queue) == -1) + return -1; + + /* try get free buffer already created */ + for (i = 0; i < ARRAY_SIZE(dri2_surf->color_buffers); i++) { + if (!dri2_surf->color_buffers[i].locked && + dri2_surf->color_buffers[i].wl_buffer) { + dri2_surf->back = &dri2_surf->color_buffers[i]; + break; + } + } + + /* else choose any another free location */ + if (!dri2_surf->back) { + for (i = 0; i < ARRAY_SIZE(dri2_surf->color_buffers); i++) { + if (!dri2_surf->color_buffers[i].locked) { + dri2_surf->back = &dri2_surf->color_buffers[i]; + if (!dri2_wl_swrast_allocate_buffer(dri2_dpy, + dri2_surf->format, + dri2_surf->base.Width, + dri2_surf->base.Height, + &dri2_surf->back->data, + &dri2_surf->back->data_size, + &dri2_surf->back->wl_buffer)) { + _eglError(EGL_BAD_ALLOC, "failed to allocate color buffer"); + return -1; + } + wl_proxy_set_queue((struct wl_proxy *) dri2_surf->back->wl_buffer, + dri2_dpy->wl_queue); + wl_buffer_add_listener(dri2_surf->back->wl_buffer, + &wl_buffer_listener, dri2_surf); + break; + } + } + } + + if (!dri2_surf->back) { + _eglError(EGL_BAD_ALLOC, "failed to find free buffer"); + return -1; + } + + dri2_surf->back->locked = 1; + + /* If we have an extra unlocked buffer at this point, we had to do triple + * buffering for a while, but now can go back to just double buffering. + * That means we can free any unlocked buffer now. */ + for (i = 0; i < ARRAY_SIZE(dri2_surf->color_buffers); i++) { + if (!dri2_surf->color_buffers[i].locked && + dri2_surf->color_buffers[i].wl_buffer) { + wl_buffer_destroy(dri2_surf->color_buffers[i].wl_buffer); + munmap(dri2_surf->color_buffers[i].data, + dri2_surf->color_buffers[i].data_size); + dri2_surf->color_buffers[i].wl_buffer = NULL; + dri2_surf->color_buffers[i].data = NULL; + } + } + + return 0; +} + +static void* +dri2_wl_swrast_get_frontbuffer_data(struct dri2_egl_surface *dri2_surf) +{ + /* if there has been a resize: */ + if (!dri2_surf->current) + return NULL; + + return dri2_surf->current->data; +} + +static void* +dri2_wl_swrast_get_backbuffer_data(struct dri2_egl_surface *dri2_surf) +{ + assert(dri2_surf->back); + return dri2_surf->back->data; +} + +static void +dri2_wl_swrast_commit_backbuffer(struct dri2_egl_surface *dri2_surf) +{ + struct dri2_egl_display *dri2_dpy = dri2_egl_display(dri2_surf->base.Resource.Display); + + if (dri2_surf->base.SwapInterval > 0) { + dri2_surf->throttle_callback = + wl_surface_frame(dri2_surf->wl_win->surface); + wl_callback_add_listener(dri2_surf->throttle_callback, + &throttle_listener, dri2_surf); + wl_proxy_set_queue((struct wl_proxy *) dri2_surf->throttle_callback, + dri2_dpy->wl_queue); + } + + dri2_surf->current = dri2_surf->back; + dri2_surf->back = NULL; + + wl_surface_attach(dri2_surf->wl_win->surface, + dri2_surf->current->wl_buffer, + dri2_surf->dx, dri2_surf->dy); + + dri2_surf->wl_win->attached_width = dri2_surf->base.Width; + dri2_surf->wl_win->attached_height = dri2_surf->base.Height; + /* reset resize growing parameters */ + dri2_surf->dx = 0; + dri2_surf->dy = 0; + + wl_surface_damage(dri2_surf->wl_win->surface, + 0, 0, INT32_MAX, INT32_MAX); + wl_surface_commit(dri2_surf->wl_win->surface); + + /* If we're not waiting for a frame callback then we'll at least throttle + * to a sync callback so that we always give a chance for the compositor to + * handle the commit and send a release event before checking for a free + * buffer */ + if (dri2_surf->throttle_callback == NULL) { + dri2_surf->throttle_callback = wl_display_sync(dri2_dpy->wl_dpy); + wl_callback_add_listener(dri2_surf->throttle_callback, + &throttle_listener, dri2_surf); + wl_proxy_set_queue((struct wl_proxy *) dri2_surf->throttle_callback, + dri2_dpy->wl_queue); + } + + wl_display_flush(dri2_dpy->wl_dpy); +} + +static void +dri2_wl_swrast_get_drawable_info(__DRIdrawable * draw, + int *x, int *y, int *w, int *h, + void *loaderPrivate) +{ + struct dri2_egl_surface *dri2_surf = loaderPrivate; + + (void) swrast_update_buffers(dri2_surf); + *x = 0; + *y = 0; + *w = dri2_surf->base.Width; + *h = dri2_surf->base.Height; +} + +static void +dri2_wl_swrast_get_image(__DRIdrawable * read, + int x, int y, int w, int h, + char *data, void *loaderPrivate) +{ + struct dri2_egl_surface *dri2_surf = loaderPrivate; + int copy_width = dri2_wl_swrast_get_stride_for_format(dri2_surf->format, w); + int x_offset = dri2_wl_swrast_get_stride_for_format(dri2_surf->format, x); + int src_stride = dri2_wl_swrast_get_stride_for_format(dri2_surf->format, dri2_surf->base.Width); + int dst_stride = copy_width; + char *src, *dst; + + src = dri2_wl_swrast_get_frontbuffer_data(dri2_surf); + if (!src) { + memset(data, 0, copy_width * h); + return; + } + + assert(data != src); + assert(copy_width <= src_stride); + + src += x_offset; + src += y * src_stride; + dst = data; + + if (copy_width > src_stride-x_offset) + copy_width = src_stride-x_offset; + if (h > dri2_surf->base.Height-y) + h = dri2_surf->base.Height-y; + + for (; h>0; h--) { + memcpy(dst, src, copy_width); + src += src_stride; + dst += dst_stride; + } +} + +static void +dri2_wl_swrast_put_image2(__DRIdrawable * draw, int op, + int x, int y, int w, int h, int stride, + char *data, void *loaderPrivate) +{ + struct dri2_egl_surface *dri2_surf = loaderPrivate; + int copy_width = dri2_wl_swrast_get_stride_for_format(dri2_surf->format, w); + int dst_stride = dri2_wl_swrast_get_stride_for_format(dri2_surf->format, dri2_surf->base.Width); + int x_offset = dri2_wl_swrast_get_stride_for_format(dri2_surf->format, x); + char *src, *dst; + + assert(copy_width <= stride); + + (void) swrast_update_buffers(dri2_surf); + dst = dri2_wl_swrast_get_backbuffer_data(dri2_surf); + + /* partial copy, copy old content */ + if (copy_width < dst_stride) + dri2_wl_swrast_get_image(draw, 0, 0, + dri2_surf->base.Width, dri2_surf->base.Height, + dst, loaderPrivate); + + dst += x_offset; + dst += y * dst_stride; + + src = data; + + /* drivers expect we do these checks (and some rely on it) */ + if (copy_width > dst_stride-x_offset) + copy_width = dst_stride-x_offset; + if (h > dri2_surf->base.Height-y) + h = dri2_surf->base.Height-y; + + for (; h>0; h--) { + memcpy(dst, src, copy_width); + src += stride; + dst += dst_stride; + } + dri2_wl_swrast_commit_backbuffer(dri2_surf); +} + +static void +dri2_wl_swrast_put_image(__DRIdrawable * draw, int op, + int x, int y, int w, int h, + char *data, void *loaderPrivate) +{ + struct dri2_egl_surface *dri2_surf = loaderPrivate; + int stride; + + stride = dri2_wl_swrast_get_stride_for_format(dri2_surf->format, w); + dri2_wl_swrast_put_image2(draw, op, x, y, w, h, + stride, data, loaderPrivate); +} + +/** + * Called via eglCreateWindowSurface(), drv->API.CreateWindowSurface(). + */ +static _EGLSurface * +dri2_wl_swrast_create_window_surface(_EGLDriver *drv, _EGLDisplay *disp, + _EGLConfig *conf, void *native_window, + const EGLint *attrib_list) +{ + struct dri2_egl_display *dri2_dpy = dri2_egl_display(disp); + struct dri2_egl_config *dri2_conf = dri2_egl_config(conf); + struct wl_egl_window *window = native_window; + struct dri2_egl_surface *dri2_surf; + + (void) drv; + + dri2_surf = calloc(1, sizeof *dri2_surf); + if (!dri2_surf) { + _eglError(EGL_BAD_ALLOC, "dri2_create_surface"); + return NULL; + } + + if (!_eglInitSurface(&dri2_surf->base, disp, EGL_WINDOW_BIT, conf, attrib_list)) + goto cleanup_surf; + + if (conf->RedSize == 5) + dri2_surf->format = WL_SHM_FORMAT_RGB565; + else if (conf->AlphaSize == 0) + dri2_surf->format = WL_SHM_FORMAT_XRGB8888; + else + dri2_surf->format = WL_SHM_FORMAT_ARGB8888; + + dri2_surf->wl_win = window; + + dri2_surf->base.Width = -1; + dri2_surf->base.Height = -1; + + dri2_surf->dri_drawable = + (*dri2_dpy->swrast->createNewDrawable) (dri2_dpy->dri_screen, + dri2_conf->dri_double_config, + dri2_surf); + if (dri2_surf->dri_drawable == NULL) { + _eglError(EGL_BAD_ALLOC, "swrast->createNewDrawable"); + goto cleanup_dri_drawable; + } + + dri2_wl_swap_interval(drv, disp, &dri2_surf->base, + dri2_dpy->default_swap_interval); + + return &dri2_surf->base; + + cleanup_dri_drawable: + dri2_dpy->core->destroyDrawable(dri2_surf->dri_drawable); + cleanup_surf: + free(dri2_surf); + + return NULL; +} + +static EGLBoolean +dri2_wl_swrast_swap_buffers(_EGLDriver *drv, _EGLDisplay *disp, _EGLSurface *draw) +{ + struct dri2_egl_display *dri2_dpy = dri2_egl_display(disp); + struct dri2_egl_surface *dri2_surf = dri2_egl_surface(draw); + + dri2_dpy->core->swapBuffers(dri2_surf->dri_drawable); + return EGL_TRUE; +} + +static void +shm_handle_format(void *data, struct wl_shm *shm, uint32_t format) +{ + struct dri2_egl_display *dri2_dpy = data; + + switch (format) { + case WL_SHM_FORMAT_ARGB8888: + dri2_dpy->formats |= HAS_ARGB8888; + break; + case WL_SHM_FORMAT_XRGB8888: + dri2_dpy->formats |= HAS_XRGB8888; + break; + case WL_SHM_FORMAT_RGB565: + dri2_dpy->formats |= HAS_RGB565; + break; + } +} + +static const struct wl_shm_listener shm_listener = { + shm_handle_format +}; + +static void +registry_handle_global_swrast(void *data, struct wl_registry *registry, uint32_t name, + const char *interface, uint32_t version) +{ + struct dri2_egl_display *dri2_dpy = data; + + if (strcmp(interface, "wl_shm") == 0) { + dri2_dpy->wl_shm = + wl_registry_bind(registry, name, &wl_shm_interface, 1); + wl_shm_add_listener(dri2_dpy->wl_shm, &shm_listener, dri2_dpy); + } +} + +static const struct wl_registry_listener registry_listener_swrast = { + registry_handle_global_swrast, + registry_handle_global_remove +}; + +static struct dri2_egl_display_vtbl dri2_wl_swrast_display_vtbl = { + .authenticate = NULL, + .create_window_surface = dri2_wl_swrast_create_window_surface, + .create_pixmap_surface = dri2_wl_create_pixmap_surface, + .create_pbuffer_surface = dri2_fallback_create_pbuffer_surface, + .destroy_surface = dri2_wl_destroy_surface, + .create_image = dri2_fallback_create_image_khr, + .swap_interval = dri2_wl_swap_interval, + .swap_buffers = dri2_wl_swrast_swap_buffers, + .swap_buffers_with_damage = dri2_fallback_swap_buffers_with_damage, + .swap_buffers_region = dri2_fallback_swap_buffers_region, + .post_sub_buffer = dri2_fallback_post_sub_buffer, + .copy_buffers = dri2_fallback_copy_buffers, + .query_buffer_age = dri2_fallback_query_buffer_age, + .create_wayland_buffer_from_image = dri2_fallback_create_wayland_buffer_from_image, + .get_sync_values = dri2_fallback_get_sync_values, +}; + +static EGLBoolean +dri2_initialize_wayland_swrast(_EGLDriver *drv, _EGLDisplay *disp) +{ + struct dri2_egl_display *dri2_dpy; + const __DRIconfig *config; + uint32_t types; + int i; + static const unsigned int argb_masks[4] = + { 0xff0000, 0xff00, 0xff, 0xff000000 }; + static const unsigned int rgb_masks[4] = { 0xff0000, 0xff00, 0xff, 0 }; + static const unsigned int rgb565_masks[4] = { 0xf800, 0x07e0, 0x001f, 0 }; + + loader_set_logger(_eglLog); + + dri2_dpy = calloc(1, sizeof *dri2_dpy); + if (!dri2_dpy) + return _eglError(EGL_BAD_ALLOC, "eglInitialize"); + + disp->DriverData = (void *) dri2_dpy; + if (disp->PlatformDisplay == NULL) { + dri2_dpy->wl_dpy = wl_display_connect(NULL); + if (dri2_dpy->wl_dpy == NULL) + goto cleanup_dpy; + dri2_dpy->own_device = 1; + } else { + dri2_dpy->wl_dpy = disp->PlatformDisplay; + } + + dri2_dpy->wl_queue = wl_display_create_queue(dri2_dpy->wl_dpy); + + if (dri2_dpy->own_device) + wl_display_dispatch_pending(dri2_dpy->wl_dpy); + + dri2_dpy->wl_registry = wl_display_get_registry(dri2_dpy->wl_dpy); + wl_proxy_set_queue((struct wl_proxy *) dri2_dpy->wl_registry, + dri2_dpy->wl_queue); + wl_registry_add_listener(dri2_dpy->wl_registry, + ®istry_listener_swrast, dri2_dpy); + + if (roundtrip(dri2_dpy) < 0 || dri2_dpy->wl_shm == NULL) + goto cleanup_registry; + + if (roundtrip(dri2_dpy) < 0 || dri2_dpy->formats == 0) + goto cleanup_shm; + + dri2_dpy->driver_name = strdup("swrast"); + if (!dri2_load_driver_swrast(disp)) + goto cleanup_shm; + + dri2_dpy->swrast_loader_extension.base.name = __DRI_SWRAST_LOADER; + dri2_dpy->swrast_loader_extension.base.version = 2; + dri2_dpy->swrast_loader_extension.getDrawableInfo = dri2_wl_swrast_get_drawable_info; + dri2_dpy->swrast_loader_extension.putImage = dri2_wl_swrast_put_image; + dri2_dpy->swrast_loader_extension.getImage = dri2_wl_swrast_get_image; + dri2_dpy->swrast_loader_extension.putImage2 = dri2_wl_swrast_put_image2; + + dri2_dpy->extensions[0] = &dri2_dpy->swrast_loader_extension.base; + dri2_dpy->extensions[1] = NULL; + + if (!dri2_create_screen(disp)) + goto cleanup_driver; + + dri2_wl_setup_swap_interval(dri2_dpy); + + types = EGL_WINDOW_BIT; + for (i = 0; dri2_dpy->driver_configs[i]; i++) { + config = dri2_dpy->driver_configs[i]; + if (dri2_dpy->formats & HAS_XRGB8888) + dri2_add_config(disp, config, i + 1, types, NULL, rgb_masks); + if (dri2_dpy->formats & HAS_ARGB8888) + dri2_add_config(disp, config, i + 1, types, NULL, argb_masks); + if (dri2_dpy->formats & HAS_RGB565) + dri2_add_config(disp, config, i + 1, types, NULL, rgb565_masks); + } + + /* we're supporting EGL 1.4 */ + disp->VersionMajor = 1; + disp->VersionMinor = 4; + + /* Fill vtbl last to prevent accidentally calling virtual function during + * initialization. + */ + dri2_dpy->vtbl = &dri2_wl_swrast_display_vtbl; + + return EGL_TRUE; + + cleanup_driver: + dlclose(dri2_dpy->driver); + cleanup_shm: + wl_shm_destroy(dri2_dpy->wl_shm); + cleanup_registry: + wl_registry_destroy(dri2_dpy->wl_registry); + wl_event_queue_destroy(dri2_dpy->wl_queue); + cleanup_dpy: + free(dri2_dpy); + + return EGL_FALSE; +} + +EGLBoolean +dri2_initialize_wayland(_EGLDriver *drv, _EGLDisplay *disp) +{ + EGLBoolean initialized = EGL_TRUE; + + int hw_accel = (getenv("LIBGL_ALWAYS_SOFTWARE") == NULL); + + if (hw_accel) { + if (!dri2_initialize_wayland_drm(drv, disp)) { + initialized = dri2_initialize_wayland_swrast(drv, disp); + } + } else { + initialized = dri2_initialize_wayland_swrast(drv, disp); + } + + return initialized; + +} From 13fa84e1bcf1e07c69bb678508f8cdb0912b57c5 Mon Sep 17 00:00:00 2001 From: Axel Davy Date: Fri, 1 May 2015 19:08:37 +0200 Subject: [PATCH 018/834] egl/swrast: Enable config extension for swrast Enables to use dri config for swrast, like vblank_mode. Reviewed-by: Dave Airlie Signed-off-by: Axel Davy --- src/egl/drivers/dri2/egl_dri2.c | 27 +++++++++++++------------- src/gallium/state_trackers/dri/drisw.c | 1 + src/mesa/drivers/dri/swrast/swrast.c | 1 + 3 files changed, 15 insertions(+), 14 deletions(-) diff --git a/src/egl/drivers/dri2/egl_dri2.c b/src/egl/drivers/dri2/egl_dri2.c index 467b81c6e8d..fe5cbc8815d 100644 --- a/src/egl/drivers/dri2/egl_dri2.c +++ b/src/egl/drivers/dri2/egl_dri2.c @@ -576,6 +576,7 @@ dri2_create_screen(_EGLDisplay *disp) { const __DRIextension **extensions; struct dri2_egl_display *dri2_dpy; + unsigned i; dri2_dpy = disp->DriverData; @@ -616,28 +617,26 @@ dri2_create_screen(_EGLDisplay *disp) extensions = dri2_dpy->core->getExtensions(dri2_dpy->dri_screen); if (dri2_dpy->dri2) { - unsigned i; - if (!dri2_bind_extensions(dri2_dpy, dri2_core_extensions, extensions)) goto cleanup_dri_screen; - - for (i = 0; extensions[i]; i++) { - if (strcmp(extensions[i]->name, __DRI2_ROBUSTNESS) == 0) { - dri2_dpy->robustness = (__DRIrobustnessExtension *) extensions[i]; - } - if (strcmp(extensions[i]->name, __DRI2_CONFIG_QUERY) == 0) { - dri2_dpy->config = (__DRI2configQueryExtension *) extensions[i]; - } - if (strcmp(extensions[i]->name, __DRI2_FENCE) == 0) { - dri2_dpy->fence = (__DRI2fenceExtension *) extensions[i]; - } - } } else { assert(dri2_dpy->swrast); if (!dri2_bind_extensions(dri2_dpy, swrast_core_extensions, extensions)) goto cleanup_dri_screen; } + for (i = 0; extensions[i]; i++) { + if (strcmp(extensions[i]->name, __DRI2_ROBUSTNESS) == 0) { + dri2_dpy->robustness = (__DRIrobustnessExtension *) extensions[i]; + } + if (strcmp(extensions[i]->name, __DRI2_CONFIG_QUERY) == 0) { + dri2_dpy->config = (__DRI2configQueryExtension *) extensions[i]; + } + if (strcmp(extensions[i]->name, __DRI2_FENCE) == 0) { + dri2_dpy->fence = (__DRI2fenceExtension *) extensions[i]; + } + } + dri2_setup_screen(disp); return EGL_TRUE; diff --git a/src/gallium/state_trackers/dri/drisw.c b/src/gallium/state_trackers/dri/drisw.c index 5f69a2d670e..4a2c1bbc2ee 100644 --- a/src/gallium/state_trackers/dri/drisw.c +++ b/src/gallium/state_trackers/dri/drisw.c @@ -333,6 +333,7 @@ drisw_update_tex_buffer(struct dri_drawable *drawable, static const __DRIextension *drisw_screen_extensions[] = { &driTexBufferExtension.base, &dri2RendererQueryExtension.base, + &dri2ConfigQueryExtension.base, NULL }; diff --git a/src/mesa/drivers/dri/swrast/swrast.c b/src/mesa/drivers/dri/swrast/swrast.c index 2ddb474dde7..cbc946c3ffd 100644 --- a/src/mesa/drivers/dri/swrast/swrast.c +++ b/src/mesa/drivers/dri/swrast/swrast.c @@ -958,6 +958,7 @@ static const __DRIextension *swrast_driver_extensions[] = { &driCoreExtension.base, &driSWRastExtension.base, &driCopySubBufferExtension.base, + &dri2ConfigQueryExtension.base, &swrast_vtable.base, NULL }; From 7a58262e58d8edac3308777def0950032628edee Mon Sep 17 00:00:00 2001 From: Adam Jackson Date: Wed, 1 Apr 2015 10:39:45 -0400 Subject: [PATCH 019/834] egl: Remove skeleton implementation of EGL_MESA_screen_surface MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit No backend wires this up to anything, and the extension spec has been marked obsolete for 4+ years. Reviewed-by: Marek Olšák Signed-off-by: Adam Jackson --- include/EGL/eglmesaext.h | 46 ----- src/egl/main/Makefile.sources | 4 - src/egl/main/eglapi.c | 278 -------------------------- src/egl/main/eglapi.h | 31 --- src/egl/main/eglconfig.c | 4 - src/egl/main/eglcurrent.c | 8 - src/egl/main/egldisplay.h | 1 - src/egl/main/eglfallbacks.c | 18 -- src/egl/main/eglmode.c | 357 ---------------------------------- src/egl/main/eglmode.h | 88 --------- src/egl/main/eglscreen.c | 235 ---------------------- src/egl/main/eglscreen.h | 117 ----------- src/egl/main/eglsurface.c | 55 ------ 13 files changed, 1242 deletions(-) delete mode 100644 src/egl/main/eglmode.c delete mode 100644 src/egl/main/eglmode.h delete mode 100644 src/egl/main/eglscreen.c delete mode 100644 src/egl/main/eglscreen.h diff --git a/include/EGL/eglmesaext.h b/include/EGL/eglmesaext.h index 7ce8346c2c6..27cf7ebe7e7 100644 --- a/include/EGL/eglmesaext.h +++ b/include/EGL/eglmesaext.h @@ -34,52 +34,6 @@ extern "C" { #include -/* EGL_MESA_screen extension >>> PRELIMINARY <<< */ -#ifndef EGL_MESA_screen_surface -#define EGL_MESA_screen_surface 1 - -#define EGL_BAD_SCREEN_MESA 0x4000 -#define EGL_BAD_MODE_MESA 0x4001 -#define EGL_SCREEN_COUNT_MESA 0x4002 -#define EGL_SCREEN_POSITION_MESA 0x4003 -#define EGL_SCREEN_POSITION_GRANULARITY_MESA 0x4004 -#define EGL_MODE_ID_MESA 0x4005 -#define EGL_REFRESH_RATE_MESA 0x4006 -#define EGL_OPTIMAL_MESA 0x4007 -#define EGL_INTERLACED_MESA 0x4008 -#define EGL_SCREEN_BIT_MESA 0x08 - -typedef khronos_uint32_t EGLScreenMESA; -typedef khronos_uint32_t EGLModeMESA; - -#ifdef EGL_EGLEXT_PROTOTYPES -EGLAPI EGLBoolean EGLAPIENTRY eglChooseModeMESA(EGLDisplay dpy, EGLScreenMESA screen, const EGLint *attrib_list, EGLModeMESA *modes, EGLint modes_size, EGLint *num_modes); -EGLAPI EGLBoolean EGLAPIENTRY eglGetModesMESA(EGLDisplay dpy, EGLScreenMESA screen, EGLModeMESA *modes, EGLint modes_size, EGLint *num_modes); -EGLAPI EGLBoolean EGLAPIENTRY eglGetModeAttribMESA(EGLDisplay dpy, EGLModeMESA mode, EGLint attribute, EGLint *value); -EGLAPI EGLBoolean EGLAPIENTRY eglGetScreensMESA(EGLDisplay dpy, EGLScreenMESA *screens, EGLint max_screens, EGLint *num_screens); -EGLAPI EGLSurface EGLAPIENTRY eglCreateScreenSurfaceMESA(EGLDisplay dpy, EGLConfig config, const EGLint *attrib_list); -EGLAPI EGLBoolean EGLAPIENTRY eglShowScreenSurfaceMESA(EGLDisplay dpy, EGLint screen, EGLSurface surface, EGLModeMESA mode); -EGLAPI EGLBoolean EGLAPIENTRY eglScreenPositionMESA(EGLDisplay dpy, EGLScreenMESA screen, EGLint x, EGLint y); -EGLAPI EGLBoolean EGLAPIENTRY eglQueryScreenMESA(EGLDisplay dpy, EGLScreenMESA screen, EGLint attribute, EGLint *value); -EGLAPI EGLBoolean EGLAPIENTRY eglQueryScreenSurfaceMESA(EGLDisplay dpy, EGLScreenMESA screen, EGLSurface *surface); -EGLAPI EGLBoolean EGLAPIENTRY eglQueryScreenModeMESA(EGLDisplay dpy, EGLScreenMESA screen, EGLModeMESA *mode); -EGLAPI const char * EGLAPIENTRY eglQueryModeStringMESA(EGLDisplay dpy, EGLModeMESA mode); -#endif /* EGL_EGLEXT_PROTOTYPES */ - -typedef EGLBoolean (EGLAPIENTRYP PFNEGLCHOOSEMODEMESA) (EGLDisplay dpy, EGLScreenMESA screen, const EGLint *attrib_list, EGLModeMESA *modes, EGLint modes_size, EGLint *num_modes); -typedef EGLBoolean (EGLAPIENTRYP PFNEGLGETMODESMESA) (EGLDisplay dpy, EGLScreenMESA screen, EGLModeMESA *modes, EGLint modes_size, EGLint *num_modes); -typedef EGLBoolean (EGLAPIENTRYP PFNEGLGetModeATTRIBMESA) (EGLDisplay dpy, EGLModeMESA mode, EGLint attribute, EGLint *value); -typedef EGLBoolean (EGLAPIENTRYP PFNEGLGETSCRREENSMESA) (EGLDisplay dpy, EGLScreenMESA *screens, EGLint max_screens, EGLint *num_screens); -typedef EGLSurface (EGLAPIENTRYP PFNEGLCREATESCREENSURFACEMESA) (EGLDisplay dpy, EGLConfig config, const EGLint *attrib_list); -typedef EGLBoolean (EGLAPIENTRYP PFNEGLSHOWSCREENSURFACEMESA) (EGLDisplay dpy, EGLint screen, EGLSurface surface, EGLModeMESA mode); -typedef EGLBoolean (EGLAPIENTRYP PFNEGLSCREENPOSIITONMESA) (EGLDisplay dpy, EGLScreenMESA screen, EGLint x, EGLint y); -typedef EGLBoolean (EGLAPIENTRYP PFNEGLQUERYSCREENMESA) (EGLDisplay dpy, EGLScreenMESA screen, EGLint attribute, EGLint *value); -typedef EGLBoolean (EGLAPIENTRYP PFNEGLQUERYSCREENSURFACEMESA) (EGLDisplay dpy, EGLScreenMESA screen, EGLSurface *surface); -typedef EGLBoolean (EGLAPIENTRYP PFNEGLQUERYSCREENMODEMESA) (EGLDisplay dpy, EGLScreenMESA screen, EGLModeMESA *mode); -typedef const char * (EGLAPIENTRYP PFNEGLQUERYMODESTRINGMESA) (EGLDisplay dpy, EGLModeMESA mode); - -#endif /* EGL_MESA_screen_surface */ - #ifndef EGL_MESA_copy_context #define EGL_MESA_copy_context 1 diff --git a/src/egl/main/Makefile.sources b/src/egl/main/Makefile.sources index 304c7731c8c..e39a80f14a6 100644 --- a/src/egl/main/Makefile.sources +++ b/src/egl/main/Makefile.sources @@ -22,10 +22,6 @@ LIBEGL_C_FILES := \ eglimage.h \ egllog.c \ egllog.h \ - eglmode.c \ - eglmode.h \ - eglscreen.c \ - eglscreen.h \ eglstring.c \ eglstring.h \ eglsurface.c \ diff --git a/src/egl/main/eglapi.c b/src/egl/main/eglapi.c index ba1d0ddc975..3f02c5c5539 100644 --- a/src/egl/main/eglapi.c +++ b/src/egl/main/eglapi.c @@ -98,8 +98,6 @@ #include "egldriver.h" #include "eglsurface.h" #include "eglconfig.h" -#include "eglscreen.h" -#include "eglmode.h" #include "eglimage.h" #include "eglsync.h" #include "eglstring.h" @@ -155,12 +153,6 @@ #define _EGL_CHECK_CONFIG(disp, conf, ret, drv) \ _EGL_CHECK_OBJECT(disp, Config, conf, ret, drv) -#define _EGL_CHECK_SCREEN(disp, scrn, ret, drv) \ - _EGL_CHECK_OBJECT(disp, Screen, scrn, ret, drv) - -#define _EGL_CHECK_MODE(disp, m, ret, drv) \ - _EGL_CHECK_OBJECT(disp, Mode, m, ret, drv) - #define _EGL_CHECK_SYNC(disp, s, ret, drv) \ _EGL_CHECK_OBJECT(disp, Sync, s, ret, drv) @@ -236,40 +228,6 @@ _eglCheckSync(_EGLDisplay *disp, _EGLSync *s, const char *msg) } -#ifdef EGL_MESA_screen_surface - - -static inline _EGLDriver * -_eglCheckScreen(_EGLDisplay *disp, _EGLScreen *scrn, const char *msg) -{ - _EGLDriver *drv = _eglCheckDisplay(disp, msg); - if (!drv) - return NULL; - if (!scrn) { - _eglError(EGL_BAD_SCREEN_MESA, msg); - return NULL; - } - return drv; -} - - -static inline _EGLDriver * -_eglCheckMode(_EGLDisplay *disp, _EGLMode *m, const char *msg) -{ - _EGLDriver *drv = _eglCheckDisplay(disp, msg); - if (!drv) - return NULL; - if (!m) { - _eglError(EGL_BAD_MODE_MESA, msg); - return NULL; - } - return drv; -} - - -#endif /* EGL_MESA_screen_surface */ - - /** * Lookup and lock a display. */ @@ -383,7 +341,6 @@ _eglCreateExtensionsString(_EGLDisplay *dpy) char *exts = dpy->ExtensionsString; - _EGL_CHECK_EXTENSION(MESA_screen_surface); _EGL_CHECK_EXTENSION(MESA_copy_context); _EGL_CHECK_EXTENSION(MESA_drm_display); _EGL_CHECK_EXTENSION(MESA_drm_image); @@ -1196,20 +1153,6 @@ eglGetProcAddress(const char *procname) { "eglWaitGL", (_EGLProc) eglWaitGL }, { "eglWaitNative", (_EGLProc) eglWaitNative }, #endif /* _EGL_GET_CORE_ADDRESSES */ -#ifdef EGL_MESA_screen_surface - { "eglChooseModeMESA", (_EGLProc) eglChooseModeMESA }, - { "eglGetModesMESA", (_EGLProc) eglGetModesMESA }, - { "eglGetModeAttribMESA", (_EGLProc) eglGetModeAttribMESA }, - { "eglCopyContextMESA", (_EGLProc) eglCopyContextMESA }, - { "eglGetScreensMESA", (_EGLProc) eglGetScreensMESA }, - { "eglCreateScreenSurfaceMESA", (_EGLProc) eglCreateScreenSurfaceMESA }, - { "eglShowScreenSurfaceMESA", (_EGLProc) eglShowScreenSurfaceMESA }, - { "eglScreenPositionMESA", (_EGLProc) eglScreenPositionMESA }, - { "eglQueryScreenMESA", (_EGLProc) eglQueryScreenMESA }, - { "eglQueryScreenSurfaceMESA", (_EGLProc) eglQueryScreenSurfaceMESA }, - { "eglQueryScreenModeMESA", (_EGLProc) eglQueryScreenModeMESA }, - { "eglQueryModeStringMESA", (_EGLProc) eglQueryModeStringMESA }, -#endif /* EGL_MESA_screen_surface */ #ifdef EGL_MESA_drm_display { "eglGetDRMDisplayMESA", (_EGLProc) eglGetDRMDisplayMESA }, #endif @@ -1273,227 +1216,6 @@ eglGetProcAddress(const char *procname) } -#ifdef EGL_MESA_screen_surface - - -/* - * EGL_MESA_screen extension - */ - -EGLBoolean EGLAPIENTRY -eglChooseModeMESA(EGLDisplay dpy, EGLScreenMESA screen, - const EGLint *attrib_list, EGLModeMESA *modes, - EGLint modes_size, EGLint *num_modes) -{ - _EGLDisplay *disp = _eglLockDisplay(dpy); - _EGLScreen *scrn = _eglLookupScreen(screen, disp); - _EGLDriver *drv; - EGLBoolean ret; - - _EGL_CHECK_SCREEN(disp, scrn, EGL_FALSE, drv); - ret = drv->API.ChooseModeMESA(drv, disp, scrn, attrib_list, - modes, modes_size, num_modes); - - RETURN_EGL_EVAL(disp, ret); -} - - -EGLBoolean EGLAPIENTRY -eglGetModesMESA(EGLDisplay dpy, EGLScreenMESA screen, EGLModeMESA *modes, - EGLint mode_size, EGLint *num_mode) -{ - _EGLDisplay *disp = _eglLockDisplay(dpy); - _EGLScreen *scrn = _eglLookupScreen(screen, disp); - _EGLDriver *drv; - EGLBoolean ret; - - _EGL_CHECK_SCREEN(disp, scrn, EGL_FALSE, drv); - ret = drv->API.GetModesMESA(drv, disp, scrn, modes, mode_size, num_mode); - - RETURN_EGL_EVAL(disp, ret); -} - - -EGLBoolean EGLAPIENTRY -eglGetModeAttribMESA(EGLDisplay dpy, EGLModeMESA mode, - EGLint attribute, EGLint *value) -{ - _EGLDisplay *disp = _eglLockDisplay(dpy); - _EGLMode *m = _eglLookupMode(mode, disp); - _EGLDriver *drv; - EGLBoolean ret; - - _EGL_CHECK_MODE(disp, m, EGL_FALSE, drv); - ret = drv->API.GetModeAttribMESA(drv, disp, m, attribute, value); - - RETURN_EGL_EVAL(disp, ret); -} - - -EGLBoolean EGLAPIENTRY -eglCopyContextMESA(EGLDisplay dpy, EGLContext source, EGLContext dest, - EGLint mask) -{ - _EGLDisplay *disp = _eglLockDisplay(dpy); - _EGLContext *source_context = _eglLookupContext(source, disp); - _EGLContext *dest_context = _eglLookupContext(dest, disp); - _EGLDriver *drv; - EGLBoolean ret; - - _EGL_CHECK_CONTEXT(disp, source_context, EGL_FALSE, drv); - if (!dest_context) - RETURN_EGL_ERROR(disp, EGL_BAD_CONTEXT, EGL_FALSE); - - ret = drv->API.CopyContextMESA(drv, disp, - source_context, dest_context, mask); - - RETURN_EGL_EVAL(disp, ret); -} - - -EGLBoolean EGLAPIENTRY -eglGetScreensMESA(EGLDisplay dpy, EGLScreenMESA *screens, - EGLint max_screens, EGLint *num_screens) -{ - _EGLDisplay *disp = _eglLockDisplay(dpy); - _EGLDriver *drv; - EGLBoolean ret; - - _EGL_CHECK_DISPLAY(disp, EGL_FALSE, drv); - ret = drv->API.GetScreensMESA(drv, disp, screens, max_screens, num_screens); - - RETURN_EGL_EVAL(disp, ret); -} - - -EGLSurface EGLAPIENTRY -eglCreateScreenSurfaceMESA(EGLDisplay dpy, EGLConfig config, - const EGLint *attrib_list) -{ - _EGLDisplay *disp = _eglLockDisplay(dpy); - _EGLConfig *conf = _eglLookupConfig(config, disp); - _EGLDriver *drv; - _EGLSurface *surf; - EGLSurface ret; - - _EGL_CHECK_CONFIG(disp, conf, EGL_NO_SURFACE, drv); - - surf = drv->API.CreateScreenSurfaceMESA(drv, disp, conf, attrib_list); - ret = (surf) ? _eglLinkSurface(surf) : EGL_NO_SURFACE; - - RETURN_EGL_EVAL(disp, ret); -} - - -EGLBoolean EGLAPIENTRY -eglShowScreenSurfaceMESA(EGLDisplay dpy, EGLint screen, - EGLSurface surface, EGLModeMESA mode) -{ - _EGLDisplay *disp = _eglLockDisplay(dpy); - _EGLScreen *scrn = _eglLookupScreen((EGLScreenMESA) screen, disp); - _EGLSurface *surf = _eglLookupSurface(surface, disp); - _EGLMode *m = _eglLookupMode(mode, disp); - _EGLDriver *drv; - EGLBoolean ret; - - _EGL_CHECK_SCREEN(disp, scrn, EGL_FALSE, drv); - if (!surf && surface != EGL_NO_SURFACE) - RETURN_EGL_ERROR(disp, EGL_BAD_SURFACE, EGL_FALSE); - if (!m && mode != EGL_NO_MODE_MESA) - RETURN_EGL_ERROR(disp, EGL_BAD_MODE_MESA, EGL_FALSE); - - ret = drv->API.ShowScreenSurfaceMESA(drv, disp, scrn, surf, m); - - RETURN_EGL_EVAL(disp, ret); -} - - -EGLBoolean EGLAPIENTRY -eglScreenPositionMESA(EGLDisplay dpy, EGLScreenMESA screen, EGLint x, EGLint y) -{ - _EGLDisplay *disp = _eglLockDisplay(dpy); - _EGLScreen *scrn = _eglLookupScreen(screen, disp); - _EGLDriver *drv; - EGLBoolean ret; - - _EGL_CHECK_SCREEN(disp, scrn, EGL_FALSE, drv); - ret = drv->API.ScreenPositionMESA(drv, disp, scrn, x, y); - - RETURN_EGL_EVAL(disp, ret); -} - - -EGLBoolean EGLAPIENTRY -eglQueryScreenMESA(EGLDisplay dpy, EGLScreenMESA screen, - EGLint attribute, EGLint *value) -{ - _EGLDisplay *disp = _eglLockDisplay(dpy); - _EGLScreen *scrn = _eglLookupScreen(screen, disp); - _EGLDriver *drv; - EGLBoolean ret; - - _EGL_CHECK_SCREEN(disp, scrn, EGL_FALSE, drv); - ret = drv->API.QueryScreenMESA(drv, disp, scrn, attribute, value); - - RETURN_EGL_EVAL(disp, ret); -} - - -EGLBoolean EGLAPIENTRY -eglQueryScreenSurfaceMESA(EGLDisplay dpy, EGLScreenMESA screen, - EGLSurface *surface) -{ - _EGLDisplay *disp = _eglLockDisplay(dpy); - _EGLScreen *scrn = _eglLookupScreen((EGLScreenMESA) screen, disp); - _EGLDriver *drv; - _EGLSurface *surf; - EGLBoolean ret; - - _EGL_CHECK_SCREEN(disp, scrn, EGL_FALSE, drv); - ret = drv->API.QueryScreenSurfaceMESA(drv, disp, scrn, &surf); - if (ret && surface) - *surface = _eglGetSurfaceHandle(surf); - - RETURN_EGL_EVAL(disp, ret); -} - - -EGLBoolean EGLAPIENTRY -eglQueryScreenModeMESA(EGLDisplay dpy, EGLScreenMESA screen, EGLModeMESA *mode) -{ - _EGLDisplay *disp = _eglLockDisplay(dpy); - _EGLScreen *scrn = _eglLookupScreen((EGLScreenMESA) screen, disp); - _EGLDriver *drv; - _EGLMode *m; - EGLBoolean ret; - - _EGL_CHECK_SCREEN(disp, scrn, EGL_FALSE, drv); - ret = drv->API.QueryScreenModeMESA(drv, disp, scrn, &m); - if (ret && mode) - *mode = m->Handle; - - RETURN_EGL_EVAL(disp, ret); -} - - -const char * EGLAPIENTRY -eglQueryModeStringMESA(EGLDisplay dpy, EGLModeMESA mode) -{ - _EGLDisplay *disp = _eglLockDisplay(dpy); - _EGLMode *m = _eglLookupMode(mode, disp); - _EGLDriver *drv; - const char *ret; - - _EGL_CHECK_MODE(disp, m, NULL, drv); - ret = drv->API.QueryModeStringMESA(drv, disp, m); - - RETURN_EGL_EVAL(disp, ret); -} - - -#endif /* EGL_MESA_screen_surface */ - - #ifdef EGL_MESA_drm_display EGLDisplay EGLAPIENTRY diff --git a/src/egl/main/eglapi.h b/src/egl/main/eglapi.h index 068d4ef5c1e..066a416b3e3 100644 --- a/src/egl/main/eglapi.h +++ b/src/egl/main/eglapi.h @@ -79,22 +79,6 @@ typedef _EGLProc (*GetProcAddress_t)(_EGLDriver *drv, const char *procname); -#ifdef EGL_MESA_screen_surface -typedef EGLBoolean (*ChooseModeMESA_t)(_EGLDriver *drv, _EGLDisplay *dpy, _EGLScreen *screen, const EGLint *attrib_list, EGLModeMESA *modes, EGLint modes_size, EGLint *num_modes); -typedef EGLBoolean (*GetModesMESA_t)(_EGLDriver *drv, _EGLDisplay *dpy, _EGLScreen *screen, EGLModeMESA *modes, EGLint mode_size, EGLint *num_mode); -typedef EGLBoolean (*GetModeAttribMESA_t)(_EGLDriver *drv, _EGLDisplay *dpy, _EGLMode *mode, EGLint attribute, EGLint *value); -typedef EGLBoolean (*CopyContextMESA_t)(_EGLDriver *drv, _EGLDisplay *dpy, _EGLContext *source, _EGLContext *dest, EGLint mask); -typedef EGLBoolean (*GetScreensMESA_t)(_EGLDriver *drv, _EGLDisplay *dpy, EGLScreenMESA *screens, EGLint max_screens, EGLint *num_screens); -typedef _EGLSurface *(*CreateScreenSurfaceMESA_t)(_EGLDriver *drv, _EGLDisplay *dpy, _EGLConfig *config, const EGLint *attrib_list); -typedef EGLBoolean (*ShowScreenSurfaceMESA_t)(_EGLDriver *drv, _EGLDisplay *dpy, _EGLScreen *screen, _EGLSurface *surface, _EGLMode *mode); -typedef EGLBoolean (*ScreenPositionMESA_t)(_EGLDriver *drv, _EGLDisplay *dpy, _EGLScreen *screen, EGLint x, EGLint y); -typedef EGLBoolean (*QueryScreenMESA_t)(_EGLDriver *drv, _EGLDisplay *dpy, _EGLScreen *screen, EGLint attribute, EGLint *value); -typedef EGLBoolean (*QueryScreenSurfaceMESA_t)(_EGLDriver *drv, _EGLDisplay *dpy, _EGLScreen *screen, _EGLSurface **surface); -typedef EGLBoolean (*QueryScreenModeMESA_t)(_EGLDriver *drv, _EGLDisplay *dpy, _EGLScreen *screen, _EGLMode **mode); -typedef const char * (*QueryModeStringMESA_t)(_EGLDriver *drv, _EGLDisplay *dpy, _EGLMode *mode); -#endif /* EGL_MESA_screen_surface */ - - typedef _EGLSurface *(*CreatePbufferFromClientBuffer_t)(_EGLDriver *drv, _EGLDisplay *dpy, EGLenum buftype, EGLClientBuffer buffer, _EGLConfig *config, const EGLint *attrib_list); @@ -179,21 +163,6 @@ struct _egl_api WaitNative_t WaitNative; GetProcAddress_t GetProcAddress; -#ifdef EGL_MESA_screen_surface - ChooseModeMESA_t ChooseModeMESA; - GetModesMESA_t GetModesMESA; - GetModeAttribMESA_t GetModeAttribMESA; - CopyContextMESA_t CopyContextMESA; - GetScreensMESA_t GetScreensMESA; - CreateScreenSurfaceMESA_t CreateScreenSurfaceMESA; - ShowScreenSurfaceMESA_t ShowScreenSurfaceMESA; - ScreenPositionMESA_t ScreenPositionMESA; - QueryScreenMESA_t QueryScreenMESA; - QueryScreenSurfaceMESA_t QueryScreenSurfaceMESA; - QueryScreenModeMESA_t QueryScreenModeMESA; - QueryModeStringMESA_t QueryModeStringMESA; -#endif /* EGL_MESA_screen_surface */ - CreatePbufferFromClientBuffer_t CreatePbufferFromClientBuffer; CreateImageKHR_t CreateImageKHR; diff --git a/src/egl/main/eglconfig.c b/src/egl/main/eglconfig.c index db42e95f88d..cf65c69b7b4 100644 --- a/src/egl/main/eglconfig.c +++ b/src/egl/main/eglconfig.c @@ -323,10 +323,6 @@ _eglValidateConfig(const _EGLConfig *conf, EGLBoolean for_matching) EGL_VG_ALPHA_FORMAT_PRE_BIT | EGL_MULTISAMPLE_RESOLVE_BOX_BIT | EGL_SWAP_BEHAVIOR_PRESERVED_BIT; -#ifdef EGL_MESA_screen_surface - if (conf->Display->Extensions.MESA_screen_surface) - mask |= EGL_SCREEN_BIT_MESA; -#endif break; case EGL_RENDERABLE_TYPE: case EGL_CONFORMANT: diff --git a/src/egl/main/eglcurrent.c b/src/egl/main/eglcurrent.c index 6ffc799d3de..835631d3ba3 100644 --- a/src/egl/main/eglcurrent.c +++ b/src/egl/main/eglcurrent.c @@ -282,14 +282,6 @@ _eglError(EGLint errCode, const char *msg) case EGL_NOT_INITIALIZED: s = "EGL_NOT_INITIALIZED"; break; -#ifdef EGL_MESA_screen_surface - case EGL_BAD_SCREEN_MESA: - s = "EGL_BAD_SCREEN_MESA"; - break; - case EGL_BAD_MODE_MESA: - s = "EGL_BAD_MODE_MESA"; - break; -#endif default: s = "other EGL error"; } diff --git a/src/egl/main/egldisplay.h b/src/egl/main/egldisplay.h index b6b9ed8e278..36f50b97cb3 100644 --- a/src/egl/main/egldisplay.h +++ b/src/egl/main/egldisplay.h @@ -86,7 +86,6 @@ struct _egl_resource */ struct _egl_extensions { - EGLBoolean MESA_screen_surface; EGLBoolean MESA_copy_context; EGLBoolean MESA_drm_display; EGLBoolean MESA_drm_image; diff --git a/src/egl/main/eglfallbacks.c b/src/egl/main/eglfallbacks.c index 83d775610c5..d12b8491ad1 100644 --- a/src/egl/main/eglfallbacks.c +++ b/src/egl/main/eglfallbacks.c @@ -32,8 +32,6 @@ #include "eglconfig.h" #include "eglcontext.h" #include "eglsurface.h" -#include "eglscreen.h" -#include "eglmode.h" #include "eglsync.h" @@ -85,22 +83,6 @@ _eglInitDriverFallbacks(_EGLDriver *drv) drv->API.WaitNative = (WaitNative_t) _eglReturnFalse; drv->API.GetProcAddress = (GetProcAddress_t) _eglReturnFalse; -#ifdef EGL_MESA_screen_surface - drv->API.CopyContextMESA = (CopyContextMESA_t) _eglReturnFalse; - drv->API.CreateScreenSurfaceMESA = - (CreateScreenSurfaceMESA_t) _eglReturnFalse; - drv->API.ShowScreenSurfaceMESA = (ShowScreenSurfaceMESA_t) _eglReturnFalse; - drv->API.ChooseModeMESA = _eglChooseModeMESA; - drv->API.GetModesMESA = _eglGetModesMESA; - drv->API.GetModeAttribMESA = _eglGetModeAttribMESA; - drv->API.GetScreensMESA = _eglGetScreensMESA; - drv->API.ScreenPositionMESA = _eglScreenPositionMESA; - drv->API.QueryScreenMESA = _eglQueryScreenMESA; - drv->API.QueryScreenSurfaceMESA = _eglQueryScreenSurfaceMESA; - drv->API.QueryScreenModeMESA = _eglQueryScreenModeMESA; - drv->API.QueryModeStringMESA = _eglQueryModeStringMESA; -#endif /* EGL_MESA_screen_surface */ - drv->API.CreateImageKHR = NULL; drv->API.DestroyImageKHR = NULL; diff --git a/src/egl/main/eglmode.c b/src/egl/main/eglmode.c deleted file mode 100644 index d248ea45886..00000000000 --- a/src/egl/main/eglmode.c +++ /dev/null @@ -1,357 +0,0 @@ -/************************************************************************** - * - * Copyright 2008 VMware, Inc. - * Copyright 2009-2010 Chia-I Wu - * Copyright 2010 LunarG, Inc. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - -#include -#include -#include - -#include "egldisplay.h" -#include "eglmode.h" -#include "eglcurrent.h" -#include "eglscreen.h" - - -#ifdef EGL_MESA_screen_surface - - -#define MIN2(A, B) (((A) < (B)) ? (A) : (B)) - - -/** - * Given an EGLModeMESA handle, return the corresponding _EGLMode object - * or null if non-existant. - */ -_EGLMode * -_eglLookupMode(EGLModeMESA mode, _EGLDisplay *disp) -{ - EGLint scrnum; - - if (!disp || !disp->Screens) - return NULL; - - /* loop over all screens on the display */ - for (scrnum = 0; scrnum < disp->Screens->Size; scrnum++) { - const _EGLScreen *scrn = disp->Screens->Elements[scrnum]; - EGLint idx; - - /* - * the mode ids of a screen ranges from scrn->Handle to scrn->Handle + - * scrn->NumModes - */ - if (mode >= scrn->Handle && - mode < scrn->Handle + _EGL_SCREEN_MAX_MODES) { - idx = mode - scrn->Handle; - - assert(idx < scrn->NumModes && scrn->Modes[idx].Handle == mode); - - return &scrn->Modes[idx]; - } - } - - return NULL; -} - - -/** - * Parse the attrib_list to fill in the fields of the given _eglMode - * Return EGL_FALSE if any errors, EGL_TRUE otherwise. - */ -static EGLBoolean -_eglParseModeAttribs(_EGLMode *mode, const EGLint *attrib_list) -{ - EGLint i; - - /* init all attribs to EGL_DONT_CARE */ - mode->Handle = EGL_DONT_CARE; - mode->Width = EGL_DONT_CARE; - mode->Height = EGL_DONT_CARE; - mode->RefreshRate = EGL_DONT_CARE; - mode->Optimal = EGL_DONT_CARE; - mode->Interlaced = EGL_DONT_CARE; - mode->Name = NULL; - - for (i = 0; attrib_list && attrib_list[i] != EGL_NONE; i++) { - switch (attrib_list[i]) { - case EGL_MODE_ID_MESA: - mode->Handle = attrib_list[++i]; - if (mode->Handle <= 0) { - _eglError(EGL_BAD_PARAMETER, "eglChooseModeMESA(handle)"); - return EGL_FALSE; - } - break; - case EGL_WIDTH: - mode->Width = attrib_list[++i]; - if (mode->Width <= 0) { - _eglError(EGL_BAD_PARAMETER, "eglChooseModeMESA(width)"); - return EGL_FALSE; - } - break; - case EGL_HEIGHT: - mode->Height = attrib_list[++i]; - if (mode->Height <= 0) { - _eglError(EGL_BAD_PARAMETER, "eglChooseModeMESA(height)"); - return EGL_FALSE; - } - break; - case EGL_REFRESH_RATE_MESA: - mode->RefreshRate = attrib_list[++i]; - if (mode->RefreshRate <= 0) { - _eglError(EGL_BAD_PARAMETER, "eglChooseModeMESA(refresh rate)"); - return EGL_FALSE; - } - break; - case EGL_INTERLACED_MESA: - mode->Interlaced = attrib_list[++i]; - if (mode->Interlaced != EGL_TRUE && mode->Interlaced != EGL_FALSE) { - _eglError(EGL_BAD_PARAMETER, "eglChooseModeMESA(interlaced)"); - return EGL_FALSE; - } - break; - case EGL_OPTIMAL_MESA: - mode->Optimal = attrib_list[++i]; - if (mode->Optimal != EGL_TRUE && mode->Optimal != EGL_FALSE) { - _eglError(EGL_BAD_PARAMETER, "eglChooseModeMESA(optimal)"); - return EGL_FALSE; - } - break; - default: - _eglError(EGL_BAD_ATTRIBUTE, "eglChooseModeMESA"); - return EGL_FALSE; - } - } - return EGL_TRUE; -} - - -/** - * Determine if the candidate mode's attributes are at least as good - * as the minimal mode's. - * \return EGL_TRUE if qualifies, EGL_FALSE otherwise - */ -static EGLBoolean -_eglModeQualifies(const _EGLMode *c, const _EGLMode *min) -{ - if (min->Handle != EGL_DONT_CARE && c->Handle != min->Handle) - return EGL_FALSE; - if (min->Width != EGL_DONT_CARE && c->Width < min->Width) - return EGL_FALSE; - if (min->Height != EGL_DONT_CARE && c->Height < min->Height) - return EGL_FALSE; - if (min->RefreshRate != EGL_DONT_CARE && c->RefreshRate < min->RefreshRate) - return EGL_FALSE; - if (min->Optimal != EGL_DONT_CARE && c->Optimal != min->Optimal) - return EGL_FALSE; - if (min->Interlaced != EGL_DONT_CARE && c->Interlaced != min->Interlaced) - return EGL_FALSE; - - return EGL_TRUE; -} - - -/** - * Return value of given mode attribute, or -1 if bad attrib. - */ -static EGLint -getModeAttrib(const _EGLMode *m, EGLint attrib) -{ - switch (attrib) { - case EGL_MODE_ID_MESA: - return m->Handle; - case EGL_WIDTH: - return m->Width; - case EGL_HEIGHT: - return m->Height; - case EGL_REFRESH_RATE_MESA: - return m->RefreshRate; - case EGL_OPTIMAL_MESA: - return m->Optimal; - case EGL_INTERLACED_MESA: - return m->Interlaced; - default: - return -1; - } -} - - -#define SMALLER 1 -#define LARGER 2 - -struct sort_info { - EGLint Attrib; - EGLint Order; /* SMALLER or LARGER */ -}; - -/* the order of these entries is the priority */ -static struct sort_info SortInfo[] = { - { EGL_OPTIMAL_MESA, LARGER }, - { EGL_INTERLACED_MESA, SMALLER }, - { EGL_WIDTH, LARGER }, - { EGL_HEIGHT, LARGER }, - { EGL_REFRESH_RATE_MESA, LARGER }, - { EGL_MODE_ID_MESA, SMALLER }, - { 0, 0 } -}; - - -/** - * Compare modes 'a' and 'b' and return -1 if a belongs before b, or 1 if a - * belongs after b, or 0 if they're equal. - * Used by qsort(). - */ -static int -_eglCompareModes(const void *a, const void *b) -{ - const _EGLMode *aMode = *((const _EGLMode **) a); - const _EGLMode *bMode = *((const _EGLMode **) b); - EGLint i; - - for (i = 0; SortInfo[i].Attrib; i++) { - const EGLint aVal = getModeAttrib(aMode, SortInfo[i].Attrib); - const EGLint bVal = getModeAttrib(bMode, SortInfo[i].Attrib); - if (aVal == bVal) { - /* a tie */ - continue; - } - else if (SortInfo[i].Order == SMALLER) { - return (aVal < bVal) ? -1 : 1; - } - else if (SortInfo[i].Order == LARGER) { - return (aVal > bVal) ? -1 : 1; - } - } - - /* all attributes identical */ - return 0; -} - - -/** - * Search for EGLModes which match the given attribute list. - * Called via eglChooseModeMESA API function. - */ -EGLBoolean -_eglChooseModeMESA(_EGLDriver *drv, _EGLDisplay *dpy, _EGLScreen *scrn, - const EGLint *attrib_list, EGLModeMESA *modes, - EGLint modes_size, EGLint *num_modes) -{ - _EGLMode **modeList, min; - EGLint i, count; - - if (!_eglParseModeAttribs(&min, attrib_list)) { - /* error code will have been recorded */ - return EGL_FALSE; - } - - /* allocate array of mode pointers */ - modeList = malloc(modes_size * sizeof(_EGLMode *)); - if (!modeList) { - _eglError(EGL_BAD_MODE_MESA, "eglChooseModeMESA(out of memory)"); - return EGL_FALSE; - } - - /* make array of pointers to qualifying modes */ - for (i = count = 0; i < scrn->NumModes && count < modes_size; i++) { - if (_eglModeQualifies(scrn->Modes + i, &min)) { - modeList[count++] = scrn->Modes + i; - } - } - - /* sort array of pointers */ - qsort(modeList, count, sizeof(_EGLMode *), _eglCompareModes); - - /* copy mode handles to output array */ - for (i = 0; i < count; i++) { - modes[i] = modeList[i]->Handle; - } - - free(modeList); - - *num_modes = count; - - return EGL_TRUE; -} - - - -/** - * Return all possible modes for the given screen. No sorting of results. - * Called via eglGetModesMESA() API function. - */ -EGLBoolean -_eglGetModesMESA(_EGLDriver *drv, _EGLDisplay *dpy, _EGLScreen *scrn, - EGLModeMESA *modes, EGLint modes_size, EGLint *num_modes) -{ - if (modes) { - EGLint i; - *num_modes = MIN2(scrn->NumModes, modes_size); - for (i = 0; i < *num_modes; i++) { - modes[i] = scrn->Modes[i].Handle; - } - } - else { - /* just return total number of supported modes */ - *num_modes = scrn->NumModes; - } - - return EGL_TRUE; -} - - -/** - * Query an attribute of a mode. - */ -EGLBoolean -_eglGetModeAttribMESA(_EGLDriver *drv, _EGLDisplay *dpy, - _EGLMode *m, EGLint attribute, EGLint *value) -{ - EGLint v; - - v = getModeAttrib(m, attribute); - if (v < 0) { - _eglError(EGL_BAD_ATTRIBUTE, "eglGetModeAttribMESA"); - return EGL_FALSE; - } - *value = v; - return EGL_TRUE; -} - - -/** - * Return human-readable string for given mode. - * This is the default function called by eglQueryModeStringMESA(). - */ -const char * -_eglQueryModeStringMESA(_EGLDriver *drv, _EGLDisplay *dpy, _EGLMode *m) -{ - return m->Name; -} - - -#endif /* EGL_MESA_screen_surface */ diff --git a/src/egl/main/eglmode.h b/src/egl/main/eglmode.h deleted file mode 100644 index 664074fedee..00000000000 --- a/src/egl/main/eglmode.h +++ /dev/null @@ -1,88 +0,0 @@ -/************************************************************************** - * - * Copyright 2008 VMware, Inc. - * Copyright 2009-2010 Chia-I Wu - * Copyright 2010 LunarG, Inc. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - -#ifndef EGLMODE_INCLUDED -#define EGLMODE_INCLUDED - -#include "egltypedefs.h" - - -#ifdef EGL_MESA_screen_surface - - -#define EGL_NO_MODE_MESA 0 - - -/** - * Data structure which corresponds to an EGLModeMESA. - */ -struct _egl_mode -{ - EGLModeMESA Handle; /* the public/opaque handle which names this mode */ - EGLint Width, Height; /* size in pixels */ - EGLint RefreshRate; /* rate * 1000.0 */ - EGLint Optimal; - EGLint Interlaced; - const char *Name; - - /* Other possible attributes */ - /* interlaced */ - /* external sync */ -}; - - -extern _EGLMode * -_eglLookupMode(EGLModeMESA mode, _EGLDisplay *dpy); - - -extern EGLBoolean -_eglChooseModeMESA(_EGLDriver *drv, _EGLDisplay *dpy, _EGLScreen *scrn, - const EGLint *attrib_list, EGLModeMESA *modes, - EGLint modes_size, EGLint *num_modes); - - -extern EGLBoolean -_eglGetModesMESA(_EGLDriver *drv, _EGLDisplay *dpy, _EGLScreen *scrn, - EGLModeMESA *modes, EGLint modes_size, EGLint *num_modes); - - -extern EGLBoolean -_eglGetModeAttribMESA(_EGLDriver *drv, _EGLDisplay *dpy, _EGLMode *m, - EGLint attribute, EGLint *value); - - -extern const char * -_eglQueryModeStringMESA(_EGLDriver *drv, _EGLDisplay *dpy, _EGLMode *m); - - -#endif /* EGL_MESA_screen_surface */ - - -#endif /* EGLMODE_INCLUDED */ diff --git a/src/egl/main/eglscreen.c b/src/egl/main/eglscreen.c deleted file mode 100644 index 42ac621fcd9..00000000000 --- a/src/egl/main/eglscreen.c +++ /dev/null @@ -1,235 +0,0 @@ -/************************************************************************** - * - * Copyright 2008 VMware, Inc. - * Copyright 2009-2010 Chia-I Wu - * Copyright 2010 LunarG, Inc. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - -/* - * Ideas for screen management extension to EGL. - * - * Each EGLDisplay has one or more screens (CRTs, Flat Panels, etc). - * The screens' handles can be obtained with eglGetScreensMESA(). - * - * A new kind of EGLSurface is possible- one which can be directly scanned - * out on a screen. Such a surface is created with eglCreateScreenSurface(). - * - * To actually display a screen surface on a screen, the eglShowSurface() - * function is called. - */ - -#include -#include -#include -#include "c11/threads.h" - -#include "egldisplay.h" -#include "eglcurrent.h" -#include "eglmode.h" -#include "eglsurface.h" -#include "eglscreen.h" - - -#ifdef EGL_MESA_screen_surface - - -/* ugh, no atomic op? */ -static mtx_t _eglNextScreenHandleMutex = _MTX_INITIALIZER_NP; -static EGLScreenMESA _eglNextScreenHandle = 1; - - -/** - * Return a new screen handle/ID. - * NOTE: we never reuse these! - */ -static EGLScreenMESA -_eglAllocScreenHandle(void) -{ - EGLScreenMESA s; - - mtx_lock(&_eglNextScreenHandleMutex); - s = _eglNextScreenHandle; - _eglNextScreenHandle += _EGL_SCREEN_MAX_MODES; - mtx_unlock(&_eglNextScreenHandleMutex); - - return s; -} - - -/** - * Initialize an _EGLScreen object to default values. - */ -void -_eglInitScreen(_EGLScreen *screen, _EGLDisplay *dpy, EGLint num_modes) -{ - memset(screen, 0, sizeof(_EGLScreen)); - - screen->Display = dpy; - screen->NumModes = num_modes; - screen->StepX = 1; - screen->StepY = 1; - - if (num_modes > _EGL_SCREEN_MAX_MODES) - num_modes = _EGL_SCREEN_MAX_MODES; - screen->Modes = calloc(num_modes, sizeof(*screen->Modes)); - screen->NumModes = (screen->Modes) ? num_modes : 0; -} - - -/** - * Link a screen to its display and return the handle of the link. - * The handle can be passed to client directly. - */ -EGLScreenMESA -_eglLinkScreen(_EGLScreen *screen) -{ - _EGLDisplay *display; - EGLint i; - - assert(screen && screen->Display); - display = screen->Display; - - if (!display->Screens) { - display->Screens = _eglCreateArray("Screen", 4); - if (!display->Screens) - return (EGLScreenMESA) 0; - } - - screen->Handle = _eglAllocScreenHandle(); - for (i = 0; i < screen->NumModes; i++) - screen->Modes[i].Handle = screen->Handle + i; - - _eglAppendArray(display->Screens, (void *) screen); - - return screen->Handle; -} - - -/** - * Lookup a handle to find the linked config. - * Return NULL if the handle has no corresponding linked config. - */ -_EGLScreen * -_eglLookupScreen(EGLScreenMESA screen, _EGLDisplay *display) -{ - EGLint i; - - if (!display || !display->Screens) - return NULL; - - for (i = 0; i < display->Screens->Size; i++) { - _EGLScreen *scr = (_EGLScreen *) display->Screens->Elements[i]; - if (scr->Handle == screen) { - assert(scr->Display == display); - return scr; - } - } - return NULL; -} - - -static EGLBoolean -_eglFlattenScreen(void *elem, void *buffer) -{ - _EGLScreen *scr = (_EGLScreen *) elem; - EGLScreenMESA *handle = (EGLScreenMESA *) buffer; - *handle = _eglGetScreenHandle(scr); - return EGL_TRUE; -} - - -EGLBoolean -_eglGetScreensMESA(_EGLDriver *drv, _EGLDisplay *display, EGLScreenMESA *screens, - EGLint max_screens, EGLint *num_screens) -{ - *num_screens = _eglFlattenArray(display->Screens, (void *) screens, - sizeof(screens[0]), max_screens, _eglFlattenScreen); - - return EGL_TRUE; -} - - -/** - * Set a screen's surface origin. - */ -EGLBoolean -_eglScreenPositionMESA(_EGLDriver *drv, _EGLDisplay *dpy, - _EGLScreen *scrn, EGLint x, EGLint y) -{ - scrn->OriginX = x; - scrn->OriginY = y; - - return EGL_TRUE; -} - - -/** - * Query a screen's current surface. - */ -EGLBoolean -_eglQueryScreenSurfaceMESA(_EGLDriver *drv, _EGLDisplay *dpy, - _EGLScreen *scrn, _EGLSurface **surf) -{ - *surf = scrn->CurrentSurface; - return EGL_TRUE; -} - - -/** - * Query a screen's current mode. - */ -EGLBoolean -_eglQueryScreenModeMESA(_EGLDriver *drv, _EGLDisplay *dpy, _EGLScreen *scrn, - _EGLMode **m) -{ - *m = scrn->CurrentMode; - return EGL_TRUE; -} - - -EGLBoolean -_eglQueryScreenMESA(_EGLDriver *drv, _EGLDisplay *dpy, _EGLScreen *scrn, - EGLint attribute, EGLint *value) -{ - switch (attribute) { - case EGL_SCREEN_POSITION_MESA: - value[0] = scrn->OriginX; - value[1] = scrn->OriginY; - break; - case EGL_SCREEN_POSITION_GRANULARITY_MESA: - value[0] = scrn->StepX; - value[1] = scrn->StepY; - break; - default: - _eglError(EGL_BAD_ATTRIBUTE, "eglQueryScreenMESA"); - return EGL_FALSE; - } - - return EGL_TRUE; -} - - -#endif /* EGL_MESA_screen_surface */ diff --git a/src/egl/main/eglscreen.h b/src/egl/main/eglscreen.h deleted file mode 100644 index c554e1d5812..00000000000 --- a/src/egl/main/eglscreen.h +++ /dev/null @@ -1,117 +0,0 @@ -/************************************************************************** - * - * Copyright 2008 VMware, Inc. - * Copyright 2009-2010 Chia-I Wu - * Copyright 2010 LunarG, Inc. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - -#ifndef EGLSCREEN_INCLUDED -#define EGLSCREEN_INCLUDED - -#include "c99_compat.h" - -#include "egltypedefs.h" - - -#ifdef EGL_MESA_screen_surface - - -#define _EGL_SCREEN_MAX_MODES 16 - - -/** - * Per-screen information. - * Note that an EGL screen doesn't have a size. A screen may be set to - * one of several display modes (width/height/scanrate). The screen - * then displays a drawing surface. The drawing surface must be at least - * as large as the display mode's resolution. If it's larger, the - * OriginX and OriginY fields control what part of the surface is visible - * on the screen. - */ -struct _egl_screen -{ - _EGLDisplay *Display; - - EGLScreenMESA Handle; /* The public/opaque handle which names this object */ - - _EGLMode *CurrentMode; - _EGLSurface *CurrentSurface; - - EGLint OriginX, OriginY; /**< Origin of scan-out region w.r.t. surface */ - EGLint StepX, StepY; /**< Screen position/origin granularity */ - - EGLint NumModes; - _EGLMode *Modes; /**< array [NumModes] */ -}; - - -extern void -_eglInitScreen(_EGLScreen *screen, _EGLDisplay *dpy, EGLint num_modes); - - -extern EGLScreenMESA -_eglLinkScreen(_EGLScreen *screen); - - -extern _EGLScreen * -_eglLookupScreen(EGLScreenMESA screen, _EGLDisplay *dpy); - - -/** - * Return the handle of a linked screen. - */ -static inline EGLScreenMESA -_eglGetScreenHandle(_EGLScreen *screen) -{ - return (screen) ? screen->Handle : (EGLScreenMESA) 0; -} - - -extern EGLBoolean -_eglGetScreensMESA(_EGLDriver *drv, _EGLDisplay *dpy, EGLScreenMESA *screens, EGLint max_screens, EGLint *num_screens); - - -extern EGLBoolean -_eglScreenPositionMESA(_EGLDriver *drv, _EGLDisplay *dpy, _EGLScreen *scrn, EGLint x, EGLint y); - - -extern EGLBoolean -_eglQueryScreenSurfaceMESA(_EGLDriver *drv, _EGLDisplay *dpy, - _EGLScreen *scrn, _EGLSurface **surface); - - -extern EGLBoolean -_eglQueryScreenModeMESA(_EGLDriver *drv, _EGLDisplay *dpy, _EGLScreen *scrn, _EGLMode **m); - - -extern EGLBoolean -_eglQueryScreenMESA(_EGLDriver *drv, _EGLDisplay *dpy, _EGLScreen *scrn, EGLint attribute, EGLint *value); - - -#endif /* EGL_MESA_screen_surface */ - - -#endif /* EGLSCREEN_INCLUDED */ diff --git a/src/egl/main/eglsurface.c b/src/egl/main/eglsurface.c index e2cb73b7e91..76c60e940dc 100644 --- a/src/egl/main/eglsurface.c +++ b/src/egl/main/eglsurface.c @@ -61,50 +61,6 @@ _eglClampSwapInterval(_EGLSurface *surf, EGLint interval) } -#ifdef EGL_MESA_screen_surface -static EGLint -_eglParseScreenSurfaceAttribList(_EGLSurface *surf, const EGLint *attrib_list) -{ - EGLint i, err = EGL_SUCCESS; - - if (!attrib_list) - return EGL_SUCCESS; - - for (i = 0; attrib_list[i] != EGL_NONE; i++) { - EGLint attr = attrib_list[i++]; - EGLint val = attrib_list[i]; - - switch (attr) { - case EGL_WIDTH: - if (val < 0) { - err = EGL_BAD_PARAMETER; - break; - } - surf->Width = val; - break; - case EGL_HEIGHT: - if (val < 0) { - err = EGL_BAD_PARAMETER; - break; - } - surf->Height = val; - break; - default: - err = EGL_BAD_ATTRIBUTE; - break; - } - - if (err != EGL_SUCCESS) { - _eglLog(_EGL_WARNING, "bad surface attribute 0x%04x", attr); - break; - } - } - - return err; -} -#endif /* EGL_MESA_screen_surface */ - - /** * Parse the list of surface attributes and return the proper error code. */ @@ -119,11 +75,6 @@ _eglParseSurfaceAttribList(_EGLSurface *surf, const EGLint *attrib_list) if (!attrib_list) return EGL_SUCCESS; -#ifdef EGL_MESA_screen_surface - if (type == EGL_SCREEN_BIT_MESA) - return _eglParseScreenSurfaceAttribList(surf, attrib_list); -#endif - if (dpy->Extensions.NOK_texture_from_pixmap) texture_type |= EGL_PIXMAP_BIT; @@ -297,12 +248,6 @@ _eglInitSurface(_EGLSurface *surf, _EGLDisplay *dpy, EGLint type, case EGL_PBUFFER_BIT: func = "eglCreatePBufferSurface"; break; -#ifdef EGL_MESA_screen_surface - case EGL_SCREEN_BIT_MESA: - func = "eglCreateScreenSurface"; - renderBuffer = EGL_SINGLE_BUFFER; /* XXX correct? */ - break; -#endif default: _eglLog(_EGL_WARNING, "Bad type in _eglInitSurface"); return EGL_FALSE; From 73f4010082cf0fc2fe34c59e2eb5801eed10762b Mon Sep 17 00:00:00 2001 From: Matt Turner Date: Mon, 11 May 2015 10:46:59 -0700 Subject: [PATCH 020/834] i965/fs: Add missing initializer in fs_visitor(). --- src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp index 80ca1b750f8..78f269ef973 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp @@ -4193,7 +4193,7 @@ fs_visitor::fs_visitor(struct brw_context *brw, reg_null_d(retype(brw_null_vec(dispatch_width), BRW_REGISTER_TYPE_D)), reg_null_ud(retype(brw_null_vec(dispatch_width), BRW_REGISTER_TYPE_UD)), key(key), prog_data(&prog_data->base), - dispatch_width(dispatch_width) + dispatch_width(dispatch_width), promoted_constants(0) { this->mem_ctx = mem_ctx; init(); From f546902d9597429713c83e2caf6b69856bd7ba4d Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Thu, 7 May 2015 13:57:14 +0000 Subject: [PATCH 021/834] clover: Add a mutex to guard queue::queued_events This fixes a potential crash where on a sequence like this: Thread 0: Check if queue is not empty. Thread 1: Remove item from queue, making it empty. Thread 0: Do something assuming queue is not empty. CC: 10.5 Reviewed-by: Francisco Jerez --- src/gallium/state_trackers/clover/core/queue.cpp | 2 ++ src/gallium/state_trackers/clover/core/queue.hpp | 2 ++ 2 files changed, 4 insertions(+) diff --git a/src/gallium/state_trackers/clover/core/queue.cpp b/src/gallium/state_trackers/clover/core/queue.cpp index 24f9326662d..87f9dcc6476 100644 --- a/src/gallium/state_trackers/clover/core/queue.cpp +++ b/src/gallium/state_trackers/clover/core/queue.cpp @@ -44,6 +44,7 @@ command_queue::flush() { pipe_screen *screen = device().pipe; pipe_fence_handle *fence = NULL; + std::lock_guard lock(queued_events_mutex); if (!queued_events.empty()) { pipe->flush(pipe, &fence, 0); @@ -69,6 +70,7 @@ command_queue::profiling_enabled() const { void command_queue::sequence(hard_event &ev) { + std::lock_guard lock(queued_events_mutex); if (!queued_events.empty()) queued_events.back()().chain(ev); diff --git a/src/gallium/state_trackers/clover/core/queue.hpp b/src/gallium/state_trackers/clover/core/queue.hpp index b7166e685b7..bddb86c0e4c 100644 --- a/src/gallium/state_trackers/clover/core/queue.hpp +++ b/src/gallium/state_trackers/clover/core/queue.hpp @@ -24,6 +24,7 @@ #define CLOVER_CORE_QUEUE_HPP #include +#include #include "core/object.hpp" #include "core/context.hpp" @@ -69,6 +70,7 @@ namespace clover { cl_command_queue_properties props; pipe_context *pipe; + std::mutex queued_events_mutex; std::deque> queued_events; }; } From 9c4dc98b298c74015f2a7c21571bccf0a5b6cc98 Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Thu, 26 Mar 2015 19:33:24 +0000 Subject: [PATCH 022/834] clover: Fix a bug with multi-threaded events v2 It was possible for some events never to get triggered if one thread was creating events and another threads was waiting for them. This patch consolidates soft_event::wait() and hard_event::wait() into event::wait() so that hard_event objects will now wait for all their dependencies to be submitted before flushing the command queue. v2: - Rename variables - Use mutable varibales so we can keep event::wait() const - Open code signalled() call so mutex can be atted to signalled without deadlocking. CC: 10.5 Reviewed-by: Francisco Jerez --- src/gallium/state_trackers/clover/core/event.cpp | 15 +++++++++++++-- src/gallium/state_trackers/clover/core/event.hpp | 5 ++++- 2 files changed, 17 insertions(+), 3 deletions(-) diff --git a/src/gallium/state_trackers/clover/core/event.cpp b/src/gallium/state_trackers/clover/core/event.cpp index 58de8884457..55793031ad1 100644 --- a/src/gallium/state_trackers/clover/core/event.cpp +++ b/src/gallium/state_trackers/clover/core/event.cpp @@ -39,6 +39,7 @@ event::~event() { void event::trigger() { if (!--wait_count) { + cv.notify_all(); action_ok(*this); while (!_chain.empty()) { @@ -73,6 +74,15 @@ event::chain(event &ev) { ev.deps.push_back(*this); } +void +event::wait() const { + for (event &ev : deps) + ev.wait(); + + std::unique_lock lock(mutex); + cv.wait(lock, [=]{ return !wait_count; }); +} + hard_event::hard_event(command_queue &q, cl_command_type command, const ref_vector &deps, action action) : event(q.context(), deps, profile(q, action), [](event &ev){}), @@ -120,6 +130,8 @@ void hard_event::wait() const { pipe_screen *screen = queue()->device().pipe; + event::wait(); + if (status() == CL_QUEUED) queue()->flush(); @@ -207,8 +219,7 @@ soft_event::command() const { void soft_event::wait() const { - for (event &ev : deps) - ev.wait(); + event::wait(); if (status() != CL_COMPLETE) throw error(CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST); diff --git a/src/gallium/state_trackers/clover/core/event.hpp b/src/gallium/state_trackers/clover/core/event.hpp index d407c80ce25..0914842ad1b 100644 --- a/src/gallium/state_trackers/clover/core/event.hpp +++ b/src/gallium/state_trackers/clover/core/event.hpp @@ -23,6 +23,7 @@ #ifndef CLOVER_CORE_EVENT_HPP #define CLOVER_CORE_EVENT_HPP +#include #include #include "core/object.hpp" @@ -68,7 +69,7 @@ namespace clover { virtual cl_int status() const = 0; virtual command_queue *queue() const = 0; virtual cl_command_type command() const = 0; - virtual void wait() const = 0; + virtual void wait() const; virtual struct pipe_fence_handle *fence() const { return NULL; @@ -87,6 +88,8 @@ namespace clover { action action_ok; action action_fail; std::vector> _chain; + mutable std::condition_variable cv; + mutable std::mutex mutex; }; /// From 2b5355c8ab383d86bb6332dd29c417a6a1bc52bd Mon Sep 17 00:00:00 2001 From: Ilia Mirkin Date: Wed, 6 May 2015 23:29:33 -0400 Subject: [PATCH 023/834] st/mesa: make sure to create a "clean" bool when doing i2b MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit i2b has to work for all integers, not just 1. INEG would not necessarily result with all bits set, which is something that other operations can rely on by e.g. using AND (or INEG for b2i). Signed-off-by: Ilia Mirkin Reviewed-by: Jason Ekstrand Reviewed-by: Marek Olšák Reviewed-by: Roland Scheidegger Cc: mesa-stable@lists.freedesktop.org --- src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index 1fea8600a75..f0f2a77d065 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -1953,7 +1953,7 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir) break; case ir_unop_i2b: if (native_integers) - emit(ir, TGSI_OPCODE_INEG, result_dst, op[0]); + emit(ir, TGSI_OPCODE_USNE, result_dst, op[0], st_src_reg_for_int(0)); else emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], st_src_reg_for_float(0.0)); break; From 22aaa746bdbe153effcbba7d5690bd9db880c76f Mon Sep 17 00:00:00 2001 From: Emil Velikov Date: Mon, 11 May 2015 20:19:33 +0100 Subject: [PATCH 024/834] Add release notes for the 10.5.5 release Signed-off-by: Emil Velikov (cherry picked from commit d88fb4050561a62fa824bec59ffedf2a826c2083) --- docs/relnotes/10.5.5.html | 94 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 94 insertions(+) create mode 100644 docs/relnotes/10.5.5.html diff --git a/docs/relnotes/10.5.5.html b/docs/relnotes/10.5.5.html new file mode 100644 index 00000000000..06743cc1e2a --- /dev/null +++ b/docs/relnotes/10.5.5.html @@ -0,0 +1,94 @@ + + + + + Mesa Release Notes + + + + +
+

The Mesa 3D Graphics Library

+
+ + +
+ +

Mesa 10.5.5 Release Notes / May 11, 2015

+ +

+Mesa 10.5.5 is a bug fix release which fixes bugs found since the 10.5.4 release. +

+

+Mesa 10.5.5 implements the OpenGL 3.3 API, but the version reported by +glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) / +glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used. +Some drivers don't support all the features required in OpenGL 3.3. OpenGL +3.3 is only available if requested at context creation +because compatibility contexts are not supported. +

+ + +

SHA256 checksums

+
+TBD
+
+ + +

New features

+

None

+ +

Bug fixes

+ +

This list is likely incomplete.

+ +
    + +
  • Bug 88521 - GLBenchmark 2.7 TRex renders with artifacts on Gen8 with !UXA
  • + +
  • Bug 89455 - [NVC0/Gallium] Unigine Heaven black and white boxes
  • + +
  • Bug 89689 - [Regression] Weston on DRM backend won't start with new version of mesa
  • + +
  • Bug 90130 - gl_PrimitiveId seems to reset at 340
  • + +
+ + +

Changes

+ +

Boyan Ding (1):

+
    +
  • i965: Add XRGB8888 format to intel_screen_make_configs
  • +
+ +

Emil Velikov (3):

+
    +
  • docs: Add sha256 sums for the 10.5.4 release
  • +
  • r300: do not link against libdrm_intel
  • +
  • Update version to 10.5.5
  • +
+ +

Ilia Mirkin (4):

+
    +
  • nvc0/ir: flush denorms to zero in non-compute shaders
  • +
  • gk110/ir: fix set with a register dest to not auto-set the abs flag
  • +
  • nvc0/ir: fix predicated PFETCH emission
  • +
  • nv50/ir: fix asFlow() const helper for OP_JOIN
  • +
+ +

Kenneth Graunke (2):

+
    +
  • i965: Make intel_emit_linear_blit handle Gen8+ alignment restrictions.
  • +
  • i965: Disallow linear blits that are not cacheline aligned.
  • +
+ +

Roland Scheidegger (1):

+
    +
  • draw: fix prim ids when there's no gs
  • +
+ + +
+ + From d4125c41f999a09521effa1579c0964441411a13 Mon Sep 17 00:00:00 2001 From: Emil Velikov Date: Mon, 11 May 2015 22:02:03 +0100 Subject: [PATCH 025/834] docs: Add sha256 sums for the 10.5.5 release Signed-off-by: Emil Velikov (cherry picked from commit 8ee1a1c08b168d7583b806a2f8a2dc2ae28be62a) --- docs/relnotes/10.5.5.html | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docs/relnotes/10.5.5.html b/docs/relnotes/10.5.5.html index 06743cc1e2a..fc8247c0035 100644 --- a/docs/relnotes/10.5.5.html +++ b/docs/relnotes/10.5.5.html @@ -31,7 +31,8 @@ because compatibility contexts are not supported.

SHA256 checksums

-TBD
+c10f00fd792b8290dd51ebcc48a9016c4cafab19ec205423c6fcadfd7f3a59f2  mesa-10.5.5.tar.gz
+4ac4e4ea3414f1cadb1467f2f173f9e56170d31e8674f7953a46f0549d319f28  mesa-10.5.5.tar.xz
 
From 95089bfaebcff449289494267c3461704f48452e Mon Sep 17 00:00:00 2001 From: Emil Velikov Date: Mon, 11 May 2015 22:07:46 +0100 Subject: [PATCH 026/834] docs: add news item and link release notes for mesa 10.5.5 Signed-off-by: Emil Velikov --- docs/index.html | 6 ++++++ docs/relnotes.html | 1 + 2 files changed, 7 insertions(+) diff --git a/docs/index.html b/docs/index.html index e01790cd79d..325e554df5b 100644 --- a/docs/index.html +++ b/docs/index.html @@ -16,6 +16,12 @@

News

+

May 11, 2015

+

+Mesa 10.5.5 is released. +This is a bug-fix release. +

+

April 24, 2015

Mesa 10.5.4 is released. diff --git a/docs/relnotes.html b/docs/relnotes.html index 7f2e1d851b9..6ba9e5904be 100644 --- a/docs/relnotes.html +++ b/docs/relnotes.html @@ -21,6 +21,7 @@ The release notes summarize what's new or changed in each Mesa release.

    +
  • 10.5.5 release notes
  • 10.5.4 release notes
  • 10.5.3 release notes
  • 10.5.2 release notes From 971be2b7c9c4459e383059f02d20a35e469b429e Mon Sep 17 00:00:00 2001 From: Roland Scheidegger Date: Tue, 12 May 2015 04:48:48 +0200 Subject: [PATCH 027/834] docs/GL3: (trivial) mark some tf extensions as done for softpipe/llvmpipe Those extensions were enabled for ages already. --- docs/GL3.txt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/GL3.txt b/docs/GL3.txt index 7a7c1bd9689..5590fea9f25 100644 --- a/docs/GL3.txt +++ b/docs/GL3.txt @@ -117,8 +117,8 @@ GL 4.0, GLSL 4.00: GL_ARB_texture_cube_map_array DONE (i965, nv50, nvc0, r600, radeonsi, llvmpipe, softpipe) GL_ARB_texture_gather DONE (i965, nv50, nvc0, r600, radeonsi, llvmpipe) GL_ARB_texture_query_lod DONE (i965, nv50, nvc0, r600, radeonsi) - GL_ARB_transform_feedback2 DONE (i965, nv50, nvc0, r600, radeonsi) - GL_ARB_transform_feedback3 DONE (i965, nv50, nvc0, r600, radeonsi) + GL_ARB_transform_feedback2 DONE (i965, nv50, nvc0, r600, radeonsi, llvmpipe, softpipe) + GL_ARB_transform_feedback3 DONE (i965, nv50, nvc0, r600, radeonsi, llvmpipe, softpipe) GL 4.1, GLSL 4.10: @@ -137,7 +137,7 @@ GL 4.2, GLSL 4.20: GL_ARB_compressed_texture_pixel_storage DONE (all drivers) GL_ARB_shader_atomic_counters DONE (i965) GL_ARB_texture_storage DONE (all drivers) - GL_ARB_transform_feedback_instanced DONE (i965, nv50, nvc0, r600, radeonsi) + GL_ARB_transform_feedback_instanced DONE (i965, nv50, nvc0, r600, radeonsi, llvmpipe, softpipe) GL_ARB_base_instance DONE (i965, nv50, nvc0, r600, radeonsi, llvmpipe, softpipe) GL_ARB_shader_image_load_store in progress (curro) GL_ARB_conservative_depth DONE (all drivers that support GLSL 1.30) From 8a59f2f26fb7bb036ad524cdec668716664d2a82 Mon Sep 17 00:00:00 2001 From: Neil Roberts Date: Fri, 7 Nov 2014 18:20:17 +0000 Subject: [PATCH 028/834] i965: Store the command parser version number in intel_screen In order to detect whether the predicate source registers can be used in a later patch we will need to know the version number for the command parser. This patch just adds a member to intel_screen and does an ioctl to get the version. Reviewed-by: Kenneth Graunke --- src/mesa/drivers/dri/i965/intel_screen.c | 7 +++++++ src/mesa/drivers/dri/i965/intel_screen.h | 8 +++++++- 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/src/mesa/drivers/dri/i965/intel_screen.c b/src/mesa/drivers/dri/i965/intel_screen.c index dda16389f8a..896a12534e6 100644 --- a/src/mesa/drivers/dri/i965/intel_screen.c +++ b/src/mesa/drivers/dri/i965/intel_screen.c @@ -1407,6 +1407,13 @@ __DRIconfig **intelInitScreen2(__DRIscreen *psp) (ret != -1 || errno != EINVAL); } + struct drm_i915_getparam getparam; + getparam.param = I915_PARAM_CMD_PARSER_VERSION; + getparam.value = &intelScreen->cmd_parser_version; + const int ret = drmIoctl(psp->fd, DRM_IOCTL_I915_GETPARAM, &getparam); + if (ret == -1) + intelScreen->cmd_parser_version = 0; + psp->extensions = !intelScreen->has_context_reset_notification ? intelScreenExtensions : intelRobustScreenExtensions; diff --git a/src/mesa/drivers/dri/i965/intel_screen.h b/src/mesa/drivers/dri/i965/intel_screen.h index e7a14903d6e..742b3d30eee 100644 --- a/src/mesa/drivers/dri/i965/intel_screen.h +++ b/src/mesa/drivers/dri/i965/intel_screen.h @@ -72,7 +72,13 @@ struct intel_screen * Configuration cache with default values for all contexts */ driOptionCache optionCache; -}; + + /** + * Version of the command parser reported by the + * I915_PARAM_CMD_PARSER_VERSION parameter + */ + int cmd_parser_version; + }; extern void intelDestroyContext(__DRIcontext * driContextPriv); From 9585879d46fe412cbcfd50510e59e8ffe85b055f Mon Sep 17 00:00:00 2001 From: Neil Roberts Date: Mon, 11 May 2015 14:00:42 +0100 Subject: [PATCH 029/834] i956: Add a function to load a 64-bit register from a buffer Adds brw_load_register_mem64 which is similar to brw_load_register_mem except that it queues two GEN7_MI_LOAD_REGISTER_MEM commands in order to load both halves of a 64-bit register. The function is implemented by splitting the 32-bit version into an internal helper function which takes a size. This will later be used to set the 64-bit predicate source registers. Reviewed-by: Kenneth Graunke --- src/mesa/drivers/dri/i965/brw_context.h | 5 ++ src/mesa/drivers/dri/i965/intel_batchbuffer.c | 59 ++++++++++++++----- 2 files changed, 48 insertions(+), 16 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index 834aaa45737..c794fa43973 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -1606,6 +1606,11 @@ void brw_load_register_mem(struct brw_context *brw, drm_intel_bo *bo, uint32_t read_domains, uint32_t write_domain, uint32_t offset); +void brw_load_register_mem64(struct brw_context *brw, + uint32_t reg, + drm_intel_bo *bo, + uint32_t read_domains, uint32_t write_domain, + uint32_t offset); /*====================================================================== * brw_state_dump.c diff --git a/src/mesa/drivers/dri/i965/intel_batchbuffer.c b/src/mesa/drivers/dri/i965/intel_batchbuffer.c index e522e4e9c1d..ed659ed625e 100644 --- a/src/mesa/drivers/dri/i965/intel_batchbuffer.c +++ b/src/mesa/drivers/dri/i965/intel_batchbuffer.c @@ -743,6 +743,38 @@ intel_batchbuffer_emit_mi_flush(struct brw_context *brw) brw_render_cache_set_clear(brw); } +static void +load_sized_register_mem(struct brw_context *brw, + uint32_t reg, + drm_intel_bo *bo, + uint32_t read_domains, uint32_t write_domain, + uint32_t offset, + int size) +{ + int i; + + /* MI_LOAD_REGISTER_MEM only exists on Gen7+. */ + assert(brw->gen >= 7); + + if (brw->gen >= 8) { + BEGIN_BATCH(4 * size); + for (i = 0; i < size; i++) { + OUT_BATCH(GEN7_MI_LOAD_REGISTER_MEM | (4 - 2)); + OUT_BATCH(reg + i * 4); + OUT_RELOC64(bo, read_domains, write_domain, offset + i * 4); + } + ADVANCE_BATCH(); + } else { + BEGIN_BATCH(3 * size); + for (i = 0; i < size; i++) { + OUT_BATCH(GEN7_MI_LOAD_REGISTER_MEM | (3 - 2)); + OUT_BATCH(reg + i * 4); + OUT_RELOC(bo, read_domains, write_domain, offset + i * 4); + } + ADVANCE_BATCH(); + } +} + void brw_load_register_mem(struct brw_context *brw, uint32_t reg, @@ -750,20 +782,15 @@ brw_load_register_mem(struct brw_context *brw, uint32_t read_domains, uint32_t write_domain, uint32_t offset) { - /* MI_LOAD_REGISTER_MEM only exists on Gen7+. */ - assert(brw->gen >= 7); - - if (brw->gen >= 8) { - BEGIN_BATCH(4); - OUT_BATCH(GEN7_MI_LOAD_REGISTER_MEM | (4 - 2)); - OUT_BATCH(reg); - OUT_RELOC64(bo, read_domains, write_domain, offset); - ADVANCE_BATCH(); - } else { - BEGIN_BATCH(3); - OUT_BATCH(GEN7_MI_LOAD_REGISTER_MEM | (3 - 2)); - OUT_BATCH(reg); - OUT_RELOC(bo, read_domains, write_domain, offset); - ADVANCE_BATCH(); - } + load_sized_register_mem(brw, reg, bo, read_domains, write_domain, offset, 1); +} + +void +brw_load_register_mem64(struct brw_context *brw, + uint32_t reg, + drm_intel_bo *bo, + uint32_t read_domains, uint32_t write_domain, + uint32_t offset) +{ + load_sized_register_mem(brw, reg, bo, read_domains, write_domain, offset, 2); } From 426023050d1d3cd1b5fc0b3508dd7e1ee3b061e7 Mon Sep 17 00:00:00 2001 From: Neil Roberts Date: Tue, 4 Nov 2014 19:15:00 +0000 Subject: [PATCH 030/834] i965: Use predicate enable bit for conditional rendering w/o stalling Previously whenever a primitive is drawn the driver would call _mesa_check_conditional_render which blocks waiting for the result of the query to determine whether to render. On Gen7+ there is a bit in the 3DPRIMITIVE command which can be used to disable the primitive based on the value of a state bit. This state bit can be set based on whether two registers have different values using the MI_PREDICATE command. We can load these two registers with the pixel count values stored in the query begin and end to implement conditional rendering without stalling. Unfortunately these two source registers were not in the whitelist of available registers in the kernel driver until v3.19. This patch uses the command parser version from intel_screen to detect whether to attempt to set the predicate data registers. The predicate enable bit is currently only used for drawing 3D primitives. For blits, clears, bitmaps, copypixels and drawpixels it still causes a stall. For most of these it would probably just work to call the new brw_check_conditional_render function instead of _mesa_check_conditional_render because they already work in terms of rendering primitives. However it's a bit trickier for blits because it can use the BLT ring or the blorp codepath. I think these operations are less useful for conditional rendering than rendering primitives so it might be best to leave it for a later patch. v2: Use the command parser version to detect whether we can write to the predicate data registers instead of trying to execute a register load command. v3: Simple rebase v4: Changes suggested by Kenneth Graunke: Split the load_64bit_register function out to a separate patch so it can be a shared public function. Avoid calling _mesa_check_conditional_render if we've already determined that there's no query object. Some styling fixes. Reviewed-by: Kenneth Graunke --- src/mesa/drivers/dri/i965/Makefile.sources | 1 + .../drivers/dri/i965/brw_conditional_render.c | 161 ++++++++++++++++++ src/mesa/drivers/dri/i965/brw_context.c | 4 + src/mesa/drivers/dri/i965/brw_context.h | 23 +++ src/mesa/drivers/dri/i965/brw_defines.h | 1 + src/mesa/drivers/dri/i965/brw_draw.c | 16 +- src/mesa/drivers/dri/i965/brw_queryobj.c | 18 +- src/mesa/drivers/dri/i965/intel_extensions.c | 5 + src/mesa/drivers/dri/i965/intel_reg.h | 23 +++ 9 files changed, 240 insertions(+), 12 deletions(-) create mode 100644 src/mesa/drivers/dri/i965/brw_conditional_render.c diff --git a/src/mesa/drivers/dri/i965/Makefile.sources b/src/mesa/drivers/dri/i965/Makefile.sources index 1ae93e1d5f3..a24c20aada4 100644 --- a/src/mesa/drivers/dri/i965/Makefile.sources +++ b/src/mesa/drivers/dri/i965/Makefile.sources @@ -18,6 +18,7 @@ i965_FILES = \ brw_clip_unfilled.c \ brw_clip_util.c \ brw_compute.c \ + brw_conditional_render.c \ brw_context.c \ brw_context.h \ brw_cs.cpp \ diff --git a/src/mesa/drivers/dri/i965/brw_conditional_render.c b/src/mesa/drivers/dri/i965/brw_conditional_render.c new file mode 100644 index 00000000000..6d37c3b6928 --- /dev/null +++ b/src/mesa/drivers/dri/i965/brw_conditional_render.c @@ -0,0 +1,161 @@ +/* + * Copyright © 2014 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Neil Roberts + */ + +/** @file brw_conditional_render.c + * + * Support for conditional rendering based on query objects + * (GL_NV_conditional_render, GL_ARB_conditional_render_inverted) on Gen7+. + */ + +#include "main/imports.h" +#include "main/condrender.h" + +#include "brw_context.h" +#include "brw_defines.h" +#include "intel_batchbuffer.h" + +static void +set_predicate_enable(struct brw_context *brw, + bool value) +{ + if (value) + brw->predicate.state = BRW_PREDICATE_STATE_RENDER; + else + brw->predicate.state = BRW_PREDICATE_STATE_DONT_RENDER; +} + +static void +set_predicate_for_result(struct brw_context *brw, + struct brw_query_object *query, + bool inverted) +{ + int load_op; + + assert(query->bo != NULL); + + brw_load_register_mem64(brw, + MI_PREDICATE_SRC0, + query->bo, + I915_GEM_DOMAIN_INSTRUCTION, + 0, /* write domain */ + 0 /* offset */); + brw_load_register_mem64(brw, + MI_PREDICATE_SRC1, + query->bo, + I915_GEM_DOMAIN_INSTRUCTION, + 0, /* write domain */ + 8 /* offset */); + + if (inverted) + load_op = MI_PREDICATE_LOADOP_LOAD; + else + load_op = MI_PREDICATE_LOADOP_LOADINV; + + BEGIN_BATCH(1); + OUT_BATCH(GEN7_MI_PREDICATE | + load_op | + MI_PREDICATE_COMBINEOP_SET | + MI_PREDICATE_COMPAREOP_SRCS_EQUAL); + ADVANCE_BATCH(); + + brw->predicate.state = BRW_PREDICATE_STATE_USE_BIT; +} + +static void +brw_begin_conditional_render(struct gl_context *ctx, + struct gl_query_object *q, + GLenum mode) +{ + struct brw_context *brw = brw_context(ctx); + struct brw_query_object *query = (struct brw_query_object *) q; + bool inverted; + + if (!brw->predicate.supported) + return; + + switch (mode) { + case GL_QUERY_WAIT: + case GL_QUERY_NO_WAIT: + case GL_QUERY_BY_REGION_WAIT: + case GL_QUERY_BY_REGION_NO_WAIT: + inverted = false; + break; + case GL_QUERY_WAIT_INVERTED: + case GL_QUERY_NO_WAIT_INVERTED: + case GL_QUERY_BY_REGION_WAIT_INVERTED: + case GL_QUERY_BY_REGION_NO_WAIT_INVERTED: + inverted = true; + break; + default: + unreachable("Unexpected conditional render mode"); + } + + /* If there are already samples from a BLT operation or if the query object + * is ready then we can avoid looking at the values in the buffer and just + * decide whether to draw using the CPU without stalling. + */ + if (query->Base.Result || query->Base.Ready) + set_predicate_enable(brw, (query->Base.Result != 0) ^ inverted); + else + set_predicate_for_result(brw, query, inverted); +} + +static void +brw_end_conditional_render(struct gl_context *ctx, + struct gl_query_object *q) +{ + struct brw_context *brw = brw_context(ctx); + + /* When there is no longer a conditional render in progress it should + * always render. + */ + brw->predicate.state = BRW_PREDICATE_STATE_RENDER; +} + +void +brw_init_conditional_render_functions(struct dd_function_table *functions) +{ + functions->BeginConditionalRender = brw_begin_conditional_render; + functions->EndConditionalRender = brw_end_conditional_render; +} + +bool +brw_check_conditional_render(struct brw_context *brw) +{ + if (brw->predicate.supported) { + /* In some cases it is possible to determine that the primitives should + * be skipped without needing the predicate enable bit and still without + * stalling. + */ + return brw->predicate.state != BRW_PREDICATE_STATE_DONT_RENDER; + } else if (brw->ctx.Query.CondRenderQuery) { + perf_debug("Conditional rendering is implemented in software and may " + "stall.\n"); + return _mesa_check_conditional_render(&brw->ctx); + } else { + return true; + } +} diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c index fd7420a6c6f..673529a28cd 100644 --- a/src/mesa/drivers/dri/i965/brw_context.c +++ b/src/mesa/drivers/dri/i965/brw_context.c @@ -289,6 +289,8 @@ brw_init_driver_functions(struct brw_context *brw, else gen4_init_queryobj_functions(functions); brw_init_compute_functions(functions); + if (brw->gen >= 7) + brw_init_conditional_render_functions(functions); functions->QuerySamplesForFormat = brw_query_samples_for_format; @@ -891,6 +893,8 @@ brwCreateContext(gl_api api, brw->gs.enabled = false; brw->sf.viewport_transform_enable = true; + brw->predicate.state = BRW_PREDICATE_STATE_RENDER; + ctx->VertexProgram._MaintainTnlProgram = true; ctx->FragmentProgram._MaintainTexEnvProgram = true; diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index c794fa43973..2dcc23c5fc6 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -966,6 +966,20 @@ struct brw_stage_state uint32_t sampler_offset; }; +enum brw_predicate_state { + /* The first two states are used if we can determine whether to draw + * without having to look at the values in the query object buffer. This + * will happen if there is no conditional render in progress, if the query + * object is already completed or if something else has already added + * samples to the preliminary result such as via a BLT command. + */ + BRW_PREDICATE_STATE_RENDER, + BRW_PREDICATE_STATE_DONT_RENDER, + /* In this case whether to draw or not depends on the result of an + * MI_PREDICATE command so the predicate enable bit needs to be checked. + */ + BRW_PREDICATE_STATE_USE_BIT +}; /** * brw_context is derived from gl_context. @@ -1401,6 +1415,11 @@ struct brw_context bool begin_emitted; } query; + struct { + enum brw_predicate_state state; + bool supported; + } predicate; + struct { /** A map from pipeline statistics counter IDs to MMIO addresses. */ const int *statistics_registers; @@ -1600,6 +1619,10 @@ void brw_write_depth_count(struct brw_context *brw, drm_intel_bo *bo, int idx); void brw_store_register_mem64(struct brw_context *brw, drm_intel_bo *bo, uint32_t reg, int idx); +/** brw_conditional_render.c */ +void brw_init_conditional_render_functions(struct dd_function_table *functions); +bool brw_check_conditional_render(struct brw_context *brw); + /** intel_batchbuffer.c */ void brw_load_register_mem(struct brw_context *brw, uint32_t reg, diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h index 83d7a3535e4..11cb3fa490b 100644 --- a/src/mesa/drivers/dri/i965/brw_defines.h +++ b/src/mesa/drivers/dri/i965/brw_defines.h @@ -51,6 +51,7 @@ # define GEN4_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL (0 << 15) # define GEN4_3DPRIM_VERTEXBUFFER_ACCESS_RANDOM (1 << 15) # define GEN7_3DPRIM_INDIRECT_PARAMETER_ENABLE (1 << 10) +# define GEN7_3DPRIM_PREDICATE_ENABLE (1 << 8) /* DW1 */ # define GEN7_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL (0 << 8) # define GEN7_3DPRIM_VERTEXBUFFER_ACCESS_RANDOM (1 << 8) diff --git a/src/mesa/drivers/dri/i965/brw_draw.c b/src/mesa/drivers/dri/i965/brw_draw.c index 96e23697923..a7164dbf7d8 100644 --- a/src/mesa/drivers/dri/i965/brw_draw.c +++ b/src/mesa/drivers/dri/i965/brw_draw.c @@ -178,6 +178,7 @@ static void brw_emit_prim(struct brw_context *brw, int verts_per_instance; int vertex_access_type; int indirect_flag; + int predicate_enable; DBG("PRIM: %s %d %d\n", _mesa_lookup_enum_by_nr(prim->mode), prim->start, prim->count); @@ -258,10 +259,14 @@ static void brw_emit_prim(struct brw_context *brw, indirect_flag = 0; } - if (brw->gen >= 7) { + if (brw->predicate.state == BRW_PREDICATE_STATE_USE_BIT) + predicate_enable = GEN7_3DPRIM_PREDICATE_ENABLE; + else + predicate_enable = 0; + BEGIN_BATCH(7); - OUT_BATCH(CMD_3D_PRIM << 16 | (7 - 2) | indirect_flag); + OUT_BATCH(CMD_3D_PRIM << 16 | (7 - 2) | indirect_flag | predicate_enable); OUT_BATCH(hw_prim | vertex_access_type); } else { BEGIN_BATCH(6); @@ -561,12 +566,7 @@ void brw_draw_prims( struct gl_context *ctx, assert(unused_tfb_object == NULL); - if (ctx->Query.CondRenderQuery) { - perf_debug("Conditional rendering is implemented in software and may " - "stall. This should be fixed in the driver.\n"); - } - - if (!_mesa_check_conditional_render(ctx)) + if (!brw_check_conditional_render(brw)) return; /* Handle primitive restart if needed */ diff --git a/src/mesa/drivers/dri/i965/brw_queryobj.c b/src/mesa/drivers/dri/i965/brw_queryobj.c index 667c9009304..aea4d9b77d3 100644 --- a/src/mesa/drivers/dri/i965/brw_queryobj.c +++ b/src/mesa/drivers/dri/i965/brw_queryobj.c @@ -66,10 +66,20 @@ brw_write_timestamp(struct brw_context *brw, drm_intel_bo *query_bo, int idx) void brw_write_depth_count(struct brw_context *brw, drm_intel_bo *query_bo, int idx) { - brw_emit_pipe_control_write(brw, - PIPE_CONTROL_WRITE_DEPTH_COUNT - | PIPE_CONTROL_DEPTH_STALL, - query_bo, idx * sizeof(uint64_t), 0, 0); + uint32_t flags; + + flags = (PIPE_CONTROL_WRITE_DEPTH_COUNT | + PIPE_CONTROL_DEPTH_STALL); + + /* Needed to ensure the memory is coherent for the MI_LOAD_REGISTER_MEM + * command when loading the values into the predicate source registers for + * conditional rendering. + */ + if (brw->predicate.supported) + flags |= PIPE_CONTROL_FLUSH_ENABLE; + + brw_emit_pipe_control_write(brw, flags, query_bo, + idx * sizeof(uint64_t), 0, 0); } /** diff --git a/src/mesa/drivers/dri/i965/intel_extensions.c b/src/mesa/drivers/dri/i965/intel_extensions.c index c3eee31d017..cafb77455d7 100644 --- a/src/mesa/drivers/dri/i965/intel_extensions.c +++ b/src/mesa/drivers/dri/i965/intel_extensions.c @@ -320,6 +320,8 @@ intelInitExtensions(struct gl_context *ctx) } } + brw->predicate.supported = false; + if (brw->gen >= 7) { ctx->Extensions.ARB_conservative_depth = true; ctx->Extensions.ARB_derivative_control = true; @@ -333,6 +335,9 @@ intelInitExtensions(struct gl_context *ctx) ctx->Extensions.ARB_transform_feedback2 = true; ctx->Extensions.ARB_transform_feedback3 = true; ctx->Extensions.ARB_transform_feedback_instanced = true; + + if (brw->intelScreen->cmd_parser_version >= 2) + brw->predicate.supported = true; } /* Only enable this in core profile because other parts of Mesa behave diff --git a/src/mesa/drivers/dri/i965/intel_reg.h b/src/mesa/drivers/dri/i965/intel_reg.h index 488fb5b98f8..bd14e189da3 100644 --- a/src/mesa/drivers/dri/i965/intel_reg.h +++ b/src/mesa/drivers/dri/i965/intel_reg.h @@ -48,6 +48,20 @@ #define GEN7_MI_LOAD_REGISTER_MEM (CMD_MI | (0x29 << 23)) # define MI_LOAD_REGISTER_MEM_USE_GGTT (1 << 22) +/* Manipulate the predicate bit based on some register values. Only on Gen7+ */ +#define GEN7_MI_PREDICATE (CMD_MI | (0xC << 23)) +# define MI_PREDICATE_LOADOP_KEEP (0 << 6) +# define MI_PREDICATE_LOADOP_LOAD (2 << 6) +# define MI_PREDICATE_LOADOP_LOADINV (3 << 6) +# define MI_PREDICATE_COMBINEOP_SET (0 << 3) +# define MI_PREDICATE_COMBINEOP_AND (1 << 3) +# define MI_PREDICATE_COMBINEOP_OR (2 << 3) +# define MI_PREDICATE_COMBINEOP_XOR (3 << 3) +# define MI_PREDICATE_COMPAREOP_TRUE (0 << 0) +# define MI_PREDICATE_COMPAREOP_FALSE (1 << 0) +# define MI_PREDICATE_COMPAREOP_SRCS_EQUAL (2 << 0) +# define MI_PREDICATE_COMPAREOP_DELTAS_EQUAL (3 << 0) + /** @{ * * PIPE_CONTROL operation, a combination MI_FLUSH and register write with @@ -69,6 +83,7 @@ #define PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE (1 << 10) /* GM45+ only */ #define PIPE_CONTROL_ISP_DIS (1 << 9) #define PIPE_CONTROL_INTERRUPT_ENABLE (1 << 8) +#define PIPE_CONTROL_FLUSH_ENABLE (1 << 7) /* Gen7+ only */ /* GT */ #define PIPE_CONTROL_DATA_CACHE_INVALIDATE (1 << 5) #define PIPE_CONTROL_VF_CACHE_INVALIDATE (1 << 4) @@ -147,3 +162,11 @@ # define GEN9_PARTIAL_RESOLVE_DISABLE_IN_VC (1 << 1) # define GEN8_HIZ_PMA_MASK_BITS \ ((GEN8_HIZ_NP_PMA_FIX_ENABLE | GEN8_HIZ_NP_EARLY_Z_FAILS_DISABLE) << 16) + +/* Predicate registers */ +#define MI_PREDICATE_SRC0 0x2400 +#define MI_PREDICATE_SRC1 0x2408 +#define MI_PREDICATE_DATA 0x2410 +#define MI_PREDICATE_RESULT 0x2418 +#define MI_PREDICATE_RESULT_1 0x241C +#define MI_PREDICATE_RESULT_2 0x2214 From 95774ca258d216d42877f9a8da7e1bb4212a6500 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tapani=20P=C3=A4lli?= Date: Mon, 11 May 2015 14:50:19 +0300 Subject: [PATCH 031/834] nir: fix sampler lowering pass for arrays MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This fixes bugs with special cases where we have arrays of structures containing samplers or arrays of samplers. I've verified that patch results in calculating same index value as returned by _mesa_get_sampler_uniform_value for IR. Patch makes following ES3 conformance test pass: ES3-CTS.shaders.struct.uniform.sampler_array_fragment v2: remove unnecessary comment (Topi) simplify changes and the overall code (Jason) Signed-off-by: Tapani Pälli Reviewed-by: Jason Ekstrand Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=90114 --- src/glsl/nir/nir_lower_samplers.cpp | 31 +++++++++++------------------ 1 file changed, 12 insertions(+), 19 deletions(-) diff --git a/src/glsl/nir/nir_lower_samplers.cpp b/src/glsl/nir/nir_lower_samplers.cpp index 8fc5909b711..7a0b0a09ffe 100644 --- a/src/glsl/nir/nir_lower_samplers.cpp +++ b/src/glsl/nir/nir_lower_samplers.cpp @@ -70,19 +70,22 @@ lower_sampler(nir_tex_instr *instr, const struct gl_shader_program *shader_progr case nir_deref_type_array: { nir_deref_array *deref_array = nir_deref_as_array(deref->child); + assert(deref_array->deref_array_type != nir_deref_array_type_wildcard); + + if (deref_array->deref.child) { + ralloc_asprintf_append(&name, "[%u]", + deref_array->deref_array_type == nir_deref_array_type_direct ? + deref_array->base_offset : 0); + } else { + assert(deref->child->type->base_type == GLSL_TYPE_SAMPLER); + instr->sampler_index = deref_array->base_offset; + } + /* XXX: We're assuming here that the indirect is the last array * thing we have. This should be ok for now as we don't support * arrays_of_arrays yet. */ - - instr->sampler_index *= glsl_get_length(deref->type); - switch (deref_array->deref_array_type) { - case nir_deref_array_type_direct: - instr->sampler_index += deref_array->base_offset; - if (deref_array->deref.child) - ralloc_asprintf_append(&name, "[%u]", deref_array->base_offset); - break; - case nir_deref_array_type_indirect: { + if (deref_array->deref_array_type == nir_deref_array_type_indirect) { /* First, we have to resize the array of texture sources */ nir_tex_src *new_srcs = rzalloc_array(instr, nir_tex_src, instr->num_srcs + 1); @@ -106,16 +109,6 @@ lower_sampler(nir_tex_instr *instr, const struct gl_shader_program *shader_progr &deref_array->indirect); instr->sampler_array_size = glsl_get_length(deref->type); - - if (deref_array->deref.child) - ralloc_strcat(&name, "[0]"); - break; - } - - case nir_deref_array_type_wildcard: - unreachable("Cannot copy samplers"); - default: - unreachable("Invalid deref array type"); } break; } From cbf204069d00e99055a539e5f79566e2021fa8f4 Mon Sep 17 00:00:00 2001 From: Francisco Jerez Date: Wed, 6 May 2015 17:37:12 +0300 Subject: [PATCH 032/834] i965: Document brw_mask_reg(). Reviewed-by: Matt Turner --- src/mesa/drivers/dri/i965/brw_reg.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/mesa/drivers/dri/i965/brw_reg.h b/src/mesa/drivers/dri/i965/brw_reg.h index c03a8aed796..81a932063a7 100644 --- a/src/mesa/drivers/dri/i965/brw_reg.h +++ b/src/mesa/drivers/dri/i965/brw_reg.h @@ -778,7 +778,11 @@ brw_flag_reg(int reg, int subreg) BRW_ARF_FLAG + reg, subreg); } - +/** + * Return the mask register present in Gen4-5, or the related register present + * in Gen7.5 and later hardware referred to as "channel enable" register in + * the documentation. + */ static inline struct brw_reg brw_mask_reg(unsigned subnr) { From 0db663503ea86579d3352fe83d428d573a8d2b03 Mon Sep 17 00:00:00 2001 From: Francisco Jerez Date: Thu, 7 May 2015 19:33:57 +0300 Subject: [PATCH 033/834] i965: Don't forget the force_sechalf flag in lower_load_payload(). Regression from commit 41868bb6824c6106a55c8442006c1e2215abf567. Fixes a bunch of ARB_shader_image_load_store tests. Reviewed-by: Jason Ekstrand --- src/mesa/drivers/dri/i965/brw_fs.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 3414d92efde..08664cf328c 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -3507,6 +3507,7 @@ fs_visitor::lower_load_payload() fs_inst *mov = MOV(retype(dst, inst->src[i].type), inst->src[i]); mov->force_writemask_all = inst->force_writemask_all; + mov->force_sechalf = inst->force_sechalf; inst->insert_before(block, mov); } dst = offset(dst, 1); From 4171ef371a25fccf9e96c0908a4848ea79dcfef2 Mon Sep 17 00:00:00 2001 From: Francisco Jerez Date: Wed, 22 Apr 2015 14:46:17 +0300 Subject: [PATCH 034/834] i965/fs: Fix offset() for registers with zero stride. stride == 0 implies that the register has one channel per vector component. Reviewed-by: Matt Turner --- src/mesa/drivers/dri/i965/brw_ir_fs.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_ir_fs.h b/src/mesa/drivers/dri/i965/brw_ir_fs.h index f3dfe790f34..a98f4e3e142 100644 --- a/src/mesa/drivers/dri/i965/brw_ir_fs.h +++ b/src/mesa/drivers/dri/i965/brw_ir_fs.h @@ -131,14 +131,15 @@ horiz_offset(fs_reg reg, unsigned delta) static inline fs_reg offset(fs_reg reg, unsigned delta) { - assert(reg.stride > 0); switch (reg.file) { case BAD_FILE: break; case GRF: case MRF: case ATTR: - return byte_offset(reg, delta * reg.width * reg.stride * type_sz(reg.type)); + return byte_offset(reg, + delta * MAX2(reg.width * reg.stride, 1) * + type_sz(reg.type)); case UNIFORM: reg.reg_offset += delta; break; From ee1a8b5a8cc118b49b47c7074fa18cf1b4820885 Mon Sep 17 00:00:00 2001 From: Francisco Jerez Date: Thu, 30 Apr 2015 19:29:54 +0300 Subject: [PATCH 035/834] i965/fs: Have component() set the register stride to zero. Reviewed-by: Kenneth Graunke --- src/mesa/drivers/dri/i965/brw_ir_fs.h | 1 + 1 file changed, 1 insertion(+) diff --git a/src/mesa/drivers/dri/i965/brw_ir_fs.h b/src/mesa/drivers/dri/i965/brw_ir_fs.h index a98f4e3e142..7ac7ff81d20 100644 --- a/src/mesa/drivers/dri/i965/brw_ir_fs.h +++ b/src/mesa/drivers/dri/i965/brw_ir_fs.h @@ -156,6 +156,7 @@ component(fs_reg reg, unsigned idx) assert(idx < reg.width); reg.subreg_offset = idx * type_sz(reg.type); reg.width = 1; + reg.stride = 0; return reg; } From d9e930997f1addafe37e10ddc0f56d0684be0086 Mon Sep 17 00:00:00 2001 From: Francisco Jerez Date: Tue, 5 May 2015 22:58:39 +0300 Subject: [PATCH 036/834] nir: Define image load, store and atomic intrinsics. v2: Undefine coordinate components not applicable to the target. Reviewed-by: Connor Abbott Reviewed-by: Kenneth Graunke --- src/glsl/nir/nir_intrinsics.h | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/src/glsl/nir/nir_intrinsics.h b/src/glsl/nir/nir_intrinsics.h index 8e28765c13a..2a0df2be025 100644 --- a/src/glsl/nir/nir_intrinsics.h +++ b/src/glsl/nir/nir_intrinsics.h @@ -89,6 +89,33 @@ ATOMIC(inc, 0) ATOMIC(dec, 0) ATOMIC(read, NIR_INTRINSIC_CAN_ELIMINATE) +/* + * Image load, store and atomic intrinsics. + * + * All image intrinsics take an image target passed as a nir_variable. Image + * variables contain a number of memory and layout qualifiers that influence + * the semantics of the intrinsic. + * + * All image intrinsics take a four-coordinate vector and a sample index as + * first two sources, determining the location within the image that will be + * accessed by the intrinsic. Components not applicable to the image target + * in use are undefined. Image store takes an additional four-component + * argument with the value to be written, and image atomic operations take + * either one or two additional scalar arguments with the same meaning as in + * the ARB_shader_image_load_store specification. + */ +INTRINSIC(image_load, 2, ARR(4, 1), true, 4, 1, 0, + NIR_INTRINSIC_CAN_ELIMINATE) +INTRINSIC(image_store, 3, ARR(4, 1, 4), false, 0, 1, 0, 0) +INTRINSIC(image_atomic_add, 3, ARR(4, 1, 1), true, 1, 1, 0, 0) +INTRINSIC(image_atomic_min, 3, ARR(4, 1, 1), true, 1, 1, 0, 0) +INTRINSIC(image_atomic_max, 3, ARR(4, 1, 1), true, 1, 1, 0, 0) +INTRINSIC(image_atomic_and, 3, ARR(4, 1, 1), true, 1, 1, 0, 0) +INTRINSIC(image_atomic_or, 3, ARR(4, 1, 1), true, 1, 1, 0, 0) +INTRINSIC(image_atomic_xor, 3, ARR(4, 1, 1), true, 1, 1, 0, 0) +INTRINSIC(image_atomic_exchange, 3, ARR(4, 1, 1), true, 1, 1, 0, 0) +INTRINSIC(image_atomic_comp_swap, 4, ARR(4, 1, 1, 1), true, 1, 1, 0, 0) + #define SYSTEM_VALUE(name, components) \ INTRINSIC(load_##name, 0, ARR(), true, components, 0, 0, \ NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) From f1269a3e013e23135b2482e4f137d69ed6cc1734 Mon Sep 17 00:00:00 2001 From: Francisco Jerez Date: Tue, 5 May 2015 23:00:51 +0300 Subject: [PATCH 037/834] nir: Add memory barrier intrinsic. Reviewed-by: Connor Abbott Reviewed-by: Kenneth Graunke --- src/glsl/nir/nir_intrinsics.h | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/glsl/nir/nir_intrinsics.h b/src/glsl/nir/nir_intrinsics.h index 2a0df2be025..10192c5315c 100644 --- a/src/glsl/nir/nir_intrinsics.h +++ b/src/glsl/nir/nir_intrinsics.h @@ -68,6 +68,13 @@ INTRINSIC(interp_var_at_offset, 1, ARR(2), true, 0, 1, 0, #define BARRIER(name) INTRINSIC(name, 0, ARR(), false, 0, 0, 0, 0) BARRIER(discard) + +/* + * Memory barrier with semantics analogous to the memoryBarrier() GLSL + * intrinsic. + */ +BARRIER(memory_barrier) + /** A conditional discard, with a single boolean source. */ INTRINSIC(discard_if, 1, ARR(1), false, 0, 0, 0, 0) From 6de78e6b0c8aa4bd6bdd89e3ca33c2ccb9a5ac3d Mon Sep 17 00:00:00 2001 From: Francisco Jerez Date: Tue, 5 May 2015 23:02:05 +0300 Subject: [PATCH 038/834] nir: Fix indexing of atomic counter arrays with a constant value. Reviewed-by: Connor Abbott Reviewed-by: Kenneth Graunke --- src/glsl/nir/nir_lower_atomics.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/glsl/nir/nir_lower_atomics.c b/src/glsl/nir/nir_lower_atomics.c index e82df016969..f6f89020f78 100644 --- a/src/glsl/nir/nir_lower_atomics.c +++ b/src/glsl/nir/nir_lower_atomics.c @@ -78,7 +78,8 @@ lower_instr(nir_intrinsic_instr *instr, nir_function_impl *impl) nir_deref_as_array(instr->variables[0]->deref.child); assert(deref_array->deref.child == NULL); - offset_const->value.u[0] += deref_array->base_offset; + offset_const->value.u[0] += + deref_array->base_offset * ATOMIC_COUNTER_SIZE; if (deref_array->deref_array_type == nir_deref_array_type_indirect) { nir_load_const_instr *atomic_counter_size = From f8f8b318476cb40650b0bc2597b21978fc456d78 Mon Sep 17 00:00:00 2001 From: Francisco Jerez Date: Tue, 5 May 2015 23:04:46 +0300 Subject: [PATCH 039/834] nir: Translate image load, store and atomic intrinsics from GLSL IR. v2: Undefine coordinate components not applicable to the target. Reviewed-by: Connor Abbott Reviewed-by: Kenneth Graunke --- src/glsl/nir/glsl_to_nir.cpp | 126 ++++++++++++++++++++++++++++++++--- 1 file changed, 115 insertions(+), 11 deletions(-) diff --git a/src/glsl/nir/glsl_to_nir.cpp b/src/glsl/nir/glsl_to_nir.cpp index 7a20e1a36f5..ff9c9b4a9e7 100644 --- a/src/glsl/nir/glsl_to_nir.cpp +++ b/src/glsl/nir/glsl_to_nir.cpp @@ -614,27 +614,131 @@ nir_visitor::visit(ir_call *ir) op = nir_intrinsic_atomic_counter_inc_var; } else if (strcmp(ir->callee_name(), "__intrinsic_atomic_predecrement") == 0) { op = nir_intrinsic_atomic_counter_dec_var; + } else if (strcmp(ir->callee_name(), "__intrinsic_image_load") == 0) { + op = nir_intrinsic_image_load; + } else if (strcmp(ir->callee_name(), "__intrinsic_image_store") == 0) { + op = nir_intrinsic_image_store; + } else if (strcmp(ir->callee_name(), "__intrinsic_image_atomic_add") == 0) { + op = nir_intrinsic_image_atomic_add; + } else if (strcmp(ir->callee_name(), "__intrinsic_image_atomic_min") == 0) { + op = nir_intrinsic_image_atomic_min; + } else if (strcmp(ir->callee_name(), "__intrinsic_image_atomic_max") == 0) { + op = nir_intrinsic_image_atomic_max; + } else if (strcmp(ir->callee_name(), "__intrinsic_image_atomic_and") == 0) { + op = nir_intrinsic_image_atomic_and; + } else if (strcmp(ir->callee_name(), "__intrinsic_image_atomic_or") == 0) { + op = nir_intrinsic_image_atomic_or; + } else if (strcmp(ir->callee_name(), "__intrinsic_image_atomic_xor") == 0) { + op = nir_intrinsic_image_atomic_xor; + } else if (strcmp(ir->callee_name(), "__intrinsic_image_atomic_exchange") == 0) { + op = nir_intrinsic_image_atomic_exchange; + } else if (strcmp(ir->callee_name(), "__intrinsic_image_atomic_comp_swap") == 0) { + op = nir_intrinsic_image_atomic_comp_swap; } else { unreachable("not reached"); } nir_intrinsic_instr *instr = nir_intrinsic_instr_create(shader, op); - ir_dereference *param = - (ir_dereference *) ir->actual_parameters.get_head(); - instr->variables[0] = evaluate_deref(&instr->instr, param); - nir_ssa_dest_init(&instr->instr, &instr->dest, 1, NULL); + + switch (op) { + case nir_intrinsic_atomic_counter_read_var: + case nir_intrinsic_atomic_counter_inc_var: + case nir_intrinsic_atomic_counter_dec_var: { + ir_dereference *param = + (ir_dereference *) ir->actual_parameters.get_head(); + instr->variables[0] = evaluate_deref(&instr->instr, param); + nir_ssa_dest_init(&instr->instr, &instr->dest, 1, NULL); + break; + } + case nir_intrinsic_image_load: + case nir_intrinsic_image_store: + case nir_intrinsic_image_atomic_add: + case nir_intrinsic_image_atomic_min: + case nir_intrinsic_image_atomic_max: + case nir_intrinsic_image_atomic_and: + case nir_intrinsic_image_atomic_or: + case nir_intrinsic_image_atomic_xor: + case nir_intrinsic_image_atomic_exchange: + case nir_intrinsic_image_atomic_comp_swap: { + nir_ssa_undef_instr *instr_undef = + nir_ssa_undef_instr_create(shader, 1); + nir_instr_insert_after_cf_list(this->cf_node_list, + &instr_undef->instr); + + /* Set the image variable dereference. */ + exec_node *param = ir->actual_parameters.get_head(); + ir_dereference *image = (ir_dereference *)param; + const glsl_type *type = + image->variable_referenced()->type->without_array(); + + instr->variables[0] = evaluate_deref(&instr->instr, image); + param = param->get_next(); + + /* Set the address argument, extending the coordinate vector to four + * components. + */ + const nir_src src_addr = evaluate_rvalue((ir_dereference *)param); + nir_alu_instr *instr_addr = nir_alu_instr_create(shader, nir_op_vec4); + nir_ssa_dest_init(&instr_addr->instr, &instr_addr->dest.dest, 4, NULL); + + for (int i = 0; i < 4; i++) { + if (i < type->coordinate_components()) { + instr_addr->src[i].src = src_addr; + instr_addr->src[i].swizzle[0] = i; + } else { + instr_addr->src[i].src = nir_src_for_ssa(&instr_undef->def); + } + } + + nir_instr_insert_after_cf_list(cf_node_list, &instr_addr->instr); + instr->src[0] = nir_src_for_ssa(&instr_addr->dest.dest.ssa); + param = param->get_next(); + + /* Set the sample argument, which is undefined for single-sample + * images. + */ + if (type->sampler_dimensionality == GLSL_SAMPLER_DIM_MS) { + instr->src[1] = evaluate_rvalue((ir_dereference *)param); + param = param->get_next(); + } else { + instr->src[1] = nir_src_for_ssa(&instr_undef->def); + } + + /* Set the intrinsic parameters. */ + if (!param->is_tail_sentinel()) { + instr->src[2] = evaluate_rvalue((ir_dereference *)param); + param = param->get_next(); + } + + if (!param->is_tail_sentinel()) { + instr->src[3] = evaluate_rvalue((ir_dereference *)param); + param = param->get_next(); + } + + /* Set the intrinsic destination. */ + if (ir->return_deref) + nir_ssa_dest_init(&instr->instr, &instr->dest, + ir->return_deref->type->vector_elements, NULL); + break; + } + default: + unreachable("not reached"); + } nir_instr_insert_after_cf_list(this->cf_node_list, &instr->instr); - nir_intrinsic_instr *store_instr = - nir_intrinsic_instr_create(shader, nir_intrinsic_store_var); - store_instr->num_components = 1; + if (ir->return_deref) { + nir_intrinsic_instr *store_instr = + nir_intrinsic_instr_create(shader, nir_intrinsic_store_var); + store_instr->num_components = ir->return_deref->type->vector_elements; - store_instr->variables[0] = evaluate_deref(&store_instr->instr, ir->return_deref); - store_instr->src[0].is_ssa = true; - store_instr->src[0].ssa = &instr->dest.ssa; + store_instr->variables[0] = + evaluate_deref(&store_instr->instr, ir->return_deref); + store_instr->src[0] = nir_src_for_ssa(&instr->dest.ssa); - nir_instr_insert_after_cf_list(this->cf_node_list, &store_instr->instr); + nir_instr_insert_after_cf_list(this->cf_node_list, + &store_instr->instr); + } return; } From d91d6b3f03f36d4cfef5e9aacac1534f12372c9f Mon Sep 17 00:00:00 2001 From: Francisco Jerez Date: Tue, 5 May 2015 23:10:56 +0300 Subject: [PATCH 040/834] nir: Translate memory barrier intrinsics from GLSL IR. Reviewed-by: Connor Abbott Reviewed-by: Kenneth Graunke --- src/glsl/nir/glsl_to_nir.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/glsl/nir/glsl_to_nir.cpp b/src/glsl/nir/glsl_to_nir.cpp index ff9c9b4a9e7..af758ceb020 100644 --- a/src/glsl/nir/glsl_to_nir.cpp +++ b/src/glsl/nir/glsl_to_nir.cpp @@ -634,6 +634,8 @@ nir_visitor::visit(ir_call *ir) op = nir_intrinsic_image_atomic_exchange; } else if (strcmp(ir->callee_name(), "__intrinsic_image_atomic_comp_swap") == 0) { op = nir_intrinsic_image_atomic_comp_swap; + } else if (strcmp(ir->callee_name(), "__intrinsic_memory_barrier") == 0) { + op = nir_intrinsic_memory_barrier; } else { unreachable("not reached"); } @@ -721,6 +723,8 @@ nir_visitor::visit(ir_call *ir) ir->return_deref->type->vector_elements, NULL); break; } + case nir_intrinsic_memory_barrier: + break; default: unreachable("not reached"); } From 2232b929fd9ca6f00c8dab9dc45c386986be922d Mon Sep 17 00:00:00 2001 From: Francisco Jerez Date: Sat, 9 May 2015 14:47:38 +0300 Subject: [PATCH 041/834] clover: Refactor event::trigger and ::abort to prevent deadlock and reentrancy issues. Refactor ::trigger and ::abort to split out the operations that access concurrently modified data members and require locking from the recursive and possibly re-entrant part of these methods. This will avoid some deadlock situations when locking is implemented. Tested-by: Tom Stellard CC: 10.5 --- .../state_trackers/clover/core/event.cpp | 43 +++++++++++++------ .../state_trackers/clover/core/event.hpp | 3 ++ 2 files changed, 34 insertions(+), 12 deletions(-) diff --git a/src/gallium/state_trackers/clover/core/event.cpp b/src/gallium/state_trackers/clover/core/event.cpp index 55793031ad1..d03e0b42ba3 100644 --- a/src/gallium/state_trackers/clover/core/event.cpp +++ b/src/gallium/state_trackers/clover/core/event.cpp @@ -36,28 +36,47 @@ event::event(clover::context &ctx, const ref_vector &deps, event::~event() { } +std::vector> +event::trigger_self() { + std::vector> evs; + + if (!--wait_count) + std::swap(_chain, evs); + + return evs; +} + void event::trigger() { - if (!--wait_count) { - cv.notify_all(); - action_ok(*this); + auto evs = trigger_self(); - while (!_chain.empty()) { - _chain.back()().trigger(); - _chain.pop_back(); - } + if (signalled()) { + action_ok(*this); + cv.notify_all(); } + + for (event &ev : evs) + ev.trigger(); +} + +std::vector> +event::abort_self(cl_int status) { + std::vector> evs; + + _status = status; + std::swap(_chain, evs); + + return evs; } void event::abort(cl_int status) { - _status = status; + auto evs = abort_self(status); + action_fail(*this); - while (!_chain.empty()) { - _chain.back()().abort(status); - _chain.pop_back(); - } + for (event &ev : evs) + ev.abort(status); } bool diff --git a/src/gallium/state_trackers/clover/core/event.hpp b/src/gallium/state_trackers/clover/core/event.hpp index 0914842ad1b..f638c5bcf15 100644 --- a/src/gallium/state_trackers/clover/core/event.hpp +++ b/src/gallium/state_trackers/clover/core/event.hpp @@ -84,6 +84,9 @@ namespace clover { std::vector> deps; private: + std::vector> trigger_self(); + std::vector> abort_self(cl_int status); + unsigned wait_count; action action_ok; action action_fail; From 4022a468b2976c65e0d2afe9c9ac5804729e8641 Mon Sep 17 00:00:00 2001 From: Francisco Jerez Date: Sat, 9 May 2015 16:22:33 +0300 Subject: [PATCH 042/834] clover: Wrap event::_status in a method to prevent unlocked access. Tested-by: Tom Stellard CC: 10.5 --- src/gallium/state_trackers/clover/core/event.cpp | 15 ++++++++++----- src/gallium/state_trackers/clover/core/event.hpp | 4 ++-- 2 files changed, 12 insertions(+), 7 deletions(-) diff --git a/src/gallium/state_trackers/clover/core/event.cpp b/src/gallium/state_trackers/clover/core/event.cpp index d03e0b42ba3..969d19a54ae 100644 --- a/src/gallium/state_trackers/clover/core/event.cpp +++ b/src/gallium/state_trackers/clover/core/event.cpp @@ -27,7 +27,7 @@ using namespace clover; event::event(clover::context &ctx, const ref_vector &deps, action action_ok, action action_fail) : - context(ctx), _status(0), wait_count(1), + context(ctx), wait_count(1), _status(0), action_ok(action_ok), action_fail(action_fail) { for (auto &ev : deps) ev.chain(*this); @@ -84,6 +84,11 @@ event::signalled() const { return !wait_count; } +cl_int +event::status() const { + return _status; +} + void event::chain(event &ev) { if (wait_count) { @@ -122,8 +127,8 @@ cl_int hard_event::status() const { pipe_screen *screen = queue()->device().pipe; - if (_status < 0) - return _status; + if (event::status() < 0) + return event::status(); else if (!_fence) return CL_QUEUED; @@ -213,8 +218,8 @@ soft_event::soft_event(clover::context &ctx, const ref_vector &deps, cl_int soft_event::status() const { - if (_status < 0) - return _status; + if (event::status() < 0) + return event::status(); else if (!signalled() || any_of([](const event &ev) { diff --git a/src/gallium/state_trackers/clover/core/event.hpp b/src/gallium/state_trackers/clover/core/event.hpp index f638c5bcf15..6469e483c73 100644 --- a/src/gallium/state_trackers/clover/core/event.hpp +++ b/src/gallium/state_trackers/clover/core/event.hpp @@ -66,7 +66,7 @@ namespace clover { void abort(cl_int status); bool signalled() const; - virtual cl_int status() const = 0; + virtual cl_int status() const; virtual command_queue *queue() const = 0; virtual cl_command_type command() const = 0; virtual void wait() const; @@ -80,7 +80,6 @@ namespace clover { protected: void chain(event &ev); - cl_int _status; std::vector> deps; private: @@ -88,6 +87,7 @@ namespace clover { std::vector> abort_self(cl_int status); unsigned wait_count; + cl_int _status; action action_ok; action action_fail; std::vector> _chain; From a533d4edf1ea346dd9e343c71b2cd500fa550ef8 Mon Sep 17 00:00:00 2001 From: Francisco Jerez Date: Sat, 9 May 2015 16:01:23 +0300 Subject: [PATCH 043/834] clover: Implement locking of the wait_count, _chain and _status members of event. Tested-by: Tom Stellard CC: 10.5 --- src/gallium/state_trackers/clover/core/event.cpp | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/gallium/state_trackers/clover/core/event.cpp b/src/gallium/state_trackers/clover/core/event.cpp index 969d19a54ae..e1f9de07f83 100644 --- a/src/gallium/state_trackers/clover/core/event.cpp +++ b/src/gallium/state_trackers/clover/core/event.cpp @@ -38,6 +38,7 @@ event::~event() { std::vector> event::trigger_self() { + std::lock_guard lock(mutex); std::vector> evs; if (!--wait_count) @@ -61,6 +62,7 @@ event::trigger() { std::vector> event::abort_self(cl_int status) { + std::lock_guard lock(mutex); std::vector> evs; _status = status; @@ -81,16 +83,22 @@ event::abort(cl_int status) { bool event::signalled() const { + std::lock_guard lock(mutex); return !wait_count; } cl_int event::status() const { + std::lock_guard lock(mutex); return _status; } void event::chain(event &ev) { + std::unique_lock lock(mutex, std::defer_lock); + std::unique_lock lock_ev(ev.mutex, std::defer_lock); + std::lock(lock, lock_ev); + if (wait_count) { ev.wait_count++; _chain.push_back(ev); From cacd0e290a7f510fe9cd78fde3156cd42f35a8b6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Wed, 29 Apr 2015 15:04:34 +0200 Subject: [PATCH 044/834] gallium: add an interface for querying a device reset status Reviewed-by: Kenneth Graunke --- src/gallium/include/pipe/p_context.h | 4 ++++ src/gallium/include/pipe/p_defines.h | 13 +++++++++++++ 2 files changed, 17 insertions(+) diff --git a/src/gallium/include/pipe/p_context.h b/src/gallium/include/pipe/p_context.h index adff67a88c8..2d9f6d35dc9 100644 --- a/src/gallium/include/pipe/p_context.h +++ b/src/gallium/include/pipe/p_context.h @@ -562,6 +562,10 @@ struct pipe_context { void (*invalidate_resource)(struct pipe_context *ctx, struct pipe_resource *resource); + /** + * Return information about unexpected device resets. + */ + enum pipe_reset_status (*get_device_reset_status)(struct pipe_context *ctx); }; diff --git a/src/gallium/include/pipe/p_defines.h b/src/gallium/include/pipe/p_defines.h index 8a16fde22e7..ba1a4c62f44 100644 --- a/src/gallium/include/pipe/p_defines.h +++ b/src/gallium/include/pipe/p_defines.h @@ -476,6 +476,19 @@ enum pipe_flush_flags #define PIPE_TIMEOUT_INFINITE 0xffffffffffffffffull + +/** + * Device reset status. + */ +enum pipe_reset_status +{ + PIPE_NO_RESET = 0, + PIPE_GUILTY_CONTEXT_RESET = 1, + PIPE_INNOCENT_CONTEXT_RESET = 2, + PIPE_UNKNOWN_CONTEXT_RESET = 3 +}; + + /** * Implementation capabilities/limits which are queried through * pipe_screen::get_param() From 79ffc08ae8641e5b22d8cd4d9edc7ca7f0cf3aa8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Wed, 29 Apr 2015 15:44:55 +0200 Subject: [PATCH 045/834] gallium: add PIPE_CAP_DEVICE_RESET_STATUS_QUERY Reviewed-by: Kenneth Graunke --- src/gallium/docs/source/screen.rst | 2 ++ src/gallium/drivers/freedreno/freedreno_screen.c | 1 + src/gallium/drivers/i915/i915_screen.c | 1 + src/gallium/drivers/ilo/ilo_screen.c | 1 + src/gallium/drivers/llvmpipe/lp_screen.c | 1 + src/gallium/drivers/nouveau/nv30/nv30_screen.c | 1 + src/gallium/drivers/nouveau/nv50/nv50_screen.c | 1 + src/gallium/drivers/nouveau/nvc0/nvc0_screen.c | 1 + src/gallium/drivers/r300/r300_screen.c | 1 + src/gallium/drivers/r600/r600_pipe.c | 1 + src/gallium/drivers/radeonsi/si_pipe.c | 1 + src/gallium/drivers/softpipe/sp_screen.c | 1 + src/gallium/drivers/svga/svga_screen.c | 1 + src/gallium/drivers/vc4/vc4_screen.c | 1 + src/gallium/include/pipe/p_defines.h | 1 + 15 files changed, 16 insertions(+) diff --git a/src/gallium/docs/source/screen.rst b/src/gallium/docs/source/screen.rst index 68931cf3519..416ef2dada6 100644 --- a/src/gallium/docs/source/screen.rst +++ b/src/gallium/docs/source/screen.rst @@ -252,6 +252,8 @@ The integer capabilities: existing user memory into the device address space for direct device access. The create function is pipe_screen::resource_from_user_memory. The address and size must be page-aligned. +* ``PIPE_CAP_DEVICE_RESET_STATUS_QUERY``: + Whether pipe_context::get_device_reset_status is implemented. .. _pipe_capf: diff --git a/src/gallium/drivers/freedreno/freedreno_screen.c b/src/gallium/drivers/freedreno/freedreno_screen.c index 556c8ab18d4..f81ec80e045 100644 --- a/src/gallium/drivers/freedreno/freedreno_screen.c +++ b/src/gallium/drivers/freedreno/freedreno_screen.c @@ -220,6 +220,7 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_POLYGON_OFFSET_CLAMP: case PIPE_CAP_MULTISAMPLE_Z_RESOLVE: case PIPE_CAP_RESOURCE_FROM_USER_MEMORY: + case PIPE_CAP_DEVICE_RESET_STATUS_QUERY: return 0; case PIPE_CAP_MAX_VIEWPORTS: diff --git a/src/gallium/drivers/i915/i915_screen.c b/src/gallium/drivers/i915/i915_screen.c index 7216160bb22..03fecd1ca64 100644 --- a/src/gallium/drivers/i915/i915_screen.c +++ b/src/gallium/drivers/i915/i915_screen.c @@ -241,6 +241,7 @@ i915_get_param(struct pipe_screen *screen, enum pipe_cap cap) case PIPE_CAP_POLYGON_OFFSET_CLAMP: case PIPE_CAP_MULTISAMPLE_Z_RESOLVE: case PIPE_CAP_RESOURCE_FROM_USER_MEMORY: + case PIPE_CAP_DEVICE_RESET_STATUS_QUERY: return 0; case PIPE_CAP_MAX_DUAL_SOURCE_RENDER_TARGETS: diff --git a/src/gallium/drivers/ilo/ilo_screen.c b/src/gallium/drivers/ilo/ilo_screen.c index 918af0820de..b0fed730512 100644 --- a/src/gallium/drivers/ilo/ilo_screen.c +++ b/src/gallium/drivers/ilo/ilo_screen.c @@ -458,6 +458,7 @@ ilo_get_param(struct pipe_screen *screen, enum pipe_cap param) case PIPE_CAP_SAMPLER_VIEW_TARGET: case PIPE_CAP_MULTISAMPLE_Z_RESOLVE: case PIPE_CAP_RESOURCE_FROM_USER_MEMORY: + case PIPE_CAP_DEVICE_RESET_STATUS_QUERY: return 0; case PIPE_CAP_VENDOR_ID: diff --git a/src/gallium/drivers/llvmpipe/lp_screen.c b/src/gallium/drivers/llvmpipe/lp_screen.c index f4ba596f358..509b9bce116 100644 --- a/src/gallium/drivers/llvmpipe/lp_screen.c +++ b/src/gallium/drivers/llvmpipe/lp_screen.c @@ -290,6 +290,7 @@ llvmpipe_get_param(struct pipe_screen *screen, enum pipe_cap param) return 1; case PIPE_CAP_MULTISAMPLE_Z_RESOLVE: case PIPE_CAP_RESOURCE_FROM_USER_MEMORY: + case PIPE_CAP_DEVICE_RESET_STATUS_QUERY: return 0; } /* should only get here on unhandled cases */ diff --git a/src/gallium/drivers/nouveau/nv30/nv30_screen.c b/src/gallium/drivers/nouveau/nv30/nv30_screen.c index eeb714864e2..025cad28042 100644 --- a/src/gallium/drivers/nouveau/nv30/nv30_screen.c +++ b/src/gallium/drivers/nouveau/nv30/nv30_screen.c @@ -161,6 +161,7 @@ nv30_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_POLYGON_OFFSET_CLAMP: case PIPE_CAP_MULTISAMPLE_Z_RESOLVE: case PIPE_CAP_RESOURCE_FROM_USER_MEMORY: + case PIPE_CAP_DEVICE_RESET_STATUS_QUERY: return 0; case PIPE_CAP_VENDOR_ID: diff --git a/src/gallium/drivers/nouveau/nv50/nv50_screen.c b/src/gallium/drivers/nouveau/nv50/nv50_screen.c index 829dfbc13fa..f455a7f91b6 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_screen.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_screen.c @@ -209,6 +209,7 @@ nv50_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_VERTEXID_NOBASE: case PIPE_CAP_MULTISAMPLE_Z_RESOLVE: /* potentially supported on some hw */ case PIPE_CAP_RESOURCE_FROM_USER_MEMORY: + case PIPE_CAP_DEVICE_RESET_STATUS_QUERY: return 0; case PIPE_CAP_VENDOR_ID: diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c index 748c9e7c8b9..1ca997a4913 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c @@ -193,6 +193,7 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_TGSI_VS_WINDOW_SPACE_POSITION: case PIPE_CAP_VERTEXID_NOBASE: case PIPE_CAP_RESOURCE_FROM_USER_MEMORY: + case PIPE_CAP_DEVICE_RESET_STATUS_QUERY: return 0; case PIPE_CAP_VENDOR_ID: diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c index a7b59d8bfbb..8e1d7102f0c 100644 --- a/src/gallium/drivers/r300/r300_screen.c +++ b/src/gallium/drivers/r300/r300_screen.c @@ -190,6 +190,7 @@ static int r300_get_param(struct pipe_screen* pscreen, enum pipe_cap param) case PIPE_CAP_POLYGON_OFFSET_CLAMP: case PIPE_CAP_MULTISAMPLE_Z_RESOLVE: case PIPE_CAP_RESOURCE_FROM_USER_MEMORY: + case PIPE_CAP_DEVICE_RESET_STATUS_QUERY: return 0; /* SWTCL-only features. */ diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c index 21e5d42adc3..5a8eb068f00 100644 --- a/src/gallium/drivers/r600/r600_pipe.c +++ b/src/gallium/drivers/r600/r600_pipe.c @@ -332,6 +332,7 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param) case PIPE_CAP_CONDITIONAL_RENDER_INVERTED: case PIPE_CAP_SAMPLER_VIEW_TARGET: case PIPE_CAP_VERTEXID_NOBASE: + case PIPE_CAP_DEVICE_RESET_STATUS_QUERY: return 0; /* Stream output. */ diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c index e68c30e8c7c..9d60ef1db38 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.c +++ b/src/gallium/drivers/radeonsi/si_pipe.c @@ -293,6 +293,7 @@ static int si_get_param(struct pipe_screen* pscreen, enum pipe_cap param) case PIPE_CAP_CONDITIONAL_RENDER_INVERTED: case PIPE_CAP_SAMPLER_VIEW_TARGET: case PIPE_CAP_VERTEXID_NOBASE: + case PIPE_CAP_DEVICE_RESET_STATUS_QUERY: return 0; case PIPE_CAP_TEXTURE_BORDER_COLOR_QUIRK: diff --git a/src/gallium/drivers/softpipe/sp_screen.c b/src/gallium/drivers/softpipe/sp_screen.c index d289e28a6f8..fc32c56b699 100644 --- a/src/gallium/drivers/softpipe/sp_screen.c +++ b/src/gallium/drivers/softpipe/sp_screen.c @@ -237,6 +237,7 @@ softpipe_get_param(struct pipe_screen *screen, enum pipe_cap param) return 0; case PIPE_CAP_MULTISAMPLE_Z_RESOLVE: case PIPE_CAP_RESOURCE_FROM_USER_MEMORY: + case PIPE_CAP_DEVICE_RESET_STATUS_QUERY: return 0; } /* should only get here on unhandled cases */ diff --git a/src/gallium/drivers/svga/svga_screen.c b/src/gallium/drivers/svga/svga_screen.c index b75f0386449..549a89aed22 100644 --- a/src/gallium/drivers/svga/svga_screen.c +++ b/src/gallium/drivers/svga/svga_screen.c @@ -308,6 +308,7 @@ svga_get_param(struct pipe_screen *screen, enum pipe_cap param) return 1; case PIPE_CAP_UMA: case PIPE_CAP_RESOURCE_FROM_USER_MEMORY: + case PIPE_CAP_DEVICE_RESET_STATUS_QUERY: return 0; } diff --git a/src/gallium/drivers/vc4/vc4_screen.c b/src/gallium/drivers/vc4/vc4_screen.c index 84aae918326..134d644cb48 100644 --- a/src/gallium/drivers/vc4/vc4_screen.c +++ b/src/gallium/drivers/vc4/vc4_screen.c @@ -175,6 +175,7 @@ vc4_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_POLYGON_OFFSET_CLAMP: case PIPE_CAP_MULTISAMPLE_Z_RESOLVE: case PIPE_CAP_RESOURCE_FROM_USER_MEMORY: + case PIPE_CAP_DEVICE_RESET_STATUS_QUERY: return 0; /* Stream output. */ diff --git a/src/gallium/include/pipe/p_defines.h b/src/gallium/include/pipe/p_defines.h index ba1a4c62f44..0eb8a713c95 100644 --- a/src/gallium/include/pipe/p_defines.h +++ b/src/gallium/include/pipe/p_defines.h @@ -594,6 +594,7 @@ enum pipe_cap PIPE_CAP_POLYGON_OFFSET_CLAMP = 113, PIPE_CAP_MULTISAMPLE_Z_RESOLVE = 114, PIPE_CAP_RESOURCE_FROM_USER_MEMORY = 115, + PIPE_CAP_DEVICE_RESET_STATUS_QUERY = 116, }; #define PIPE_QUIRK_TEXTURE_BORDER_COLOR_SWIZZLE_NV50 (1 << 0) From a0ad18580335d2255d4e1bf222886418c8e2302e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Wed, 29 Apr 2015 15:05:19 +0200 Subject: [PATCH 046/834] st/mesa: implement GetGraphicsResetStatus Reviewed-by: Kenneth Graunke --- src/mesa/state_tracker/st_cb_flush.c | 35 +++++++++++++++++++++++++++- src/mesa/state_tracker/st_cb_flush.h | 3 ++- src/mesa/state_tracker/st_context.c | 7 +++--- src/mesa/state_tracker/st_context.h | 3 ++- 4 files changed, 42 insertions(+), 6 deletions(-) diff --git a/src/mesa/state_tracker/st_cb_flush.c b/src/mesa/state_tracker/st_cb_flush.c index ca51eeee366..82affd2de3e 100644 --- a/src/mesa/state_tracker/st_cb_flush.c +++ b/src/mesa/state_tracker/st_cb_flush.c @@ -141,11 +141,44 @@ static void st_glFinish(struct gl_context *ctx) } -void st_init_flush_functions(struct dd_function_table *functions) +/** + * Query information about GPU resets observed by this context + * + * Called via \c dd_function_table::GetGraphicsResetStatus. + */ +static GLenum +st_get_graphics_reset_status(struct gl_context *ctx) +{ + struct st_context *st = st_context(ctx); + enum pipe_reset_status status; + + status = st->pipe->get_device_reset_status(st->pipe); + + switch (status) { + case PIPE_NO_RESET: + return GL_NO_ERROR; + case PIPE_GUILTY_CONTEXT_RESET: + return GL_GUILTY_CONTEXT_RESET_ARB; + case PIPE_INNOCENT_CONTEXT_RESET: + return GL_INNOCENT_CONTEXT_RESET_ARB; + case PIPE_UNKNOWN_CONTEXT_RESET: + return GL_UNKNOWN_CONTEXT_RESET_ARB; + default: + assert(0); + return GL_NO_ERROR; + } +} + + +void st_init_flush_functions(struct pipe_screen *screen, + struct dd_function_table *functions) { functions->Flush = st_glFlush; functions->Finish = st_glFinish; + if (screen->get_param(screen, PIPE_CAP_DEVICE_RESET_STATUS_QUERY)) + functions->GetGraphicsResetStatus = st_get_graphics_reset_status; + /* Windows opengl32.dll calls glFinish prior to every swapbuffers. * This is unnecessary and degrades performance. Luckily we have some * scope to work around this, as the externally-visible behaviour of diff --git a/src/mesa/state_tracker/st_cb_flush.h b/src/mesa/state_tracker/st_cb_flush.h index 84ffc63ae13..f92dcd56b64 100644 --- a/src/mesa/state_tracker/st_cb_flush.h +++ b/src/mesa/state_tracker/st_cb_flush.h @@ -37,7 +37,8 @@ struct pipe_fence_handle; struct st_context; extern void -st_init_flush_functions(struct dd_function_table *functions); +st_init_flush_functions(struct pipe_screen *screen, + struct dd_function_table *functions); extern void st_flush(struct st_context *st, diff --git a/src/mesa/state_tracker/st_context.c b/src/mesa/state_tracker/st_context.c index bfb9c8406bd..69e0f929db8 100644 --- a/src/mesa/state_tracker/st_context.c +++ b/src/mesa/state_tracker/st_context.c @@ -321,7 +321,7 @@ struct st_context *st_create_context(gl_api api, struct pipe_context *pipe, struct st_context *st; memset(&funcs, 0, sizeof(funcs)); - st_init_driver_functions(&funcs); + st_init_driver_functions(pipe->screen, &funcs); ctx = _mesa_create_context(api, visual, shareCtx, &funcs); if (!ctx) { @@ -401,7 +401,8 @@ void st_destroy_context( struct st_context *st ) } -void st_init_driver_functions(struct dd_function_table *functions) +void st_init_driver_functions(struct pipe_screen *screen, + struct dd_function_table *functions) { _mesa_init_shader_object_functions(functions); _mesa_init_sampler_object_functions(functions); @@ -429,7 +430,7 @@ void st_init_driver_functions(struct dd_function_table *functions) st_init_readpixels_functions(functions); st_init_texture_functions(functions); st_init_texture_barrier_functions(functions); - st_init_flush_functions(functions); + st_init_flush_functions(screen, functions); st_init_string_functions(functions); st_init_viewport_functions(functions); diff --git a/src/mesa/state_tracker/st_context.h b/src/mesa/state_tracker/st_context.h index 8a9504bb7c1..dac5a4b9006 100644 --- a/src/mesa/state_tracker/st_context.h +++ b/src/mesa/state_tracker/st_context.h @@ -237,7 +237,8 @@ struct st_framebuffer }; -extern void st_init_driver_functions(struct dd_function_table *functions); +extern void st_init_driver_functions(struct pipe_screen *screen, + struct dd_function_table *functions); void st_invalidate_state(struct gl_context * ctx, GLuint new_state); From f1c42475a589531919194c95b97e7558b448eb5c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Wed, 29 Apr 2015 17:52:02 +0200 Subject: [PATCH 047/834] st/dri: add support for create_context_robustness GLX and EGL extensions Reviewed-by: Kenneth Graunke --- src/gallium/include/state_tracker/st_api.h | 1 + src/gallium/state_trackers/dri/dri2.c | 23 ++++++++++++++++- src/gallium/state_trackers/dri/dri_context.c | 27 ++++++++++++++------ src/gallium/state_trackers/dri/dri_screen.h | 1 + 4 files changed, 43 insertions(+), 9 deletions(-) diff --git a/src/gallium/include/state_tracker/st_api.h b/src/gallium/include/state_tracker/st_api.h index 86fdc6988ab..ecf1c07fb98 100644 --- a/src/gallium/include/state_tracker/st_api.h +++ b/src/gallium/include/state_tracker/st_api.h @@ -89,6 +89,7 @@ enum st_api_feature #define ST_CONTEXT_FLAG_DEBUG (1 << 0) #define ST_CONTEXT_FLAG_FORWARD_COMPATIBLE (1 << 1) #define ST_CONTEXT_FLAG_ROBUST_ACCESS (1 << 2) +#define ST_CONTEXT_FLAG_RESET_NOTIFICATION_ENABLED (1 << 3) /** * Reasons that context creation might fail. diff --git a/src/gallium/state_trackers/dri/dri2.c b/src/gallium/state_trackers/dri/dri2.c index 8b6fe67dc91..792d5651698 100644 --- a/src/gallium/state_trackers/dri/dri2.c +++ b/src/gallium/state_trackers/dri/dri2.c @@ -1399,6 +1399,10 @@ static __DRI2fenceExtension dri2FenceExtension = { .server_wait_sync = dri2_server_wait_sync }; +static const __DRIrobustnessExtension dri2Robustness = { + .base = { __DRI2_ROBUSTNESS, 1 } +}; + /* * Backend function init_screen. */ @@ -1414,6 +1418,18 @@ static const __DRIextension *dri_screen_extensions[] = { NULL }; +static const __DRIextension *dri_robust_screen_extensions[] = { + &driTexBufferExtension.base, + &dri2FlushExtension.base, + &dri2ImageExtension.base, + &dri2RendererQueryExtension.base, + &dri2ConfigQueryExtension.base, + &dri2ThrottleExtension.base, + &dri2FenceExtension.base, + &dri2Robustness.base, + NULL +}; + /** * This is the driver specific part of the createNewScreen entry point. * @@ -1467,7 +1483,12 @@ dri2_init_screen(__DRIscreen * sPriv) } } - sPriv->extensions = dri_screen_extensions; + if (pscreen->get_param(pscreen, PIPE_CAP_DEVICE_RESET_STATUS_QUERY)) { + sPriv->extensions = dri_robust_screen_extensions; + screen->has_reset_status_query = true; + } + else + sPriv->extensions = dri_screen_extensions; /* dri_init_screen_helper checks pscreen for us */ diff --git a/src/gallium/state_trackers/dri/dri_context.c b/src/gallium/state_trackers/dri/dri_context.c index 8ac81b7364b..9f11b15596c 100644 --- a/src/gallium/state_trackers/dri/dri_context.c +++ b/src/gallium/state_trackers/dri/dri_context.c @@ -56,6 +56,21 @@ dri_create_context(gl_api api, const struct gl_config * visual, struct st_context_iface *st_share = NULL; struct st_context_attribs attribs; enum st_context_error ctx_err = 0; + unsigned allowed_flags = __DRI_CTX_FLAG_DEBUG | + __DRI_CTX_FLAG_FORWARD_COMPATIBLE; + + if (screen->has_reset_status_query) + allowed_flags |= __DRI_CTX_FLAG_ROBUST_BUFFER_ACCESS; + + if (flags & ~allowed_flags) { + *error = __DRI_CTX_ERROR_UNKNOWN_FLAG; + goto fail; + } + + if (!screen->has_reset_status_query && notify_reset) { + *error = __DRI_CTX_ERROR_UNKNOWN_ATTRIBUTE; + goto fail; + } memset(&attribs, 0, sizeof(attribs)); switch (api) { @@ -83,15 +98,11 @@ dri_create_context(gl_api api, const struct gl_config * visual, if ((flags & __DRI_CTX_FLAG_DEBUG) != 0) attribs.flags |= ST_CONTEXT_FLAG_DEBUG; - if (flags & ~(__DRI_CTX_FLAG_DEBUG | __DRI_CTX_FLAG_FORWARD_COMPATIBLE)) { - *error = __DRI_CTX_ERROR_UNKNOWN_FLAG; - goto fail; - } + if (flags & __DRI_CTX_FLAG_ROBUST_BUFFER_ACCESS) + attribs.flags |= ST_CONTEXT_FLAG_ROBUST_ACCESS; - if (notify_reset) { - *error = __DRI_CTX_ERROR_UNKNOWN_ATTRIBUTE; - goto fail; - } + if (notify_reset) + attribs.flags |= ST_CONTEXT_FLAG_RESET_NOTIFICATION_ENABLED; if (sharedContextPrivate) { st_share = ((struct dri_context *)sharedContextPrivate)->st; diff --git a/src/gallium/state_trackers/dri/dri_screen.h b/src/gallium/state_trackers/dri/dri_screen.h index bdab74f2802..173f4038cdb 100644 --- a/src/gallium/state_trackers/dri/dri_screen.h +++ b/src/gallium/state_trackers/dri/dri_screen.h @@ -82,6 +82,7 @@ struct dri_screen boolean d_depth_bits_last; boolean sd_depth_bits_last; boolean auto_fake_front; + boolean has_reset_status_query; enum pipe_texture_target target; /* hooks filled in by dri2 & drisw */ From 0ea1047d8c0636fa1f6bcbac56be329e19ada205 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Wed, 29 Apr 2015 17:54:26 +0200 Subject: [PATCH 048/834] st/mesa: translate st_api robustness flags to gl_context flags Reviewed-by: Kenneth Graunke --- src/mesa/state_tracker/st_manager.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/mesa/state_tracker/st_manager.c b/src/mesa/state_tracker/st_manager.c index 840f76a1307..0376954f742 100644 --- a/src/mesa/state_tracker/st_manager.c +++ b/src/mesa/state_tracker/st_manager.c @@ -680,6 +680,10 @@ st_api_create_context(struct st_api *stapi, struct st_manager *smapi, if (attribs->flags & ST_CONTEXT_FLAG_FORWARD_COMPATIBLE) st->ctx->Const.ContextFlags |= GL_CONTEXT_FLAG_FORWARD_COMPATIBLE_BIT; + if (attribs->flags & ST_CONTEXT_FLAG_ROBUST_ACCESS) + st->ctx->Const.ContextFlags |= GL_CONTEXT_FLAG_ROBUST_ACCESS_BIT_ARB; + if (attribs->flags & ST_CONTEXT_FLAG_RESET_NOTIFICATION_ENABLED) + st->ctx->Const.ResetStrategy = GL_LOSE_CONTEXT_ON_RESET_ARB; /* need to perform version check */ if (attribs->major > 1 || attribs->minor > 0) { From 71ba30f7788167c04d0968d286a387fce16afcce Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 12 May 2015 13:13:05 -0400 Subject: [PATCH 049/834] radeonsi: add new bonaire pci id MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Marek Olšák Signed-off-by: Alex Deucher Cc: mesa-stable@lists.freedesktop.org --- include/pci_ids/radeonsi_pci_ids.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/pci_ids/radeonsi_pci_ids.h b/include/pci_ids/radeonsi_pci_ids.h index 571e8633ffb..cd5da99a6a6 100644 --- a/include/pci_ids/radeonsi_pci_ids.h +++ b/include/pci_ids/radeonsi_pci_ids.h @@ -85,6 +85,7 @@ CHIPSET(0x6651, BONAIRE_6651, BONAIRE) CHIPSET(0x6658, BONAIRE_6658, BONAIRE) CHIPSET(0x665C, BONAIRE_665C, BONAIRE) CHIPSET(0x665D, BONAIRE_665D, BONAIRE) +CHIPSET(0x665F, BONAIRE_665F, BONAIRE) CHIPSET(0x9830, KABINI_9830, KABINI) CHIPSET(0x9831, KABINI_9831, KABINI) From 380f7611b5d23b72684ce1eb848f956945e4c39b Mon Sep 17 00:00:00 2001 From: Ilia Mirkin Date: Fri, 24 Apr 2015 19:17:11 -0400 Subject: [PATCH 050/834] st/mesa: update stencil surface if it comes from texture Now that ARB_texture_stencil8 is supported, this might happen. Signed-off-by: Ilia Mirkin Reviewed-by: Dave Airlie --- src/mesa/state_tracker/st_atom_framebuffer.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/mesa/state_tracker/st_atom_framebuffer.c b/src/mesa/state_tracker/st_atom_framebuffer.c index b195c55b347..ae883a2535e 100644 --- a/src/mesa/state_tracker/st_atom_framebuffer.c +++ b/src/mesa/state_tracker/st_atom_framebuffer.c @@ -134,7 +134,10 @@ update_framebuffer_state( struct st_context *st ) else { strb = st_renderbuffer(fb->Attachment[BUFFER_STENCIL].Renderbuffer); if (strb) { - assert(strb->surface); + if (strb->is_rtt) { + /* rendering to a GL texture, may have to update surface */ + st_update_renderbuffer_surface(st, strb); + } pipe_surface_reference(&framebuffer->zsbuf, strb->surface); update_framebuffer_size(framebuffer, strb->surface); } From d06ce2f1df54edd234b1abde37bba524ed599acb Mon Sep 17 00:00:00 2001 From: Ilia Mirkin Date: Sun, 10 May 2015 01:57:56 -0400 Subject: [PATCH 051/834] nvc0: switch mechanism for shader eviction to be a while loop This aligns it to work similarly to nv50. However there's no library code there, so the whole thing can be freed. Here we end up with an allocated node that's not attached to a specific program. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=86792 Signed-off-by: Ilia Mirkin Cc: mesa-stable@lists.freedesktop.org --- src/gallium/drivers/nouveau/nvc0/nvc0_program.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c index c156e918dc5..55896955ca2 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c @@ -683,11 +683,12 @@ nvc0_program_upload_code(struct nvc0_context *nvc0, struct nvc0_program *prog) ret = nouveau_heap_alloc(screen->text_heap, size, prog, &prog->mem); if (ret) { struct nouveau_heap *heap = screen->text_heap; - struct nouveau_heap *iter; - for (iter = heap; iter && iter->next != heap; iter = iter->next) { - struct nvc0_program *evict = iter->priv; - if (evict) - nouveau_heap_free(&evict->mem); + /* Note that the code library, which is allocated before anything else, + * does not have a priv pointer. We can stop once we hit it. + */ + while (heap->next && heap->next->priv) { + struct nvc0_program *evict = heap->next->priv; + nouveau_heap_free(&evict->mem); } debug_printf("WARNING: out of code space, evicting all shaders.\n"); ret = nouveau_heap_alloc(heap, size, prog, &prog->mem); From c696a318ef1eb58f65fb867d5616bbefd1def31e Mon Sep 17 00:00:00 2001 From: Ilia Mirkin Date: Tue, 12 May 2015 18:58:17 -0400 Subject: [PATCH 052/834] nouveau: document nouveau_heap Signed-off-by: Ilia Mirkin --- src/gallium/drivers/nouveau/nouveau_heap.h | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/src/gallium/drivers/nouveau/nouveau_heap.h b/src/gallium/drivers/nouveau/nouveau_heap.h index d0b22844ad0..a3d64a65623 100644 --- a/src/gallium/drivers/nouveau/nouveau_heap.h +++ b/src/gallium/drivers/nouveau/nouveau_heap.h @@ -23,6 +23,26 @@ #ifndef __NOUVEAU_HEAP_H__ #define __NOUVEAU_HEAP_H__ +/* This datastructure represents a memory allocation heap. Fundamentally, this + * is a doubly-linked list with a few properties, and a usage convention. + * + * On initial allocation, there is a single node with the full size that's + * marked as not in-use. As allocations are made, blocks are taken off the end + * of that first node, and inserted right after it. If the first node doesn't + * have enough free space, we look for free space down in the rest of the + * list. This can happen if an allocation is made and then freed. + * + * The first node will remain with in_use == 0 even if the whole heap is + * exhausted. Another invariant is that there will never be two sequential + * in_use == 0 nodes. If a node is freed and it has one (or both) adjacent + * free nodes, they are merged into one, and the relevant heap entries are + * freed. + * + * The pattern to free the whole heap is to start with the first node and then + * just free the "next" node, until there is no next node. This should assure + * that at the end the first (and only) node is not in use and contains the + * full size of the heap. + */ struct nouveau_heap { struct nouveau_heap *prev; struct nouveau_heap *next; From 71fc52072b1ecf01764c37d64ad160fcbfe19e11 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Wed, 13 May 2015 09:59:59 +1000 Subject: [PATCH 053/834] i965/cs: drop explicit initialisers in C++ file gcc 4.4.7 really doesn't like them, and they aren't standard C++, they seem to be a gcc extension. Reviewed-by: Jordan Justen Signed-off-by: Dave Airlie --- src/mesa/drivers/dri/i965/brw_cs.cpp | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_cs.cpp b/src/mesa/drivers/dri/i965/brw_cs.cpp index 70731e4a4ff..fc2d8576495 100644 --- a/src/mesa/drivers/dri/i965/brw_cs.cpp +++ b/src/mesa/drivers/dri/i965/brw_cs.cpp @@ -368,9 +368,11 @@ brw_upload_cs_state(struct brw_context *brw) extern "C" const struct brw_tracked_state brw_cs_state = { - .dirty = { - .mesa = 0, - .brw = BRW_NEW_CS_PROG_DATA, + /* explicit initialisers aren't valid C++, comment + * them for documentation purposes */ + /* .dirty = */{ + /* .mesa = */ 0, + /* .brw = */ BRW_NEW_CS_PROG_DATA, }, - .emit = brw_upload_cs_state + /* .emit = */ brw_upload_cs_state }; From 58715b72396133350c1549381553121f936a198e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tapani=20P=C3=A4lli?= Date: Tue, 12 May 2015 14:24:08 +0300 Subject: [PATCH 054/834] i965/fs: set execution size to 8 with simd8 ddy instruction MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit dd5c825 changed the way how execution size for instructions get set. Previously it was based on destination register width, now it is set explicitly when emitting instructions. Signed-off-by: Tapani Pälli Reviewed-by: Francisco Jerez Reviewed-by: Matt Turner Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=90258 --- src/mesa/drivers/dri/i965/brw_fs_generator.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp index a99b7f75b26..b6b0d0523a0 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp @@ -941,6 +941,7 @@ fs_generator::generate_ddy(enum opcode opcode, brw_push_insn_state(p); brw_set_default_access_mode(p, BRW_ALIGN_16); if (unroll_to_simd8) { + brw_set_default_exec_size(p, BRW_EXECUTE_8); brw_set_default_compression_control(p, BRW_COMPRESSION_NONE); if (negate_value) { brw_ADD(p, firsthalf(dst), firsthalf(src1), negate(firsthalf(src0))); From d247615e0d67a7c8eaeea3fece837229c8c9658c Mon Sep 17 00:00:00 2001 From: Francisco Jerez Date: Sat, 31 Jan 2015 20:04:55 +0200 Subject: [PATCH 055/834] i965: Fix PBO cache coherency issue after _mesa_meta_pbo_GetTexSubImage(). This problem can easily be reproduced with a number of ARB_shader_image_load_store piglit tests, which use a buffer object as PBO for a pixel transfer operation and later on bind the same buffer to the pipeline as shader image -- The problem is not exclusive to images though, and is likely to affect other kinds of buffer objects that can be bound to the 3D pipeline, including vertex, index, uniform, atomic counter buffers, etc. CC: 10.5 Reviewed-by: Jason Ekstrand Reviewed-by: Anuj Phogat --- src/mesa/drivers/dri/i965/intel_pixel_read.c | 24 +++++++++++++++++++- src/mesa/drivers/dri/i965/intel_tex_image.c | 9 +++++++- 2 files changed, 31 insertions(+), 2 deletions(-) diff --git a/src/mesa/drivers/dri/i965/intel_pixel_read.c b/src/mesa/drivers/dri/i965/intel_pixel_read.c index d3ca38b6ecd..30380570d62 100644 --- a/src/mesa/drivers/dri/i965/intel_pixel_read.c +++ b/src/mesa/drivers/dri/i965/intel_pixel_read.c @@ -226,8 +226,30 @@ intelReadPixels(struct gl_context * ctx, if (_mesa_is_bufferobj(pack->BufferObj)) { if (_mesa_meta_pbo_GetTexSubImage(ctx, 2, NULL, x, y, 0, width, height, 1, - format, type, pixels, pack)) + format, type, pixels, pack)) { + /* _mesa_meta_pbo_GetTexSubImage() implements PBO transfers by + * binding the user-provided BO as a fake framebuffer and rendering + * to it. This breaks the invariant of the GL that nothing is able + * to render to a BO, causing nondeterministic corruption issues + * because the render cache is not coherent with a number of other + * caches that the BO could potentially be bound to afterwards. + * + * This could be solved in the same way that we guarantee texture + * coherency after a texture is attached to a framebuffer and + * rendered to, but that would involve checking *all* BOs bound to + * the pipeline for the case we need to emit a cache flush due to + * previous rendering to any of them -- Including vertex, index, + * uniform, atomic counter, shader image, transform feedback, + * indirect draw buffers, etc. + * + * That would increase the per-draw call overhead even though it's + * very unlikely that any of the BOs bound to the pipeline has been + * rendered to via a PBO at any point, so it seems better to just + * flush here unconditionally. + */ + intel_batchbuffer_emit_mi_flush(brw); return; + } perf_debug("%s: fallback to CPU mapping in PBO case\n", __func__); } diff --git a/src/mesa/drivers/dri/i965/intel_tex_image.c b/src/mesa/drivers/dri/i965/intel_tex_image.c index 7952ee5ad88..85d3d04ecb3 100644 --- a/src/mesa/drivers/dri/i965/intel_tex_image.c +++ b/src/mesa/drivers/dri/i965/intel_tex_image.c @@ -486,8 +486,15 @@ intel_get_tex_image(struct gl_context *ctx, if (_mesa_meta_pbo_GetTexSubImage(ctx, 3, texImage, 0, 0, 0, texImage->Width, texImage->Height, texImage->Depth, format, type, - pixels, &ctx->Pack)) + pixels, &ctx->Pack)) { + /* Flush to guarantee coherency between the render cache and other + * caches the PBO could potentially be bound to after this point. + * See the related comment in intelReadPixels() for a more detailed + * explanation. + */ + intel_batchbuffer_emit_mi_flush(brw); return; + } perf_debug("%s: fallback to CPU mapping in PBO case\n", __func__); } From 915d808a5653653b5c7b5413c4f667db017239ec Mon Sep 17 00:00:00 2001 From: Alexander von Gluck IV Date: Wed, 13 May 2015 09:40:01 -0500 Subject: [PATCH 056/834] gallium/st + hgl: Build fixes for Haiku * No impact risk to any other platforms * Tracing printf needs stdio.h now due to child header change * Add missing #/src include directory for util/macros.h --- src/gallium/state_trackers/hgl/hgl.c | 2 ++ src/gallium/targets/haiku-softpipe/GalliumContext.cpp | 2 ++ src/hgl/SConscript | 1 + 3 files changed, 5 insertions(+) diff --git a/src/gallium/state_trackers/hgl/hgl.c b/src/gallium/state_trackers/hgl/hgl.c index b75dc26bc39..77f7c2256e5 100644 --- a/src/gallium/state_trackers/hgl/hgl.c +++ b/src/gallium/state_trackers/hgl/hgl.c @@ -10,6 +10,8 @@ #include "GLView.h" +#include + #include "pipe/p_format.h" #include "util/u_atomic.h" #include "util/u_format.h" diff --git a/src/gallium/targets/haiku-softpipe/GalliumContext.cpp b/src/gallium/targets/haiku-softpipe/GalliumContext.cpp index f9d7dfc8734..b24aef7dd5d 100644 --- a/src/gallium/targets/haiku-softpipe/GalliumContext.cpp +++ b/src/gallium/targets/haiku-softpipe/GalliumContext.cpp @@ -10,6 +10,8 @@ #include "GalliumContext.h" +#include + #include "GLView.h" #include "bitmap_wrapper.h" diff --git a/src/hgl/SConscript b/src/hgl/SConscript index 70db1494df8..71881f504c9 100644 --- a/src/hgl/SConscript +++ b/src/hgl/SConscript @@ -6,6 +6,7 @@ Import('*') env = env.Clone() env.Append(CPPPATH = [ + '#/src', '#/src/mapi', '#/src/mesa', '#/src/mesa/main', From d27b114eaf75b1a61cc9600eed1b9cde352d8409 Mon Sep 17 00:00:00 2001 From: Alexander von Gluck IV Date: Wed, 13 May 2015 14:22:00 -0400 Subject: [PATCH 057/834] glapi: Add extern "C" to glapi_priv.h * The Haiku glapi has a C++ wrapper around the dispatch code. Reviewed-by: Brian Paul --- src/mapi/glapi/glapi_priv.h | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/mapi/glapi/glapi_priv.h b/src/mapi/glapi/glapi_priv.h index 50f710edc8a..337913acc71 100644 --- a/src/mapi/glapi/glapi_priv.h +++ b/src/mapi/glapi/glapi_priv.h @@ -49,6 +49,10 @@ typedef void *GLeglImageOES; #include "glapi/glapi.h" +#ifdef __cplusplus +extern "C" { +#endif + /* getproc */ extern void @@ -106,4 +110,8 @@ get_entrypoint_address(unsigned int functionOffset); #define MAX_EXTENSION_FUNCS 256 +#ifdef __cplusplus +} +#endif + #endif From cf71e7093c301a1d3dc3e19d88d44a043ccadc7d Mon Sep 17 00:00:00 2001 From: Alexander von Gluck IV Date: Wed, 13 May 2015 14:22:57 -0400 Subject: [PATCH 058/834] glapi/hgl: Drop extern "C" as it was added to glapi Reviewed-by: Brian Paul --- src/hgl/GLDispatcher.cpp | 5 +++-- src/hgl/GLDispatcher.h | 4 +--- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/src/hgl/GLDispatcher.cpp b/src/hgl/GLDispatcher.cpp index 46b91d57c49..a1e9053617c 100644 --- a/src/hgl/GLDispatcher.cpp +++ b/src/hgl/GLDispatcher.cpp @@ -1,6 +1,6 @@ /* * Copyright 1998-1999 Precision Insight, Inc., Cedar Park, Texas. - * Copyright 2000-2012 Haiku, Inc. All Rights Reserved. + * Copyright 2000-2015 Haiku, Inc. All Rights Reserved. * Distributed under the terms of the MIT License. * * Authors: @@ -10,10 +10,11 @@ */ -extern "C" { #include "glapi/glapi.h" #include "glapi/glapi_priv.h" + +extern "C" { /* * NOTE: this file portion implements C-based dispatch of the OpenGL entrypoints * (glAccum, glBegin, etc). diff --git a/src/hgl/GLDispatcher.h b/src/hgl/GLDispatcher.h index 44bca8ce586..7ee095d917b 100644 --- a/src/hgl/GLDispatcher.h +++ b/src/hgl/GLDispatcher.h @@ -1,6 +1,6 @@ /* * Copyright 1998-1999 Precision Insight, Inc., Cedar Park, Texas. - * Copyright 2000-2012 Haiku, Inc. All Rights Reserved. + * Copyright 2000-2015 Haiku, Inc. All Rights Reserved. * Distributed under the terms of the MIT License. * * Authors: @@ -17,9 +17,7 @@ #include "glheader.h" -extern "C" { #include "glapi/glapi.h" -} class BGLDispatcher From 2712f70d57a0df25db6371496eb567564a588e13 Mon Sep 17 00:00:00 2001 From: Roland Scheidegger Date: Tue, 12 May 2015 17:27:59 +0200 Subject: [PATCH 059/834] gallium/util: fix blitter sampler view target initialization MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This was missing, and drivers relying on the target in the view could get into quite some trouble. Signed-off-by: Roland Scheidegger Reviewed-by: Dave Airlie Reviewed-by: Marek Olšák --- src/gallium/auxiliary/util/u_blitter.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/gallium/auxiliary/util/u_blitter.c b/src/gallium/auxiliary/util/u_blitter.c index 9d087fe8a66..24a5b93e199 100644 --- a/src/gallium/auxiliary/util/u_blitter.c +++ b/src/gallium/auxiliary/util/u_blitter.c @@ -1306,6 +1306,7 @@ void util_blitter_default_src_texture(struct pipe_sampler_view *src_templ, unsigned srclevel) { memset(src_templ, 0, sizeof(*src_templ)); + src_templ->target = src->target; src_templ->format = util_format_linear(src->format); src_templ->u.tex.first_level = srclevel; src_templ->u.tex.last_level = srclevel; From e6c66f4fb060cf2566d2b5e091b76a098566344d Mon Sep 17 00:00:00 2001 From: Roland Scheidegger Date: Wed, 13 May 2015 22:16:44 +0200 Subject: [PATCH 060/834] llvmpipe: enable ARB_texture_view All the functionality was pretty much there, just not tested. Trivially fix up the missing pieces (take target info from view not resource), and add some missing bits for cubes. Also add some minimal debug validation to detect uninitialized target values in the view... 49 new piglits, 47 pass, 2 fail (both related to fake multisampling, not texture_view itself). No other piglit changes. v2: move sampler view validation to sampler view creation, update docs. Reviewed-by: Brian Paul --- docs/GL3.txt | 2 +- docs/relnotes/10.6.0.html | 1 + src/gallium/auxiliary/gallivm/lp_bld_sample.c | 2 +- src/gallium/drivers/llvmpipe/lp_screen.c | 3 +- src/gallium/drivers/llvmpipe/lp_setup.c | 10 +++-- .../drivers/llvmpipe/lp_state_sampler.c | 40 +++++++++++++++++-- 6 files changed, 47 insertions(+), 11 deletions(-) diff --git a/docs/GL3.txt b/docs/GL3.txt index 5590fea9f25..32b7809d10c 100644 --- a/docs/GL3.txt +++ b/docs/GL3.txt @@ -169,7 +169,7 @@ GL 4.3, GLSL 4.30: GL_ARB_texture_buffer_range DONE (nv50, nvc0, i965, r600, radeonsi, llvmpipe) GL_ARB_texture_query_levels DONE (all drivers that support GLSL 1.30) GL_ARB_texture_storage_multisample DONE (all drivers that support GL_ARB_texture_multisample) - GL_ARB_texture_view DONE (i965, nv50, nvc0) + GL_ARB_texture_view DONE (i965, nv50, nvc0, llvmpipe) GL_ARB_vertex_attrib_binding DONE (all drivers) diff --git a/docs/relnotes/10.6.0.html b/docs/relnotes/10.6.0.html index b7cd486f529..3f69f986ccd 100644 --- a/docs/relnotes/10.6.0.html +++ b/docs/relnotes/10.6.0.html @@ -56,6 +56,7 @@ Note: some of the new features are only available with certain drivers.
  • GL_ARB_pipeline_statistics_query on i965, nv50, nvc0, r600, radeonsi, softpipe
  • GL_ARB_program_interface_query (all drivers)
  • GL_ARB_texture_stencil8 on nv50, nvc0, r600, radeonsi, softpipe
  • +
  • GL_ARB_texture_view on llvmpipe
  • GL_ARB_uniform_buffer_object on freedreno
  • GL_ARB_vertex_attrib_64bit on nvc0, softpipe
  • GL_ARB_viewport_array, GL_AMD_vertex_shader_viewport_index on i965/gen6
  • diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample.c b/src/gallium/auxiliary/gallivm/lp_bld_sample.c index 5b220450bf3..4befb3a1c80 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_sample.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample.c @@ -113,7 +113,7 @@ lp_sampler_static_texture_state(struct lp_static_texture_state *state, state->swizzle_b = view->swizzle_b; state->swizzle_a = view->swizzle_a; - state->target = texture->target; + state->target = view->target; state->pot_width = util_is_power_of_two(texture->width0); state->pot_height = util_is_power_of_two(texture->height0); state->pot_depth = util_is_power_of_two(texture->depth0); diff --git a/src/gallium/drivers/llvmpipe/lp_screen.c b/src/gallium/drivers/llvmpipe/lp_screen.c index 509b9bce116..09ac9af50ec 100644 --- a/src/gallium/drivers/llvmpipe/lp_screen.c +++ b/src/gallium/drivers/llvmpipe/lp_screen.c @@ -258,8 +258,9 @@ llvmpipe_get_param(struct pipe_screen *screen, enum pipe_cap param) case PIPE_CAP_TGSI_VS_WINDOW_SPACE_POSITION: return 1; case PIPE_CAP_TGSI_FS_FINE_DERIVATIVE: - case PIPE_CAP_SAMPLER_VIEW_TARGET: return 0; + case PIPE_CAP_SAMPLER_VIEW_TARGET: + return 1; case PIPE_CAP_FAKE_SW_MSAA: return 1; case PIPE_CAP_CONDITIONAL_RENDER_INVERTED: diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index 96cc77c250c..56292c68c5f 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -854,9 +854,10 @@ lp_setup_set_fragment_sampler_views(struct lp_setup_context *setup, jit_tex->img_stride[j] = lp_tex->img_stride[j]; } - if (res->target == PIPE_TEXTURE_1D_ARRAY || - res->target == PIPE_TEXTURE_2D_ARRAY || - res->target == PIPE_TEXTURE_CUBE_ARRAY) { + if (view->target == PIPE_TEXTURE_1D_ARRAY || + view->target == PIPE_TEXTURE_2D_ARRAY || + view->target == PIPE_TEXTURE_CUBE || + view->target == PIPE_TEXTURE_CUBE_ARRAY) { /* * For array textures, we don't have first_layer, instead * adjust last_layer (stored as depth) plus the mip level offsets @@ -868,7 +869,8 @@ lp_setup_set_fragment_sampler_views(struct lp_setup_context *setup, jit_tex->mip_offsets[j] += view->u.tex.first_layer * lp_tex->img_stride[j]; } - if (res->target == PIPE_TEXTURE_CUBE_ARRAY) { + if (view->target == PIPE_TEXTURE_CUBE || + view->target == PIPE_TEXTURE_CUBE_ARRAY) { assert(jit_tex->depth % 6 == 0); } assert(view->u.tex.first_layer <= view->u.tex.last_layer); diff --git a/src/gallium/drivers/llvmpipe/lp_state_sampler.c b/src/gallium/drivers/llvmpipe/lp_state_sampler.c index 21da6290574..b205f02fdba 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_sampler.c +++ b/src/gallium/drivers/llvmpipe/lp_state_sampler.c @@ -170,6 +170,36 @@ llvmpipe_create_sampler_view(struct pipe_context *pipe, view->texture = NULL; pipe_resource_reference(&view->texture, texture); view->context = pipe; + +#ifdef DEBUG + /* + * This is possibly too lenient, but the primary reason is just + * to catch state trackers which forget to initialize this, so + * it only catches clearly impossible view targets. + */ + if (view->target != texture->target) { + if (view->target == PIPE_TEXTURE_1D) + assert(texture->target == PIPE_TEXTURE_1D_ARRAY); + else if (view->target == PIPE_TEXTURE_1D_ARRAY) + assert(texture->target == PIPE_TEXTURE_1D); + else if (view->target == PIPE_TEXTURE_2D) + assert(texture->target == PIPE_TEXTURE_2D_ARRAY || + texture->target == PIPE_TEXTURE_CUBE || + texture->target == PIPE_TEXTURE_CUBE_ARRAY); + else if (view->target == PIPE_TEXTURE_2D_ARRAY) + assert(texture->target == PIPE_TEXTURE_2D || + texture->target == PIPE_TEXTURE_CUBE || + texture->target == PIPE_TEXTURE_CUBE_ARRAY); + else if (view->target == PIPE_TEXTURE_CUBE) + assert(texture->target == PIPE_TEXTURE_CUBE_ARRAY || + texture->target == PIPE_TEXTURE_2D_ARRAY); + else if (view->target == PIPE_TEXTURE_CUBE_ARRAY) + assert(texture->target == PIPE_TEXTURE_CUBE || + texture->target == PIPE_TEXTURE_2D_ARRAY); + else + assert(0); + } +#endif } return view; @@ -245,15 +275,17 @@ prepare_shader_sampling( row_stride[j] = lp_tex->row_stride[j]; img_stride[j] = lp_tex->img_stride[j]; } - if (res->target == PIPE_TEXTURE_1D_ARRAY || - res->target == PIPE_TEXTURE_2D_ARRAY || - res->target == PIPE_TEXTURE_CUBE_ARRAY) { + if (view->target == PIPE_TEXTURE_1D_ARRAY || + view->target == PIPE_TEXTURE_2D_ARRAY || + view->target == PIPE_TEXTURE_CUBE || + view->target == PIPE_TEXTURE_CUBE_ARRAY) { num_layers = view->u.tex.last_layer - view->u.tex.first_layer + 1; for (j = first_level; j <= last_level; j++) { mip_offsets[j] += view->u.tex.first_layer * lp_tex->img_stride[j]; } - if (res->target == PIPE_TEXTURE_CUBE_ARRAY) { + if (view->target == PIPE_TEXTURE_CUBE || + view->target == PIPE_TEXTURE_CUBE_ARRAY) { assert(num_layers % 6 == 0); } assert(view->u.tex.first_layer <= view->u.tex.last_layer); From adcf8f8a13717f7eb53b2aa86c4b56e344f2f317 Mon Sep 17 00:00:00 2001 From: Roland Scheidegger Date: Wed, 13 May 2015 22:56:07 +0200 Subject: [PATCH 061/834] softpipe: enable ARB_texture_view Some bits were already there for texture views but some were missing. In particular for cube map views things needed to change a bit. For simplicity I ended up removing the separate face addr bit (just use the z bit) - cube arrays didn't use it already, so just follow the same logic there. (In theory using separate bits could allow for better hash function but I don't think anyone ever did some measurements of that so probably not worth the trouble, if we'd reintroduce it we'd certainly wanted to use the same logic for cube arrays and cube maps.) Also extend the seamless cube sampling to cube arrays - as there were no piglit failures before this is apparently untested, but things now generally work quite the same for cube textures and cube array textures so there hopefully shouldn't be any trouble... 49 new piglits, 47 pass, 2 fail (both due to fake multisampling). v2: incorporate Brian's feedback, add sampler view validation, function rename, formatting fixes. Reviewed-by: Brian Paul --- docs/GL3.txt | 2 +- docs/relnotes/10.6.0.html | 2 +- src/gallium/drivers/softpipe/sp_screen.c | 3 +- .../drivers/softpipe/sp_state_sampler.c | 12 +- src/gallium/drivers/softpipe/sp_tex_sample.c | 273 +++++++++++------- .../drivers/softpipe/sp_tex_tile_cache.c | 11 +- .../drivers/softpipe/sp_tex_tile_cache.h | 4 +- 7 files changed, 181 insertions(+), 126 deletions(-) diff --git a/docs/GL3.txt b/docs/GL3.txt index 32b7809d10c..5a15bc55f47 100644 --- a/docs/GL3.txt +++ b/docs/GL3.txt @@ -169,7 +169,7 @@ GL 4.3, GLSL 4.30: GL_ARB_texture_buffer_range DONE (nv50, nvc0, i965, r600, radeonsi, llvmpipe) GL_ARB_texture_query_levels DONE (all drivers that support GLSL 1.30) GL_ARB_texture_storage_multisample DONE (all drivers that support GL_ARB_texture_multisample) - GL_ARB_texture_view DONE (i965, nv50, nvc0, llvmpipe) + GL_ARB_texture_view DONE (i965, nv50, nvc0, llvmpipe, softpipe) GL_ARB_vertex_attrib_binding DONE (all drivers) diff --git a/docs/relnotes/10.6.0.html b/docs/relnotes/10.6.0.html index 3f69f986ccd..6d379868865 100644 --- a/docs/relnotes/10.6.0.html +++ b/docs/relnotes/10.6.0.html @@ -56,7 +56,7 @@ Note: some of the new features are only available with certain drivers.
  • GL_ARB_pipeline_statistics_query on i965, nv50, nvc0, r600, radeonsi, softpipe
  • GL_ARB_program_interface_query (all drivers)
  • GL_ARB_texture_stencil8 on nv50, nvc0, r600, radeonsi, softpipe
  • -
  • GL_ARB_texture_view on llvmpipe
  • +
  • GL_ARB_texture_view on llvmpipe, softpipe
  • GL_ARB_uniform_buffer_object on freedreno
  • GL_ARB_vertex_attrib_64bit on nvc0, softpipe
  • GL_ARB_viewport_array, GL_AMD_vertex_shader_viewport_index on i965/gen6
  • diff --git a/src/gallium/drivers/softpipe/sp_screen.c b/src/gallium/drivers/softpipe/sp_screen.c index fc32c56b699..b3bc1773e9f 100644 --- a/src/gallium/drivers/softpipe/sp_screen.c +++ b/src/gallium/drivers/softpipe/sp_screen.c @@ -200,8 +200,9 @@ softpipe_get_param(struct pipe_screen *screen, enum pipe_cap param) case PIPE_CAP_TGSI_VS_WINDOW_SPACE_POSITION: return 1; case PIPE_CAP_TGSI_FS_FINE_DERIVATIVE: - case PIPE_CAP_SAMPLER_VIEW_TARGET: return 0; + case PIPE_CAP_SAMPLER_VIEW_TARGET: + return 1; case PIPE_CAP_FAKE_SW_MSAA: return 1; case PIPE_CAP_MIN_TEXTURE_GATHER_OFFSET: diff --git a/src/gallium/drivers/softpipe/sp_state_sampler.c b/src/gallium/drivers/softpipe/sp_state_sampler.c index e56fb5b1485..d7a3360713f 100644 --- a/src/gallium/drivers/softpipe/sp_state_sampler.c +++ b/src/gallium/drivers/softpipe/sp_state_sampler.c @@ -202,7 +202,7 @@ prepare_shader_sampling( struct pipe_resource *res = view->texture; int j; - if (res->target != PIPE_BUFFER) { + if (view->target != PIPE_BUFFER) { first_level = view->u.tex.first_level; last_level = view->u.tex.last_level; assert(first_level <= last_level); @@ -214,15 +214,17 @@ prepare_shader_sampling( row_stride[j] = sp_tex->stride[j]; img_stride[j] = sp_tex->img_stride[j]; } - if (res->target == PIPE_TEXTURE_1D_ARRAY || - res->target == PIPE_TEXTURE_2D_ARRAY || - res->target == PIPE_TEXTURE_CUBE_ARRAY) { + if (view->target == PIPE_TEXTURE_1D_ARRAY || + view->target == PIPE_TEXTURE_2D_ARRAY || + view->target == PIPE_TEXTURE_CUBE || + view->target == PIPE_TEXTURE_CUBE_ARRAY) { num_layers = view->u.tex.last_layer - view->u.tex.first_layer + 1; for (j = first_level; j <= last_level; j++) { mip_offsets[j] += view->u.tex.first_layer * sp_tex->img_stride[j]; } - if (res->target == PIPE_TEXTURE_CUBE_ARRAY) { + if (view->target == PIPE_TEXTURE_CUBE || + view->target == PIPE_TEXTURE_CUBE_ARRAY) { assert(num_layers % 6 == 0); } assert(view->u.tex.first_layer <= view->u.tex.last_layer); diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c b/src/gallium/drivers/softpipe/sp_tex_sample.c index 68dcf57240d..21fb6b00e30 100644 --- a/src/gallium/drivers/softpipe/sp_tex_sample.c +++ b/src/gallium/drivers/softpipe/sp_tex_sample.c @@ -474,11 +474,11 @@ wrap_linear_unorm_clamp_to_edge(float s, unsigned size, /** * Do coordinate to array index conversion. For array textures. */ -static INLINE void -wrap_array_layer(float coord, unsigned size, int *layer) +static INLINE int +coord_to_layer(float coord, unsigned first_layer, unsigned last_layer) { int c = util_ifloor(coord + 0.5F); - *layer = CLAMP(c, 0, (int) size - 1); + return CLAMP(c, (int)first_layer, (int)last_layer); } @@ -757,61 +757,6 @@ get_next_ycoord(unsigned face, unsigned fall_off_index, int max, int xc, int yc) } -static INLINE const float * -get_texel_cube_seamless(const struct sp_sampler_view *sp_sview, - union tex_tile_address addr, int x, int y, - float *corner) -{ - const struct pipe_resource *texture = sp_sview->base.texture; - unsigned level = addr.bits.level; - unsigned face = addr.bits.face; - int new_x, new_y, max_x; - - max_x = (int) u_minify(texture->width0, level); - - assert(texture->width0 == texture->height0); - new_x = x; - new_y = y; - - /* change the face */ - if (x < 0) { - /* - * Cheat with corners. They are difficult and I believe because we don't get - * per-pixel faces we can actually have multiple corner texels per pixel, - * which screws things up majorly in any case (as the per spec behavior is - * to average the 3 remaining texels, which we might not have). - * Hence just make sure that the 2nd coord is clamped, will simply pick the - * sample which would have fallen off the x coord, but not y coord. - * So the filter weight of the samples will be wrong, but at least this - * ensures that only valid texels near the corner are used. - */ - if (y < 0 || y >= max_x) { - y = CLAMP(y, 0, max_x - 1); - } - new_x = get_next_xcoord(face, 0, max_x -1, x, y); - new_y = get_next_ycoord(face, 0, max_x -1, x, y); - face = get_next_face(face, 0); - } else if (x >= max_x) { - if (y < 0 || y >= max_x) { - y = CLAMP(y, 0, max_x - 1); - } - new_x = get_next_xcoord(face, 1, max_x -1, x, y); - new_y = get_next_ycoord(face, 1, max_x -1, x, y); - face = get_next_face(face, 1); - } else if (y < 0) { - new_x = get_next_xcoord(face, 2, max_x -1, x, y); - new_y = get_next_ycoord(face, 2, max_x -1, x, y); - face = get_next_face(face, 2); - } else if (y >= max_x) { - new_x = get_next_xcoord(face, 3, max_x -1, x, y); - new_y = get_next_ycoord(face, 3, max_x -1, x, y); - face = get_next_face(face, 3); - } - - addr.bits.face = face; - return get_texel_2d_no_border( sp_sview, addr, new_x, new_y ); -} - /* Gather a quad of adjacent texels within a tile: */ static INLINE void @@ -948,6 +893,60 @@ get_texel_2d_array(const struct sp_sampler_view *sp_sview, } +static INLINE const float * +get_texel_cube_seamless(const struct sp_sampler_view *sp_sview, + union tex_tile_address addr, int x, int y, + float *corner, int layer, unsigned face) +{ + const struct pipe_resource *texture = sp_sview->base.texture; + unsigned level = addr.bits.level; + int new_x, new_y, max_x; + + max_x = (int) u_minify(texture->width0, level); + + assert(texture->width0 == texture->height0); + new_x = x; + new_y = y; + + /* change the face */ + if (x < 0) { + /* + * Cheat with corners. They are difficult and I believe because we don't get + * per-pixel faces we can actually have multiple corner texels per pixel, + * which screws things up majorly in any case (as the per spec behavior is + * to average the 3 remaining texels, which we might not have). + * Hence just make sure that the 2nd coord is clamped, will simply pick the + * sample which would have fallen off the x coord, but not y coord. + * So the filter weight of the samples will be wrong, but at least this + * ensures that only valid texels near the corner are used. + */ + if (y < 0 || y >= max_x) { + y = CLAMP(y, 0, max_x - 1); + } + new_x = get_next_xcoord(face, 0, max_x -1, x, y); + new_y = get_next_ycoord(face, 0, max_x -1, x, y); + face = get_next_face(face, 0); + } else if (x >= max_x) { + if (y < 0 || y >= max_x) { + y = CLAMP(y, 0, max_x - 1); + } + new_x = get_next_xcoord(face, 1, max_x -1, x, y); + new_y = get_next_ycoord(face, 1, max_x -1, x, y); + face = get_next_face(face, 1); + } else if (y < 0) { + new_x = get_next_xcoord(face, 2, max_x -1, x, y); + new_y = get_next_ycoord(face, 2, max_x -1, x, y); + face = get_next_face(face, 2); + } else if (y >= max_x) { + new_x = get_next_xcoord(face, 3, max_x -1, x, y); + new_y = get_next_ycoord(face, 3, max_x -1, x, y); + face = get_next_face(face, 3); + } + + return get_texel_3d_no_border(sp_sview, addr, new_x, new_y, layer + face); +} + + /* Get texel pointer for cube array texture */ static INLINE const float * get_texel_cube_array(const struct sp_sampler_view *sp_sview, @@ -1208,7 +1207,8 @@ img_filter_1d_array_nearest(struct sp_sampler_view *sp_sview, addr.bits.level = level; sp_samp->nearest_texcoord_s(s, width, &x); - wrap_array_layer(t, texture->array_size, &layer); + layer = coord_to_layer(t, sp_sview->base.u.tex.first_layer, + sp_sview->base.u.tex.last_layer); out = get_texel_1d_array(sp_sview, sp_samp, addr, x, layer); for (c = 0; c < TGSI_QUAD_SIZE; c++) @@ -1287,7 +1287,8 @@ img_filter_2d_array_nearest(struct sp_sampler_view *sp_sview, sp_samp->nearest_texcoord_s(s, width, &x); sp_samp->nearest_texcoord_t(t, height, &y); - wrap_array_layer(p, texture->array_size, &layer); + layer = coord_to_layer(p, sp_sview->base.u.tex.first_layer, + sp_sview->base.u.tex.last_layer); out = get_texel_2d_array(sp_sview, sp_samp, addr, x, y, layer); for (c = 0; c < TGSI_QUAD_SIZE; c++) @@ -1299,14 +1300,6 @@ img_filter_2d_array_nearest(struct sp_sampler_view *sp_sview, } -static INLINE union tex_tile_address -face(union tex_tile_address addr, unsigned face ) -{ - addr.bits.face = face; - return addr; -} - - static void img_filter_cube_nearest(struct sp_sampler_view *sp_sview, struct sp_sampler *sp_samp, @@ -1319,7 +1312,7 @@ img_filter_cube_nearest(struct sp_sampler_view *sp_sview, { const struct pipe_resource *texture = sp_sview->base.texture; int width, height; - int x, y; + int x, y, layerface; union tex_tile_address addr; const float *out; int c; @@ -1346,7 +1339,8 @@ img_filter_cube_nearest(struct sp_sampler_view *sp_sview, sp_samp->nearest_texcoord_t(t, height, &y); } - out = get_texel_2d(sp_sview, sp_samp, face(addr, face_id), x, y); + layerface = face_id + sp_sview->base.u.tex.first_layer; + out = get_texel_cube_array(sp_sview, sp_samp, addr, x, y, layerface); for (c = 0; c < TGSI_QUAD_SIZE; c++) rgba[TGSI_NUM_CHANNELS*c] = out[c]; @@ -1367,7 +1361,7 @@ img_filter_cube_array_nearest(struct sp_sampler_view *sp_sview, { const struct pipe_resource *texture = sp_sview->base.texture; int width, height; - int x, y, layer; + int x, y, layerface; union tex_tile_address addr; const float *out; int c; @@ -1383,9 +1377,11 @@ img_filter_cube_array_nearest(struct sp_sampler_view *sp_sview, sp_samp->nearest_texcoord_s(s, width, &x); sp_samp->nearest_texcoord_t(t, height, &y); - wrap_array_layer(p, texture->array_size, &layer); + layerface = coord_to_layer(6 * p + sp_sview->base.u.tex.first_layer, + sp_sview->base.u.tex.first_layer, + sp_sview->base.u.tex.last_layer - 5) + face_id; - out = get_texel_cube_array(sp_sview, sp_samp, addr, x, y, layer * 6 + face_id); + out = get_texel_cube_array(sp_sview, sp_samp, addr, x, y, layerface); for (c = 0; c < TGSI_QUAD_SIZE; c++) rgba[TGSI_NUM_CHANNELS*c] = out[c]; @@ -1494,7 +1490,8 @@ img_filter_1d_array_linear(struct sp_sampler_view *sp_sview, addr.bits.level = level; sp_samp->linear_texcoord_s(s, width, &x0, &x1, &xw); - wrap_array_layer(t, texture->array_size, &layer); + layer = coord_to_layer(t, sp_sview->base.u.tex.first_layer, + sp_sview->base.u.tex.last_layer); tx0 = get_texel_1d_array(sp_sview, sp_samp, addr, x0, layer); tx1 = get_texel_1d_array(sp_sview, sp_samp, addr, x1, layer); @@ -1577,7 +1574,8 @@ img_filter_2d_array_linear(struct sp_sampler_view *sp_sview, sp_samp->linear_texcoord_s(s, width, &x0, &x1, &xw); sp_samp->linear_texcoord_t(t, height, &y0, &y1, &yw); - wrap_array_layer(p, texture->array_size, &layer); + layer = coord_to_layer(p, sp_sview->base.u.tex.first_layer, + sp_sview->base.u.tex.last_layer); tx0 = get_texel_2d_array(sp_sview, sp_samp, addr, x0, y0, layer); tx1 = get_texel_2d_array(sp_sview, sp_samp, addr, x1, y0, layer); @@ -1604,9 +1602,9 @@ img_filter_cube_linear(struct sp_sampler_view *sp_sview, { const struct pipe_resource *texture = sp_sview->base.texture; int width, height; - int x0, y0, x1, y1; + int x0, y0, x1, y1, layer; float xw, yw; /* weights */ - union tex_tile_address addr, addrj; + union tex_tile_address addr; const float *tx0, *tx1, *tx2, *tx3; float corner0[TGSI_QUAD_SIZE], corner1[TGSI_QUAD_SIZE], corner2[TGSI_QUAD_SIZE], corner3[TGSI_QUAD_SIZE]; @@ -1635,19 +1633,20 @@ img_filter_cube_linear(struct sp_sampler_view *sp_sview, sp_samp->linear_texcoord_t(t, height, &y0, &y1, &yw); } - addrj = face(addr, face_id); + layer = sp_sview->base.u.tex.first_layer; if (sp_samp->base.seamless_cube_map) { - tx0 = get_texel_cube_seamless(sp_sview, addrj, x0, y0, corner0); - tx1 = get_texel_cube_seamless(sp_sview, addrj, x1, y0, corner1); - tx2 = get_texel_cube_seamless(sp_sview, addrj, x0, y1, corner2); - tx3 = get_texel_cube_seamless(sp_sview, addrj, x1, y1, corner3); + tx0 = get_texel_cube_seamless(sp_sview, addr, x0, y0, corner0, layer, face_id); + tx1 = get_texel_cube_seamless(sp_sview, addr, x1, y0, corner1, layer, face_id); + tx2 = get_texel_cube_seamless(sp_sview, addr, x0, y1, corner2, layer, face_id); + tx3 = get_texel_cube_seamless(sp_sview, addr, x1, y1, corner3, layer, face_id); } else { - tx0 = get_texel_2d(sp_sview, sp_samp, addrj, x0, y0); - tx1 = get_texel_2d(sp_sview, sp_samp, addrj, x1, y0); - tx2 = get_texel_2d(sp_sview, sp_samp, addrj, x0, y1); - tx3 = get_texel_2d(sp_sview, sp_samp, addrj, x1, y1); + tx0 = get_texel_cube_array(sp_sview, sp_samp, addr, x0, y0, layer + face_id); + tx1 = get_texel_cube_array(sp_sview, sp_samp, addr, x1, y0, layer + face_id); + tx2 = get_texel_cube_array(sp_sview, sp_samp, addr, x0, y1, layer + face_id); + tx3 = get_texel_cube_array(sp_sview, sp_samp, addr, x1, y1, layer + face_id); } + /* interpolate R, G, B, A */ for (c = 0; c < TGSI_QUAD_SIZE; c++) rgba[TGSI_NUM_CHANNELS*c] = lerp_2d(xw, yw, @@ -1672,6 +1671,8 @@ img_filter_cube_array_linear(struct sp_sampler_view *sp_sview, float xw, yw; /* weights */ union tex_tile_address addr; const float *tx0, *tx1, *tx2, *tx3; + float corner0[TGSI_QUAD_SIZE], corner1[TGSI_QUAD_SIZE], + corner2[TGSI_QUAD_SIZE], corner3[TGSI_QUAD_SIZE]; int c; width = u_minify(texture->width0, level); @@ -1683,14 +1684,35 @@ img_filter_cube_array_linear(struct sp_sampler_view *sp_sview, addr.value = 0; addr.bits.level = level; - sp_samp->linear_texcoord_s(s, width, &x0, &x1, &xw); - sp_samp->linear_texcoord_t(t, height, &y0, &y1, &yw); - wrap_array_layer(p, texture->array_size, &layer); + /* + * For seamless if LINEAR filtering is done within a miplevel, + * always apply wrap mode CLAMP_TO_BORDER. + */ + if (sp_samp->base.seamless_cube_map) { + /* Note this is a bit overkill, actual clamping is not required */ + wrap_linear_clamp_to_border(s, width, &x0, &x1, &xw); + wrap_linear_clamp_to_border(t, height, &y0, &y1, &yw); + } else { + /* Would probably make sense to ignore mode and just do edge clamp */ + sp_samp->linear_texcoord_s(s, width, &x0, &x1, &xw); + sp_samp->linear_texcoord_t(t, height, &y0, &y1, &yw); + } - tx0 = get_texel_cube_array(sp_sview, sp_samp, addr, x0, y0, layer * 6 + face_id); - tx1 = get_texel_cube_array(sp_sview, sp_samp, addr, x1, y0, layer * 6 + face_id); - tx2 = get_texel_cube_array(sp_sview, sp_samp, addr, x0, y1, layer * 6 + face_id); - tx3 = get_texel_cube_array(sp_sview, sp_samp, addr, x1, y1, layer * 6 + face_id); + layer = coord_to_layer(6 * p + sp_sview->base.u.tex.first_layer, + sp_sview->base.u.tex.first_layer, + sp_sview->base.u.tex.last_layer - 5); + + if (sp_samp->base.seamless_cube_map) { + tx0 = get_texel_cube_seamless(sp_sview, addr, x0, y0, corner0, layer, face_id); + tx1 = get_texel_cube_seamless(sp_sview, addr, x1, y0, corner1, layer, face_id); + tx2 = get_texel_cube_seamless(sp_sview, addr, x0, y1, corner2, layer, face_id); + tx3 = get_texel_cube_seamless(sp_sview, addr, x1, y1, corner3, layer, face_id); + } else { + tx0 = get_texel_cube_array(sp_sview, sp_samp, addr, x0, y0, layer + face_id); + tx1 = get_texel_cube_array(sp_sview, sp_samp, addr, x1, y0, layer + face_id); + tx2 = get_texel_cube_array(sp_sview, sp_samp, addr, x0, y1, layer + face_id); + tx3 = get_texel_cube_array(sp_sview, sp_samp, addr, x1, y1, layer + face_id); + } /* interpolate R, G, B, A */ for (c = 0; c < TGSI_QUAD_SIZE; c++) @@ -2408,13 +2430,13 @@ sample_compare(struct sp_sampler_view *sp_sview, * RGBA channels. We look at the red channel here. */ - if (sp_sview->base.texture->target == PIPE_TEXTURE_2D_ARRAY || - sp_sview->base.texture->target == PIPE_TEXTURE_CUBE) { + if (sp_sview->base.target == PIPE_TEXTURE_2D_ARRAY || + sp_sview->base.target == PIPE_TEXTURE_CUBE) { pc[0] = c0[0]; pc[1] = c0[1]; pc[2] = c0[2]; pc[3] = c0[3]; - } else if (sp_sview->base.texture->target == PIPE_TEXTURE_CUBE_ARRAY) { + } else if (sp_sview->base.target == PIPE_TEXTURE_CUBE_ARRAY) { pc[0] = c1[0]; pc[1] = c1[1]; pc[2] = c1[2]; @@ -2681,7 +2703,7 @@ get_img_filter(const struct sp_sampler_view *sp_sview, const struct pipe_sampler_state *sampler, unsigned filter) { - switch (sp_sview->base.texture->target) { + switch (sp_sview->base.target) { case PIPE_BUFFER: case PIPE_TEXTURE_1D: if (filter == PIPE_TEX_FILTER_NEAREST) @@ -2907,7 +2929,7 @@ sp_get_dims(struct sp_sampler_view *sp_sview, int level, const struct pipe_sampler_view *view = &sp_sview->base; const struct pipe_resource *texture = view->texture; - if (texture->target == PIPE_BUFFER) { + if (view->target == PIPE_BUFFER) { dims[0] = (view->u.buf.last_element - view->u.buf.first_element) + 1; /* the other values are undefined, but let's avoid potential valgrind * warnings. @@ -2924,7 +2946,7 @@ sp_get_dims(struct sp_sampler_view *sp_sview, int level, dims[3] = view->u.tex.last_level - view->u.tex.first_level + 1; dims[0] = u_minify(texture->width0, level); - switch(texture->target) { + switch (view->target) { case PIPE_TEXTURE_1D_ARRAY: dims[1] = view->u.tex.last_layer - view->u.tex.first_layer + 1; /* fallthrough */ @@ -2975,13 +2997,16 @@ sp_get_texels(struct sp_sampler_view *sp_sview, addr.value = 0; /* TODO write a better test for LOD */ - addr.bits.level = lod[0]; + addr.bits.level = sp_sview->base.target == PIPE_BUFFER ? 0 : + CLAMP(lod[0] + sp_sview->base.u.tex.first_level, + sp_sview->base.u.tex.first_level, + sp_sview->base.u.tex.last_level); width = u_minify(texture->width0, addr.bits.level); height = u_minify(texture->height0, addr.bits.level); depth = u_minify(texture->depth0, addr.bits.level); - switch(texture->target) { + switch (sp_sview->base.target) { case PIPE_BUFFER: case PIPE_TEXTURE_1D: for (j = 0; j < TGSI_QUAD_SIZE; j++) { @@ -2995,7 +3020,8 @@ sp_get_texels(struct sp_sampler_view *sp_sview, case PIPE_TEXTURE_1D_ARRAY: for (j = 0; j < TGSI_QUAD_SIZE; j++) { int x = CLAMP(v_i[j] + offset[0], 0, width - 1); - int y = CLAMP(v_j[j], sp_sview->base.u.tex.first_layer, sp_sview->base.u.tex.last_layer); + int y = CLAMP(v_j[j], sp_sview->base.u.tex.first_layer, + sp_sview->base.u.tex.last_layer); tx = get_texel_2d_no_border(sp_sview, addr, x, y); for (c = 0; c < 4; c++) { rgba[c][j] = tx[c]; @@ -3017,7 +3043,8 @@ sp_get_texels(struct sp_sampler_view *sp_sview, for (j = 0; j < TGSI_QUAD_SIZE; j++) { int x = CLAMP(v_i[j] + offset[0], 0, width - 1); int y = CLAMP(v_j[j] + offset[1], 0, height - 1); - int layer = CLAMP(v_k[j], sp_sview->base.u.tex.first_layer, sp_sview->base.u.tex.last_layer); + int layer = CLAMP(v_k[j], sp_sview->base.u.tex.first_layer, + sp_sview->base.u.tex.last_layer); tx = get_texel_3d_no_border(sp_sview, addr, x, y, layer); for (c = 0; c < 4; c++) { rgba[c][j] = tx[c]; @@ -3140,7 +3167,7 @@ softpipe_get_lambda_func(const struct pipe_sampler_view *view, unsigned shader) if (shader != PIPE_SHADER_FRAGMENT) return compute_lambda_vert; - switch (view->texture->target) { + switch (view->target) { case PIPE_BUFFER: case PIPE_TEXTURE_1D: case PIPE_TEXTURE_1D_ARRAY: @@ -3176,19 +3203,49 @@ softpipe_create_sampler_view(struct pipe_context *pipe, pipe_resource_reference(&view->texture, resource); view->context = pipe; +#ifdef DEBUG + /* + * This is possibly too lenient, but the primary reason is just + * to catch state trackers which forget to initialize this, so + * it only catches clearly impossible view targets. + */ + if (view->target != resource->target) { + if (view->target == PIPE_TEXTURE_1D) + assert(resource->target == PIPE_TEXTURE_1D_ARRAY); + else if (view->target == PIPE_TEXTURE_1D_ARRAY) + assert(resource->target == PIPE_TEXTURE_1D); + else if (view->target == PIPE_TEXTURE_2D) + assert(resource->target == PIPE_TEXTURE_2D_ARRAY || + resource->target == PIPE_TEXTURE_CUBE || + resource->target == PIPE_TEXTURE_CUBE_ARRAY); + else if (view->target == PIPE_TEXTURE_2D_ARRAY) + assert(resource->target == PIPE_TEXTURE_2D || + resource->target == PIPE_TEXTURE_CUBE || + resource->target == PIPE_TEXTURE_CUBE_ARRAY); + else if (view->target == PIPE_TEXTURE_CUBE) + assert(resource->target == PIPE_TEXTURE_CUBE_ARRAY || + resource->target == PIPE_TEXTURE_2D_ARRAY); + else if (view->target == PIPE_TEXTURE_CUBE_ARRAY) + assert(resource->target == PIPE_TEXTURE_CUBE || + resource->target == PIPE_TEXTURE_2D_ARRAY); + else + assert(0); + } +#endif + if (any_swizzle(view)) { sview->need_swizzle = TRUE; } - if (resource->target == PIPE_TEXTURE_CUBE || - resource->target == PIPE_TEXTURE_CUBE_ARRAY) + if (view->target == PIPE_TEXTURE_CUBE || + view->target == PIPE_TEXTURE_CUBE_ARRAY) sview->get_samples = sample_cube; else { sview->get_samples = sample_mip; } sview->pot2d = spr->pot && - (resource->target == PIPE_TEXTURE_2D || - resource->target == PIPE_TEXTURE_RECT); + (view->target == PIPE_TEXTURE_2D || + view->target == PIPE_TEXTURE_RECT); sview->xpot = util_logbase2( resource->width0 ); sview->ypot = util_logbase2( resource->height0 ); diff --git a/src/gallium/drivers/softpipe/sp_tex_tile_cache.c b/src/gallium/drivers/softpipe/sp_tex_tile_cache.c index ab8ba60849a..4a421a8f882 100644 --- a/src/gallium/drivers/softpipe/sp_tex_tile_cache.c +++ b/src/gallium/drivers/softpipe/sp_tex_tile_cache.c @@ -151,7 +151,7 @@ sp_tex_tile_cache_set_sampler_view(struct softpipe_tex_tile_cache *tc, tc->entries[i].addr.bits.invalid = 1; } - tc->tex_face = -1; /* any invalid value here */ + tc->tex_z = -1; /* any invalid value here */ } } @@ -172,7 +172,7 @@ sp_flush_tex_tile_cache(struct softpipe_tex_tile_cache *tc) for (pos = 0; pos < Elements(tc->entries); pos++) { tc->entries[pos].addr.bits.invalid = 1; } - tc->tex_face = -1; + tc->tex_z = -1; } } @@ -190,8 +190,7 @@ tex_cache_pos( union tex_tile_address addr ) { uint entry = (addr.bits.x + addr.bits.y * 9 + - addr.bits.z * 3 + - addr.bits.face + + addr.bits.z + addr.bits.level * 7); return entry % NUM_TEX_TILE_ENTRIES; @@ -226,7 +225,6 @@ sp_find_cached_tile_tex(struct softpipe_tex_tile_cache *tc, /* check if we need to get a new transfer */ if (!tc->tex_trans || - tc->tex_face != addr.bits.face || tc->tex_level != addr.bits.level || tc->tex_z != addr.bits.z) { /* get new transfer (view into texture) */ @@ -245,7 +243,7 @@ sp_find_cached_tile_tex(struct softpipe_tex_tile_cache *tc, } else { height = u_minify(tc->texture->height0, addr.bits.level); - layer = addr.bits.face + addr.bits.z; + layer = addr.bits.z; } tc->tex_trans_map = @@ -255,7 +253,6 @@ sp_find_cached_tile_tex(struct softpipe_tex_tile_cache *tc, PIPE_TRANSFER_READ | PIPE_TRANSFER_UNSYNCHRONIZED, 0, 0, width, height, &tc->tex_trans); - tc->tex_face = addr.bits.face; tc->tex_level = addr.bits.level; tc->tex_z = addr.bits.z; } diff --git a/src/gallium/drivers/softpipe/sp_tex_tile_cache.h b/src/gallium/drivers/softpipe/sp_tex_tile_cache.h index 4eb42460552..2233effc439 100644 --- a/src/gallium/drivers/softpipe/sp_tex_tile_cache.h +++ b/src/gallium/drivers/softpipe/sp_tex_tile_cache.h @@ -55,7 +55,6 @@ union tex_tile_address { unsigned x:TEX_ADDR_BITS; /* 16K / TILE_SIZE */ unsigned y:TEX_ADDR_BITS; /* 16K / TILE_SIZE */ unsigned z:TEX_Z_BITS; /* 16K -- z not tiled */ - unsigned face:3; unsigned level:4; unsigned invalid:1; } bits; @@ -94,7 +93,7 @@ struct softpipe_tex_tile_cache struct pipe_transfer *tex_trans; void *tex_trans_map; - int tex_face, tex_level, tex_z; + int tex_level, tex_z; unsigned swizzle_r; unsigned swizzle_g; @@ -141,7 +140,6 @@ tex_tile_address( unsigned x, addr.bits.x = x / TEX_TILE_SIZE; addr.bits.y = y / TEX_TILE_SIZE; addr.bits.z = z; - addr.bits.face = face; addr.bits.level = level; return addr; From 6d8eff4af7816d675f8ee91f99106fa118cda424 Mon Sep 17 00:00:00 2001 From: Laura Ekstrand Date: Thu, 22 Jan 2015 10:23:35 -0800 Subject: [PATCH 062/834] main: Add utility function _mesa_lookup_framebuffer_err. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [Fredrik: Generate an error for non-existent framebuffers] Reviewed-by: Fredrik Höglund Reviewed-by: Anuj Phogat Signed-off-by: Fredrik Höglund --- src/mesa/main/fbobject.c | 21 +++++++++++++++++++++ src/mesa/main/fbobject.h | 4 ++++ 2 files changed, 25 insertions(+) diff --git a/src/mesa/main/fbobject.c b/src/mesa/main/fbobject.c index 27cf97f1778..3f5c0d7ef19 100644 --- a/src/mesa/main/fbobject.c +++ b/src/mesa/main/fbobject.c @@ -137,6 +137,27 @@ _mesa_lookup_framebuffer(struct gl_context *ctx, GLuint id) } +/** + * A convenience function for direct state access that throws + * GL_INVALID_OPERATION if the framebuffer doesn't exist. + */ +struct gl_framebuffer * +_mesa_lookup_framebuffer_err(struct gl_context *ctx, GLuint id, + const char *func) +{ + struct gl_framebuffer *fb; + + fb = _mesa_lookup_framebuffer(ctx, id); + if (!fb || fb == &DummyFramebuffer) { + _mesa_error(ctx, GL_INVALID_OPERATION, + "%s(non-existent framebuffer %u)", func, id); + return NULL; + } + + return fb; +} + + /** * Mark the given framebuffer as invalid. This will force the * test for framebuffer completeness to be done before the framebuffer diff --git a/src/mesa/main/fbobject.h b/src/mesa/main/fbobject.h index 61aa1f50308..1f0eb0b5b78 100644 --- a/src/mesa/main/fbobject.h +++ b/src/mesa/main/fbobject.h @@ -67,6 +67,10 @@ _mesa_lookup_renderbuffer(struct gl_context *ctx, GLuint id); extern struct gl_framebuffer * _mesa_lookup_framebuffer(struct gl_context *ctx, GLuint id); +extern struct gl_framebuffer * +_mesa_lookup_framebuffer_err(struct gl_context *ctx, GLuint id, + const char *func); + void _mesa_update_texture_renderbuffer(struct gl_context *ctx, From f868de7d6b1370105414eb3d83b4b38b598bff66 Mon Sep 17 00:00:00 2001 From: Laura Ekstrand Date: Fri, 23 Jan 2015 14:54:48 -0800 Subject: [PATCH 063/834] main: Add glCreateFramebuffers. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [Fredrik: Whitespace fixes] Reviewed-by: Fredrik Höglund Signed-off-by: Fredrik Höglund --- .../glapi/gen/ARB_direct_state_access.xml | 7 ++++ src/mesa/main/fbobject.c | 42 ++++++++++++++++--- src/mesa/main/fbobject.h | 3 ++ src/mesa/main/tests/dispatch_sanity.cpp | 1 + 4 files changed, 48 insertions(+), 5 deletions(-) diff --git a/src/mapi/glapi/gen/ARB_direct_state_access.xml b/src/mapi/glapi/gen/ARB_direct_state_access.xml index 9e0cf2d6ce1..7801ef45bb2 100644 --- a/src/mapi/glapi/gen/ARB_direct_state_access.xml +++ b/src/mapi/glapi/gen/ARB_direct_state_access.xml @@ -152,6 +152,13 @@ + + + + + + + diff --git a/src/mesa/main/fbobject.c b/src/mesa/main/fbobject.c index 3f5c0d7ef19..e4cb8cc71b6 100644 --- a/src/mesa/main/fbobject.c +++ b/src/mesa/main/fbobject.c @@ -2417,15 +2417,23 @@ _mesa_DeleteFramebuffers(GLsizei n, const GLuint *framebuffers) } -void GLAPIENTRY -_mesa_GenFramebuffers(GLsizei n, GLuint *framebuffers) +/** + * This is the implementation for glGenFramebuffers and glCreateFramebuffers. + * It is not exposed to the rest of Mesa to encourage the use of + * nameless buffers in driver internals. + */ +static void +create_framebuffers(GLsizei n, GLuint *framebuffers, bool dsa) { GET_CURRENT_CONTEXT(ctx); GLuint first; GLint i; + struct gl_framebuffer *fb; + + const char *func = dsa ? "glCreateFramebuffers" : "glGenFramebuffers"; if (n < 0) { - _mesa_error(ctx, GL_INVALID_VALUE, "glGenFramebuffersEXT(n)"); + _mesa_error(ctx, GL_INVALID_VALUE, "%s(n < 0)", func); return; } @@ -2437,14 +2445,38 @@ _mesa_GenFramebuffers(GLsizei n, GLuint *framebuffers) for (i = 0; i < n; i++) { GLuint name = first + i; framebuffers[i] = name; - /* insert dummy placeholder into hash table */ + + if (dsa) { + fb = ctx->Driver.NewFramebuffer(ctx, framebuffers[i]); + if (!fb) { + _mesa_error(ctx, GL_OUT_OF_MEMORY, "%s", func); + return; + } + } + else + fb = &DummyFramebuffer; + mtx_lock(&ctx->Shared->Mutex); - _mesa_HashInsert(ctx->Shared->FrameBuffers, name, &DummyFramebuffer); + _mesa_HashInsert(ctx->Shared->FrameBuffers, name, fb); mtx_unlock(&ctx->Shared->Mutex); } } +void GLAPIENTRY +_mesa_GenFramebuffers(GLsizei n, GLuint *framebuffers) +{ + create_framebuffers(n, framebuffers, false); +} + + +void GLAPIENTRY +_mesa_CreateFramebuffers(GLsizei n, GLuint *framebuffers) +{ + create_framebuffers(n, framebuffers, true); +} + + GLenum GLAPIENTRY _mesa_CheckFramebufferStatus(GLenum target) { diff --git a/src/mesa/main/fbobject.h b/src/mesa/main/fbobject.h index 1f0eb0b5b78..37d9535018b 100644 --- a/src/mesa/main/fbobject.h +++ b/src/mesa/main/fbobject.h @@ -169,6 +169,9 @@ _mesa_DeleteFramebuffers(GLsizei n, const GLuint *framebuffers); extern void GLAPIENTRY _mesa_GenFramebuffers(GLsizei n, GLuint *framebuffers); +extern void GLAPIENTRY +_mesa_CreateFramebuffers(GLsizei n, GLuint *framebuffers); + extern GLenum GLAPIENTRY _mesa_CheckFramebufferStatus(GLenum target); diff --git a/src/mesa/main/tests/dispatch_sanity.cpp b/src/mesa/main/tests/dispatch_sanity.cpp index ccd0124a2bb..6e1293d98e5 100644 --- a/src/mesa/main/tests/dispatch_sanity.cpp +++ b/src/mesa/main/tests/dispatch_sanity.cpp @@ -980,6 +980,7 @@ const struct function gl_core_functions_possible[] = { { "glGetNamedBufferParameteri64v", 45, -1 }, { "glGetNamedBufferPointerv", 45, -1 }, { "glGetNamedBufferSubData", 45, -1 }, + { "glCreateFramebuffers", 45, -1 }, { "glCreateRenderbuffers", 45, -1 }, { "glNamedRenderbufferStorage", 45, -1 }, { "glNamedRenderbufferStorageMultisample", 45, -1 }, From 2bb138e7ec24b9e56715a53f9c4e911b99d4a97b Mon Sep 17 00:00:00 2001 From: Laura Ekstrand Date: Fri, 23 Jan 2015 16:38:36 -0800 Subject: [PATCH 064/834] main: Add utility function _mesa_lookup_renderbuffer_err. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [Fredrik: Generate an error for non-existent renderbuffers] Reviewed-by: Fredrik Höglund Signed-off-by: Fredrik Höglund --- src/mesa/main/fbobject.c | 21 +++++++++++++++++++++ src/mesa/main/fbobject.h | 4 ++++ 2 files changed, 25 insertions(+) diff --git a/src/mesa/main/fbobject.c b/src/mesa/main/fbobject.c index e4cb8cc71b6..adadf3c980b 100644 --- a/src/mesa/main/fbobject.c +++ b/src/mesa/main/fbobject.c @@ -120,6 +120,27 @@ _mesa_lookup_renderbuffer(struct gl_context *ctx, GLuint id) } +/** + * A convenience function for direct state access that throws + * GL_INVALID_OPERATION if the renderbuffer doesn't exist. + */ +struct gl_renderbuffer * +_mesa_lookup_renderbuffer_err(struct gl_context *ctx, GLuint id, + const char *func) +{ + struct gl_renderbuffer *rb; + + rb = _mesa_lookup_renderbuffer(ctx, id); + if (!rb || rb == &DummyRenderbuffer) { + _mesa_error(ctx, GL_INVALID_OPERATION, + "%s(non-existent renderbuffer %u)", func, id); + return NULL; + } + + return rb; +} + + /** * Helper routine for getting a gl_framebuffer. */ diff --git a/src/mesa/main/fbobject.h b/src/mesa/main/fbobject.h index 37d9535018b..6ac16fca241 100644 --- a/src/mesa/main/fbobject.h +++ b/src/mesa/main/fbobject.h @@ -64,6 +64,10 @@ _mesa_get_incomplete_framebuffer(void); extern struct gl_renderbuffer * _mesa_lookup_renderbuffer(struct gl_context *ctx, GLuint id); +extern struct gl_renderbuffer * +_mesa_lookup_renderbuffer_err(struct gl_context *ctx, GLuint id, + const char *func); + extern struct gl_framebuffer * _mesa_lookup_framebuffer(struct gl_context *ctx, GLuint id); From 3d100372f15c377bb1cff3d23621f1f5958f4c53 Mon Sep 17 00:00:00 2001 From: Laura Ekstrand Date: Fri, 27 Feb 2015 17:23:59 -0800 Subject: [PATCH 065/834] main: Rename framebuffer renderbuffer software fallback. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Rename _mesa_framebuffer_renderbuffer to _mesa_FramebufferRenderbuffer_sw in preparation for adding the ARB_direct_state_access backend function for FramebufferRenderbuffer and NamedFramebufferRenderbuffer to share. Reviewed-by: Fredrik Höglund Signed-off-by: Fredrik Höglund --- src/mesa/drivers/common/driverfuncs.c | 2 +- src/mesa/drivers/dri/i915/intel_fbo.c | 2 +- src/mesa/drivers/dri/nouveau/nouveau_fbo.c | 2 +- src/mesa/drivers/dri/radeon/radeon_fbo.c | 2 +- src/mesa/main/fbobject.c | 7 ++++--- src/mesa/main/fbobject.h | 7 ++++--- src/mesa/state_tracker/st_cb_fbo.c | 2 +- src/mesa/swrast/s_texrender.c | 2 +- 8 files changed, 14 insertions(+), 12 deletions(-) diff --git a/src/mesa/drivers/common/driverfuncs.c b/src/mesa/drivers/common/driverfuncs.c index 0d094ddf4e6..71c1a763912 100644 --- a/src/mesa/drivers/common/driverfuncs.c +++ b/src/mesa/drivers/common/driverfuncs.c @@ -172,7 +172,7 @@ _mesa_init_driver_functions(struct dd_function_table *driver) driver->UnmapRenderbuffer = _swrast_unmap_soft_renderbuffer; driver->RenderTexture = _swrast_render_texture; driver->FinishRenderTexture = _swrast_finish_render_texture; - driver->FramebufferRenderbuffer = _mesa_framebuffer_renderbuffer; + driver->FramebufferRenderbuffer = _mesa_FramebufferRenderbuffer_sw; driver->ValidateFramebuffer = _mesa_validate_framebuffer; driver->BlitFramebuffer = _swrast_BlitFramebuffer; diff --git a/src/mesa/drivers/dri/i915/intel_fbo.c b/src/mesa/drivers/dri/i915/intel_fbo.c index 24c318049c4..a5d5c5832fb 100644 --- a/src/mesa/drivers/dri/i915/intel_fbo.c +++ b/src/mesa/drivers/dri/i915/intel_fbo.c @@ -427,7 +427,7 @@ intel_framebuffer_renderbuffer(struct gl_context * ctx, { DBG("Intel FramebufferRenderbuffer %u %u\n", fb->Name, rb ? rb->Name : 0); - _mesa_framebuffer_renderbuffer(ctx, fb, attachment, rb); + _mesa_FramebufferRenderbuffer_sw(ctx, fb, attachment, rb); intel_draw_buffer(ctx); } diff --git a/src/mesa/drivers/dri/nouveau/nouveau_fbo.c b/src/mesa/drivers/dri/nouveau/nouveau_fbo.c index 6c479f5f0c6..c78d4baa124 100644 --- a/src/mesa/drivers/dri/nouveau/nouveau_fbo.c +++ b/src/mesa/drivers/dri/nouveau/nouveau_fbo.c @@ -242,7 +242,7 @@ static void nouveau_framebuffer_renderbuffer(struct gl_context *ctx, struct gl_framebuffer *fb, GLenum attachment, struct gl_renderbuffer *rb) { - _mesa_framebuffer_renderbuffer(ctx, fb, attachment, rb); + _mesa_FramebufferRenderbuffer_sw(ctx, fb, attachment, rb); context_dirty(ctx, FRAMEBUFFER); } diff --git a/src/mesa/drivers/dri/radeon/radeon_fbo.c b/src/mesa/drivers/dri/radeon/radeon_fbo.c index 97022f95953..ef62d097bae 100644 --- a/src/mesa/drivers/dri/radeon/radeon_fbo.c +++ b/src/mesa/drivers/dri/radeon/radeon_fbo.c @@ -723,7 +723,7 @@ radeon_framebuffer_renderbuffer(struct gl_context * ctx, "%s(%p, fb %p, rb %p) \n", __func__, ctx, fb, rb); - _mesa_framebuffer_renderbuffer(ctx, fb, attachment, rb); + _mesa_FramebufferRenderbuffer_sw(ctx, fb, attachment, rb); radeon_draw_buffer(ctx, fb); } diff --git a/src/mesa/main/fbobject.c b/src/mesa/main/fbobject.c index adadf3c980b..f1603649f4c 100644 --- a/src/mesa/main/fbobject.c +++ b/src/mesa/main/fbobject.c @@ -521,9 +521,10 @@ set_renderbuffer_attachment(struct gl_context *ctx, * Attach a renderbuffer object to a framebuffer object. */ void -_mesa_framebuffer_renderbuffer(struct gl_context *ctx, - struct gl_framebuffer *fb, - GLenum attachment, struct gl_renderbuffer *rb) +_mesa_FramebufferRenderbuffer_sw(struct gl_context *ctx, + struct gl_framebuffer *fb, + GLenum attachment, + struct gl_renderbuffer *rb) { struct gl_renderbuffer_attachment *att; diff --git a/src/mesa/main/fbobject.h b/src/mesa/main/fbobject.h index 6ac16fca241..0c0bc0e4a66 100644 --- a/src/mesa/main/fbobject.h +++ b/src/mesa/main/fbobject.h @@ -82,9 +82,10 @@ _mesa_update_texture_renderbuffer(struct gl_context *ctx, struct gl_renderbuffer_attachment *att); extern void -_mesa_framebuffer_renderbuffer(struct gl_context *ctx, - struct gl_framebuffer *fb, - GLenum attachment, struct gl_renderbuffer *rb); +_mesa_FramebufferRenderbuffer_sw(struct gl_context *ctx, + struct gl_framebuffer *fb, + GLenum attachment, + struct gl_renderbuffer *rb); extern void _mesa_validate_framebuffer(struct gl_context *ctx, struct gl_framebuffer *fb); diff --git a/src/mesa/state_tracker/st_cb_fbo.c b/src/mesa/state_tracker/st_cb_fbo.c index 296ea1e0d29..0399eef7204 100644 --- a/src/mesa/state_tracker/st_cb_fbo.c +++ b/src/mesa/state_tracker/st_cb_fbo.c @@ -842,7 +842,7 @@ void st_init_fbo_functions(struct dd_function_table *functions) functions->NewFramebuffer = st_new_framebuffer; functions->NewRenderbuffer = st_new_renderbuffer; functions->BindFramebuffer = st_bind_framebuffer; - functions->FramebufferRenderbuffer = _mesa_framebuffer_renderbuffer; + functions->FramebufferRenderbuffer = _mesa_FramebufferRenderbuffer_sw; functions->RenderTexture = st_render_texture; functions->FinishRenderTexture = st_finish_render_texture; functions->ValidateFramebuffer = st_validate_framebuffer; diff --git a/src/mesa/swrast/s_texrender.c b/src/mesa/swrast/s_texrender.c index fa853c9197f..4e41b3b72a8 100644 --- a/src/mesa/swrast/s_texrender.c +++ b/src/mesa/swrast/s_texrender.c @@ -72,7 +72,7 @@ update_wrapper(struct gl_context *ctx, struct gl_renderbuffer_attachment *att) * \param fb the framebuffer object the texture is being bound to * \param att the fb attachment point of the texture * - * \sa _mesa_framebuffer_renderbuffer + * \sa _mesa_FramebufferRenderbuffer_sw */ void _swrast_render_texture(struct gl_context *ctx, From a29318bf0a0385fa4fdedbdc3fb6e1f6f0d87884 Mon Sep 17 00:00:00 2001 From: Laura Ekstrand Date: Fri, 27 Feb 2015 17:27:30 -0800 Subject: [PATCH 066/834] main: Add entry point for NamedFramebufferRenderbuffer. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [Fredrik: - Remove the DummyRenderbuffer checks now that they are done in _mesa_lookup_renderbuffer_err. - Fix the name in error messages. - Make the error message in _mesa_framebuffer_renderbuffer reflect that might not be the bound framebuffer. - Remove EXT suffixes from GL tokens.] Reviewed-by: Fredrik Höglund Signed-off-by: Fredrik Höglund --- .../glapi/gen/ARB_direct_state_access.xml | 7 + src/mesa/main/fbobject.c | 129 ++++++++++++------ src/mesa/main/fbobject.h | 12 ++ src/mesa/main/tests/dispatch_sanity.cpp | 1 + 4 files changed, 105 insertions(+), 44 deletions(-) diff --git a/src/mapi/glapi/gen/ARB_direct_state_access.xml b/src/mapi/glapi/gen/ARB_direct_state_access.xml index 7801ef45bb2..f228a5286a7 100644 --- a/src/mapi/glapi/gen/ARB_direct_state_access.xml +++ b/src/mapi/glapi/gen/ARB_direct_state_access.xml @@ -159,6 +159,13 @@ + + + + + + + diff --git a/src/mesa/main/fbobject.c b/src/mesa/main/fbobject.c index f1603649f4c..ea6e273fda3 100644 --- a/src/mesa/main/fbobject.c +++ b/src/mesa/main/fbobject.c @@ -2896,71 +2896,37 @@ _mesa_FramebufferTexture(GLenum target, GLenum attachment, } -void GLAPIENTRY -_mesa_FramebufferRenderbuffer(GLenum target, GLenum attachment, - GLenum renderbufferTarget, - GLuint renderbuffer) +void +_mesa_framebuffer_renderbuffer(struct gl_context *ctx, + struct gl_framebuffer *fb, + GLenum attachment, + struct gl_renderbuffer *rb, + const char *func) { struct gl_renderbuffer_attachment *att; - struct gl_framebuffer *fb; - struct gl_renderbuffer *rb; - GET_CURRENT_CONTEXT(ctx); - - fb = get_framebuffer_target(ctx, target); - if (!fb) { - _mesa_error(ctx, GL_INVALID_ENUM, - "glFramebufferRenderbuffer(target)"); - return; - } - - if (renderbufferTarget != GL_RENDERBUFFER_EXT) { - _mesa_error(ctx, GL_INVALID_ENUM, - "glFramebufferRenderbuffer(renderbufferTarget)"); - return; - } if (_mesa_is_winsys_fbo(fb)) { /* Can't attach new renderbuffers to a window system framebuffer */ - _mesa_error(ctx, GL_INVALID_OPERATION, "glFramebufferRenderbuffer"); + _mesa_error(ctx, GL_INVALID_OPERATION, + "%s(window-system framebuffer)", func); return; } att = get_attachment(ctx, fb, attachment); if (att == NULL) { _mesa_error(ctx, GL_INVALID_ENUM, - "glFramebufferRenderbuffer(invalid attachment %s)", + "%s(invalid attachment %s)", func, _mesa_lookup_enum_by_nr(attachment)); return; } - if (renderbuffer) { - rb = _mesa_lookup_renderbuffer(ctx, renderbuffer); - if (!rb) { - _mesa_error(ctx, GL_INVALID_OPERATION, - "glFramebufferRenderbuffer(non-existant" - " renderbuffer %u)", renderbuffer); - return; - } - else if (rb == &DummyRenderbuffer) { - _mesa_error(ctx, GL_INVALID_OPERATION, - "glFramebufferRenderbuffer(renderbuffer %u)", - renderbuffer); - return; - } - } - else { - /* remove renderbuffer attachment */ - rb = NULL; - } - if (attachment == GL_DEPTH_STENCIL_ATTACHMENT && rb && rb->Format != MESA_FORMAT_NONE) { /* make sure the renderbuffer is a depth/stencil format */ const GLenum baseFormat = _mesa_get_format_base_format(rb->Format); if (baseFormat != GL_DEPTH_STENCIL) { _mesa_error(ctx, GL_INVALID_OPERATION, - "glFramebufferRenderbuffer(renderbuffer" - " is not DEPTH_STENCIL format)"); + "%s(renderbuffer is not DEPTH_STENCIL format)", func); return; } } @@ -2977,6 +2943,81 @@ _mesa_FramebufferRenderbuffer(GLenum target, GLenum attachment, } +void GLAPIENTRY +_mesa_FramebufferRenderbuffer(GLenum target, GLenum attachment, + GLenum renderbuffertarget, + GLuint renderbuffer) +{ + struct gl_framebuffer *fb; + struct gl_renderbuffer *rb; + GET_CURRENT_CONTEXT(ctx); + + fb = get_framebuffer_target(ctx, target); + if (!fb) { + _mesa_error(ctx, GL_INVALID_ENUM, + "glFramebufferRenderbuffer(invalid target %s)", + _mesa_lookup_enum_by_nr(target)); + return; + } + + if (renderbuffertarget != GL_RENDERBUFFER) { + _mesa_error(ctx, GL_INVALID_ENUM, + "glFramebufferRenderbuffer(renderbuffertarget is not " + "GL_RENDERBUFFER)"); + return; + } + + if (renderbuffer) { + rb = _mesa_lookup_renderbuffer_err(ctx, renderbuffer, + "glFramebufferRenderbuffer"); + if (!rb) + return; + } + else { + /* remove renderbuffer attachment */ + rb = NULL; + } + + _mesa_framebuffer_renderbuffer(ctx, fb, attachment, rb, + "glFramebufferRenderbuffer"); +} + + +void GLAPIENTRY +_mesa_NamedFramebufferRenderbuffer(GLuint framebuffer, GLenum attachment, + GLenum renderbuffertarget, + GLuint renderbuffer) +{ + struct gl_framebuffer *fb; + struct gl_renderbuffer *rb; + GET_CURRENT_CONTEXT(ctx); + + fb = _mesa_lookup_framebuffer_err(ctx, framebuffer, + "glNamedFramebufferRenderbuffer"); + + if (renderbuffertarget != GL_RENDERBUFFER) { + _mesa_error(ctx, GL_INVALID_ENUM, + "glNamedFramebufferRenderbuffer(renderbuffertarget is not " + "GL_RENDERBUFFER)"); + return; + } + + if (renderbuffer) { + rb = _mesa_lookup_renderbuffer_err(ctx, renderbuffer, + "glNamedFramebufferRenderbuffer"); + if (!rb) + return; + } + else { + /* remove renderbuffer attachment */ + rb = NULL; + } + + _mesa_framebuffer_renderbuffer(ctx, fb, attachment, rb, + "glNamedFramebufferRenderbuffer"); +} + + void GLAPIENTRY _mesa_GetFramebufferAttachmentParameteriv(GLenum target, GLenum attachment, GLenum pname, GLint *params) diff --git a/src/mesa/main/fbobject.h b/src/mesa/main/fbobject.h index 0c0bc0e4a66..63b76f1ed49 100644 --- a/src/mesa/main/fbobject.h +++ b/src/mesa/main/fbobject.h @@ -87,6 +87,13 @@ _mesa_FramebufferRenderbuffer_sw(struct gl_context *ctx, GLenum attachment, struct gl_renderbuffer *rb); +extern void +_mesa_framebuffer_renderbuffer(struct gl_context *ctx, + struct gl_framebuffer *fb, + GLenum attachment, + struct gl_renderbuffer *rb, + const char *func); + extern void _mesa_validate_framebuffer(struct gl_context *ctx, struct gl_framebuffer *fb); @@ -206,6 +213,11 @@ _mesa_FramebufferRenderbuffer(GLenum target, GLenum attachment, GLenum renderbuffertarget, GLuint renderbuffer); +extern void GLAPIENTRY +_mesa_NamedFramebufferRenderbuffer(GLuint framebuffer, GLenum attachment, + GLenum renderbuffertarget, + GLuint renderbuffer); + extern void GLAPIENTRY _mesa_GetFramebufferAttachmentParameteriv(GLenum target, GLenum attachment, GLenum pname, GLint *params); diff --git a/src/mesa/main/tests/dispatch_sanity.cpp b/src/mesa/main/tests/dispatch_sanity.cpp index 6e1293d98e5..32b082bcb28 100644 --- a/src/mesa/main/tests/dispatch_sanity.cpp +++ b/src/mesa/main/tests/dispatch_sanity.cpp @@ -981,6 +981,7 @@ const struct function gl_core_functions_possible[] = { { "glGetNamedBufferPointerv", 45, -1 }, { "glGetNamedBufferSubData", 45, -1 }, { "glCreateFramebuffers", 45, -1 }, + { "glNamedFramebufferRenderbuffer", 45, -1 }, { "glCreateRenderbuffers", 45, -1 }, { "glNamedRenderbufferStorage", 45, -1 }, { "glNamedRenderbufferStorageMultisample", 45, -1 }, From 8f78c6889d508d40c82229ea3fa09a78b4ea2e17 Mon Sep 17 00:00:00 2001 From: Laura Ekstrand Date: Tue, 27 Jan 2015 14:11:13 -0800 Subject: [PATCH 067/834] main: Fix the indentation in framebuffer_texture MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Fredrik Höglund Signed-off-by: Fredrik Höglund --- src/mesa/main/fbobject.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/mesa/main/fbobject.c b/src/mesa/main/fbobject.c index ea6e273fda3..9486ded05a0 100644 --- a/src/mesa/main/fbobject.c +++ b/src/mesa/main/fbobject.c @@ -2731,7 +2731,8 @@ framebuffer_texture(struct gl_context *ctx, const char *caller, GLenum target, BUFFER_DEPTH); } else { set_texture_attachment(ctx, fb, att, texObj, textarget, - level, zoffset, layered); + level, zoffset, layered); + if (attachment == GL_DEPTH_STENCIL_ATTACHMENT) { /* Above we created a new renderbuffer and attached it to the * depth attachment point. Now attach it to the stencil attachment From f9f5c822845698482d0d81eaa64bc13c2fd8852a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fredrik=20H=C3=B6glund?= Date: Thu, 7 May 2015 20:28:23 +0200 Subject: [PATCH 068/834] main: Require that the texture exists in framebuffer_texture MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Generate GL_INVALID_OPERATION if the texture hasn't been created. Signed-off-by: Fredrik Höglund Reviewed-by: Adam Jackson Cc: "10.4 10.5" --- src/mesa/main/fbobject.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mesa/main/fbobject.c b/src/mesa/main/fbobject.c index 9486ded05a0..4524e51e4c0 100644 --- a/src/mesa/main/fbobject.c +++ b/src/mesa/main/fbobject.c @@ -2603,7 +2603,7 @@ framebuffer_texture(struct gl_context *ctx, const char *caller, GLenum target, GLboolean err = GL_TRUE; texObj = _mesa_lookup_texture(ctx, texture); - if (texObj != NULL) { + if (texObj != NULL && texObj->Target != 0) { if (textarget == 0) { if (layered) { /* We're being called by glFramebufferTexture() and textarget From 8ba7ad8abc7d71131e17970203c991ccb1befbe6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fredrik=20H=C3=B6glund?= Date: Sat, 9 May 2015 15:31:45 +0200 Subject: [PATCH 069/834] mesa: Generate GL_INVALID_VALUE in framebuffer_texture when layer < 0 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Fredrik Höglund Reviewed-by: Adam Jackson Cc: "10.4 10.5" --- src/mesa/main/fbobject.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/src/mesa/main/fbobject.c b/src/mesa/main/fbobject.c index 4524e51e4c0..20a4e86007c 100644 --- a/src/mesa/main/fbobject.c +++ b/src/mesa/main/fbobject.c @@ -2668,6 +2668,18 @@ framebuffer_texture(struct gl_context *ctx, const char *caller, GLenum target, return; } + /* Page 306 (page 328 of the PDF) of the OpenGL 4.5 (Core Profile) + * spec says: + * + * "An INVALID_VALUE error is generated if texture is non-zero + * and layer is negative." + */ + if (zoffset < 0) { + _mesa_error(ctx, GL_INVALID_VALUE, + "glFramebufferTexture%s(layer %u < 0)", caller, zoffset); + return; + } + if (texObj->Target == GL_TEXTURE_3D) { const GLuint maxSize = 1 << (ctx->Const.Max3DTextureLevels - 1); if (zoffset >= maxSize) { From 69bdc9dcb8e5d3648e8d96029d5988b8971de8dc Mon Sep 17 00:00:00 2001 From: Laura Ekstrand Date: Mon, 20 Apr 2015 17:21:20 +0200 Subject: [PATCH 070/834] main: Fix an error generated by FramebufferTexture MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit gl*FramebufferTexture should generate GL_INVALID_VALUE when the texture doesn't exist. [Fredrik: Split this change out from the next commit] Signed-off-by: Fredrik Höglund Reviewed-by: Fredrik Höglund Reviewed-by: Adam Jackson Cc: "10.4 10.5" --- src/mesa/main/fbobject.c | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/src/mesa/main/fbobject.c b/src/mesa/main/fbobject.c index 20a4e86007c..09dbf338a51 100644 --- a/src/mesa/main/fbobject.c +++ b/src/mesa/main/fbobject.c @@ -2654,10 +2654,19 @@ framebuffer_texture(struct gl_context *ctx, const char *caller, GLenum target, } } else { - /* can't render to a non-existant texture */ - _mesa_error(ctx, GL_INVALID_OPERATION, - "glFramebufferTexture%s(non existant texture)", - caller); + /* Can't render to a non-existent texture object. + * + * The OpenGL 4.5 core spec (02.02.2015) in Section 9.2 Binding and + * Managing Framebuffer Objects specifies a different error + * depending upon the calling function (PDF pages 325-328). + * *FramebufferTexture (where layered = GL_TRUE) throws invalid + * value, while the other commands throw invalid operation (where + * layered = GL_FALSE). + */ + const GLenum error = layered ? GL_INVALID_VALUE : + GL_INVALID_OPERATION; + _mesa_error(ctx, error, + "%s(non-existent texture %u)", caller, texture); return; } From a245e3bdeb1cf38b56df4f208a7e20d0f0de6045 Mon Sep 17 00:00:00 2001 From: Laura Ekstrand Date: Tue, 27 Jan 2015 16:11:52 -0800 Subject: [PATCH 071/834] main: Split framebuffer_texture. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Split apart utility function framebuffer_texture to better prepare for implementing NamedFramebufferTexture and NamedFramebufferTextureLayer. This should also pave the way for some future cleanup work. [Fredrik: - Mention which limit was exceeded when is out of range. - Update a comment to reflect that might not be the bound framebuffer. - Make it clear that the error message in glFramebufferTexture*D refers to the parameter. - Remove EXT suffixes.] Reviewed-by: Fredrik Höglund Signed-off-by: Fredrik Höglund --- src/mesa/main/fbobject.c | 265 ++++++++++++++++++++++++++++----------- src/mesa/main/fbobject.h | 8 ++ 2 files changed, 198 insertions(+), 75 deletions(-) diff --git a/src/mesa/main/fbobject.c b/src/mesa/main/fbobject.c index 09dbf338a51..553a32b2a4d 100644 --- a/src/mesa/main/fbobject.c +++ b/src/mesa/main/fbobject.c @@ -2562,39 +2562,31 @@ reuse_framebuffer_texture_attachment(struct gl_framebuffer *fb, /** - * Common code called by glFramebufferTexture1D/2D/3D() and - * glFramebufferTextureLayer(). + * Common code called by gl*FramebufferTexture*() to retrieve the correct + * texture object pointer and check for associated errors. * * \param textarget is the textarget that was passed to the * glFramebufferTexture...() function, or 0 if the corresponding function * doesn't have a textarget parameter. * * \param layered is true if this function was called from - * glFramebufferTexture(), false otherwise. + * gl*FramebufferTexture(), false otherwise. + * + * \param texObj where the pointer to the texture object is returned. Note + * that a successful call may return texObj = NULL. + * + * \return true if no errors, false if errors */ -static void -framebuffer_texture(struct gl_context *ctx, const char *caller, GLenum target, - GLenum attachment, GLenum textarget, GLuint texture, - GLint level, GLuint zoffset, GLboolean layered) +static bool +get_texture_for_framebuffer(struct gl_context *ctx, + GLuint texture, GLenum textarget, + GLint level, GLuint zoffset, GLboolean *layered, + const char *caller, + struct gl_texture_object **texObj) { - struct gl_renderbuffer_attachment *att; - struct gl_texture_object *texObj = NULL; - struct gl_framebuffer *fb; GLenum maxLevelsTarget; - fb = get_framebuffer_target(ctx, target); - if (!fb) { - _mesa_error(ctx, GL_INVALID_ENUM, - "glFramebufferTexture%s(target=0x%x)", caller, target); - return; - } - - /* check framebuffer binding */ - if (_mesa_is_winsys_fbo(fb)) { - _mesa_error(ctx, GL_INVALID_OPERATION, - "glFramebufferTexture%s", caller); - return; - } + *texObj = NULL; /* This will get returned if texture = 0. */ /* The textarget, level, and zoffset parameters are only validated if * texture is non-zero. @@ -2602,14 +2594,14 @@ framebuffer_texture(struct gl_context *ctx, const char *caller, GLenum target, if (texture) { GLboolean err = GL_TRUE; - texObj = _mesa_lookup_texture(ctx, texture); - if (texObj != NULL && texObj->Target != 0) { + *texObj = _mesa_lookup_texture(ctx, texture); + if (*texObj != NULL && (*texObj)->Target != 0) { if (textarget == 0) { - if (layered) { - /* We're being called by glFramebufferTexture() and textarget + if (*layered) { + /* We're being called by gl*FramebufferTexture() and textarget * is not used. */ - switch (texObj->Target) { + switch ((*texObj)->Target) { case GL_TEXTURE_3D: case GL_TEXTURE_1D_ARRAY_EXT: case GL_TEXTURE_2D_ARRAY_EXT: @@ -2627,8 +2619,8 @@ framebuffer_texture(struct gl_context *ctx, const char *caller, GLenum target, * is equivalent to calling glFramebufferTexture{1D,2D}(). */ err = false; - layered = false; - textarget = texObj->Target; + *layered = false; + textarget = (*texObj)->Target; break; default: err = true; @@ -2639,18 +2631,18 @@ framebuffer_texture(struct gl_context *ctx, const char *caller, GLenum target, * textarget is not used. The only legal texture types for * that function are 3D and 1D/2D arrays textures. */ - err = (texObj->Target != GL_TEXTURE_3D) && - (texObj->Target != GL_TEXTURE_1D_ARRAY_EXT) && - (texObj->Target != GL_TEXTURE_2D_ARRAY_EXT) && - (texObj->Target != GL_TEXTURE_CUBE_MAP_ARRAY) && - (texObj->Target != GL_TEXTURE_2D_MULTISAMPLE_ARRAY); + err = ((*texObj)->Target != GL_TEXTURE_3D) && + ((*texObj)->Target != GL_TEXTURE_1D_ARRAY) && + ((*texObj)->Target != GL_TEXTURE_2D_ARRAY) && + ((*texObj)->Target != GL_TEXTURE_CUBE_MAP_ARRAY) && + ((*texObj)->Target != GL_TEXTURE_2D_MULTISAMPLE_ARRAY); } } else { /* Make sure textarget is consistent with the texture's type */ - err = (texObj->Target == GL_TEXTURE_CUBE_MAP) + err = ((*texObj)->Target == GL_TEXTURE_CUBE_MAP) ? !_mesa_is_cube_face(textarget) - : (texObj->Target != textarget); + : ((*texObj)->Target != textarget); } } else { @@ -2659,22 +2651,21 @@ framebuffer_texture(struct gl_context *ctx, const char *caller, GLenum target, * The OpenGL 4.5 core spec (02.02.2015) in Section 9.2 Binding and * Managing Framebuffer Objects specifies a different error * depending upon the calling function (PDF pages 325-328). - * *FramebufferTexture (where layered = GL_TRUE) throws invalid + * *FramebufferTexture (where *layered = GL_TRUE) throws invalid * value, while the other commands throw invalid operation (where - * layered = GL_FALSE). + * *layered = GL_FALSE). */ - const GLenum error = layered ? GL_INVALID_VALUE : + const GLenum error = *layered ? GL_INVALID_VALUE : GL_INVALID_OPERATION; _mesa_error(ctx, error, "%s(non-existent texture %u)", caller, texture); - return; + return false; } if (err) { _mesa_error(ctx, GL_INVALID_OPERATION, - "glFramebufferTexture%s(texture target mismatch)", - caller); - return; + "%s(invalid or mismatched texture target)", caller); + return false; } /* Page 306 (page 328 of the PDF) of the OpenGL 4.5 (Core Profile) @@ -2685,42 +2676,64 @@ framebuffer_texture(struct gl_context *ctx, const char *caller, GLenum target, */ if (zoffset < 0) { _mesa_error(ctx, GL_INVALID_VALUE, - "glFramebufferTexture%s(layer %u < 0)", caller, zoffset); - return; + "%s(layer %u < 0)", caller, zoffset); + return false; } - if (texObj->Target == GL_TEXTURE_3D) { + if ((*texObj)->Target == GL_TEXTURE_3D) { const GLuint maxSize = 1 << (ctx->Const.Max3DTextureLevels - 1); if (zoffset >= maxSize) { _mesa_error(ctx, GL_INVALID_VALUE, - "glFramebufferTexture%s(zoffset)", caller); - return; + "%s(invalid zoffset %u)", caller, zoffset); + return false; } } - else if ((texObj->Target == GL_TEXTURE_1D_ARRAY_EXT) || - (texObj->Target == GL_TEXTURE_2D_ARRAY_EXT) || - (texObj->Target == GL_TEXTURE_CUBE_MAP_ARRAY) || - (texObj->Target == GL_TEXTURE_2D_MULTISAMPLE_ARRAY)) { + else if (((*texObj)->Target == GL_TEXTURE_1D_ARRAY) || + ((*texObj)->Target == GL_TEXTURE_2D_ARRAY) || + ((*texObj)->Target == GL_TEXTURE_CUBE_MAP_ARRAY) || + ((*texObj)->Target == GL_TEXTURE_2D_MULTISAMPLE_ARRAY)) { if (zoffset >= ctx->Const.MaxArrayTextureLayers) { _mesa_error(ctx, GL_INVALID_VALUE, - "glFramebufferTexture%s(layer)", caller); - return; + "%s(layer %u >= GL_MAX_ARRAY_TEXTURE_LAYERS)", + caller, zoffset); + return false; } } - maxLevelsTarget = textarget ? textarget : texObj->Target; + maxLevelsTarget = textarget ? textarget : (*texObj)->Target; if ((level < 0) || (level >= _mesa_max_texture_levels(ctx, maxLevelsTarget))) { _mesa_error(ctx, GL_INVALID_VALUE, - "glFramebufferTexture%s(level)", caller); - return; + "%s(invalid level %d)", caller, level); + return false; } } + return true; +} + + +void +_mesa_framebuffer_texture(struct gl_context *ctx, struct gl_framebuffer *fb, + GLenum attachment, + struct gl_texture_object *texObj, GLenum textarget, + GLint level, GLuint zoffset, GLboolean layered, + const char *caller) +{ + struct gl_renderbuffer_attachment *att; + + /* The window-system framebuffer object is immutable */ + if (_mesa_is_winsys_fbo(fb)) { + _mesa_error(ctx, GL_INVALID_OPERATION, "%s(window-system framebuffer)", + caller); + return; + } + + /* Not a hash lookup, so we can afford to get the attachment here. */ att = get_attachment(ctx, fb, attachment); if (att == NULL) { - _mesa_error(ctx, GL_INVALID_ENUM, - "glFramebufferTexture%s(attachment)", caller); + _mesa_error(ctx, GL_INVALID_ENUM, "%s(invalid attachment %s)", caller, + _mesa_lookup_enum_by_nr(attachment)); return; } @@ -2794,6 +2807,9 @@ _mesa_FramebufferTexture1D(GLenum target, GLenum attachment, GLenum textarget, GLuint texture, GLint level) { GET_CURRENT_CONTEXT(ctx); + struct gl_framebuffer *fb; + struct gl_texture_object *texObj; + GLboolean layered = GL_FALSE; if (texture != 0) { GLboolean error; @@ -2811,14 +2827,31 @@ _mesa_FramebufferTexture1D(GLenum target, GLenum attachment, if (error) { _mesa_error(ctx, GL_INVALID_OPERATION, - "glFramebufferTexture1D(textarget=%s)", + "glFramebufferTexture1D(invalid textarget %s)", _mesa_lookup_enum_by_nr(textarget)); return; } } - framebuffer_texture(ctx, "1D", target, attachment, textarget, texture, - level, 0, GL_FALSE); + /* Get the framebuffer object */ + fb = get_framebuffer_target(ctx, target); + if (!fb) { + _mesa_error(ctx, GL_INVALID_ENUM, + "glFramebufferTexture1D(invalid target %s)", + _mesa_lookup_enum_by_nr(target)); + return; + } + + /* Get the texture object */ + if (!get_texture_for_framebuffer(ctx, texture, textarget, level, 0, + &layered, "glFramebufferTexture1D", + &texObj)) { + /* Error already recorded */ + return; + } + + _mesa_framebuffer_texture(ctx, fb, attachment, texObj, textarget, level, + 0, layered, "glFramebufferTexture1D"); } @@ -2827,6 +2860,9 @@ _mesa_FramebufferTexture2D(GLenum target, GLenum attachment, GLenum textarget, GLuint texture, GLint level) { GET_CURRENT_CONTEXT(ctx); + struct gl_framebuffer *fb; + struct gl_texture_object *texObj; + GLboolean layered = GL_FALSE; if (texture != 0) { GLboolean error; @@ -2862,14 +2898,31 @@ _mesa_FramebufferTexture2D(GLenum target, GLenum attachment, if (error) { _mesa_error(ctx, GL_INVALID_OPERATION, - "glFramebufferTexture2D(textarget=%s)", + "glFramebufferTexture2D(invalid textarget %s)", _mesa_lookup_enum_by_nr(textarget)); return; } } - framebuffer_texture(ctx, "2D", target, attachment, textarget, texture, - level, 0, GL_FALSE); + /* Get the framebuffer object */ + fb = get_framebuffer_target(ctx, target); + if (!fb) { + _mesa_error(ctx, GL_INVALID_ENUM, + "glFramebufferTexture2D(invalid target %s)", + _mesa_lookup_enum_by_nr(target)); + return; + } + + /* Get the texture object */ + if (!get_texture_for_framebuffer(ctx, texture, textarget, level, 0, + &layered, "glFramebufferTexture2D", + &texObj)) { + /* Error already recorded */ + return; + } + + _mesa_framebuffer_texture(ctx, fb, attachment, texObj, textarget, level, + 0, layered, "glFramebufferTexture2D"); } @@ -2879,15 +2932,36 @@ _mesa_FramebufferTexture3D(GLenum target, GLenum attachment, GLint level, GLint zoffset) { GET_CURRENT_CONTEXT(ctx); + struct gl_framebuffer *fb; + struct gl_texture_object *texObj; + GLboolean layered = GL_FALSE; if ((texture != 0) && (textarget != GL_TEXTURE_3D)) { _mesa_error(ctx, GL_INVALID_OPERATION, - "glFramebufferTexture3D(textarget)"); + "glFramebufferTexture3D(invalid textarget %s)", + _mesa_lookup_enum_by_nr(textarget)); return; } - framebuffer_texture(ctx, "3D", target, attachment, textarget, texture, - level, zoffset, GL_FALSE); + /* Get the framebuffer object */ + fb = get_framebuffer_target(ctx, target); + if (!fb) { + _mesa_error(ctx, GL_INVALID_ENUM, + "glFramebufferTexture3D(invalid target %s)", + _mesa_lookup_enum_by_nr(target)); + return; + } + + /* Get the texture object */ + if (!get_texture_for_framebuffer(ctx, texture, textarget, level, zoffset, + &layered, "glFramebufferTexture3D", + &texObj)) { + /* Error already recorded */ + return; + } + + _mesa_framebuffer_texture(ctx, fb, attachment, texObj, textarget, level, + zoffset, layered, "glFramebufferTexture3D"); } @@ -2896,9 +2970,29 @@ _mesa_FramebufferTextureLayer(GLenum target, GLenum attachment, GLuint texture, GLint level, GLint layer) { GET_CURRENT_CONTEXT(ctx); + struct gl_framebuffer *fb; + struct gl_texture_object *texObj; + GLboolean layered = GL_FALSE; - framebuffer_texture(ctx, "Layer", target, attachment, 0, texture, - level, layer, GL_FALSE); + /* Get the framebuffer object */ + fb = get_framebuffer_target(ctx, target); + if (!fb) { + _mesa_error(ctx, GL_INVALID_ENUM, + "glFramebufferTextureLayer(invalid target %s)", + _mesa_lookup_enum_by_nr(target)); + return; + } + + /* Get the texture object */ + if (!get_texture_for_framebuffer(ctx, texture, 0, level, layer, + &layered, "glFramebufferTextureLayer", + &texObj)) { + /* Error already recorded */ + return; + } + + _mesa_framebuffer_texture(ctx, fb, attachment, texObj, 0, level, + layer, layered, "glFramebufferTextureLayer"); } @@ -2907,14 +3001,35 @@ _mesa_FramebufferTexture(GLenum target, GLenum attachment, GLuint texture, GLint level) { GET_CURRENT_CONTEXT(ctx); + struct gl_framebuffer *fb; + struct gl_texture_object *texObj; + GLboolean layered = GL_TRUE; - if (_mesa_has_geometry_shaders(ctx)) { - framebuffer_texture(ctx, "", target, attachment, 0, texture, - level, 0, GL_TRUE); - } else { + if (!_mesa_has_geometry_shaders(ctx)) { _mesa_error(ctx, GL_INVALID_OPERATION, "unsupported function (glFramebufferTexture) called"); + return; } + + /* Get the framebuffer object */ + fb = get_framebuffer_target(ctx, target); + if (!fb) { + _mesa_error(ctx, GL_INVALID_ENUM, + "glFramebufferTexture(invalid target %s)", + _mesa_lookup_enum_by_nr(target)); + return; + } + + /* Get the texture object */ + if (!get_texture_for_framebuffer(ctx, texture, 0, level, 0, + &layered, "glFramebufferTexture", + &texObj)) { + /* Error already recorded */ + return; + } + + _mesa_framebuffer_texture(ctx, fb, attachment, texObj, 0, level, + 0, layered, "glFramebufferTexture"); } diff --git a/src/mesa/main/fbobject.h b/src/mesa/main/fbobject.h index 63b76f1ed49..974cd568f27 100644 --- a/src/mesa/main/fbobject.h +++ b/src/mesa/main/fbobject.h @@ -115,6 +115,14 @@ _mesa_detach_renderbuffer(struct gl_context *ctx, struct gl_framebuffer *fb, const void *att); +extern void +_mesa_framebuffer_texture(struct gl_context *ctx, struct gl_framebuffer *fb, + GLenum attachment, + struct gl_texture_object *texObj, GLenum textarget, + GLint level, GLuint zoffset, GLboolean layered, + const char *caller); + + extern GLboolean GLAPIENTRY _mesa_IsRenderbuffer(GLuint renderbuffer); From a9f73f7f42081594d0ba9ff6cb7a0a743c3cad93 Mon Sep 17 00:00:00 2001 From: Laura Ekstrand Date: Mon, 2 Mar 2015 13:41:13 -0800 Subject: [PATCH 072/834] main: Refactor get_texture_for_framebuffer. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This moves a few blocks around so that the control flow is more obvious. If the texture is 0, just return true at the beginning of the function. Likewise, if the texObj is NULL, return true at the beginning of the function as well. [Fredrik: Fix the texObj NULL check] Reviewed-by: Fredrik Höglund Signed-off-by: Fredrik Höglund --- src/mesa/main/fbobject.c | 43 ++++++++++++++++++++-------------------- 1 file changed, 21 insertions(+), 22 deletions(-) diff --git a/src/mesa/main/fbobject.c b/src/mesa/main/fbobject.c index 553a32b2a4d..f1c0780318c 100644 --- a/src/mesa/main/fbobject.c +++ b/src/mesa/main/fbobject.c @@ -2585,17 +2585,34 @@ get_texture_for_framebuffer(struct gl_context *ctx, struct gl_texture_object **texObj) { GLenum maxLevelsTarget; + GLboolean err = GL_TRUE; *texObj = NULL; /* This will get returned if texture = 0. */ /* The textarget, level, and zoffset parameters are only validated if * texture is non-zero. */ - if (texture) { - GLboolean err = GL_TRUE; + if (!texture) + return true; + + *texObj = _mesa_lookup_texture(ctx, texture); + if (*texObj == NULL || (*texObj)->Target == 0) { + /* Can't render to a non-existent texture object. + * + * The OpenGL 4.5 core spec (02.02.2015) in Section 9.2 Binding and + * Managing Framebuffer Objects specifies a different error + * depending upon the calling function (PDF pages 325-328). + * *FramebufferTexture (where *layered = GL_TRUE) throws invalid + * value, while the other commands throw invalid operation (where + * *layered = GL_FALSE). + */ + const GLenum error = *layered ? GL_INVALID_VALUE : + GL_INVALID_OPERATION; + _mesa_error(ctx, error, + "%s(non-existent texture %u)", caller, texture); + return false; + } - *texObj = _mesa_lookup_texture(ctx, texture); - if (*texObj != NULL && (*texObj)->Target != 0) { if (textarget == 0) { if (*layered) { /* We're being called by gl*FramebufferTexture() and textarget @@ -2644,23 +2661,6 @@ get_texture_for_framebuffer(struct gl_context *ctx, ? !_mesa_is_cube_face(textarget) : ((*texObj)->Target != textarget); } - } - else { - /* Can't render to a non-existent texture object. - * - * The OpenGL 4.5 core spec (02.02.2015) in Section 9.2 Binding and - * Managing Framebuffer Objects specifies a different error - * depending upon the calling function (PDF pages 325-328). - * *FramebufferTexture (where *layered = GL_TRUE) throws invalid - * value, while the other commands throw invalid operation (where - * *layered = GL_FALSE). - */ - const GLenum error = *layered ? GL_INVALID_VALUE : - GL_INVALID_OPERATION; - _mesa_error(ctx, error, - "%s(non-existent texture %u)", caller, texture); - return false; - } if (err) { _mesa_error(ctx, GL_INVALID_OPERATION, @@ -2707,7 +2707,6 @@ get_texture_for_framebuffer(struct gl_context *ctx, "%s(invalid level %d)", caller, level); return false; } - } return true; } From a602b21f94ded038f9781cc02ab46cdaab868f14 Mon Sep 17 00:00:00 2001 From: Laura Ekstrand Date: Mon, 2 Mar 2015 13:43:09 -0800 Subject: [PATCH 073/834] main: Fix indentation in get_texture_for_framebuffer. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Fredrik Höglund Signed-off-by: Fredrik Höglund --- src/mesa/main/fbobject.c | 170 +++++++++++++++++++-------------------- 1 file changed, 85 insertions(+), 85 deletions(-) diff --git a/src/mesa/main/fbobject.c b/src/mesa/main/fbobject.c index f1c0780318c..dc730c8695c 100644 --- a/src/mesa/main/fbobject.c +++ b/src/mesa/main/fbobject.c @@ -2613,100 +2613,100 @@ get_texture_for_framebuffer(struct gl_context *ctx, return false; } - if (textarget == 0) { - if (*layered) { - /* We're being called by gl*FramebufferTexture() and textarget - * is not used. - */ - switch ((*texObj)->Target) { - case GL_TEXTURE_3D: - case GL_TEXTURE_1D_ARRAY_EXT: - case GL_TEXTURE_2D_ARRAY_EXT: - case GL_TEXTURE_CUBE_MAP: - case GL_TEXTURE_CUBE_MAP_ARRAY: - case GL_TEXTURE_2D_MULTISAMPLE_ARRAY: - err = false; - break; - case GL_TEXTURE_1D: - case GL_TEXTURE_2D: - case GL_TEXTURE_RECTANGLE: - case GL_TEXTURE_2D_MULTISAMPLE: - /* These texture types are valid to pass to - * glFramebufferTexture(), but since they aren't layered, it - * is equivalent to calling glFramebufferTexture{1D,2D}(). - */ - err = false; - *layered = false; - textarget = (*texObj)->Target; - break; - default: - err = true; - break; - } - } else { - /* We're being called by glFramebufferTextureLayer() and - * textarget is not used. The only legal texture types for - * that function are 3D and 1D/2D arrays textures. - */ - err = ((*texObj)->Target != GL_TEXTURE_3D) && - ((*texObj)->Target != GL_TEXTURE_1D_ARRAY) && - ((*texObj)->Target != GL_TEXTURE_2D_ARRAY) && - ((*texObj)->Target != GL_TEXTURE_CUBE_MAP_ARRAY) && - ((*texObj)->Target != GL_TEXTURE_2D_MULTISAMPLE_ARRAY); - } + if (textarget == 0) { + if (*layered) { + /* We're being called by gl*FramebufferTexture() and textarget + * is not used. + */ + switch ((*texObj)->Target) { + case GL_TEXTURE_3D: + case GL_TEXTURE_1D_ARRAY_EXT: + case GL_TEXTURE_2D_ARRAY_EXT: + case GL_TEXTURE_CUBE_MAP: + case GL_TEXTURE_CUBE_MAP_ARRAY: + case GL_TEXTURE_2D_MULTISAMPLE_ARRAY: + err = false; + break; + case GL_TEXTURE_1D: + case GL_TEXTURE_2D: + case GL_TEXTURE_RECTANGLE: + case GL_TEXTURE_2D_MULTISAMPLE: + /* These texture types are valid to pass to + * glFramebufferTexture(), but since they aren't layered, it + * is equivalent to calling glFramebufferTexture{1D,2D}(). + */ + err = false; + *layered = false; + textarget = (*texObj)->Target; + break; + default: + err = true; + break; } - else { - /* Make sure textarget is consistent with the texture's type */ - err = ((*texObj)->Target == GL_TEXTURE_CUBE_MAP) - ? !_mesa_is_cube_face(textarget) - : ((*texObj)->Target != textarget); - } - - if (err) { - _mesa_error(ctx, GL_INVALID_OPERATION, - "%s(invalid or mismatched texture target)", caller); - return false; + } else { + /* We're being called by glFramebufferTextureLayer() and + * textarget is not used. The only legal texture types for + * that function are 3D and 1D/2D arrays textures. + */ + err = ((*texObj)->Target != GL_TEXTURE_3D) && + ((*texObj)->Target != GL_TEXTURE_1D_ARRAY) && + ((*texObj)->Target != GL_TEXTURE_2D_ARRAY) && + ((*texObj)->Target != GL_TEXTURE_CUBE_MAP_ARRAY) && + ((*texObj)->Target != GL_TEXTURE_2D_MULTISAMPLE_ARRAY); } + } + else { + /* Make sure textarget is consistent with the texture's type */ + err = ((*texObj)->Target == GL_TEXTURE_CUBE_MAP) + ? !_mesa_is_cube_face(textarget) + : ((*texObj)->Target != textarget); + } - /* Page 306 (page 328 of the PDF) of the OpenGL 4.5 (Core Profile) - * spec says: - * - * "An INVALID_VALUE error is generated if texture is non-zero - * and layer is negative." - */ - if (zoffset < 0) { + if (err) { + _mesa_error(ctx, GL_INVALID_OPERATION, + "%s(invalid or mismatched texture target)", caller); + return false; + } + + /* Page 306 (page 328 of the PDF) of the OpenGL 4.5 (Core Profile) + * spec says: + * + * "An INVALID_VALUE error is generated if texture is non-zero + * and layer is negative." + */ + if (zoffset < 0) { + _mesa_error(ctx, GL_INVALID_VALUE, + "%s(layer %u < 0)", caller, zoffset); + return false; + } + + if ((*texObj)->Target == GL_TEXTURE_3D) { + const GLuint maxSize = 1 << (ctx->Const.Max3DTextureLevels - 1); + if (zoffset >= maxSize) { _mesa_error(ctx, GL_INVALID_VALUE, - "%s(layer %u < 0)", caller, zoffset); + "%s(invalid zoffset %u)", caller, zoffset); return false; } - - if ((*texObj)->Target == GL_TEXTURE_3D) { - const GLuint maxSize = 1 << (ctx->Const.Max3DTextureLevels - 1); - if (zoffset >= maxSize) { - _mesa_error(ctx, GL_INVALID_VALUE, - "%s(invalid zoffset %u)", caller, zoffset); - return false; - } - } - else if (((*texObj)->Target == GL_TEXTURE_1D_ARRAY) || - ((*texObj)->Target == GL_TEXTURE_2D_ARRAY) || - ((*texObj)->Target == GL_TEXTURE_CUBE_MAP_ARRAY) || - ((*texObj)->Target == GL_TEXTURE_2D_MULTISAMPLE_ARRAY)) { - if (zoffset >= ctx->Const.MaxArrayTextureLayers) { - _mesa_error(ctx, GL_INVALID_VALUE, - "%s(layer %u >= GL_MAX_ARRAY_TEXTURE_LAYERS)", - caller, zoffset); - return false; - } - } - - maxLevelsTarget = textarget ? textarget : (*texObj)->Target; - if ((level < 0) || - (level >= _mesa_max_texture_levels(ctx, maxLevelsTarget))) { + } + else if (((*texObj)->Target == GL_TEXTURE_1D_ARRAY) || + ((*texObj)->Target == GL_TEXTURE_2D_ARRAY) || + ((*texObj)->Target == GL_TEXTURE_CUBE_MAP_ARRAY) || + ((*texObj)->Target == GL_TEXTURE_2D_MULTISAMPLE_ARRAY)) { + if (zoffset >= ctx->Const.MaxArrayTextureLayers) { _mesa_error(ctx, GL_INVALID_VALUE, - "%s(invalid level %d)", caller, level); + "%s(layer %u >= GL_MAX_ARRAY_TEXTURE_LAYERS)", + caller, zoffset); return false; } + } + + maxLevelsTarget = textarget ? textarget : (*texObj)->Target; + if ((level < 0) || + (level >= _mesa_max_texture_levels(ctx, maxLevelsTarget))) { + _mesa_error(ctx, GL_INVALID_VALUE, + "%s(invalid level %d)", caller, level); + return false; + } return true; } From d78c831a147e8af6f6fc1a610f4c2e490e75fad1 Mon Sep 17 00:00:00 2001 From: Laura Ekstrand Date: Wed, 28 Jan 2015 13:19:57 -0800 Subject: [PATCH 074/834] main: Add entry points for glNamedFramebufferTexture[Layer]. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Fredrik Höglund Signed-off-by: Fredrik Höglund --- .../glapi/gen/ARB_direct_state_access.xml | 15 +++++ src/mesa/main/fbobject.c | 64 +++++++++++++++++++ src/mesa/main/fbobject.h | 8 +++ src/mesa/main/tests/dispatch_sanity.cpp | 2 + 4 files changed, 89 insertions(+) diff --git a/src/mapi/glapi/gen/ARB_direct_state_access.xml b/src/mapi/glapi/gen/ARB_direct_state_access.xml index f228a5286a7..b8e90aadd45 100644 --- a/src/mapi/glapi/gen/ARB_direct_state_access.xml +++ b/src/mapi/glapi/gen/ARB_direct_state_access.xml @@ -166,6 +166,21 @@ + + + + + + + + + + + + + + + diff --git a/src/mesa/main/fbobject.c b/src/mesa/main/fbobject.c index dc730c8695c..8f759fba809 100644 --- a/src/mesa/main/fbobject.c +++ b/src/mesa/main/fbobject.c @@ -2995,6 +2995,36 @@ _mesa_FramebufferTextureLayer(GLenum target, GLenum attachment, } +void GLAPIENTRY +_mesa_NamedFramebufferTextureLayer(GLuint framebuffer, GLenum attachment, + GLuint texture, GLint level, GLint layer) +{ + GET_CURRENT_CONTEXT(ctx); + struct gl_framebuffer *fb; + struct gl_texture_object *texObj; + GLboolean layered = GL_FALSE; + + /* Get the framebuffer object */ + fb = _mesa_lookup_framebuffer_err(ctx, framebuffer, + "glNamedFramebufferTextureLayer"); + if (!fb) + return; + + /* Get the texture object */ + if (!get_texture_for_framebuffer(ctx, texture, 0, level, layer, + &layered, + "glNamedFramebufferTextureLayer", + &texObj)) { + /* Error already recorded */ + return; + } + + _mesa_framebuffer_texture(ctx, fb, attachment, texObj, 0, level, + layer, layered, + "glNamedFramebufferTextureLayer"); +} + + void GLAPIENTRY _mesa_FramebufferTexture(GLenum target, GLenum attachment, GLuint texture, GLint level) @@ -3032,6 +3062,40 @@ _mesa_FramebufferTexture(GLenum target, GLenum attachment, } +void GLAPIENTRY +_mesa_NamedFramebufferTexture(GLuint framebuffer, GLenum attachment, + GLuint texture, GLint level) +{ + GET_CURRENT_CONTEXT(ctx); + struct gl_framebuffer *fb; + struct gl_texture_object *texObj; + GLboolean layered = GL_TRUE; + + if (!_mesa_has_geometry_shaders(ctx)) { + _mesa_error(ctx, GL_INVALID_OPERATION, + "unsupported function (glNamedFramebufferTexture) called"); + return; + } + + /* Get the framebuffer object */ + fb = _mesa_lookup_framebuffer_err(ctx, framebuffer, + "glNamedFramebufferTexture"); + if (!fb) + return; + + /* Get the texture object */ + if (!get_texture_for_framebuffer(ctx, texture, 0, level, 0, + &layered, "glNamedFramebufferTexture", + &texObj)) { + /* Error already recorded */ + return; + } + + _mesa_framebuffer_texture(ctx, fb, attachment, texObj, 0, level, + 0, layered, "glNamedFramebufferTexture"); +} + + void _mesa_framebuffer_renderbuffer(struct gl_context *ctx, struct gl_framebuffer *fb, diff --git a/src/mesa/main/fbobject.h b/src/mesa/main/fbobject.h index 974cd568f27..81159d0efa9 100644 --- a/src/mesa/main/fbobject.h +++ b/src/mesa/main/fbobject.h @@ -212,10 +212,18 @@ extern void GLAPIENTRY _mesa_FramebufferTextureLayer(GLenum target, GLenum attachment, GLuint texture, GLint level, GLint layer); +extern void GLAPIENTRY +_mesa_NamedFramebufferTextureLayer(GLuint framebuffer, GLenum attachment, + GLuint texture, GLint level, GLint layer); + extern void GLAPIENTRY _mesa_FramebufferTexture(GLenum target, GLenum attachment, GLuint texture, GLint level); +extern void GLAPIENTRY +_mesa_NamedFramebufferTexture(GLuint framebuffer, GLenum attachment, + GLuint texture, GLint level); + extern void GLAPIENTRY _mesa_FramebufferRenderbuffer(GLenum target, GLenum attachment, GLenum renderbuffertarget, diff --git a/src/mesa/main/tests/dispatch_sanity.cpp b/src/mesa/main/tests/dispatch_sanity.cpp index 32b082bcb28..0994d437e24 100644 --- a/src/mesa/main/tests/dispatch_sanity.cpp +++ b/src/mesa/main/tests/dispatch_sanity.cpp @@ -982,6 +982,8 @@ const struct function gl_core_functions_possible[] = { { "glGetNamedBufferSubData", 45, -1 }, { "glCreateFramebuffers", 45, -1 }, { "glNamedFramebufferRenderbuffer", 45, -1 }, + { "glNamedFramebufferTexture", 45, -1 }, + { "glNamedFramebufferTextureLayer", 45, -1 }, { "glCreateRenderbuffers", 45, -1 }, { "glNamedRenderbufferStorage", 45, -1 }, { "glNamedRenderbufferStorageMultisample", 45, -1 }, From 085c67dc77ab3c00d3f68ba52f8343f8ff0cab53 Mon Sep 17 00:00:00 2001 From: Laura Ekstrand Date: Mon, 2 Mar 2015 16:48:59 -0800 Subject: [PATCH 075/834] main: Major refactor of get_texture_for_framebuffer. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This splits off the (still) rather large chunk that is get_texture_for_framebuffer into lots of smaller functions specialized to service the wide variety of unique needs of *FramebufferTexture* entry points. The result is much cleaner because, rather than having a pile of branches and confusing conditions (like the boolean layered), the uniqueness is baked into the entry points. The entry points know whether or not they are layered or use a textarget. [Fredrik: - Mention the value of in the error message. - Rename check_zoffset to check_layer, and zoffset to layer. The zoffset parameter was renamed to layer in ARB_framebuffer_object. - Make layered a GLboolean since the value is visible to the API. - Remove EXT suffixes in refactored code. - Whitespace fixes.] Reviewed-by: Fredrik Höglund Signed-off-by: Fredrik Höglund --- src/mapi/glapi/gen/ARB_framebuffer_object.xml | 2 +- src/mesa/main/fbobject.c | 494 ++++++++++-------- src/mesa/main/fbobject.h | 4 +- 3 files changed, 270 insertions(+), 230 deletions(-) diff --git a/src/mapi/glapi/gen/ARB_framebuffer_object.xml b/src/mapi/glapi/gen/ARB_framebuffer_object.xml index 7c547c16742..999a8ef13ad 100644 --- a/src/mapi/glapi/gen/ARB_framebuffer_object.xml +++ b/src/mapi/glapi/gen/ARB_framebuffer_object.xml @@ -247,7 +247,7 @@ - + diff --git a/src/mesa/main/fbobject.c b/src/mesa/main/fbobject.c index 8f759fba809..807b629507b 100644 --- a/src/mesa/main/fbobject.c +++ b/src/mesa/main/fbobject.c @@ -465,7 +465,7 @@ set_texture_attachment(struct gl_context *ctx, struct gl_framebuffer *fb, struct gl_renderbuffer_attachment *att, struct gl_texture_object *texObj, - GLenum texTarget, GLuint level, GLuint zoffset, + GLenum texTarget, GLuint level, GLuint layer, GLboolean layered) { struct gl_renderbuffer *rb = att->Renderbuffer; @@ -489,7 +489,7 @@ set_texture_attachment(struct gl_context *ctx, /* always update these fields */ att->TextureLevel = level; att->CubeMapFace = _mesa_tex_target_to_face(texTarget); - att->Zoffset = zoffset; + att->Zoffset = layer; att->Layered = layered; att->Complete = GL_FALSE; @@ -2563,14 +2563,7 @@ reuse_framebuffer_texture_attachment(struct gl_framebuffer *fb, /** * Common code called by gl*FramebufferTexture*() to retrieve the correct - * texture object pointer and check for associated errors. - * - * \param textarget is the textarget that was passed to the - * glFramebufferTexture...() function, or 0 if the corresponding function - * doesn't have a textarget parameter. - * - * \param layered is true if this function was called from - * gl*FramebufferTexture(), false otherwise. + * texture object pointer. * * \param texObj where the pointer to the texture object is returned. Note * that a successful call may return texObj = NULL. @@ -2578,20 +2571,12 @@ reuse_framebuffer_texture_attachment(struct gl_framebuffer *fb, * \return true if no errors, false if errors */ static bool -get_texture_for_framebuffer(struct gl_context *ctx, - GLuint texture, GLenum textarget, - GLint level, GLuint zoffset, GLboolean *layered, - const char *caller, +get_texture_for_framebuffer(struct gl_context *ctx, GLuint texture, + bool layered, const char *caller, struct gl_texture_object **texObj) { - GLenum maxLevelsTarget; - GLboolean err = GL_TRUE; - *texObj = NULL; /* This will get returned if texture = 0. */ - /* The textarget, level, and zoffset parameters are only validated if - * texture is non-zero. - */ if (!texture) return true; @@ -2602,31 +2587,45 @@ get_texture_for_framebuffer(struct gl_context *ctx, * The OpenGL 4.5 core spec (02.02.2015) in Section 9.2 Binding and * Managing Framebuffer Objects specifies a different error * depending upon the calling function (PDF pages 325-328). - * *FramebufferTexture (where *layered = GL_TRUE) throws invalid + * *FramebufferTexture (where layered = GL_TRUE) throws invalid * value, while the other commands throw invalid operation (where - * *layered = GL_FALSE). + * layered = GL_FALSE). */ - const GLenum error = *layered ? GL_INVALID_VALUE : + const GLenum error = layered ? GL_INVALID_VALUE : GL_INVALID_OPERATION; _mesa_error(ctx, error, "%s(non-existent texture %u)", caller, texture); return false; } - if (textarget == 0) { - if (*layered) { - /* We're being called by gl*FramebufferTexture() and textarget - * is not used. - */ - switch ((*texObj)->Target) { + return true; +} + + +/** + * Common code called by gl*FramebufferTexture() to verify the texture target + * and decide whether or not the attachment should truly be considered + * layered. + * + * \param layered true if attachment should be considered layered, false if + * not + * + * \return true if no errors, false if errors + */ +static bool +check_layered_texture_target(struct gl_context *ctx, GLenum target, + const char *caller, GLboolean *layered) +{ + *layered = GL_TRUE; + + switch (target) { case GL_TEXTURE_3D: case GL_TEXTURE_1D_ARRAY_EXT: case GL_TEXTURE_2D_ARRAY_EXT: case GL_TEXTURE_CUBE_MAP: case GL_TEXTURE_CUBE_MAP_ARRAY: case GL_TEXTURE_2D_MULTISAMPLE_ARRAY: - err = false; - break; + return true; case GL_TEXTURE_1D: case GL_TEXTURE_2D: case GL_TEXTURE_RECTANGLE: @@ -2635,74 +2634,187 @@ get_texture_for_framebuffer(struct gl_context *ctx, * glFramebufferTexture(), but since they aren't layered, it * is equivalent to calling glFramebufferTexture{1D,2D}(). */ - err = false; - *layered = false; - textarget = (*texObj)->Target; - break; - default: - err = true; - break; + *layered = GL_FALSE; + return true; } - } else { + + _mesa_error(ctx, GL_INVALID_OPERATION, + "%s(invalid texture target %s)", caller, + _mesa_lookup_enum_by_nr(target)); + return false; +} + + +/** + * Common code called by gl*FramebufferTextureLayer() to verify the texture + * target. + * + * \return true if no errors, false if errors + */ +static bool +check_texture_target(struct gl_context *ctx, GLenum target, + const char *caller) +{ /* We're being called by glFramebufferTextureLayer() and * textarget is not used. The only legal texture types for * that function are 3D and 1D/2D arrays textures. */ - err = ((*texObj)->Target != GL_TEXTURE_3D) && - ((*texObj)->Target != GL_TEXTURE_1D_ARRAY) && - ((*texObj)->Target != GL_TEXTURE_2D_ARRAY) && - ((*texObj)->Target != GL_TEXTURE_CUBE_MAP_ARRAY) && - ((*texObj)->Target != GL_TEXTURE_2D_MULTISAMPLE_ARRAY); + switch (target) { + case GL_TEXTURE_3D: + case GL_TEXTURE_1D_ARRAY: + case GL_TEXTURE_2D_ARRAY: + case GL_TEXTURE_CUBE_MAP_ARRAY: + case GL_TEXTURE_2D_MULTISAMPLE_ARRAY: + return true; + } + + _mesa_error(ctx, GL_INVALID_OPERATION, + "%s(invalid texture target %s)", caller, + _mesa_lookup_enum_by_nr(target)); + return false; +} + + +/** + * Common code called by glFramebufferTexture*D() to verify the texture + * target. + * + * \return true if no errors, false if errors + */ +static bool +check_textarget(struct gl_context *ctx, int dims, GLenum target, + GLenum textarget, const char *caller) +{ + bool err = false; + + switch (dims) { + case 1: + switch (textarget) { + case GL_TEXTURE_1D: + break; + case GL_TEXTURE_1D_ARRAY: + err = !ctx->Extensions.EXT_texture_array; + break; + default: + err = true; } - } - else { - /* Make sure textarget is consistent with the texture's type */ - err = ((*texObj)->Target == GL_TEXTURE_CUBE_MAP) - ? !_mesa_is_cube_face(textarget) - : ((*texObj)->Target != textarget); + break; + case 2: + switch (textarget) { + case GL_TEXTURE_2D: + break; + case GL_TEXTURE_RECTANGLE: + err = _mesa_is_gles(ctx) + || !ctx->Extensions.NV_texture_rectangle; + break; + case GL_TEXTURE_CUBE_MAP_POSITIVE_X: + case GL_TEXTURE_CUBE_MAP_NEGATIVE_X: + case GL_TEXTURE_CUBE_MAP_POSITIVE_Y: + case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y: + case GL_TEXTURE_CUBE_MAP_POSITIVE_Z: + case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z: + err = !ctx->Extensions.ARB_texture_cube_map; + break; + case GL_TEXTURE_2D_ARRAY: + err = (_mesa_is_gles(ctx) && ctx->Version < 30) + || !ctx->Extensions.EXT_texture_array; + break; + case GL_TEXTURE_2D_MULTISAMPLE: + case GL_TEXTURE_2D_MULTISAMPLE_ARRAY: + err = _mesa_is_gles(ctx) + || !ctx->Extensions.ARB_texture_multisample; + break; + default: + err = true; + } + break; + case 3: + if (textarget != GL_TEXTURE_3D) + err = true; + break; + default: + err = true; } if (err) { _mesa_error(ctx, GL_INVALID_OPERATION, - "%s(invalid or mismatched texture target)", caller); + "%s(invalid textarget %s)", + caller, _mesa_lookup_enum_by_nr(textarget)); return false; } + /* Make sure textarget is consistent with the texture's type */ + err = (target == GL_TEXTURE_CUBE_MAP) ? + !_mesa_is_cube_face(textarget): (target != textarget); + + if (err) { + _mesa_error(ctx, GL_INVALID_OPERATION, + "%s(mismatched texture target)", caller); + return false; + } + + return true; +} + + +/** + * Common code called by gl*FramebufferTextureLayer() and + * glFramebufferTexture3D() to validate the layer. + * + * \return true if no errors, false if errors + */ +static bool +check_layer(struct gl_context *ctx, GLenum target, GLint layer, + const char *caller) +{ /* Page 306 (page 328 of the PDF) of the OpenGL 4.5 (Core Profile) * spec says: * * "An INVALID_VALUE error is generated if texture is non-zero * and layer is negative." */ - if (zoffset < 0) { + if (layer < 0) { _mesa_error(ctx, GL_INVALID_VALUE, - "%s(layer %u < 0)", caller, zoffset); + "%s(layer %u < 0)", caller, layer); return false; } - if ((*texObj)->Target == GL_TEXTURE_3D) { + if (target == GL_TEXTURE_3D) { const GLuint maxSize = 1 << (ctx->Const.Max3DTextureLevels - 1); - if (zoffset >= maxSize) { + if (layer >= maxSize) { _mesa_error(ctx, GL_INVALID_VALUE, - "%s(invalid zoffset %u)", caller, zoffset); + "%s(invalid layer %u)", caller, layer); return false; } } - else if (((*texObj)->Target == GL_TEXTURE_1D_ARRAY) || - ((*texObj)->Target == GL_TEXTURE_2D_ARRAY) || - ((*texObj)->Target == GL_TEXTURE_CUBE_MAP_ARRAY) || - ((*texObj)->Target == GL_TEXTURE_2D_MULTISAMPLE_ARRAY)) { - if (zoffset >= ctx->Const.MaxArrayTextureLayers) { + else if ((target == GL_TEXTURE_1D_ARRAY) || + (target == GL_TEXTURE_2D_ARRAY) || + (target == GL_TEXTURE_CUBE_MAP_ARRAY) || + (target == GL_TEXTURE_2D_MULTISAMPLE_ARRAY)) { + if (layer >= ctx->Const.MaxArrayTextureLayers) { _mesa_error(ctx, GL_INVALID_VALUE, "%s(layer %u >= GL_MAX_ARRAY_TEXTURE_LAYERS)", - caller, zoffset); + caller, layer); return false; } } - maxLevelsTarget = textarget ? textarget : (*texObj)->Target; + return true; +} + + +/** + * Common code called by all gl*FramebufferTexture*() entry points to verify + * the level. + * + * \return true if no errors, false if errors + */ +static bool +check_level(struct gl_context *ctx, GLenum target, GLint level, + const char *caller) +{ if ((level < 0) || - (level >= _mesa_max_texture_levels(ctx, maxLevelsTarget))) { + (level >= _mesa_max_texture_levels(ctx, target))) { _mesa_error(ctx, GL_INVALID_VALUE, "%s(invalid level %d)", caller, level); return false; @@ -2716,7 +2828,7 @@ void _mesa_framebuffer_texture(struct gl_context *ctx, struct gl_framebuffer *fb, GLenum attachment, struct gl_texture_object *texObj, GLenum textarget, - GLint level, GLuint zoffset, GLboolean layered, + GLint level, GLuint layer, GLboolean layered, const char *caller) { struct gl_renderbuffer_attachment *att; @@ -2745,7 +2857,7 @@ _mesa_framebuffer_texture(struct gl_context *ctx, struct gl_framebuffer *fb, level == fb->Attachment[BUFFER_STENCIL].TextureLevel && _mesa_tex_target_to_face(textarget) == fb->Attachment[BUFFER_STENCIL].CubeMapFace && - zoffset == fb->Attachment[BUFFER_STENCIL].Zoffset) { + layer == fb->Attachment[BUFFER_STENCIL].Zoffset) { /* The texture object is already attached to the stencil attachment * point. Don't create a new renderbuffer; just reuse the stencil * attachment's. This is required to prevent a GL error in @@ -2758,13 +2870,13 @@ _mesa_framebuffer_texture(struct gl_context *ctx, struct gl_framebuffer *fb, level == fb->Attachment[BUFFER_DEPTH].TextureLevel && _mesa_tex_target_to_face(textarget) == fb->Attachment[BUFFER_DEPTH].CubeMapFace && - zoffset == fb->Attachment[BUFFER_DEPTH].Zoffset) { + layer == fb->Attachment[BUFFER_DEPTH].Zoffset) { /* As above, but with depth and stencil transposed. */ reuse_framebuffer_texture_attachment(fb, BUFFER_STENCIL, BUFFER_DEPTH); } else { set_texture_attachment(ctx, fb, att, texObj, textarget, - level, zoffset, layered); + level, layer, layered); if (attachment == GL_DEPTH_STENCIL_ATTACHMENT) { /* Above we created a new renderbuffer and attached it to the @@ -2801,56 +2913,50 @@ _mesa_framebuffer_texture(struct gl_context *ctx, struct gl_framebuffer *fb, } -void GLAPIENTRY -_mesa_FramebufferTexture1D(GLenum target, GLenum attachment, - GLenum textarget, GLuint texture, GLint level) +static void +framebuffer_texture_with_dims(int dims, GLenum target, + GLenum attachment, GLenum textarget, + GLuint texture, GLint level, GLint layer, + const char *caller) { GET_CURRENT_CONTEXT(ctx); struct gl_framebuffer *fb; struct gl_texture_object *texObj; - GLboolean layered = GL_FALSE; - - if (texture != 0) { - GLboolean error; - - switch (textarget) { - case GL_TEXTURE_1D: - error = GL_FALSE; - break; - case GL_TEXTURE_1D_ARRAY: - error = !ctx->Extensions.EXT_texture_array; - break; - default: - error = GL_TRUE; - } - - if (error) { - _mesa_error(ctx, GL_INVALID_OPERATION, - "glFramebufferTexture1D(invalid textarget %s)", - _mesa_lookup_enum_by_nr(textarget)); - return; - } - } /* Get the framebuffer object */ fb = get_framebuffer_target(ctx, target); if (!fb) { - _mesa_error(ctx, GL_INVALID_ENUM, - "glFramebufferTexture1D(invalid target %s)", + _mesa_error(ctx, GL_INVALID_ENUM, "%s(invalid target %s)", caller, _mesa_lookup_enum_by_nr(target)); return; } /* Get the texture object */ - if (!get_texture_for_framebuffer(ctx, texture, textarget, level, 0, - &layered, "glFramebufferTexture1D", - &texObj)) { - /* Error already recorded */ + if (!get_texture_for_framebuffer(ctx, texture, false, caller, &texObj)) return; + + if (texObj) { + if (!check_textarget(ctx, dims, texObj->Target, textarget, caller)) + return; + + if ((dims == 3) && !check_layer(ctx, texObj->Target, layer, caller)) + return; } + if (!check_level(ctx, textarget, level, caller)) + return; + _mesa_framebuffer_texture(ctx, fb, attachment, texObj, textarget, level, - 0, layered, "glFramebufferTexture1D"); + layer, GL_FALSE, caller); +} + + +void GLAPIENTRY +_mesa_FramebufferTexture1D(GLenum target, GLenum attachment, + GLenum textarget, GLuint texture, GLint level) +{ + framebuffer_texture_with_dims(1, target, attachment, textarget, texture, + level, 0, "glFramebufferTexture1D"); } @@ -2858,109 +2964,18 @@ void GLAPIENTRY _mesa_FramebufferTexture2D(GLenum target, GLenum attachment, GLenum textarget, GLuint texture, GLint level) { - GET_CURRENT_CONTEXT(ctx); - struct gl_framebuffer *fb; - struct gl_texture_object *texObj; - GLboolean layered = GL_FALSE; - - if (texture != 0) { - GLboolean error; - - switch (textarget) { - case GL_TEXTURE_2D: - error = GL_FALSE; - break; - case GL_TEXTURE_RECTANGLE: - error = _mesa_is_gles(ctx) - || !ctx->Extensions.NV_texture_rectangle; - break; - case GL_TEXTURE_CUBE_MAP_POSITIVE_X: - case GL_TEXTURE_CUBE_MAP_NEGATIVE_X: - case GL_TEXTURE_CUBE_MAP_POSITIVE_Y: - case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y: - case GL_TEXTURE_CUBE_MAP_POSITIVE_Z: - case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z: - error = !ctx->Extensions.ARB_texture_cube_map; - break; - case GL_TEXTURE_2D_ARRAY: - error = (_mesa_is_gles(ctx) && ctx->Version < 30) - || !ctx->Extensions.EXT_texture_array; - break; - case GL_TEXTURE_2D_MULTISAMPLE: - case GL_TEXTURE_2D_MULTISAMPLE_ARRAY: - error = _mesa_is_gles(ctx) - || !ctx->Extensions.ARB_texture_multisample; - break; - default: - error = GL_TRUE; - } - - if (error) { - _mesa_error(ctx, GL_INVALID_OPERATION, - "glFramebufferTexture2D(invalid textarget %s)", - _mesa_lookup_enum_by_nr(textarget)); - return; - } - } - - /* Get the framebuffer object */ - fb = get_framebuffer_target(ctx, target); - if (!fb) { - _mesa_error(ctx, GL_INVALID_ENUM, - "glFramebufferTexture2D(invalid target %s)", - _mesa_lookup_enum_by_nr(target)); - return; - } - - /* Get the texture object */ - if (!get_texture_for_framebuffer(ctx, texture, textarget, level, 0, - &layered, "glFramebufferTexture2D", - &texObj)) { - /* Error already recorded */ - return; - } - - _mesa_framebuffer_texture(ctx, fb, attachment, texObj, textarget, level, - 0, layered, "glFramebufferTexture2D"); + framebuffer_texture_with_dims(2, target, attachment, textarget, texture, + level, 0, "glFramebufferTexture2D"); } void GLAPIENTRY _mesa_FramebufferTexture3D(GLenum target, GLenum attachment, GLenum textarget, GLuint texture, - GLint level, GLint zoffset) + GLint level, GLint layer) { - GET_CURRENT_CONTEXT(ctx); - struct gl_framebuffer *fb; - struct gl_texture_object *texObj; - GLboolean layered = GL_FALSE; - - if ((texture != 0) && (textarget != GL_TEXTURE_3D)) { - _mesa_error(ctx, GL_INVALID_OPERATION, - "glFramebufferTexture3D(invalid textarget %s)", - _mesa_lookup_enum_by_nr(textarget)); - return; - } - - /* Get the framebuffer object */ - fb = get_framebuffer_target(ctx, target); - if (!fb) { - _mesa_error(ctx, GL_INVALID_ENUM, - "glFramebufferTexture3D(invalid target %s)", - _mesa_lookup_enum_by_nr(target)); - return; - } - - /* Get the texture object */ - if (!get_texture_for_framebuffer(ctx, texture, textarget, level, zoffset, - &layered, "glFramebufferTexture3D", - &texObj)) { - /* Error already recorded */ - return; - } - - _mesa_framebuffer_texture(ctx, fb, attachment, texObj, textarget, level, - zoffset, layered, "glFramebufferTexture3D"); + framebuffer_texture_with_dims(3, target, attachment, textarget, texture, + level, layer, "glFramebufferTexture3D"); } @@ -2971,7 +2986,8 @@ _mesa_FramebufferTextureLayer(GLenum target, GLenum attachment, GET_CURRENT_CONTEXT(ctx); struct gl_framebuffer *fb; struct gl_texture_object *texObj; - GLboolean layered = GL_FALSE; + + const char *func = "glFramebufferTextureLayer"; /* Get the framebuffer object */ fb = get_framebuffer_target(ctx, target); @@ -2983,15 +2999,22 @@ _mesa_FramebufferTextureLayer(GLenum target, GLenum attachment, } /* Get the texture object */ - if (!get_texture_for_framebuffer(ctx, texture, 0, level, layer, - &layered, "glFramebufferTextureLayer", - &texObj)) { - /* Error already recorded */ + if (!get_texture_for_framebuffer(ctx, texture, false, func, &texObj)) return; + + if (texObj) { + if (!check_texture_target(ctx, texObj->Target, func)) + return; + + if (!check_layer(ctx, texObj->Target, layer, func)) + return; + + if (!check_level(ctx, texObj->Target, level, func)) + return; } _mesa_framebuffer_texture(ctx, fb, attachment, texObj, 0, level, - layer, layered, "glFramebufferTextureLayer"); + layer, GL_FALSE, func); } @@ -3002,26 +3025,31 @@ _mesa_NamedFramebufferTextureLayer(GLuint framebuffer, GLenum attachment, GET_CURRENT_CONTEXT(ctx); struct gl_framebuffer *fb; struct gl_texture_object *texObj; - GLboolean layered = GL_FALSE; + + const char *func = "glNamedFramebufferTextureLayer"; /* Get the framebuffer object */ - fb = _mesa_lookup_framebuffer_err(ctx, framebuffer, - "glNamedFramebufferTextureLayer"); + fb = _mesa_lookup_framebuffer_err(ctx, framebuffer, func); if (!fb) return; /* Get the texture object */ - if (!get_texture_for_framebuffer(ctx, texture, 0, level, layer, - &layered, - "glNamedFramebufferTextureLayer", - &texObj)) { - /* Error already recorded */ + if (!get_texture_for_framebuffer(ctx, texture, false, func, &texObj)) return; + + if (texObj) { + if (!check_texture_target(ctx, texObj->Target, func)) + return; + + if (!check_layer(ctx, texObj->Target, layer, func)) + return; + + if (!check_level(ctx, texObj->Target, level, func)) + return; } _mesa_framebuffer_texture(ctx, fb, attachment, texObj, 0, level, - layer, layered, - "glNamedFramebufferTextureLayer"); + layer, GL_FALSE, func); } @@ -3032,7 +3060,9 @@ _mesa_FramebufferTexture(GLenum target, GLenum attachment, GET_CURRENT_CONTEXT(ctx); struct gl_framebuffer *fb; struct gl_texture_object *texObj; - GLboolean layered = GL_TRUE; + GLboolean layered; + + const char *func = "FramebufferTexture"; if (!_mesa_has_geometry_shaders(ctx)) { _mesa_error(ctx, GL_INVALID_OPERATION, @@ -3050,15 +3080,19 @@ _mesa_FramebufferTexture(GLenum target, GLenum attachment, } /* Get the texture object */ - if (!get_texture_for_framebuffer(ctx, texture, 0, level, 0, - &layered, "glFramebufferTexture", - &texObj)) { - /* Error already recorded */ + if (!get_texture_for_framebuffer(ctx, texture, true, func, &texObj)) return; + + if (texObj) { + if (!check_layered_texture_target(ctx, texObj->Target, func, &layered)) + return; + + if (!check_level(ctx, texObj->Target, level, func)) + return; } _mesa_framebuffer_texture(ctx, fb, attachment, texObj, 0, level, - 0, layered, "glFramebufferTexture"); + 0, layered, func); } @@ -3069,7 +3103,9 @@ _mesa_NamedFramebufferTexture(GLuint framebuffer, GLenum attachment, GET_CURRENT_CONTEXT(ctx); struct gl_framebuffer *fb; struct gl_texture_object *texObj; - GLboolean layered = GL_TRUE; + GLboolean layered; + + const char *func = "glNamedFramebufferTexture"; if (!_mesa_has_geometry_shaders(ctx)) { _mesa_error(ctx, GL_INVALID_OPERATION, @@ -3078,21 +3114,25 @@ _mesa_NamedFramebufferTexture(GLuint framebuffer, GLenum attachment, } /* Get the framebuffer object */ - fb = _mesa_lookup_framebuffer_err(ctx, framebuffer, - "glNamedFramebufferTexture"); + fb = _mesa_lookup_framebuffer_err(ctx, framebuffer, func); if (!fb) return; /* Get the texture object */ - if (!get_texture_for_framebuffer(ctx, texture, 0, level, 0, - &layered, "glNamedFramebufferTexture", - &texObj)) { - /* Error already recorded */ + if (!get_texture_for_framebuffer(ctx, texture, true, func, &texObj)) return; + + if (texObj) { + if (!check_layered_texture_target(ctx, texObj->Target, func, + &layered)) + return; + + if (!check_level(ctx, texObj->Target, level, func)) + return; } _mesa_framebuffer_texture(ctx, fb, attachment, texObj, 0, level, - 0, layered, "glNamedFramebufferTexture"); + 0, layered, func); } diff --git a/src/mesa/main/fbobject.h b/src/mesa/main/fbobject.h index 81159d0efa9..e68762b429e 100644 --- a/src/mesa/main/fbobject.h +++ b/src/mesa/main/fbobject.h @@ -119,7 +119,7 @@ extern void _mesa_framebuffer_texture(struct gl_context *ctx, struct gl_framebuffer *fb, GLenum attachment, struct gl_texture_object *texObj, GLenum textarget, - GLint level, GLuint zoffset, GLboolean layered, + GLint level, GLuint layer, GLboolean layered, const char *caller); @@ -206,7 +206,7 @@ _mesa_FramebufferTexture2D(GLenum target, GLenum attachment, extern void GLAPIENTRY _mesa_FramebufferTexture3D(GLenum target, GLenum attachment, GLenum textarget, GLuint texture, - GLint level, GLint zoffset); + GLint level, GLint layer); extern void GLAPIENTRY _mesa_FramebufferTextureLayer(GLenum target, GLenum attachment, From 80e9bf2641d3bc2504fc12977c3e6a1ab7a3f49f Mon Sep 17 00:00:00 2001 From: Laura Ekstrand Date: Mon, 2 Mar 2015 16:52:55 -0800 Subject: [PATCH 076/834] main: Fix indents in former get_texture_for_framebuffer functions. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Fredrik Höglund Signed-off-by: Fredrik Höglund --- src/mesa/main/fbobject.c | 78 ++++++++++++++++++++-------------------- 1 file changed, 39 insertions(+), 39 deletions(-) diff --git a/src/mesa/main/fbobject.c b/src/mesa/main/fbobject.c index 807b629507b..4da77a33706 100644 --- a/src/mesa/main/fbobject.c +++ b/src/mesa/main/fbobject.c @@ -2618,30 +2618,30 @@ check_layered_texture_target(struct gl_context *ctx, GLenum target, { *layered = GL_TRUE; - switch (target) { - case GL_TEXTURE_3D: - case GL_TEXTURE_1D_ARRAY_EXT: - case GL_TEXTURE_2D_ARRAY_EXT: - case GL_TEXTURE_CUBE_MAP: - case GL_TEXTURE_CUBE_MAP_ARRAY: - case GL_TEXTURE_2D_MULTISAMPLE_ARRAY: - return true; - case GL_TEXTURE_1D: - case GL_TEXTURE_2D: - case GL_TEXTURE_RECTANGLE: - case GL_TEXTURE_2D_MULTISAMPLE: - /* These texture types are valid to pass to - * glFramebufferTexture(), but since they aren't layered, it - * is equivalent to calling glFramebufferTexture{1D,2D}(). - */ - *layered = GL_FALSE; - return true; - } + switch (target) { + case GL_TEXTURE_3D: + case GL_TEXTURE_1D_ARRAY_EXT: + case GL_TEXTURE_2D_ARRAY_EXT: + case GL_TEXTURE_CUBE_MAP: + case GL_TEXTURE_CUBE_MAP_ARRAY: + case GL_TEXTURE_2D_MULTISAMPLE_ARRAY: + return true; + case GL_TEXTURE_1D: + case GL_TEXTURE_2D: + case GL_TEXTURE_RECTANGLE: + case GL_TEXTURE_2D_MULTISAMPLE: + /* These texture types are valid to pass to + * glFramebufferTexture(), but since they aren't layered, it + * is equivalent to calling glFramebufferTexture{1D,2D}(). + */ + *layered = GL_FALSE; + return true; + } - _mesa_error(ctx, GL_INVALID_OPERATION, - "%s(invalid texture target %s)", caller, - _mesa_lookup_enum_by_nr(target)); - return false; + _mesa_error(ctx, GL_INVALID_OPERATION, + "%s(invalid texture target %s)", caller, + _mesa_lookup_enum_by_nr(target)); + return false; } @@ -2655,23 +2655,23 @@ static bool check_texture_target(struct gl_context *ctx, GLenum target, const char *caller) { - /* We're being called by glFramebufferTextureLayer() and - * textarget is not used. The only legal texture types for - * that function are 3D and 1D/2D arrays textures. - */ - switch (target) { - case GL_TEXTURE_3D: - case GL_TEXTURE_1D_ARRAY: - case GL_TEXTURE_2D_ARRAY: - case GL_TEXTURE_CUBE_MAP_ARRAY: - case GL_TEXTURE_2D_MULTISAMPLE_ARRAY: - return true; - } + /* We're being called by glFramebufferTextureLayer() and + * textarget is not used. The only legal texture types for + * that function are 3D and 1D/2D arrays textures. + */ + switch (target) { + case GL_TEXTURE_3D: + case GL_TEXTURE_1D_ARRAY: + case GL_TEXTURE_2D_ARRAY: + case GL_TEXTURE_CUBE_MAP_ARRAY: + case GL_TEXTURE_2D_MULTISAMPLE_ARRAY: + return true; + } - _mesa_error(ctx, GL_INVALID_OPERATION, - "%s(invalid texture target %s)", caller, - _mesa_lookup_enum_by_nr(target)); - return false; + _mesa_error(ctx, GL_INVALID_OPERATION, + "%s(invalid texture target %s)", caller, + _mesa_lookup_enum_by_nr(target)); + return false; } From f93f95928d39b13e6c263b480b3e4bfdfa218df8 Mon Sep 17 00:00:00 2001 From: Laura Ekstrand Date: Thu, 29 Jan 2015 13:15:37 -0800 Subject: [PATCH 077/834] main: Add entry point for CheckNamedFramebufferStatus. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [Fredrik: - Retain the debugging code in CheckFramebufferStatus. - Whitespace fixes.] Reviewed-by: Fredrik Höglund Signed-off-by: Fredrik Höglund --- .../glapi/gen/ARB_direct_state_access.xml | 6 ++ src/mesa/main/fbobject.c | 79 +++++++++++++++---- src/mesa/main/fbobject.h | 7 ++ src/mesa/main/tests/dispatch_sanity.cpp | 1 + 4 files changed, 78 insertions(+), 15 deletions(-) diff --git a/src/mapi/glapi/gen/ARB_direct_state_access.xml b/src/mapi/glapi/gen/ARB_direct_state_access.xml index b8e90aadd45..1748950d120 100644 --- a/src/mapi/glapi/gen/ARB_direct_state_access.xml +++ b/src/mapi/glapi/gen/ARB_direct_state_access.xml @@ -181,6 +181,12 @@ + + + + + + diff --git a/src/mesa/main/fbobject.c b/src/mesa/main/fbobject.c index 4da77a33706..ad7a85ccd9b 100644 --- a/src/mesa/main/fbobject.c +++ b/src/mesa/main/fbobject.c @@ -2499,24 +2499,12 @@ _mesa_CreateFramebuffers(GLsizei n, GLuint *framebuffers) } -GLenum GLAPIENTRY -_mesa_CheckFramebufferStatus(GLenum target) +GLenum +_mesa_check_framebuffer_status(struct gl_context *ctx, + struct gl_framebuffer *buffer) { - struct gl_framebuffer *buffer; - GET_CURRENT_CONTEXT(ctx); - ASSERT_OUTSIDE_BEGIN_END_WITH_RETVAL(ctx, 0); - if (MESA_VERBOSE & VERBOSE_API) - _mesa_debug(ctx, "glCheckFramebufferStatus(%s)\n", - _mesa_lookup_enum_by_nr(target)); - - buffer = get_framebuffer_target(ctx, target); - if (!buffer) { - _mesa_error(ctx, GL_INVALID_ENUM, "glCheckFramebufferStatus(target)"); - return 0; - } - if (_mesa_is_winsys_fbo(buffer)) { /* EGL_KHR_surfaceless_context allows the winsys FBO to be incomplete. */ if (buffer != &IncompleteFramebuffer) { @@ -2536,6 +2524,67 @@ _mesa_CheckFramebufferStatus(GLenum target) } +GLenum GLAPIENTRY +_mesa_CheckFramebufferStatus(GLenum target) +{ + struct gl_framebuffer *fb; + GET_CURRENT_CONTEXT(ctx); + + if (MESA_VERBOSE & VERBOSE_API) + _mesa_debug(ctx, "glCheckFramebufferStatus(%s)\n", + _mesa_lookup_enum_by_nr(target)); + + fb = get_framebuffer_target(ctx, target); + if (!fb) { + _mesa_error(ctx, GL_INVALID_ENUM, + "glCheckFramebufferStatus(invalid target %s)", + _mesa_lookup_enum_by_nr(target)); + return 0; + } + + return _mesa_check_framebuffer_status(ctx, fb); +} + + +GLenum GLAPIENTRY +_mesa_CheckNamedFramebufferStatus(GLuint framebuffer, GLenum target) +{ + struct gl_framebuffer *fb; + GET_CURRENT_CONTEXT(ctx); + + /* Validate the target (for conformance's sake) and grab a reference to the + * default framebuffer in case framebuffer = 0. + * Section 9.4 Framebuffer Completeness of the OpenGL 4.5 core spec + * (30.10.2014, PDF page 336) says: + * "If framebuffer is zero, then the status of the default read or + * draw framebuffer (as determined by target) is returned." + */ + switch (target) { + case GL_DRAW_FRAMEBUFFER: + case GL_FRAMEBUFFER: + fb = ctx->WinSysDrawBuffer; + break; + case GL_READ_FRAMEBUFFER: + fb = ctx->WinSysReadBuffer; + break; + default: + _mesa_error(ctx, GL_INVALID_ENUM, + "glCheckNamedFramebufferStatus(invalid target %s)", + _mesa_lookup_enum_by_nr(target)); + return 0; + } + + if (framebuffer) { + fb = _mesa_lookup_framebuffer_err(ctx, framebuffer, + "glCheckNamedFramebufferStatus"); + if (!fb) + return 0; + } + + return _mesa_check_framebuffer_status(ctx, fb); +} + + /** * Replicate the src attachment point. Used by framebuffer_texture() when * the same texture is attached at GL_DEPTH_ATTACHMENT and diff --git a/src/mesa/main/fbobject.h b/src/mesa/main/fbobject.h index e68762b429e..871a2cc280e 100644 --- a/src/mesa/main/fbobject.h +++ b/src/mesa/main/fbobject.h @@ -122,6 +122,10 @@ _mesa_framebuffer_texture(struct gl_context *ctx, struct gl_framebuffer *fb, GLint level, GLuint layer, GLboolean layered, const char *caller); +extern GLenum +_mesa_check_framebuffer_status(struct gl_context *ctx, + struct gl_framebuffer *fb); + extern GLboolean GLAPIENTRY _mesa_IsRenderbuffer(GLuint renderbuffer); @@ -195,6 +199,9 @@ _mesa_CreateFramebuffers(GLsizei n, GLuint *framebuffers); extern GLenum GLAPIENTRY _mesa_CheckFramebufferStatus(GLenum target); +extern GLenum GLAPIENTRY +_mesa_CheckNamedFramebufferStatus(GLuint framebuffer, GLenum target); + extern void GLAPIENTRY _mesa_FramebufferTexture1D(GLenum target, GLenum attachment, GLenum textarget, GLuint texture, GLint level); diff --git a/src/mesa/main/tests/dispatch_sanity.cpp b/src/mesa/main/tests/dispatch_sanity.cpp index 0994d437e24..fdfad348a6e 100644 --- a/src/mesa/main/tests/dispatch_sanity.cpp +++ b/src/mesa/main/tests/dispatch_sanity.cpp @@ -984,6 +984,7 @@ const struct function gl_core_functions_possible[] = { { "glNamedFramebufferRenderbuffer", 45, -1 }, { "glNamedFramebufferTexture", 45, -1 }, { "glNamedFramebufferTextureLayer", 45, -1 }, + { "glCheckNamedFramebufferStatus", 45, -1 }, { "glCreateRenderbuffers", 45, -1 }, { "glNamedRenderbufferStorage", 45, -1 }, { "glNamedRenderbufferStorageMultisample", 45, -1 }, From f22fa307de780723e182d62a03c2c4c4f8a937f7 Mon Sep 17 00:00:00 2001 From: Laura Ekstrand Date: Thu, 29 Jan 2015 17:11:37 -0800 Subject: [PATCH 078/834] main: Add entry point GetNamedFramebufferAttachmentParameteriv. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [Fredrik: - Update one of the error messages to reflect that the framebuffer might not be the bound framebuffer. - Whitespace fixes.] Reviewed-by: Fredrik Höglund Signed-off-by: Fredrik Höglund --- .../glapi/gen/ARB_direct_state_access.xml | 7 + src/mesa/main/fbobject.c | 124 ++++++++++++------ src/mesa/main/fbobject.h | 10 ++ src/mesa/main/tests/dispatch_sanity.cpp | 1 + 4 files changed, 101 insertions(+), 41 deletions(-) diff --git a/src/mapi/glapi/gen/ARB_direct_state_access.xml b/src/mapi/glapi/gen/ARB_direct_state_access.xml index 1748950d120..beb6249a6f7 100644 --- a/src/mapi/glapi/gen/ARB_direct_state_access.xml +++ b/src/mapi/glapi/gen/ARB_direct_state_access.xml @@ -187,6 +187,13 @@ + + + + + + + diff --git a/src/mesa/main/fbobject.c b/src/mesa/main/fbobject.c index ad7a85ccd9b..15878d327c6 100644 --- a/src/mesa/main/fbobject.c +++ b/src/mesa/main/fbobject.c @@ -3307,25 +3307,18 @@ _mesa_NamedFramebufferRenderbuffer(GLuint framebuffer, GLenum attachment, } -void GLAPIENTRY -_mesa_GetFramebufferAttachmentParameteriv(GLenum target, GLenum attachment, - GLenum pname, GLint *params) +void +_mesa_get_framebuffer_attachment_parameter(struct gl_context *ctx, + struct gl_framebuffer *buffer, + GLenum attachment, GLenum pname, + GLint *params, const char *caller) { const struct gl_renderbuffer_attachment *att; - struct gl_framebuffer *buffer; GLenum err; - GET_CURRENT_CONTEXT(ctx); /* The error differs in GL and GLES. */ err = _mesa_is_desktop_gl(ctx) ? GL_INVALID_OPERATION : GL_INVALID_ENUM; - buffer = get_framebuffer_target(ctx, target); - if (!buffer) { - _mesa_error(ctx, GL_INVALID_ENUM, - "glGetFramebufferAttachmentParameteriv(target)"); - return; - } - if (_mesa_is_winsys_fbo(buffer)) { /* Page 126 (page 136 of the PDF) of the OpenGL ES 2.0.25 spec * says: @@ -3341,14 +3334,15 @@ _mesa_GetFramebufferAttachmentParameteriv(GLenum target, GLenum attachment, !ctx->Extensions.ARB_framebuffer_object) && !_mesa_is_gles3(ctx)) { _mesa_error(ctx, GL_INVALID_OPERATION, - "glGetFramebufferAttachmentParameteriv(bound FBO = 0)"); + "%s(window-system framebuffer)", caller); return; } if (_mesa_is_gles3(ctx) && attachment != GL_BACK && attachment != GL_DEPTH && attachment != GL_STENCIL) { _mesa_error(ctx, GL_INVALID_ENUM, - "glGetFramebufferAttachmentParameteriv(attachment)"); + "%s(invalid attachment %s)", caller, + _mesa_lookup_enum_by_nr(attachment)); return; } /* the default / window-system FBO */ @@ -3360,8 +3354,8 @@ _mesa_GetFramebufferAttachmentParameteriv(GLenum target, GLenum attachment, } if (att == NULL) { - _mesa_error(ctx, GL_INVALID_ENUM, - "glGetFramebufferAttachmentParameteriv(attachment)"); + _mesa_error(ctx, GL_INVALID_ENUM, "%s(invalid attachment %s)", caller, + _mesa_lookup_enum_by_nr(attachment)); return; } @@ -3375,9 +3369,8 @@ _mesa_GetFramebufferAttachmentParameteriv(GLenum target, GLenum attachment, * attachment, since it does not have a single format." */ _mesa_error(ctx, GL_INVALID_OPERATION, - "glGetFramebufferAttachmentParameteriv(" - "GL_FRAMEBUFFER_ATTACHMENT_COMPONENT_TYPE" - " is invalid for depth+stencil attachment)"); + "%s(GL_FRAMEBUFFER_ATTACHMENT_COMPONENT_TYPE" + " is invalid for depth+stencil attachment)", caller); return; } /* the depth and stencil attachments must point to the same buffer */ @@ -3385,8 +3378,7 @@ _mesa_GetFramebufferAttachmentParameteriv(GLenum target, GLenum attachment, stencilAtt = get_attachment(ctx, buffer, GL_STENCIL_ATTACHMENT); if (depthAtt->Renderbuffer != stencilAtt->Renderbuffer) { _mesa_error(ctx, GL_INVALID_OPERATION, - "glGetFramebufferAttachmentParameteriv(DEPTH/STENCIL" - " attachments differ)"); + "%s(DEPTH/STENCIL attachments differ)", caller); return; } } @@ -3419,8 +3411,8 @@ _mesa_GetFramebufferAttachmentParameteriv(GLenum target, GLenum attachment, *params = att->TextureLevel; } else if (att->Type == GL_NONE) { - _mesa_error(ctx, err, - "glGetFramebufferAttachmentParameteriv(pname)"); + _mesa_error(ctx, err, "%s(invalid pname %s)", caller, + _mesa_lookup_enum_by_nr(pname)); } else { goto invalid_pname_enum; @@ -3436,8 +3428,8 @@ _mesa_GetFramebufferAttachmentParameteriv(GLenum target, GLenum attachment, } } else if (att->Type == GL_NONE) { - _mesa_error(ctx, err, - "glGetFramebufferAttachmentParameteriv(pname)"); + _mesa_error(ctx, err, "%s(invalid pname %s)", caller, + _mesa_lookup_enum_by_nr(pname)); } else { goto invalid_pname_enum; @@ -3447,8 +3439,8 @@ _mesa_GetFramebufferAttachmentParameteriv(GLenum target, GLenum attachment, if (ctx->API == API_OPENGLES) { goto invalid_pname_enum; } else if (att->Type == GL_NONE) { - _mesa_error(ctx, err, - "glGetFramebufferAttachmentParameteriv(pname)"); + _mesa_error(ctx, err, "%s(invalid pname %s)", caller, + _mesa_lookup_enum_by_nr(pname)); } else if (att->Type == GL_TEXTURE) { if (att->Texture && (att->Texture->Target == GL_TEXTURE_3D || att->Texture->Target == GL_TEXTURE_2D_ARRAY)) { @@ -3469,8 +3461,8 @@ _mesa_GetFramebufferAttachmentParameteriv(GLenum target, GLenum attachment, goto invalid_pname_enum; } else if (att->Type == GL_NONE) { - _mesa_error(ctx, err, - "glGetFramebufferAttachmentParameteriv(pname)"); + _mesa_error(ctx, err, "%s(invalid pname %s)", caller, + _mesa_lookup_enum_by_nr(pname)); } else { if (ctx->Extensions.EXT_framebuffer_sRGB) { @@ -3492,8 +3484,8 @@ _mesa_GetFramebufferAttachmentParameteriv(GLenum target, GLenum attachment, goto invalid_pname_enum; } else if (att->Type == GL_NONE) { - _mesa_error(ctx, err, - "glGetFramebufferAttachmentParameteriv(pname)"); + _mesa_error(ctx, err, "%s(invalid pname %s)", caller, + _mesa_lookup_enum_by_nr(pname)); } else { mesa_format format = att->Renderbuffer->Format; @@ -3508,9 +3500,9 @@ _mesa_GetFramebufferAttachmentParameteriv(GLenum target, GLenum attachment, if (_mesa_is_gles3(ctx) && attachment == GL_DEPTH_STENCIL_ATTACHMENT) { _mesa_error(ctx, GL_INVALID_OPERATION, - "glGetFramebufferAttachmentParameteriv(cannot query " + "%s(cannot query " "GL_FRAMEBUFFER_ATTACHMENT_COMPONENT_TYPE of " - "GL_DEPTH_STENCIL_ATTACHMENT"); + "GL_DEPTH_STENCIL_ATTACHMENT)", caller); return; } @@ -3544,8 +3536,8 @@ _mesa_GetFramebufferAttachmentParameteriv(GLenum target, GLenum attachment, goto invalid_pname_enum; } else if (att->Type == GL_NONE) { - _mesa_error(ctx, err, - "glGetFramebufferAttachmentParameteriv(pname)"); + _mesa_error(ctx, err, "%s(invalid pname %s)", caller, + _mesa_lookup_enum_by_nr(pname)); } else if (att->Texture) { const struct gl_texture_image *texImage = @@ -3564,8 +3556,7 @@ _mesa_GetFramebufferAttachmentParameteriv(GLenum target, GLenum attachment, att->Renderbuffer->Format); } else { - _mesa_problem(ctx, "glGetFramebufferAttachmentParameterivEXT:" - " invalid FBO attachment structure"); + _mesa_problem(ctx, "%s: invalid FBO attachment structure", caller); } return; case GL_FRAMEBUFFER_ATTACHMENT_LAYERED: @@ -3574,8 +3565,8 @@ _mesa_GetFramebufferAttachmentParameteriv(GLenum target, GLenum attachment, } else if (att->Type == GL_TEXTURE) { *params = att->Layered; } else if (att->Type == GL_NONE) { - _mesa_error(ctx, err, - "glGetFramebufferAttachmentParameteriv(pname)"); + _mesa_error(ctx, err, "%s(invalid pname %s)", caller, + _mesa_lookup_enum_by_nr(pname)); } else { goto invalid_pname_enum; } @@ -3587,12 +3578,63 @@ _mesa_GetFramebufferAttachmentParameteriv(GLenum target, GLenum attachment, return; invalid_pname_enum: - _mesa_error(ctx, GL_INVALID_ENUM, - "glGetFramebufferAttachmentParameteriv(pname)"); + _mesa_error(ctx, GL_INVALID_ENUM, "%s(invalid pname %s)", caller, + _mesa_lookup_enum_by_nr(pname)); return; } +void GLAPIENTRY +_mesa_GetFramebufferAttachmentParameteriv(GLenum target, GLenum attachment, + GLenum pname, GLint *params) +{ + GET_CURRENT_CONTEXT(ctx); + struct gl_framebuffer *buffer; + + buffer = get_framebuffer_target(ctx, target); + if (!buffer) { + _mesa_error(ctx, GL_INVALID_ENUM, + "glGetFramebufferAttachmentParameteriv(invalid target %s)", + _mesa_lookup_enum_by_nr(target)); + return; + } + + _mesa_get_framebuffer_attachment_parameter(ctx, buffer, attachment, pname, + params, + "glGetFramebufferAttachmentParameteriv"); +} + + +void GLAPIENTRY +_mesa_GetNamedFramebufferAttachmentParameteriv(GLuint framebuffer, + GLenum attachment, + GLenum pname, GLint *params) +{ + GET_CURRENT_CONTEXT(ctx); + struct gl_framebuffer *buffer; + + if (framebuffer) { + buffer = _mesa_lookup_framebuffer_err(ctx, framebuffer, + "glGetNamedFramebufferAttachmentParameteriv"); + if (!buffer) + return; + } + else { + /* + * Section 9.2 Binding and Managing Framebuffer Objects of the OpenGL + * 4.5 core spec (30.10.2014, PDF page 314): + * "If framebuffer is zero, then the default draw framebuffer is + * queried." + */ + buffer = ctx->WinSysDrawBuffer; + } + + _mesa_get_framebuffer_attachment_parameter(ctx, buffer, attachment, pname, + params, + "glGetNamedFramebufferAttachmentParameteriv"); +} + + static void invalidate_framebuffer_storage(GLenum target, GLsizei numAttachments, const GLenum *attachments, GLint x, GLint y, diff --git a/src/mesa/main/fbobject.h b/src/mesa/main/fbobject.h index 871a2cc280e..71392037119 100644 --- a/src/mesa/main/fbobject.h +++ b/src/mesa/main/fbobject.h @@ -126,6 +126,12 @@ extern GLenum _mesa_check_framebuffer_status(struct gl_context *ctx, struct gl_framebuffer *fb); +extern void +_mesa_get_framebuffer_attachment_parameter(struct gl_context *ctx, + struct gl_framebuffer *buffer, + GLenum attachment, GLenum pname, + GLint *params, const char *caller); + extern GLboolean GLAPIENTRY _mesa_IsRenderbuffer(GLuint renderbuffer); @@ -244,6 +250,10 @@ _mesa_NamedFramebufferRenderbuffer(GLuint framebuffer, GLenum attachment, extern void GLAPIENTRY _mesa_GetFramebufferAttachmentParameteriv(GLenum target, GLenum attachment, GLenum pname, GLint *params); +extern void GLAPIENTRY +_mesa_GetNamedFramebufferAttachmentParameteriv(GLuint framebuffer, + GLenum attachment, + GLenum pname, GLint *params); extern void GLAPIENTRY _mesa_InvalidateSubFramebuffer(GLenum target, GLsizei numAttachments, diff --git a/src/mesa/main/tests/dispatch_sanity.cpp b/src/mesa/main/tests/dispatch_sanity.cpp index fdfad348a6e..f3aff5f9fa8 100644 --- a/src/mesa/main/tests/dispatch_sanity.cpp +++ b/src/mesa/main/tests/dispatch_sanity.cpp @@ -985,6 +985,7 @@ const struct function gl_core_functions_possible[] = { { "glNamedFramebufferTexture", 45, -1 }, { "glNamedFramebufferTextureLayer", 45, -1 }, { "glCheckNamedFramebufferStatus", 45, -1 }, + { "glGetNamedFramebufferAttachmentParameteriv", 45, -1 }, { "glCreateRenderbuffers", 45, -1 }, { "glNamedRenderbufferStorage", 45, -1 }, { "glNamedRenderbufferStorageMultisample", 45, -1 }, From df032ef7e04977112871edc1f1e82e35e6dbac76 Mon Sep 17 00:00:00 2001 From: Laura Ekstrand Date: Mon, 2 Feb 2015 16:18:11 -0800 Subject: [PATCH 079/834] main: Fix whitespace in blit.c MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Fredrik Höglund Signed-off-by: Fredrik Höglund --- src/mesa/main/blit.c | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/src/mesa/main/blit.c b/src/mesa/main/blit.c index 0694466eb75..da875e2310e 100644 --- a/src/mesa/main/blit.c +++ b/src/mesa/main/blit.c @@ -306,7 +306,7 @@ _mesa_BlitFramebuffer(GLint srcX0, GLint srcY0, GLint srcX1, GLint srcY1, * ignored." */ if ((readRb == NULL) || (drawRb == NULL)) { - mask &= ~GL_STENCIL_BUFFER_BIT; + mask &= ~GL_STENCIL_BUFFER_BIT; } else { int read_z_bits, draw_z_bits; @@ -360,7 +360,7 @@ _mesa_BlitFramebuffer(GLint srcX0, GLint srcY0, GLint srcX1, GLint srcY1, * ignored." */ if ((readRb == NULL) || (drawRb == NULL)) { - mask &= ~GL_DEPTH_BUFFER_BIT; + mask &= ~GL_DEPTH_BUFFER_BIT; } else { int read_s_bit, draw_s_bit; @@ -458,23 +458,23 @@ _mesa_BlitFramebuffer(GLint srcX0, GLint srcY0, GLint srcX1, GLint srcY1, GLuint i = 0; printf("glBlitFramebuffer(%d, %d, %d, %d, %d, %d, %d, %d," - " 0x%x, 0x%x)\n", - srcX0, srcY0, srcX1, srcY1, - dstX0, dstY0, dstX1, dstY1, - mask, filter); + " 0x%x, 0x%x)\n", + srcX0, srcY0, srcX1, srcY1, + dstX0, dstY0, dstX1, dstY1, + mask, filter); if (colorReadRb) { const struct gl_renderbuffer_attachment *att; att = find_attachment(readFb, colorReadRb); printf(" Src FBO %u RB %u (%dx%d) ", - readFb->Name, colorReadRb->Name, - colorReadRb->Width, colorReadRb->Height); + readFb->Name, colorReadRb->Name, + colorReadRb->Width, colorReadRb->Height); if (att && att->Texture) { printf("Tex %u tgt 0x%x level %u face %u", - att->Texture->Name, - att->Texture->Target, - att->TextureLevel, - att->CubeMapFace); + att->Texture->Name, + att->Texture->Target, + att->TextureLevel, + att->CubeMapFace); } printf("\n"); @@ -486,14 +486,14 @@ _mesa_BlitFramebuffer(GLint srcX0, GLint srcY0, GLint srcX1, GLint srcY1, att = find_attachment(drawFb, colorDrawRb); printf(" Dst FBO %u RB %u (%dx%d) ", - drawFb->Name, colorDrawRb->Name, - colorDrawRb->Width, colorDrawRb->Height); + drawFb->Name, colorDrawRb->Name, + colorDrawRb->Width, colorDrawRb->Height); if (att && att->Texture) { printf("Tex %u tgt 0x%x level %u face %u", - att->Texture->Name, - att->Texture->Target, - att->TextureLevel, - att->CubeMapFace); + att->Texture->Name, + att->Texture->Target, + att->TextureLevel, + att->CubeMapFace); } printf("\n"); } From 1a314f3c51bf1884cc3bf880e32461f304d5030b Mon Sep 17 00:00:00 2001 From: Laura Ekstrand Date: Mon, 2 Feb 2015 16:17:47 -0800 Subject: [PATCH 080/834] main: Refactor glBlitFramebuffer. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Fredrik Höglund Signed-off-by: Fredrik Höglund --- src/mesa/main/blit.c | 120 ++++++++++++++++++++++++------------------- src/mesa/main/blit.h | 7 +++ 2 files changed, 74 insertions(+), 53 deletions(-) diff --git a/src/mesa/main/blit.c b/src/mesa/main/blit.c index da875e2310e..6c78686a59a 100644 --- a/src/mesa/main/blit.c +++ b/src/mesa/main/blit.c @@ -148,39 +148,24 @@ is_valid_blit_filter(const struct gl_context *ctx, GLenum filter) } -/** - * Blit rectangular region, optionally from one framebuffer to another. - * - * Note, if the src buffer is multisampled and the dest is not, this is - * when the samples must be resolved to a single color. - */ -void GLAPIENTRY -_mesa_BlitFramebuffer(GLint srcX0, GLint srcY0, GLint srcX1, GLint srcY1, - GLint dstX0, GLint dstY0, GLint dstX1, GLint dstY1, - GLbitfield mask, GLenum filter) +void +_mesa_blit_framebuffer(struct gl_context *ctx, + struct gl_framebuffer *readFb, + struct gl_framebuffer *drawFb, + GLint srcX0, GLint srcY0, GLint srcX1, GLint srcY1, + GLint dstX0, GLint dstY0, GLint dstX1, GLint dstY1, + GLbitfield mask, GLenum filter, const char *func) { const GLbitfield legalMaskBits = (GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT); - const struct gl_framebuffer *readFb, *drawFb; - GET_CURRENT_CONTEXT(ctx); FLUSH_VERTICES(ctx, 0); - if (MESA_VERBOSE & VERBOSE_API) - _mesa_debug(ctx, - "glBlitFramebuffer(%d, %d, %d, %d, %d, %d, %d, %d, 0x%x, %s)\n", - srcX0, srcY0, srcX1, srcY1, - dstX0, dstY0, dstX1, dstY1, - mask, _mesa_lookup_enum_by_nr(filter)); - if (ctx->NewState) { _mesa_update_state(ctx); } - readFb = ctx->ReadBuffer; - drawFb = ctx->DrawBuffer; - if (!readFb || !drawFb) { /* This will normally never happen but someday we may want to * support MakeCurrent() with no drawables. @@ -192,12 +177,12 @@ _mesa_BlitFramebuffer(GLint srcX0, GLint srcY0, GLint srcX1, GLint srcY1, if (drawFb->_Status != GL_FRAMEBUFFER_COMPLETE_EXT || readFb->_Status != GL_FRAMEBUFFER_COMPLETE_EXT) { _mesa_error(ctx, GL_INVALID_FRAMEBUFFER_OPERATION_EXT, - "glBlitFramebufferEXT(incomplete draw/read buffers)"); + "%s(incomplete draw/read buffers)", func); return; } if (!is_valid_blit_filter(ctx, filter)) { - _mesa_error(ctx, GL_INVALID_ENUM, "glBlitFramebufferEXT(%s)", + _mesa_error(ctx, GL_INVALID_ENUM, "%s(invalid filter %s)", func, _mesa_lookup_enum_by_nr(filter)); return; } @@ -205,13 +190,13 @@ _mesa_BlitFramebuffer(GLint srcX0, GLint srcY0, GLint srcX1, GLint srcY1, if ((filter == GL_SCALED_RESOLVE_FASTEST_EXT || filter == GL_SCALED_RESOLVE_NICEST_EXT) && (readFb->Visual.samples == 0 || drawFb->Visual.samples > 0)) { - _mesa_error(ctx, GL_INVALID_OPERATION, "glBlitFramebufferEXT(%s)", + _mesa_error(ctx, GL_INVALID_OPERATION, "%s(%s: invalid samples)", func, _mesa_lookup_enum_by_nr(filter)); return; } if (mask & ~legalMaskBits) { - _mesa_error( ctx, GL_INVALID_VALUE, "glBlitFramebufferEXT(mask)"); + _mesa_error(ctx, GL_INVALID_VALUE, "%s(invalid mask bits set)", func); return; } @@ -219,13 +204,13 @@ _mesa_BlitFramebuffer(GLint srcX0, GLint srcY0, GLint srcX1, GLint srcY1, if ((mask & (GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT)) && filter != GL_NEAREST) { _mesa_error(ctx, GL_INVALID_OPERATION, - "glBlitFramebufferEXT(depth/stencil requires GL_NEAREST filter)"); + "%s(depth/stencil requires GL_NEAREST filter)", func); return; } /* get color read/draw renderbuffers */ if (mask & GL_COLOR_BUFFER_BIT) { - const GLuint numColorDrawBuffers = ctx->DrawBuffer->_NumColorDrawBuffers; + const GLuint numColorDrawBuffers = drawFb->_NumColorDrawBuffers; const struct gl_renderbuffer *colorReadRb = readFb->_ColorReadBuffer; const struct gl_renderbuffer *colorDrawRb = NULL; GLuint i; @@ -241,7 +226,7 @@ _mesa_BlitFramebuffer(GLint srcX0, GLint srcY0, GLint srcX1, GLint srcY1, } else { for (i = 0; i < numColorDrawBuffers; i++) { - colorDrawRb = ctx->DrawBuffer->_ColorDrawBuffers[i]; + colorDrawRb = drawFb->_ColorDrawBuffers[i]; if (!colorDrawRb) continue; @@ -257,15 +242,15 @@ _mesa_BlitFramebuffer(GLint srcX0, GLint srcY0, GLint srcX1, GLint srcY1, */ if (_mesa_is_gles3(ctx) && (colorDrawRb == colorReadRb)) { _mesa_error(ctx, GL_INVALID_OPERATION, - "glBlitFramebuffer(source and destination color " - "buffer cannot be the same)"); + "%s(source and destination color " + "buffer cannot be the same)", func); return; } if (!compatible_color_datatypes(colorReadRb->Format, colorDrawRb->Format)) { _mesa_error(ctx, GL_INVALID_OPERATION, - "glBlitFramebufferEXT(color buffer datatypes mismatch)"); + "%s(color buffer datatypes mismatch)", func); return; } /* extra checks for multisample copies... */ @@ -273,7 +258,7 @@ _mesa_BlitFramebuffer(GLint srcX0, GLint srcY0, GLint srcX1, GLint srcY1, /* color formats must match */ if (!compatible_resolve_formats(colorReadRb, colorDrawRb)) { _mesa_error(ctx, GL_INVALID_OPERATION, - "glBlitFramebufferEXT(bad src/dst multisample pixel formats)"); + "%s(bad src/dst multisample pixel formats)", func); return; } } @@ -286,7 +271,7 @@ _mesa_BlitFramebuffer(GLint srcX0, GLint srcY0, GLint srcX1, GLint srcY1, GLenum type = _mesa_get_format_datatype(colorReadRb->Format); if (type == GL_INT || type == GL_UNSIGNED_INT) { _mesa_error(ctx, GL_INVALID_OPERATION, - "glBlitFramebufferEXT(integer color type)"); + "%s(integer color type)", func); return; } } @@ -313,8 +298,8 @@ _mesa_BlitFramebuffer(GLint srcX0, GLint srcY0, GLint srcX1, GLint srcY1, if (_mesa_is_gles3(ctx) && (drawRb == readRb)) { _mesa_error(ctx, GL_INVALID_OPERATION, - "glBlitFramebuffer(source and destination stencil " - "buffer cannot be the same)"); + "%s(source and destination stencil " + "buffer cannot be the same)", func); return; } @@ -324,7 +309,7 @@ _mesa_BlitFramebuffer(GLint srcX0, GLint srcY0, GLint srcX1, GLint srcY1, * there is only one: GL_UNSIGNED_INT. */ _mesa_error(ctx, GL_INVALID_OPERATION, - "glBlitFramebuffer(stencil attachment format mismatch)"); + "%s(stencil attachment format mismatch)", func); return; } @@ -340,8 +325,8 @@ _mesa_BlitFramebuffer(GLint srcX0, GLint srcY0, GLint srcX1, GLint srcY1, _mesa_get_format_datatype(readRb->Format) != _mesa_get_format_datatype(drawRb->Format))) { - _mesa_error(ctx, GL_INVALID_OPERATION, "glBlitFramebuffer" - "(stencil attachment depth format mismatch)"); + _mesa_error(ctx, GL_INVALID_OPERATION, + "%s(stencil attachment depth format mismatch)", func); return; } } @@ -367,8 +352,8 @@ _mesa_BlitFramebuffer(GLint srcX0, GLint srcY0, GLint srcX1, GLint srcY1, if (_mesa_is_gles3(ctx) && (drawRb == readRb)) { _mesa_error(ctx, GL_INVALID_OPERATION, - "glBlitFramebuffer(source and destination depth " - "buffer cannot be the same)"); + "%s(source and destination depth " + "buffer cannot be the same)", func); return; } @@ -377,7 +362,7 @@ _mesa_BlitFramebuffer(GLint srcX0, GLint srcY0, GLint srcX1, GLint srcY1, (_mesa_get_format_datatype(readRb->Format) != _mesa_get_format_datatype(drawRb->Format))) { _mesa_error(ctx, GL_INVALID_OPERATION, - "glBlitFramebuffer(depth attachment format mismatch)"); + "%s(depth attachment format mismatch)", func); return; } @@ -389,8 +374,8 @@ _mesa_BlitFramebuffer(GLint srcX0, GLint srcY0, GLint srcX1, GLint srcY1, * we should ignore the stencil format check. */ if (read_s_bit > 0 && draw_s_bit > 0 && read_s_bit != draw_s_bit) { - _mesa_error(ctx, GL_INVALID_OPERATION, "glBlitFramebuffer" - "(depth attachment stencil bits mismatch)"); + _mesa_error(ctx, GL_INVALID_OPERATION, + "%s(depth attachment stencil bits mismatch)", func); return; } } @@ -406,7 +391,7 @@ _mesa_BlitFramebuffer(GLint srcX0, GLint srcY0, GLint srcX1, GLint srcY1, */ if (drawFb->Visual.samples > 0) { _mesa_error(ctx, GL_INVALID_OPERATION, - "glBlitFramebuffer(destination samples must be 0)"); + "%s(destination samples must be 0)", func); return; } @@ -426,7 +411,7 @@ _mesa_BlitFramebuffer(GLint srcX0, GLint srcY0, GLint srcX1, GLint srcY1, && (srcX0 != dstX0 || srcY0 != dstY0 || srcX1 != dstX1 || srcY1 != dstY1)) { _mesa_error(ctx, GL_INVALID_OPERATION, - "glBlitFramebuffer(bad src/dst multisample region)"); + "%s(bad src/dst multisample region)", func); return; } } else { @@ -434,7 +419,7 @@ _mesa_BlitFramebuffer(GLint srcX0, GLint srcY0, GLint srcX1, GLint srcY1, drawFb->Visual.samples > 0 && readFb->Visual.samples != drawFb->Visual.samples) { _mesa_error(ctx, GL_INVALID_OPERATION, - "glBlitFramebufferEXT(mismatched samples)"); + "%s(mismatched samples)", func); return; } @@ -445,7 +430,7 @@ _mesa_BlitFramebuffer(GLint srcX0, GLint srcY0, GLint srcX1, GLint srcY1, if (abs(srcX1 - srcX0) != abs(dstX1 - dstX0) || abs(srcY1 - srcY0) != abs(dstY1 - dstY0)) { _mesa_error(ctx, GL_INVALID_OPERATION, - "glBlitFramebufferEXT(bad src/dst multisample region sizes)"); + "%s(bad src/dst multisample region sizes)", func); return; } } @@ -457,11 +442,12 @@ _mesa_BlitFramebuffer(GLint srcX0, GLint srcY0, GLint srcX1, GLint srcY1, const struct gl_renderbuffer *colorDrawRb = NULL; GLuint i = 0; - printf("glBlitFramebuffer(%d, %d, %d, %d, %d, %d, %d, %d," - " 0x%x, 0x%x)\n", + printf("%s(%d, %d, %d, %d, %d, %d, %d, %d," + " 0x%x, 0x%x)\n", func, srcX0, srcY0, srcX1, srcY1, dstX0, dstY0, dstX1, dstY1, mask, filter); + if (colorReadRb) { const struct gl_renderbuffer_attachment *att; @@ -479,8 +465,8 @@ _mesa_BlitFramebuffer(GLint srcX0, GLint srcY0, GLint srcX1, GLint srcY1, printf("\n"); /* Print all active color render buffers */ - for (i = 0; i < ctx->DrawBuffer->_NumColorDrawBuffers; i++) { - colorDrawRb = ctx->DrawBuffer->_ColorDrawBuffers[i]; + for (i = 0; i < drawFb->_NumColorDrawBuffers; i++) { + colorDrawRb = drawFb->_ColorDrawBuffers[i]; if (!colorDrawRb) continue; @@ -507,8 +493,36 @@ _mesa_BlitFramebuffer(GLint srcX0, GLint srcY0, GLint srcX1, GLint srcY1, } assert(ctx->Driver.BlitFramebuffer); - ctx->Driver.BlitFramebuffer(ctx, ctx->ReadBuffer, ctx->DrawBuffer, + ctx->Driver.BlitFramebuffer(ctx, readFb, drawFb, srcX0, srcY0, srcX1, srcY1, dstX0, dstY0, dstX1, dstY1, mask, filter); } + + +/** + * Blit rectangular region, optionally from one framebuffer to another. + * + * Note, if the src buffer is multisampled and the dest is not, this is + * when the samples must be resolved to a single color. + */ +void GLAPIENTRY +_mesa_BlitFramebuffer(GLint srcX0, GLint srcY0, GLint srcX1, GLint srcY1, + GLint dstX0, GLint dstY0, GLint dstX1, GLint dstY1, + GLbitfield mask, GLenum filter) +{ + GET_CURRENT_CONTEXT(ctx); + + if (MESA_VERBOSE & VERBOSE_API) + _mesa_debug(ctx, + "glBlitFramebuffer(%d, %d, %d, %d, " + " %d, %d, %d, %d, 0x%x, %s)\n", + srcX0, srcY0, srcX1, srcY1, + dstX0, dstY0, dstX1, dstY1, + mask, _mesa_lookup_enum_by_nr(filter)); + + _mesa_blit_framebuffer(ctx, ctx->ReadBuffer, ctx->DrawBuffer, + srcX0, srcY0, srcX1, srcY1, + dstX0, dstY0, dstX1, dstY1, + mask, filter, "glBlitFramebuffer"); +} diff --git a/src/mesa/main/blit.h b/src/mesa/main/blit.h index 01a958af5a2..b854f5f84c6 100644 --- a/src/mesa/main/blit.h +++ b/src/mesa/main/blit.h @@ -28,6 +28,13 @@ #include "glheader.h" +extern void +_mesa_blit_framebuffer(struct gl_context *ctx, + struct gl_framebuffer *readFb, + struct gl_framebuffer *drawFb, + GLint srcX0, GLint srcY0, GLint srcX1, GLint srcY1, + GLint dstX0, GLint dstY0, GLint dstX1, GLint dstY1, + GLbitfield mask, GLenum filter, const char *func); extern void GLAPIENTRY _mesa_BlitFramebuffer(GLint srcX0, GLint srcY0, GLint srcX1, GLint srcY1, From 9036a6c0aa0637a1cd756836006dc8565b15a478 Mon Sep 17 00:00:00 2001 From: Laura Ekstrand Date: Tue, 3 Feb 2015 11:03:01 -0800 Subject: [PATCH 081/834] main: Refactor _mesa_update_framebuffer. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit _mesa_update_framebuffer now operates on arbitrary read and draw framebuffers. This allows BlitNamedFramebuffer to update the state of its arbitrary read and draw framebuffers. Reviewed-by: Fredrik Höglund Signed-off-by: Fredrik Höglund --- src/mesa/drivers/dri/i915/i830_vtbl.c | 2 +- src/mesa/drivers/dri/i915/i915_vtbl.c | 2 +- src/mesa/drivers/dri/r200/r200_state.c | 2 +- src/mesa/drivers/dri/radeon/radeon_common.c | 2 +- src/mesa/drivers/dri/radeon/radeon_state.c | 2 +- src/mesa/main/framebuffer.c | 11 ++++------- src/mesa/main/framebuffer.h | 4 +++- src/mesa/main/state.c | 2 +- 8 files changed, 13 insertions(+), 14 deletions(-) diff --git a/src/mesa/drivers/dri/i915/i830_vtbl.c b/src/mesa/drivers/dri/i915/i830_vtbl.c index 91da977acee..4fff29ee88a 100644 --- a/src/mesa/drivers/dri/i915/i830_vtbl.c +++ b/src/mesa/drivers/dri/i915/i830_vtbl.c @@ -730,7 +730,7 @@ i830_update_draw_buffer(struct intel_context *intel) */ if (ctx->NewState & _NEW_BUFFERS) { /* this updates the DrawBuffer->_NumColorDrawBuffers fields, etc */ - _mesa_update_framebuffer(ctx); + _mesa_update_framebuffer(ctx, ctx->ReadBuffer, ctx->DrawBuffer); /* this updates the DrawBuffer's Width/Height if it's a FBO */ _mesa_update_draw_buffer_bounds(ctx); } diff --git a/src/mesa/drivers/dri/i915/i915_vtbl.c b/src/mesa/drivers/dri/i915/i915_vtbl.c index 97bf81ed759..02ff312c2e6 100644 --- a/src/mesa/drivers/dri/i915/i915_vtbl.c +++ b/src/mesa/drivers/dri/i915/i915_vtbl.c @@ -732,7 +732,7 @@ i915_update_draw_buffer(struct intel_context *intel) */ if (ctx->NewState & _NEW_BUFFERS) { /* this updates the DrawBuffer->_NumColorDrawBuffers fields, etc */ - _mesa_update_framebuffer(ctx); + _mesa_update_framebuffer(ctx, ctx->ReadBuffer, ctx->DrawBuffer); /* this updates the DrawBuffer's Width/Height if it's a FBO */ _mesa_update_draw_buffer_bounds(ctx); } diff --git a/src/mesa/drivers/dri/r200/r200_state.c b/src/mesa/drivers/dri/r200/r200_state.c index b0a6bd573b6..de5296d8e2f 100644 --- a/src/mesa/drivers/dri/r200/r200_state.c +++ b/src/mesa/drivers/dri/r200/r200_state.c @@ -2215,7 +2215,7 @@ GLboolean r200ValidateState( struct gl_context *ctx ) GLuint new_state = rmesa->radeon.NewGLState; if (new_state & _NEW_BUFFERS) { - _mesa_update_framebuffer(ctx); + _mesa_update_framebuffer(ctx, ctx->ReadBuffer, ctx->DrawBuffer); /* this updates the DrawBuffer's Width/Height if it's a FBO */ _mesa_update_draw_buffer_bounds(ctx); diff --git a/src/mesa/drivers/dri/radeon/radeon_common.c b/src/mesa/drivers/dri/radeon/radeon_common.c index 0ca526d2a02..12502d5f9a3 100644 --- a/src/mesa/drivers/dri/radeon/radeon_common.c +++ b/src/mesa/drivers/dri/radeon/radeon_common.c @@ -220,7 +220,7 @@ void radeon_draw_buffer(struct gl_context *ctx, struct gl_framebuffer *fb) */ if (ctx->NewState & (_NEW_BUFFERS | _NEW_COLOR | _NEW_PIXEL)) { /* this updates the DrawBuffer->_NumColorDrawBuffers fields, etc */ - _mesa_update_framebuffer(ctx); + _mesa_update_framebuffer(ctx, ctx->ReadBuffer, ctx->DrawBuffer); /* this updates the DrawBuffer's Width/Height if it's a FBO */ _mesa_update_draw_buffer_bounds(ctx); } diff --git a/src/mesa/drivers/dri/radeon/radeon_state.c b/src/mesa/drivers/dri/radeon/radeon_state.c index c45bb513dca..7bff1d46c05 100644 --- a/src/mesa/drivers/dri/radeon/radeon_state.c +++ b/src/mesa/drivers/dri/radeon/radeon_state.c @@ -1994,7 +1994,7 @@ GLboolean radeonValidateState( struct gl_context *ctx ) GLuint new_state = rmesa->radeon.NewGLState; if (new_state & _NEW_BUFFERS) { - _mesa_update_framebuffer(ctx); + _mesa_update_framebuffer(ctx, ctx->ReadBuffer, ctx->DrawBuffer); /* this updates the DrawBuffer's Width/Height if it's a FBO */ _mesa_update_draw_buffer_bounds(ctx); RADEON_STATECHANGE(rmesa, ctx); diff --git a/src/mesa/main/framebuffer.c b/src/mesa/main/framebuffer.c index 4f7736a64d0..74e5739e6fd 100644 --- a/src/mesa/main/framebuffer.c +++ b/src/mesa/main/framebuffer.c @@ -678,17 +678,14 @@ update_framebuffer(struct gl_context *ctx, struct gl_framebuffer *fb) /** - * Update state related to the current draw/read framebuffers. + * Update state related to the draw/read framebuffers. */ void -_mesa_update_framebuffer(struct gl_context *ctx) +_mesa_update_framebuffer(struct gl_context *ctx, + struct gl_framebuffer *readFb, + struct gl_framebuffer *drawFb) { - struct gl_framebuffer *drawFb; - struct gl_framebuffer *readFb; - assert(ctx); - drawFb = ctx->DrawBuffer; - readFb = ctx->ReadBuffer; update_framebuffer(ctx, drawFb); if (readFb != drawFb) diff --git a/src/mesa/main/framebuffer.h b/src/mesa/main/framebuffer.h index a4274216ec2..d3b7e584ca4 100644 --- a/src/mesa/main/framebuffer.h +++ b/src/mesa/main/framebuffer.h @@ -84,7 +84,9 @@ _mesa_update_framebuffer_visual(struct gl_context *ctx, struct gl_framebuffer *fb); extern void -_mesa_update_framebuffer(struct gl_context *ctx); +_mesa_update_framebuffer(struct gl_context *ctx, + struct gl_framebuffer *readFb, + struct gl_framebuffer *drawFb); extern GLboolean _mesa_source_buffer_exists(struct gl_context *ctx, GLenum format); diff --git a/src/mesa/main/state.c b/src/mesa/main/state.c index 99db37bafd7..3688736cb79 100644 --- a/src/mesa/main/state.c +++ b/src/mesa/main/state.c @@ -389,7 +389,7 @@ _mesa_update_state_locked( struct gl_context *ctx ) update_frontbit( ctx ); if (new_state & _NEW_BUFFERS) - _mesa_update_framebuffer(ctx); + _mesa_update_framebuffer(ctx, ctx->ReadBuffer, ctx->DrawBuffer); if (new_state & (_NEW_SCISSOR | _NEW_BUFFERS | _NEW_VIEWPORT)) _mesa_update_draw_buffer_bounds( ctx ); From c1fe8d841c4746a6371dde569c6c88c6c2bad4e6 Mon Sep 17 00:00:00 2001 From: Laura Ekstrand Date: Tue, 3 Feb 2015 11:42:17 -0800 Subject: [PATCH 082/834] main: Refactor _mesa_[update|get]_clamp_vertex_color. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Fredrik Höglund Signed-off-by: Fredrik Höglund --- src/mesa/main/blend.c | 13 ++++++++----- src/mesa/main/blend.h | 7 +++++-- src/mesa/main/framebuffer.c | 2 +- 3 files changed, 14 insertions(+), 8 deletions(-) diff --git a/src/mesa/main/blend.c b/src/mesa/main/blend.c index 774fc888ec4..ee76b475943 100644 --- a/src/mesa/main/blend.c +++ b/src/mesa/main/blend.c @@ -769,7 +769,7 @@ _mesa_ClampColor(GLenum target, GLenum clamp) } FLUSH_VERTICES(ctx, _NEW_LIGHT); ctx->Light.ClampVertexColor = clamp; - _mesa_update_clamp_vertex_color(ctx); + _mesa_update_clamp_vertex_color(ctx, ctx->DrawBuffer); break; case GL_CLAMP_FRAGMENT_COLOR_ARB: if (ctx->API == API_OPENGL_CORE && @@ -814,9 +814,10 @@ _mesa_get_clamp_fragment_color(const struct gl_context *ctx) } GLboolean -_mesa_get_clamp_vertex_color(const struct gl_context *ctx) +_mesa_get_clamp_vertex_color(const struct gl_context *ctx, + const struct gl_framebuffer *drawFb) { - return get_clamp_color(ctx->DrawBuffer, ctx->Light.ClampVertexColor); + return get_clamp_color(drawFb, ctx->Light.ClampVertexColor); } GLboolean @@ -848,9 +849,11 @@ _mesa_update_clamp_fragment_color(struct gl_context *ctx) * Update the ctx->Color._ClampVertexColor field */ void -_mesa_update_clamp_vertex_color(struct gl_context *ctx) +_mesa_update_clamp_vertex_color(struct gl_context *ctx, + const struct gl_framebuffer *drawFb) { - ctx->Light._ClampVertexColor = _mesa_get_clamp_vertex_color(ctx); + ctx->Light._ClampVertexColor = + _mesa_get_clamp_vertex_color(ctx, drawFb); } /** diff --git a/src/mesa/main/blend.h b/src/mesa/main/blend.h index fe31a7440f0..9c1ef7e8592 100644 --- a/src/mesa/main/blend.h +++ b/src/mesa/main/blend.h @@ -37,6 +37,7 @@ #include "formats.h" struct gl_context; +struct gl_framebuffer; extern void GLAPIENTRY @@ -104,7 +105,8 @@ extern GLboolean _mesa_get_clamp_fragment_color(const struct gl_context *ctx); extern GLboolean -_mesa_get_clamp_vertex_color(const struct gl_context *ctx); +_mesa_get_clamp_vertex_color(const struct gl_context *ctx, + const struct gl_framebuffer *drawFb); extern GLboolean _mesa_get_clamp_read_color(const struct gl_context *ctx); @@ -113,7 +115,8 @@ extern void _mesa_update_clamp_fragment_color(struct gl_context *ctx); extern void -_mesa_update_clamp_vertex_color(struct gl_context *ctx); +_mesa_update_clamp_vertex_color(struct gl_context *ctx, + const struct gl_framebuffer *drawFb); extern mesa_format _mesa_get_render_format(const struct gl_context *ctx, mesa_format format); diff --git a/src/mesa/main/framebuffer.c b/src/mesa/main/framebuffer.c index 74e5739e6fd..36a953988eb 100644 --- a/src/mesa/main/framebuffer.c +++ b/src/mesa/main/framebuffer.c @@ -691,7 +691,7 @@ _mesa_update_framebuffer(struct gl_context *ctx, if (readFb != drawFb) update_framebuffer(ctx, readFb); - _mesa_update_clamp_vertex_color(ctx); + _mesa_update_clamp_vertex_color(ctx, drawFb); _mesa_update_clamp_fragment_color(ctx); } From 2cabfd9636a55f6adee04f4199ba83a39598d51c Mon Sep 17 00:00:00 2001 From: Laura Ekstrand Date: Tue, 3 Mar 2015 14:03:33 -0800 Subject: [PATCH 083/834] main: Refactor _mesa_[update|get]_clamp_fragment_color. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Fredrik Höglund Signed-off-by: Fredrik Höglund --- src/mesa/main/blend.c | 19 ++++++++++--------- src/mesa/main/blend.h | 6 ++++-- src/mesa/main/framebuffer.c | 2 +- src/mesa/main/get.c | 8 ++++---- src/mesa/main/texenv.c | 2 +- src/mesa/main/texparam.c | 2 +- src/mesa/program/prog_statevars.c | 4 ++-- 7 files changed, 23 insertions(+), 20 deletions(-) diff --git a/src/mesa/main/blend.c b/src/mesa/main/blend.c index ee76b475943..2342555a95d 100644 --- a/src/mesa/main/blend.c +++ b/src/mesa/main/blend.c @@ -778,7 +778,7 @@ _mesa_ClampColor(GLenum target, GLenum clamp) } FLUSH_VERTICES(ctx, _NEW_FRAG_CLAMP); ctx->Color.ClampFragmentColor = clamp; - _mesa_update_clamp_fragment_color(ctx); + _mesa_update_clamp_fragment_color(ctx, ctx->DrawBuffer); break; case GL_CLAMP_READ_COLOR_ARB: ctx->Color.ClampReadColor = clamp; @@ -807,10 +807,10 @@ get_clamp_color(const struct gl_framebuffer *fb, GLenum clamp) } GLboolean -_mesa_get_clamp_fragment_color(const struct gl_context *ctx) +_mesa_get_clamp_fragment_color(const struct gl_context *ctx, + const struct gl_framebuffer *drawFb) { - return get_clamp_color(ctx->DrawBuffer, - ctx->Color.ClampFragmentColor); + return get_clamp_color(drawFb, ctx->Color.ClampFragmentColor); } GLboolean @@ -830,19 +830,20 @@ _mesa_get_clamp_read_color(const struct gl_context *ctx) * Update the ctx->Color._ClampFragmentColor field */ void -_mesa_update_clamp_fragment_color(struct gl_context *ctx) +_mesa_update_clamp_fragment_color(struct gl_context *ctx, + const struct gl_framebuffer *drawFb) { - struct gl_framebuffer *fb = ctx->DrawBuffer; - /* Don't clamp if: * - there is no colorbuffer * - all colorbuffers are unsigned normalized, so clamping has no effect * - there is an integer colorbuffer */ - if (!fb || !fb->_HasSNormOrFloatColorBuffer || fb->_IntegerColor) + if (!drawFb || !drawFb->_HasSNormOrFloatColorBuffer || + drawFb->_IntegerColor) ctx->Color._ClampFragmentColor = GL_FALSE; else - ctx->Color._ClampFragmentColor = _mesa_get_clamp_fragment_color(ctx); + ctx->Color._ClampFragmentColor = + _mesa_get_clamp_fragment_color(ctx, drawFb); } /** diff --git a/src/mesa/main/blend.h b/src/mesa/main/blend.h index 9c1ef7e8592..58e66736ddc 100644 --- a/src/mesa/main/blend.h +++ b/src/mesa/main/blend.h @@ -102,7 +102,8 @@ extern void GLAPIENTRY _mesa_ClampColor(GLenum target, GLenum clamp); extern GLboolean -_mesa_get_clamp_fragment_color(const struct gl_context *ctx); +_mesa_get_clamp_fragment_color(const struct gl_context *ctx, + const struct gl_framebuffer *drawFb); extern GLboolean _mesa_get_clamp_vertex_color(const struct gl_context *ctx, @@ -112,7 +113,8 @@ extern GLboolean _mesa_get_clamp_read_color(const struct gl_context *ctx); extern void -_mesa_update_clamp_fragment_color(struct gl_context *ctx); +_mesa_update_clamp_fragment_color(struct gl_context *ctx, + const struct gl_framebuffer *drawFb); extern void _mesa_update_clamp_vertex_color(struct gl_context *ctx, diff --git a/src/mesa/main/framebuffer.c b/src/mesa/main/framebuffer.c index 36a953988eb..5c409db2572 100644 --- a/src/mesa/main/framebuffer.c +++ b/src/mesa/main/framebuffer.c @@ -692,7 +692,7 @@ _mesa_update_framebuffer(struct gl_context *ctx, update_framebuffer(ctx, readFb); _mesa_update_clamp_vertex_color(ctx, drawFb); - _mesa_update_clamp_fragment_color(ctx); + _mesa_update_clamp_fragment_color(ctx, drawFb); } diff --git a/src/mesa/main/get.c b/src/mesa/main/get.c index 09be715f911..8a6c81aff87 100644 --- a/src/mesa/main/get.c +++ b/src/mesa/main/get.c @@ -909,13 +909,13 @@ find_custom_value(struct gl_context *ctx, const struct value_desc *d, union valu break; case GL_FOG_COLOR: - if (_mesa_get_clamp_fragment_color(ctx)) + if (_mesa_get_clamp_fragment_color(ctx, ctx->DrawBuffer)) COPY_4FV(v->value_float_4, ctx->Fog.Color); else COPY_4FV(v->value_float_4, ctx->Fog.ColorUnclamped); break; case GL_COLOR_CLEAR_VALUE: - if (_mesa_get_clamp_fragment_color(ctx)) { + if (_mesa_get_clamp_fragment_color(ctx, ctx->DrawBuffer)) { v->value_float_4[0] = CLAMP(ctx->Color.ClearColor.f[0], 0.0F, 1.0F); v->value_float_4[1] = CLAMP(ctx->Color.ClearColor.f[1], 0.0F, 1.0F); v->value_float_4[2] = CLAMP(ctx->Color.ClearColor.f[2], 0.0F, 1.0F); @@ -924,13 +924,13 @@ find_custom_value(struct gl_context *ctx, const struct value_desc *d, union valu COPY_4FV(v->value_float_4, ctx->Color.ClearColor.f); break; case GL_BLEND_COLOR_EXT: - if (_mesa_get_clamp_fragment_color(ctx)) + if (_mesa_get_clamp_fragment_color(ctx, ctx->DrawBuffer)) COPY_4FV(v->value_float_4, ctx->Color.BlendColor); else COPY_4FV(v->value_float_4, ctx->Color.BlendColorUnclamped); break; case GL_ALPHA_TEST_REF: - if (_mesa_get_clamp_fragment_color(ctx)) + if (_mesa_get_clamp_fragment_color(ctx, ctx->DrawBuffer)) v->value_float = ctx->Color.AlphaRef; else v->value_float = ctx->Color.AlphaRefUnclamped; diff --git a/src/mesa/main/texenv.c b/src/mesa/main/texenv.c index ec521e6c6e5..3edafc0f776 100644 --- a/src/mesa/main/texenv.c +++ b/src/mesa/main/texenv.c @@ -646,7 +646,7 @@ _mesa_GetTexEnvfv( GLenum target, GLenum pname, GLfloat *params ) if (pname == GL_TEXTURE_ENV_COLOR) { if(ctx->NewState & (_NEW_BUFFERS | _NEW_FRAG_CLAMP)) _mesa_update_state(ctx); - if (_mesa_get_clamp_fragment_color(ctx)) + if (_mesa_get_clamp_fragment_color(ctx, ctx->DrawBuffer)) COPY_4FV( params, texUnit->EnvColor ); else COPY_4FV( params, texUnit->EnvColorUnclamped ); diff --git a/src/mesa/main/texparam.c b/src/mesa/main/texparam.c index b5d42d3047f..d74134f41b1 100644 --- a/src/mesa/main/texparam.c +++ b/src/mesa/main/texparam.c @@ -1709,7 +1709,7 @@ get_tex_parameterfv(struct gl_context *ctx, if (ctx->NewState & (_NEW_BUFFERS | _NEW_FRAG_CLAMP)) _mesa_update_state_locked(ctx); - if (_mesa_get_clamp_fragment_color(ctx)) { + if (_mesa_get_clamp_fragment_color(ctx, ctx->DrawBuffer)) { params[0] = CLAMP(obj->Sampler.BorderColor.f[0], 0.0F, 1.0F); params[1] = CLAMP(obj->Sampler.BorderColor.f[1], 0.0F, 1.0F); params[2] = CLAMP(obj->Sampler.BorderColor.f[2], 0.0F, 1.0F); diff --git a/src/mesa/program/prog_statevars.c b/src/mesa/program/prog_statevars.c index 0c0c87faa28..bdb335e4ba3 100644 --- a/src/mesa/program/prog_statevars.c +++ b/src/mesa/program/prog_statevars.c @@ -244,14 +244,14 @@ _mesa_fetch_state(struct gl_context *ctx, const gl_state_index state[], { /* state[1] is the texture unit */ const GLuint unit = (GLuint) state[1]; - if (_mesa_get_clamp_fragment_color(ctx)) + if (_mesa_get_clamp_fragment_color(ctx, ctx->DrawBuffer)) COPY_4V(value, ctx->Texture.Unit[unit].EnvColor); else COPY_4V(value, ctx->Texture.Unit[unit].EnvColorUnclamped); } return; case STATE_FOG_COLOR: - if (_mesa_get_clamp_fragment_color(ctx)) + if (_mesa_get_clamp_fragment_color(ctx, ctx->DrawBuffer)) COPY_4V(value, ctx->Fog.Color); else COPY_4V(value, ctx->Fog.ColorUnclamped); From 39be0c5f6c1922e2e2f3c4243dd8257c2e7deef8 Mon Sep 17 00:00:00 2001 From: Laura Ekstrand Date: Tue, 3 Mar 2015 15:05:16 -0800 Subject: [PATCH 084/834] main: Refactor _mesa_get_clamp_read_color. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This wasn't neccessary for ARB_direct_state_access, but felt like a good idea for the sake of completeness. Reviewed-by: Fredrik Höglund Signed-off-by: Fredrik Höglund --- src/mesa/main/blend.c | 5 +++-- src/mesa/main/blend.h | 3 ++- src/mesa/main/readpix.c | 4 ++-- 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/src/mesa/main/blend.c b/src/mesa/main/blend.c index 2342555a95d..d869fa2aa09 100644 --- a/src/mesa/main/blend.c +++ b/src/mesa/main/blend.c @@ -821,9 +821,10 @@ _mesa_get_clamp_vertex_color(const struct gl_context *ctx, } GLboolean -_mesa_get_clamp_read_color(const struct gl_context *ctx) +_mesa_get_clamp_read_color(const struct gl_context *ctx, + const struct gl_framebuffer *readFb) { - return get_clamp_color(ctx->ReadBuffer, ctx->Color.ClampReadColor); + return get_clamp_color(readFb, ctx->Color.ClampReadColor); } /** diff --git a/src/mesa/main/blend.h b/src/mesa/main/blend.h index 58e66736ddc..8ab9e02fc13 100644 --- a/src/mesa/main/blend.h +++ b/src/mesa/main/blend.h @@ -110,7 +110,8 @@ _mesa_get_clamp_vertex_color(const struct gl_context *ctx, const struct gl_framebuffer *drawFb); extern GLboolean -_mesa_get_clamp_read_color(const struct gl_context *ctx); +_mesa_get_clamp_read_color(const struct gl_context *ctx, + const struct gl_framebuffer *readFb); extern void _mesa_update_clamp_fragment_color(struct gl_context *ctx, diff --git a/src/mesa/main/readpix.c b/src/mesa/main/readpix.c index ed0104c9e46..df46f8361b9 100644 --- a/src/mesa/main/readpix.c +++ b/src/mesa/main/readpix.c @@ -83,7 +83,7 @@ get_readpixels_transfer_ops(const struct gl_context *ctx, mesa_format texFormat, if (uses_blit) { /* For blit-based ReadPixels packing, the clamping is done automatically * unless the type is float. */ - if (_mesa_get_clamp_read_color(ctx) && + if (_mesa_get_clamp_read_color(ctx, ctx->ReadBuffer) && (type == GL_FLOAT || type == GL_HALF_FLOAT)) { transferOps |= IMAGE_CLAMP_BIT; } @@ -91,7 +91,7 @@ get_readpixels_transfer_ops(const struct gl_context *ctx, mesa_format texFormat, else { /* For CPU-based ReadPixels packing, the clamping must always be done * for non-float types, */ - if (_mesa_get_clamp_read_color(ctx) || + if (_mesa_get_clamp_read_color(ctx, ctx->ReadBuffer) || (type != GL_FLOAT && type != GL_HALF_FLOAT)) { transferOps |= IMAGE_CLAMP_BIT; } From b590c617256720e82fff0428a5e0e4a64115fac2 Mon Sep 17 00:00:00 2001 From: Laura Ekstrand Date: Tue, 3 Feb 2015 14:41:26 -0800 Subject: [PATCH 085/834] main: Refactor _mesa_update_draw_buffer_bounds. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Fredrik Höglund Signed-off-by: Fredrik Höglund --- src/mesa/drivers/dri/i915/i830_vtbl.c | 2 +- src/mesa/drivers/dri/i915/i915_vtbl.c | 2 +- src/mesa/drivers/dri/r200/r200_state.c | 2 +- src/mesa/drivers/dri/radeon/radeon_common.c | 2 +- src/mesa/drivers/dri/radeon/radeon_state.c | 2 +- src/mesa/main/framebuffer.c | 6 +++--- src/mesa/main/framebuffer.h | 3 ++- src/mesa/main/state.c | 2 +- 8 files changed, 11 insertions(+), 10 deletions(-) diff --git a/src/mesa/drivers/dri/i915/i830_vtbl.c b/src/mesa/drivers/dri/i915/i830_vtbl.c index 4fff29ee88a..8ed8ff555ba 100644 --- a/src/mesa/drivers/dri/i915/i830_vtbl.c +++ b/src/mesa/drivers/dri/i915/i830_vtbl.c @@ -732,7 +732,7 @@ i830_update_draw_buffer(struct intel_context *intel) /* this updates the DrawBuffer->_NumColorDrawBuffers fields, etc */ _mesa_update_framebuffer(ctx, ctx->ReadBuffer, ctx->DrawBuffer); /* this updates the DrawBuffer's Width/Height if it's a FBO */ - _mesa_update_draw_buffer_bounds(ctx); + _mesa_update_draw_buffer_bounds(ctx, ctx->DrawBuffer); } if (fb->_Status != GL_FRAMEBUFFER_COMPLETE_EXT) { diff --git a/src/mesa/drivers/dri/i915/i915_vtbl.c b/src/mesa/drivers/dri/i915/i915_vtbl.c index 02ff312c2e6..80bd249fa7b 100644 --- a/src/mesa/drivers/dri/i915/i915_vtbl.c +++ b/src/mesa/drivers/dri/i915/i915_vtbl.c @@ -734,7 +734,7 @@ i915_update_draw_buffer(struct intel_context *intel) /* this updates the DrawBuffer->_NumColorDrawBuffers fields, etc */ _mesa_update_framebuffer(ctx, ctx->ReadBuffer, ctx->DrawBuffer); /* this updates the DrawBuffer's Width/Height if it's a FBO */ - _mesa_update_draw_buffer_bounds(ctx); + _mesa_update_draw_buffer_bounds(ctx, ctx->DrawBuffer); } if (fb->_Status != GL_FRAMEBUFFER_COMPLETE_EXT) { diff --git a/src/mesa/drivers/dri/r200/r200_state.c b/src/mesa/drivers/dri/r200/r200_state.c index de5296d8e2f..6fe70b5c9d0 100644 --- a/src/mesa/drivers/dri/r200/r200_state.c +++ b/src/mesa/drivers/dri/r200/r200_state.c @@ -2217,7 +2217,7 @@ GLboolean r200ValidateState( struct gl_context *ctx ) if (new_state & _NEW_BUFFERS) { _mesa_update_framebuffer(ctx, ctx->ReadBuffer, ctx->DrawBuffer); /* this updates the DrawBuffer's Width/Height if it's a FBO */ - _mesa_update_draw_buffer_bounds(ctx); + _mesa_update_draw_buffer_bounds(ctx, ctx->DrawBuffer); R200_STATECHANGE(rmesa, ctx); } diff --git a/src/mesa/drivers/dri/radeon/radeon_common.c b/src/mesa/drivers/dri/radeon/radeon_common.c index 12502d5f9a3..2a8bd6c9edc 100644 --- a/src/mesa/drivers/dri/radeon/radeon_common.c +++ b/src/mesa/drivers/dri/radeon/radeon_common.c @@ -222,7 +222,7 @@ void radeon_draw_buffer(struct gl_context *ctx, struct gl_framebuffer *fb) /* this updates the DrawBuffer->_NumColorDrawBuffers fields, etc */ _mesa_update_framebuffer(ctx, ctx->ReadBuffer, ctx->DrawBuffer); /* this updates the DrawBuffer's Width/Height if it's a FBO */ - _mesa_update_draw_buffer_bounds(ctx); + _mesa_update_draw_buffer_bounds(ctx, ctx->DrawBuffer); } if (fb->_Status != GL_FRAMEBUFFER_COMPLETE_EXT) { diff --git a/src/mesa/drivers/dri/radeon/radeon_state.c b/src/mesa/drivers/dri/radeon/radeon_state.c index 7bff1d46c05..cba3d9c9689 100644 --- a/src/mesa/drivers/dri/radeon/radeon_state.c +++ b/src/mesa/drivers/dri/radeon/radeon_state.c @@ -1996,7 +1996,7 @@ GLboolean radeonValidateState( struct gl_context *ctx ) if (new_state & _NEW_BUFFERS) { _mesa_update_framebuffer(ctx, ctx->ReadBuffer, ctx->DrawBuffer); /* this updates the DrawBuffer's Width/Height if it's a FBO */ - _mesa_update_draw_buffer_bounds(ctx); + _mesa_update_draw_buffer_bounds(ctx, ctx->DrawBuffer); RADEON_STATECHANGE(rmesa, ctx); } diff --git a/src/mesa/main/framebuffer.c b/src/mesa/main/framebuffer.c index 5c409db2572..0a0f73ba423 100644 --- a/src/mesa/main/framebuffer.c +++ b/src/mesa/main/framebuffer.c @@ -312,7 +312,7 @@ _mesa_resize_framebuffer(struct gl_context *ctx, struct gl_framebuffer *fb, if (ctx) { /* update scissor / window bounds */ - _mesa_update_draw_buffer_bounds(ctx); + _mesa_update_draw_buffer_bounds(ctx, ctx->DrawBuffer); /* Signal new buffer state so that swrast will update its clipping * info (the CLIP_BIT flag). */ @@ -413,9 +413,9 @@ _mesa_scissor_bounding_box(const struct gl_context *ctx, * \param ctx the GL context. */ void -_mesa_update_draw_buffer_bounds(struct gl_context *ctx) +_mesa_update_draw_buffer_bounds(struct gl_context *ctx, + struct gl_framebuffer *buffer) { - struct gl_framebuffer *buffer = ctx->DrawBuffer; int bbox[4]; if (!buffer) diff --git a/src/mesa/main/framebuffer.h b/src/mesa/main/framebuffer.h index d3b7e584ca4..d02b86f20d9 100644 --- a/src/mesa/main/framebuffer.h +++ b/src/mesa/main/framebuffer.h @@ -77,7 +77,8 @@ _mesa_scissor_bounding_box(const struct gl_context *ctx, unsigned idx, int *bbox); extern void -_mesa_update_draw_buffer_bounds(struct gl_context *ctx); +_mesa_update_draw_buffer_bounds(struct gl_context *ctx, + struct gl_framebuffer *drawFb); extern void _mesa_update_framebuffer_visual(struct gl_context *ctx, diff --git a/src/mesa/main/state.c b/src/mesa/main/state.c index 3688736cb79..2657c532f88 100644 --- a/src/mesa/main/state.c +++ b/src/mesa/main/state.c @@ -392,7 +392,7 @@ _mesa_update_state_locked( struct gl_context *ctx ) _mesa_update_framebuffer(ctx, ctx->ReadBuffer, ctx->DrawBuffer); if (new_state & (_NEW_SCISSOR | _NEW_BUFFERS | _NEW_VIEWPORT)) - _mesa_update_draw_buffer_bounds( ctx ); + _mesa_update_draw_buffer_bounds(ctx, ctx->DrawBuffer); if (new_state & _NEW_LIGHT) _mesa_update_lighting( ctx ); From 47b910d27587f738dd81cdb9a879726785c0bc54 Mon Sep 17 00:00:00 2001 From: Laura Ekstrand Date: Mon, 2 Feb 2015 16:27:46 -0800 Subject: [PATCH 086/834] main: Add entry point for BlitNamedFramebuffer. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Fredrik Höglund Signed-off-by: Fredrik Höglund --- .../glapi/gen/ARB_direct_state_access.xml | 15 ++++++ src/mesa/main/blit.c | 51 +++++++++++++++++++ src/mesa/main/blit.h | 6 +++ src/mesa/main/tests/dispatch_sanity.cpp | 1 + 4 files changed, 73 insertions(+) diff --git a/src/mapi/glapi/gen/ARB_direct_state_access.xml b/src/mapi/glapi/gen/ARB_direct_state_access.xml index beb6249a6f7..7ee2000d86e 100644 --- a/src/mapi/glapi/gen/ARB_direct_state_access.xml +++ b/src/mapi/glapi/gen/ARB_direct_state_access.xml @@ -181,6 +181,21 @@ + + + + + + + + + + + + + + + diff --git a/src/mesa/main/blit.c b/src/mesa/main/blit.c index 6c78686a59a..baf3165ff9d 100644 --- a/src/mesa/main/blit.c +++ b/src/mesa/main/blit.c @@ -526,3 +526,54 @@ _mesa_BlitFramebuffer(GLint srcX0, GLint srcY0, GLint srcX1, GLint srcY1, dstX0, dstY0, dstX1, dstY1, mask, filter, "glBlitFramebuffer"); } + + +void GLAPIENTRY +_mesa_BlitNamedFramebuffer(GLuint readFramebuffer, GLuint drawFramebuffer, + GLint srcX0, GLint srcY0, GLint srcX1, GLint srcY1, + GLint dstX0, GLint dstY0, GLint dstX1, GLint dstY1, + GLbitfield mask, GLenum filter) +{ + GET_CURRENT_CONTEXT(ctx); + struct gl_framebuffer *readFb, *drawFb; + + if (MESA_VERBOSE & VERBOSE_API) + _mesa_debug(ctx, + "glBlitNamedFramebuffer(%u %u %d, %d, %d, %d, " + " %d, %d, %d, %d, 0x%x, %s)\n", + readFramebuffer, drawFramebuffer, + srcX0, srcY0, srcX1, srcY1, + dstX0, dstY0, dstX1, dstY1, + mask, _mesa_lookup_enum_by_nr(filter)); + + /* + * According to PDF page 533 of the OpenGL 4.5 core spec (30.10.2014, + * Section 18.3 Copying Pixels): + * "... if readFramebuffer or drawFramebuffer is zero (for + * BlitNamedFramebuffer), then the default read or draw framebuffer is + * used as the corresponding source or destination framebuffer, + * respectively." + */ + if (readFramebuffer) { + readFb = _mesa_lookup_framebuffer_err(ctx, readFramebuffer, + "glBlitNamedFramebuffer"); + if (!readFb) + return; + } + else + readFb = ctx->WinSysReadBuffer; + + if (drawFramebuffer) { + drawFb = _mesa_lookup_framebuffer_err(ctx, drawFramebuffer, + "glBlitNamedFramebuffer"); + if (!drawFb) + return; + } + else + drawFb = ctx->WinSysDrawBuffer; + + _mesa_blit_framebuffer(ctx, readFb, drawFb, + srcX0, srcY0, srcX1, srcY1, + dstX0, dstY0, dstX1, dstY1, + mask, filter, "glBlitNamedFramebuffer"); +} diff --git a/src/mesa/main/blit.h b/src/mesa/main/blit.h index b854f5f84c6..54b946e3192 100644 --- a/src/mesa/main/blit.h +++ b/src/mesa/main/blit.h @@ -41,5 +41,11 @@ _mesa_BlitFramebuffer(GLint srcX0, GLint srcY0, GLint srcX1, GLint srcY1, GLint dstX0, GLint dstY0, GLint dstX1, GLint dstY1, GLbitfield mask, GLenum filter); +extern void GLAPIENTRY +_mesa_BlitNamedFramebuffer(GLuint readFramebuffer, GLuint drawFramebuffer, + GLint srcX0, GLint srcY0, GLint srcX1, GLint srcY1, + GLint dstX0, GLint dstY0, GLint dstX1, GLint dstY1, + GLbitfield mask, GLenum filter); + #endif /* BLIT_H */ diff --git a/src/mesa/main/tests/dispatch_sanity.cpp b/src/mesa/main/tests/dispatch_sanity.cpp index f3aff5f9fa8..ec3c9927162 100644 --- a/src/mesa/main/tests/dispatch_sanity.cpp +++ b/src/mesa/main/tests/dispatch_sanity.cpp @@ -984,6 +984,7 @@ const struct function gl_core_functions_possible[] = { { "glNamedFramebufferRenderbuffer", 45, -1 }, { "glNamedFramebufferTexture", 45, -1 }, { "glNamedFramebufferTextureLayer", 45, -1 }, + { "glBlitNamedFramebuffer", 45, -1 }, { "glCheckNamedFramebufferStatus", 45, -1 }, { "glGetNamedFramebufferAttachmentParameteriv", 45, -1 }, { "glCreateRenderbuffers", 45, -1 }, From 6b284f08ab399154ad10e2166440b44cbbdcb2c5 Mon Sep 17 00:00:00 2001 From: Laura Ekstrand Date: Tue, 3 Feb 2015 14:47:00 -0800 Subject: [PATCH 087/834] main: _mesa_blit_framebuffer updates its arbitrary framebuffers. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Previously, we used _mesa_update_state to update the currently bound framebuffers prior to performing a blit. Now that _mesa_blit_framebuffer uses arbitrary framebuffers, _mesa_update_state is not specific enough. Reviewed-by: Fredrik Höglund Signed-off-by: Fredrik Höglund --- src/mesa/main/blit.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/mesa/main/blit.c b/src/mesa/main/blit.c index baf3165ff9d..db8fee5a414 100644 --- a/src/mesa/main/blit.c +++ b/src/mesa/main/blit.c @@ -34,6 +34,7 @@ #include "enums.h" #include "blit.h" #include "fbobject.h" +#include "framebuffer.h" #include "glformats.h" #include "mtypes.h" #include "state.h" @@ -162,9 +163,11 @@ _mesa_blit_framebuffer(struct gl_context *ctx, FLUSH_VERTICES(ctx, 0); - if (ctx->NewState) { - _mesa_update_state(ctx); - } + /* Update completeness status of readFb and drawFb. */ + _mesa_update_framebuffer(ctx, readFb, drawFb); + + /* Make sure drawFb has an initialized bounding box. */ + _mesa_update_draw_buffer_bounds(ctx, drawFb); if (!readFb || !drawFb) { /* This will normally never happen but someday we may want to From b4368ac09db75cea412121ada6c12af1414feb50 Mon Sep 17 00:00:00 2001 From: Laura Ekstrand Date: Wed, 4 Feb 2015 14:21:17 -0800 Subject: [PATCH 088/834] main: Complete error conditions for glInvalidate*Framebuffer. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Fredrik Höglund Reviewed-by: Fredrik Höglund Reviewed-by: Adam Jackson Cc: "10.4 10.5" --- src/mesa/main/fbobject.c | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/src/mesa/main/fbobject.c b/src/mesa/main/fbobject.c index 15878d327c6..7ba72558830 100644 --- a/src/mesa/main/fbobject.c +++ b/src/mesa/main/fbobject.c @@ -3650,12 +3650,29 @@ invalidate_framebuffer_storage(GLenum target, GLsizei numAttachments, return; } + /* Section 17.4 Whole Framebuffer Operations of the OpenGL 4.5 Core + * Spec (2.2.2015, PDF page 522) says: + * "An INVALID_VALUE error is generated if numAttachments, width, or + * height is negative." + */ if (numAttachments < 0) { _mesa_error(ctx, GL_INVALID_VALUE, "%s(numAttachments < 0)", name); return; } + if (width < 0) { + _mesa_error(ctx, GL_INVALID_VALUE, + "%s(width < 0)", name); + return; + } + + if (height < 0) { + _mesa_error(ctx, GL_INVALID_VALUE, + "%s(height < 0)", name); + return; + } + /* The GL_ARB_invalidate_subdata spec says: * * "If an attachment is specified that does not exist in the @@ -3748,7 +3765,8 @@ invalidate_framebuffer_storage(GLenum target, GLsizei numAttachments, return; invalid_enum: - _mesa_error(ctx, GL_INVALID_ENUM, "%s(attachment)", name); + _mesa_error(ctx, GL_INVALID_ENUM, "%s(invalid attachment %s)", name, + _mesa_lookup_enum_by_nr(attachments[i])); return; } From 65d4a20f1c1923498220637f9474d2ae399df52d Mon Sep 17 00:00:00 2001 From: Laura Ekstrand Date: Wed, 4 Feb 2015 09:49:58 -0800 Subject: [PATCH 089/834] main: Refactor invalidate_framebuffer_storage. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Fredrik Höglund Signed-off-by: Fredrik Höglund --- src/mesa/main/fbobject.c | 38 +++++++++++++++++++++++++++----------- 1 file changed, 27 insertions(+), 11 deletions(-) diff --git a/src/mesa/main/fbobject.c b/src/mesa/main/fbobject.c index 7ba72558830..4a32f1f568b 100644 --- a/src/mesa/main/fbobject.c +++ b/src/mesa/main/fbobject.c @@ -3636,19 +3636,13 @@ _mesa_GetNamedFramebufferAttachmentParameteriv(GLuint framebuffer, static void -invalidate_framebuffer_storage(GLenum target, GLsizei numAttachments, +invalidate_framebuffer_storage(struct gl_context *ctx, + struct gl_framebuffer *fb, + GLsizei numAttachments, const GLenum *attachments, GLint x, GLint y, GLsizei width, GLsizei height, const char *name) { int i; - struct gl_framebuffer *fb; - GET_CURRENT_CONTEXT(ctx); - - fb = get_framebuffer_target(ctx, target); - if (!fb) { - _mesa_error(ctx, GL_INVALID_ENUM, "%s(target)", name); - return; - } /* Section 17.4 Whole Framebuffer Operations of the OpenGL 4.5 Core * Spec (2.2.2015, PDF page 522) says: @@ -3776,7 +3770,18 @@ _mesa_InvalidateSubFramebuffer(GLenum target, GLsizei numAttachments, const GLenum *attachments, GLint x, GLint y, GLsizei width, GLsizei height) { - invalidate_framebuffer_storage(target, numAttachments, attachments, + struct gl_framebuffer *fb; + GET_CURRENT_CONTEXT(ctx); + + fb = get_framebuffer_target(ctx, target); + if (!fb) { + _mesa_error(ctx, GL_INVALID_ENUM, + "glInvalidateSubFramebuffer(invalid target %s)", + _mesa_lookup_enum_by_nr(target)); + return; + } + + invalidate_framebuffer_storage(ctx, fb, numAttachments, attachments, x, y, width, height, "glInvalidateSubFramebuffer"); } @@ -3786,6 +3791,17 @@ void GLAPIENTRY _mesa_InvalidateFramebuffer(GLenum target, GLsizei numAttachments, const GLenum *attachments) { + struct gl_framebuffer *fb; + GET_CURRENT_CONTEXT(ctx); + + fb = get_framebuffer_target(ctx, target); + if (!fb) { + _mesa_error(ctx, GL_INVALID_ENUM, + "glInvalidateFramebuffer(invalid target %s)", + _mesa_lookup_enum_by_nr(target)); + return; + } + /* The GL_ARB_invalidate_subdata spec says: * * "The command @@ -3798,7 +3814,7 @@ _mesa_InvalidateFramebuffer(GLenum target, GLsizei numAttachments, * , equal to 0, 0, , * respectively." */ - invalidate_framebuffer_storage(target, numAttachments, attachments, + invalidate_framebuffer_storage(ctx, fb, numAttachments, attachments, 0, 0, MAX_VIEWPORT_WIDTH, MAX_VIEWPORT_HEIGHT, "glInvalidateFramebuffer"); From d890fc710f6d3472ea3837e074fafc100d41e01f Mon Sep 17 00:00:00 2001 From: Laura Ekstrand Date: Wed, 4 Feb 2015 14:21:48 -0800 Subject: [PATCH 090/834] main: Add entry points for InvalidateNamedFramebuffer[Sub]Data. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Fredrik Höglund Signed-off-by: Fredrik Höglund --- .../glapi/gen/ARB_direct_state_access.xml | 16 +++++ src/mesa/main/fbobject.c | 69 +++++++++++++++++++ src/mesa/main/fbobject.h | 12 ++++ src/mesa/main/tests/dispatch_sanity.cpp | 2 + 4 files changed, 99 insertions(+) diff --git a/src/mapi/glapi/gen/ARB_direct_state_access.xml b/src/mapi/glapi/gen/ARB_direct_state_access.xml index 7ee2000d86e..de7742088e1 100644 --- a/src/mapi/glapi/gen/ARB_direct_state_access.xml +++ b/src/mapi/glapi/gen/ARB_direct_state_access.xml @@ -181,6 +181,22 @@ + + + + + + + + + + + + + + + + diff --git a/src/mesa/main/fbobject.c b/src/mesa/main/fbobject.c index 4a32f1f568b..dc1e1a67321 100644 --- a/src/mesa/main/fbobject.c +++ b/src/mesa/main/fbobject.c @@ -3787,6 +3787,35 @@ _mesa_InvalidateSubFramebuffer(GLenum target, GLsizei numAttachments, } +void GLAPIENTRY +_mesa_InvalidateNamedFramebufferSubData(GLuint framebuffer, + GLsizei numAttachments, + const GLenum *attachments, + GLint x, GLint y, + GLsizei width, GLsizei height) +{ + struct gl_framebuffer *fb; + GET_CURRENT_CONTEXT(ctx); + + /* The OpenGL 4.5 core spec (02.02.2015) says (in Section 17.4 Whole + * Framebuffer Operations, PDF page 522): "If framebuffer is zero, the + * default draw framebuffer is affected." + */ + if (framebuffer) { + fb = _mesa_lookup_framebuffer_err(ctx, framebuffer, + "glInvalidateNamedFramebufferSubData"); + if (!fb) + return; + } + else + fb = ctx->WinSysDrawBuffer; + + invalidate_framebuffer_storage(ctx, fb, numAttachments, attachments, + x, y, width, height, + "glInvalidateNamedFramebufferSubData"); +} + + void GLAPIENTRY _mesa_InvalidateFramebuffer(GLenum target, GLsizei numAttachments, const GLenum *attachments) @@ -3821,6 +3850,46 @@ _mesa_InvalidateFramebuffer(GLenum target, GLsizei numAttachments, } +void GLAPIENTRY +_mesa_InvalidateNamedFramebufferData(GLuint framebuffer, + GLsizei numAttachments, + const GLenum *attachments) +{ + struct gl_framebuffer *fb; + GET_CURRENT_CONTEXT(ctx); + + /* The OpenGL 4.5 core spec (02.02.2015) says (in Section 17.4 Whole + * Framebuffer Operations, PDF page 522): "If framebuffer is zero, the + * default draw framebuffer is affected." + */ + if (framebuffer) { + fb = _mesa_lookup_framebuffer_err(ctx, framebuffer, + "glInvalidateNamedFramebufferData"); + if (!fb) + return; + } + else + fb = ctx->WinSysDrawBuffer; + + /* The GL_ARB_invalidate_subdata spec says: + * + * "The command + * + * void InvalidateFramebuffer(enum target, + * sizei numAttachments, + * const enum *attachments); + * + * is equivalent to the command InvalidateSubFramebuffer with , , + * , equal to 0, 0, , + * respectively." + */ + invalidate_framebuffer_storage(ctx, fb, numAttachments, attachments, + 0, 0, + MAX_VIEWPORT_WIDTH, MAX_VIEWPORT_HEIGHT, + "glInvalidateNamedFramebufferData"); +} + + void GLAPIENTRY _mesa_DiscardFramebufferEXT(GLenum target, GLsizei numAttachments, const GLenum *attachments) diff --git a/src/mesa/main/fbobject.h b/src/mesa/main/fbobject.h index 71392037119..22cb139ec9a 100644 --- a/src/mesa/main/fbobject.h +++ b/src/mesa/main/fbobject.h @@ -260,10 +260,22 @@ _mesa_InvalidateSubFramebuffer(GLenum target, GLsizei numAttachments, const GLenum *attachments, GLint x, GLint y, GLsizei width, GLsizei height); +extern void GLAPIENTRY +_mesa_InvalidateNamedFramebufferSubData(GLuint framebuffer, + GLsizei numAttachments, + const GLenum *attachments, + GLint x, GLint y, + GLsizei width, GLsizei height); + extern void GLAPIENTRY _mesa_InvalidateFramebuffer(GLenum target, GLsizei numAttachments, const GLenum *attachments); +extern void GLAPIENTRY +_mesa_InvalidateNamedFramebufferData(GLuint framebuffer, + GLsizei numAttachments, + const GLenum *attachments); + extern void GLAPIENTRY _mesa_DiscardFramebufferEXT(GLenum target, GLsizei numAttachments, const GLenum *attachments); diff --git a/src/mesa/main/tests/dispatch_sanity.cpp b/src/mesa/main/tests/dispatch_sanity.cpp index ec3c9927162..e375260ff1f 100644 --- a/src/mesa/main/tests/dispatch_sanity.cpp +++ b/src/mesa/main/tests/dispatch_sanity.cpp @@ -984,6 +984,8 @@ const struct function gl_core_functions_possible[] = { { "glNamedFramebufferRenderbuffer", 45, -1 }, { "glNamedFramebufferTexture", 45, -1 }, { "glNamedFramebufferTextureLayer", 45, -1 }, + { "glInvalidateNamedFramebufferSubData", 45, -1 }, + { "glInvalidateNamedFramebufferData", 45, -1 }, { "glBlitNamedFramebuffer", 45, -1 }, { "glCheckNamedFramebufferStatus", 45, -1 }, { "glGetNamedFramebufferAttachmentParameteriv", 45, -1 }, From 6236c477990d67499f494b3c95844217fbd9a3dd Mon Sep 17 00:00:00 2001 From: Laura Ekstrand Date: Thu, 5 Feb 2015 13:24:43 -0800 Subject: [PATCH 091/834] main: Fake entry point for glClearNamedFramebufferiv. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Mesa's ClearBuffer framework is very complicated and thoroughly married to the object binding model. Moreover, the OpenGL spec for ClearBuffer is also very complicated. At some point, we should implement buffer clearing for arbitrary framebuffer objects, but for now, we will just wrap ClearBuffer. Reviewed-by: Fredrik Höglund Signed-off-by: Fredrik Höglund --- .../glapi/gen/ARB_direct_state_access.xml | 7 +++++++ src/mesa/main/clear.c | 20 +++++++++++++++++++ src/mesa/main/clear.h | 4 ++++ src/mesa/main/tests/dispatch_sanity.cpp | 1 + 4 files changed, 32 insertions(+) diff --git a/src/mapi/glapi/gen/ARB_direct_state_access.xml b/src/mapi/glapi/gen/ARB_direct_state_access.xml index de7742088e1..c793c9e9c70 100644 --- a/src/mapi/glapi/gen/ARB_direct_state_access.xml +++ b/src/mapi/glapi/gen/ARB_direct_state_access.xml @@ -197,6 +197,13 @@ + + + + + + + diff --git a/src/mesa/main/clear.c b/src/mesa/main/clear.c index 8d707bc34a1..4e029cf258f 100644 --- a/src/mesa/main/clear.c +++ b/src/mesa/main/clear.c @@ -34,6 +34,8 @@ #include "clear.h" #include "context.h" #include "enums.h" +#include "fbobject.h" +#include "get.h" #include "macros.h" #include "mtypes.h" #include "state.h" @@ -399,6 +401,24 @@ _mesa_ClearBufferiv(GLenum buffer, GLint drawbuffer, const GLint *value) } +/** + * The ClearBuffer framework is so complicated and so riddled with the + * assumption that the framebuffer is bound that, for now, we will just fake + * direct state access clearing for the user. + */ +void GLAPIENTRY +_mesa_ClearNamedFramebufferiv(GLuint framebuffer, GLenum buffer, + GLint drawbuffer, const GLint *value) +{ + GLint oldfb; + + _mesa_GetIntegerv(GL_DRAW_FRAMEBUFFER_BINDING, &oldfb); + _mesa_BindFramebuffer(GL_DRAW_FRAMEBUFFER, framebuffer); + _mesa_ClearBufferiv(buffer, drawbuffer, value); + _mesa_BindFramebuffer(GL_DRAW_FRAMEBUFFER, (GLuint) oldfb); +} + + /** * New in GL 3.0 * Clear unsigned integer color buffer (not depth, not stencil). diff --git a/src/mesa/main/clear.h b/src/mesa/main/clear.h index 96ce47b929e..d0b61335356 100644 --- a/src/mesa/main/clear.h +++ b/src/mesa/main/clear.h @@ -51,6 +51,10 @@ _mesa_Clear( GLbitfield mask ); extern void GLAPIENTRY _mesa_ClearBufferiv(GLenum buffer, GLint drawbuffer, const GLint *value); +extern void GLAPIENTRY +_mesa_ClearNamedFramebufferiv(GLuint framebuffer, GLenum buffer, + GLint drawbuffer, const GLint *value); + extern void GLAPIENTRY _mesa_ClearBufferuiv(GLenum buffer, GLint drawbuffer, const GLuint *value); diff --git a/src/mesa/main/tests/dispatch_sanity.cpp b/src/mesa/main/tests/dispatch_sanity.cpp index e375260ff1f..149c8b456b2 100644 --- a/src/mesa/main/tests/dispatch_sanity.cpp +++ b/src/mesa/main/tests/dispatch_sanity.cpp @@ -986,6 +986,7 @@ const struct function gl_core_functions_possible[] = { { "glNamedFramebufferTextureLayer", 45, -1 }, { "glInvalidateNamedFramebufferSubData", 45, -1 }, { "glInvalidateNamedFramebufferData", 45, -1 }, + { "glClearNamedFramebufferiv", 45, -1 }, { "glBlitNamedFramebuffer", 45, -1 }, { "glCheckNamedFramebufferStatus", 45, -1 }, { "glGetNamedFramebufferAttachmentParameteriv", 45, -1 }, From 43db4b8465c203f9748cd2a7e08d8242573116f1 Mon Sep 17 00:00:00 2001 From: Laura Ekstrand Date: Thu, 5 Feb 2015 13:30:50 -0800 Subject: [PATCH 092/834] main: Fake entry point for glClearNamedFramebufferuiv. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Mesa's ClearBuffer framework is very complicated and thoroughly married to the object binding model. Moreover, the OpenGL spec for ClearBuffer is also very complicated. At some point, we should implement buffer clearing for arbitrary framebuffer objects, but for now, we will just wrap ClearBuffer. Reviewed-by: Fredrik Höglund Signed-off-by: Fredrik Höglund --- src/mapi/glapi/gen/ARB_direct_state_access.xml | 7 +++++++ src/mesa/main/clear.c | 18 ++++++++++++++++++ src/mesa/main/clear.h | 4 ++++ src/mesa/main/tests/dispatch_sanity.cpp | 1 + 4 files changed, 30 insertions(+) diff --git a/src/mapi/glapi/gen/ARB_direct_state_access.xml b/src/mapi/glapi/gen/ARB_direct_state_access.xml index c793c9e9c70..95fda9676fd 100644 --- a/src/mapi/glapi/gen/ARB_direct_state_access.xml +++ b/src/mapi/glapi/gen/ARB_direct_state_access.xml @@ -204,6 +204,13 @@ + + + + + + + diff --git a/src/mesa/main/clear.c b/src/mesa/main/clear.c index 4e029cf258f..05bbf1564be 100644 --- a/src/mesa/main/clear.c +++ b/src/mesa/main/clear.c @@ -491,6 +491,24 @@ _mesa_ClearBufferuiv(GLenum buffer, GLint drawbuffer, const GLuint *value) } +/** + * The ClearBuffer framework is so complicated and so riddled with the + * assumption that the framebuffer is bound that, for now, we will just fake + * direct state access clearing for the user. + */ +void GLAPIENTRY +_mesa_ClearNamedFramebufferuiv(GLuint framebuffer, GLenum buffer, + GLint drawbuffer, const GLuint *value) +{ + GLint oldfb; + + _mesa_GetIntegerv(GL_DRAW_FRAMEBUFFER_BINDING, &oldfb); + _mesa_BindFramebuffer(GL_DRAW_FRAMEBUFFER, framebuffer); + _mesa_ClearBufferuiv(buffer, drawbuffer, value); + _mesa_BindFramebuffer(GL_DRAW_FRAMEBUFFER, (GLuint) oldfb); +} + + /** * New in GL 3.0 * Clear fixed-pt or float color buffer or depth buffer (not stencil). diff --git a/src/mesa/main/clear.h b/src/mesa/main/clear.h index d0b61335356..b74c2275cb0 100644 --- a/src/mesa/main/clear.h +++ b/src/mesa/main/clear.h @@ -58,6 +58,10 @@ _mesa_ClearNamedFramebufferiv(GLuint framebuffer, GLenum buffer, extern void GLAPIENTRY _mesa_ClearBufferuiv(GLenum buffer, GLint drawbuffer, const GLuint *value); +extern void GLAPIENTRY +_mesa_ClearNamedFramebufferuiv(GLuint framebuffer, GLenum buffer, + GLint drawbuffer, const GLuint *value); + extern void GLAPIENTRY _mesa_ClearBufferfv(GLenum buffer, GLint drawbuffer, const GLfloat *value); diff --git a/src/mesa/main/tests/dispatch_sanity.cpp b/src/mesa/main/tests/dispatch_sanity.cpp index 149c8b456b2..60d0fe9a118 100644 --- a/src/mesa/main/tests/dispatch_sanity.cpp +++ b/src/mesa/main/tests/dispatch_sanity.cpp @@ -987,6 +987,7 @@ const struct function gl_core_functions_possible[] = { { "glInvalidateNamedFramebufferSubData", 45, -1 }, { "glInvalidateNamedFramebufferData", 45, -1 }, { "glClearNamedFramebufferiv", 45, -1 }, + { "glClearNamedFramebufferuiv", 45, -1 }, { "glBlitNamedFramebuffer", 45, -1 }, { "glCheckNamedFramebufferStatus", 45, -1 }, { "glGetNamedFramebufferAttachmentParameteriv", 45, -1 }, From bbd9c55d0217c697cbe090057bfbf830b551ed36 Mon Sep 17 00:00:00 2001 From: Laura Ekstrand Date: Thu, 5 Feb 2015 13:38:39 -0800 Subject: [PATCH 093/834] main: Fake entry point for glClearNamedFramebufferfv. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Mesa's ClearBuffer framework is very complicated and thoroughly married to the object binding model. Moreover, the OpenGL spec for ClearBuffer is also very complicated. At some point, we should implement buffer clearing for arbitrary framebuffer objects, but for now, we will just wrap ClearBuffer. Reviewed-by: Fredrik Höglund Signed-off-by: Fredrik Höglund --- src/mapi/glapi/gen/ARB_direct_state_access.xml | 7 +++++++ src/mesa/main/clear.c | 18 ++++++++++++++++++ src/mesa/main/clear.h | 4 ++++ src/mesa/main/tests/dispatch_sanity.cpp | 1 + 4 files changed, 30 insertions(+) diff --git a/src/mapi/glapi/gen/ARB_direct_state_access.xml b/src/mapi/glapi/gen/ARB_direct_state_access.xml index 95fda9676fd..2bb3c716087 100644 --- a/src/mapi/glapi/gen/ARB_direct_state_access.xml +++ b/src/mapi/glapi/gen/ARB_direct_state_access.xml @@ -211,6 +211,13 @@ + + + + + + + diff --git a/src/mesa/main/clear.c b/src/mesa/main/clear.c index 05bbf1564be..81e255e5ee7 100644 --- a/src/mesa/main/clear.c +++ b/src/mesa/main/clear.c @@ -602,6 +602,24 @@ _mesa_ClearBufferfv(GLenum buffer, GLint drawbuffer, const GLfloat *value) } +/** + * The ClearBuffer framework is so complicated and so riddled with the + * assumption that the framebuffer is bound that, for now, we will just fake + * direct state access clearing for the user. + */ +void GLAPIENTRY +_mesa_ClearNamedFramebufferfv(GLuint framebuffer, GLenum buffer, + GLint drawbuffer, const GLfloat *value) +{ + GLint oldfb; + + _mesa_GetIntegerv(GL_DRAW_FRAMEBUFFER_BINDING, &oldfb); + _mesa_BindFramebuffer(GL_DRAW_FRAMEBUFFER, framebuffer); + _mesa_ClearBufferfv(buffer, drawbuffer, value); + _mesa_BindFramebuffer(GL_DRAW_FRAMEBUFFER, (GLuint) oldfb); +} + + /** * New in GL 3.0 * Clear depth/stencil buffer only. diff --git a/src/mesa/main/clear.h b/src/mesa/main/clear.h index b74c2275cb0..582159fea0f 100644 --- a/src/mesa/main/clear.h +++ b/src/mesa/main/clear.h @@ -65,6 +65,10 @@ _mesa_ClearNamedFramebufferuiv(GLuint framebuffer, GLenum buffer, extern void GLAPIENTRY _mesa_ClearBufferfv(GLenum buffer, GLint drawbuffer, const GLfloat *value); +extern void GLAPIENTRY +_mesa_ClearNamedFramebufferfv(GLuint framebuffer, GLenum buffer, + GLint drawbuffer, const GLfloat *value); + extern void GLAPIENTRY _mesa_ClearBufferfi(GLenum buffer, GLint drawbuffer, GLfloat depth, GLint stencil); diff --git a/src/mesa/main/tests/dispatch_sanity.cpp b/src/mesa/main/tests/dispatch_sanity.cpp index 60d0fe9a118..1f25869d1ee 100644 --- a/src/mesa/main/tests/dispatch_sanity.cpp +++ b/src/mesa/main/tests/dispatch_sanity.cpp @@ -988,6 +988,7 @@ const struct function gl_core_functions_possible[] = { { "glInvalidateNamedFramebufferData", 45, -1 }, { "glClearNamedFramebufferiv", 45, -1 }, { "glClearNamedFramebufferuiv", 45, -1 }, + { "glClearNamedFramebufferfv", 45, -1 }, { "glBlitNamedFramebuffer", 45, -1 }, { "glCheckNamedFramebufferStatus", 45, -1 }, { "glGetNamedFramebufferAttachmentParameteriv", 45, -1 }, From a0329c7b40de3db22d22c74793a7c268e8904e53 Mon Sep 17 00:00:00 2001 From: Laura Ekstrand Date: Thu, 5 Feb 2015 13:43:12 -0800 Subject: [PATCH 094/834] main: Fake entry point for glClearNamedFramebufferfi. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Mesa's ClearBuffer framework is very complicated and thoroughly married to the object binding model. Moreover, the OpenGL spec for ClearBuffer is also very complicated. At some point, we should implement buffer clearing for arbitrary framebuffer objects, but for now, we will just wrap ClearBuffer. Reviewed-by: Fredrik Höglund Signed-off-by: Fredrik Höglund --- src/mapi/glapi/gen/ARB_direct_state_access.xml | 7 +++++++ src/mesa/main/clear.c | 18 ++++++++++++++++++ src/mesa/main/clear.h | 4 ++++ src/mesa/main/tests/dispatch_sanity.cpp | 1 + 4 files changed, 30 insertions(+) diff --git a/src/mapi/glapi/gen/ARB_direct_state_access.xml b/src/mapi/glapi/gen/ARB_direct_state_access.xml index 2bb3c716087..8bafe3f0bb0 100644 --- a/src/mapi/glapi/gen/ARB_direct_state_access.xml +++ b/src/mapi/glapi/gen/ARB_direct_state_access.xml @@ -218,6 +218,13 @@ + + + + + + + diff --git a/src/mesa/main/clear.c b/src/mesa/main/clear.c index 81e255e5ee7..426caea4709 100644 --- a/src/mesa/main/clear.c +++ b/src/mesa/main/clear.c @@ -682,3 +682,21 @@ _mesa_ClearBufferfi(GLenum buffer, GLint drawbuffer, ctx->Stencil.Clear = clearStencilSave; } } + + +/** + * The ClearBuffer framework is so complicated and so riddled with the + * assumption that the framebuffer is bound that, for now, we will just fake + * direct state access clearing for the user. + */ +void GLAPIENTRY +_mesa_ClearNamedFramebufferfi(GLuint framebuffer, GLenum buffer, + GLfloat depth, GLint stencil) +{ + GLint oldfb; + + _mesa_GetIntegerv(GL_DRAW_FRAMEBUFFER_BINDING, &oldfb); + _mesa_BindFramebuffer(GL_DRAW_FRAMEBUFFER, framebuffer); + _mesa_ClearBufferfi(buffer, 0, depth, stencil); + _mesa_BindFramebuffer(GL_DRAW_FRAMEBUFFER, (GLuint) oldfb); +} diff --git a/src/mesa/main/clear.h b/src/mesa/main/clear.h index 582159fea0f..c29850676ca 100644 --- a/src/mesa/main/clear.h +++ b/src/mesa/main/clear.h @@ -73,4 +73,8 @@ extern void GLAPIENTRY _mesa_ClearBufferfi(GLenum buffer, GLint drawbuffer, GLfloat depth, GLint stencil); +extern void GLAPIENTRY +_mesa_ClearNamedFramebufferfi(GLuint framebuffer, GLenum buffer, + GLfloat depth, GLint stencil); + #endif diff --git a/src/mesa/main/tests/dispatch_sanity.cpp b/src/mesa/main/tests/dispatch_sanity.cpp index 1f25869d1ee..a119d0874a6 100644 --- a/src/mesa/main/tests/dispatch_sanity.cpp +++ b/src/mesa/main/tests/dispatch_sanity.cpp @@ -989,6 +989,7 @@ const struct function gl_core_functions_possible[] = { { "glClearNamedFramebufferiv", 45, -1 }, { "glClearNamedFramebufferuiv", 45, -1 }, { "glClearNamedFramebufferfv", 45, -1 }, + { "glClearNamedFramebufferfi", 45, -1 }, { "glBlitNamedFramebuffer", 45, -1 }, { "glCheckNamedFramebufferStatus", 45, -1 }, { "glGetNamedFramebufferAttachmentParameteriv", 45, -1 }, From 9f1db78a83feebefb9e1ef889b3b6b0532482c14 Mon Sep 17 00:00:00 2001 From: Laura Ekstrand Date: Thu, 5 Feb 2015 16:38:11 -0800 Subject: [PATCH 095/834] main: Add stubs for [Get]NamedFramebufferParameteri[v]. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The ARB_direct_state_access specification says (as of 2015.02.05): "Interactions with OpenGL 4.3 or ARB_framebuffer_no_attachments If neither OpenGL 4.3 nor ARB_framebuffer_no_attachments are supported, ignore the support for NamedFramebufferParameteri and GetNamedFramebufferParameteriv." This commit adds stubs for these entry points. Reviewed-by: Fredrik Höglund Signed-off-by: Fredrik Höglund --- .../glapi/gen/ARB_direct_state_access.xml | 12 +++++++ src/mesa/main/fbobject.c | 32 +++++++++++++++++++ src/mesa/main/fbobject.h | 8 +++++ src/mesa/main/tests/dispatch_sanity.cpp | 2 ++ 4 files changed, 54 insertions(+) diff --git a/src/mapi/glapi/gen/ARB_direct_state_access.xml b/src/mapi/glapi/gen/ARB_direct_state_access.xml index 8bafe3f0bb0..3617e48a19c 100644 --- a/src/mapi/glapi/gen/ARB_direct_state_access.xml +++ b/src/mapi/glapi/gen/ARB_direct_state_access.xml @@ -166,6 +166,12 @@ + + + + + + @@ -246,6 +252,12 @@ + + + + + + diff --git a/src/mesa/main/fbobject.c b/src/mesa/main/fbobject.c index dc1e1a67321..971dc688a04 100644 --- a/src/mesa/main/fbobject.c +++ b/src/mesa/main/fbobject.c @@ -3635,6 +3635,38 @@ _mesa_GetNamedFramebufferAttachmentParameteriv(GLuint framebuffer, } +void GLAPIENTRY +_mesa_NamedFramebufferParameteri(GLuint framebuffer, GLenum pname, + GLint param) +{ + GET_CURRENT_CONTEXT(ctx); + + (void) framebuffer; + (void) pname; + (void) param; + + _mesa_error(ctx, GL_INVALID_OPERATION, + "glNamedFramebufferParameteri not supported " + "(ARB_framebuffer_no_attachments not implemented)"); +} + + +void GLAPIENTRY +_mesa_GetNamedFramebufferParameteriv(GLuint framebuffer, GLenum pname, + GLint *param) +{ + GET_CURRENT_CONTEXT(ctx); + + (void) framebuffer; + (void) pname; + (void) param; + + _mesa_error(ctx, GL_INVALID_OPERATION, + "glGetNamedFramebufferParameteriv not supported " + "(ARB_framebuffer_no_attachments not implemented)"); +} + + static void invalidate_framebuffer_storage(struct gl_context *ctx, struct gl_framebuffer *fb, diff --git a/src/mesa/main/fbobject.h b/src/mesa/main/fbobject.h index 22cb139ec9a..9f570db3a26 100644 --- a/src/mesa/main/fbobject.h +++ b/src/mesa/main/fbobject.h @@ -255,6 +255,14 @@ _mesa_GetNamedFramebufferAttachmentParameteriv(GLuint framebuffer, GLenum attachment, GLenum pname, GLint *params); +extern void GLAPIENTRY +_mesa_NamedFramebufferParameteri(GLuint framebuffer, GLenum pname, + GLint param); + +extern void GLAPIENTRY +_mesa_GetNamedFramebufferParameteriv(GLuint framebuffer, GLenum pname, + GLint *param); + extern void GLAPIENTRY _mesa_InvalidateSubFramebuffer(GLenum target, GLsizei numAttachments, const GLenum *attachments, GLint x, GLint y, diff --git a/src/mesa/main/tests/dispatch_sanity.cpp b/src/mesa/main/tests/dispatch_sanity.cpp index a119d0874a6..23ddabd169d 100644 --- a/src/mesa/main/tests/dispatch_sanity.cpp +++ b/src/mesa/main/tests/dispatch_sanity.cpp @@ -982,6 +982,7 @@ const struct function gl_core_functions_possible[] = { { "glGetNamedBufferSubData", 45, -1 }, { "glCreateFramebuffers", 45, -1 }, { "glNamedFramebufferRenderbuffer", 45, -1 }, + { "glNamedFramebufferParameteri", 45, -1 }, { "glNamedFramebufferTexture", 45, -1 }, { "glNamedFramebufferTextureLayer", 45, -1 }, { "glInvalidateNamedFramebufferSubData", 45, -1 }, @@ -992,6 +993,7 @@ const struct function gl_core_functions_possible[] = { { "glClearNamedFramebufferfi", 45, -1 }, { "glBlitNamedFramebuffer", 45, -1 }, { "glCheckNamedFramebufferStatus", 45, -1 }, + { "glGetNamedFramebufferParameteriv", 45, -1 }, { "glGetNamedFramebufferAttachmentParameteriv", 45, -1 }, { "glCreateRenderbuffers", 45, -1 }, { "glNamedRenderbufferStorage", 45, -1 }, From f8fd8dfee8647a3bd2215fce49626953ecdbdbec Mon Sep 17 00:00:00 2001 From: Laura Ekstrand Date: Fri, 6 Feb 2015 10:34:24 -0800 Subject: [PATCH 096/834] main: Refactor _mesa_drawbuffers. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [Fredrik: Whitespace fix] Reviewed-by: Fredrik Höglund Signed-off-by: Fredrik Höglund --- src/mesa/drivers/common/meta.c | 3 ++- src/mesa/main/buffers.c | 26 ++++++++++++-------------- src/mesa/main/buffers.h | 4 +++- src/mesa/main/context.c | 3 ++- src/mesa/main/framebuffer.c | 2 +- 5 files changed, 20 insertions(+), 18 deletions(-) diff --git a/src/mesa/drivers/common/meta.c b/src/mesa/drivers/common/meta.c index d2ab7b8ded9..214a68a9129 100644 --- a/src/mesa/drivers/common/meta.c +++ b/src/mesa/drivers/common/meta.c @@ -1211,7 +1211,8 @@ _mesa_meta_end(struct gl_context *ctx) _mesa_BindRenderbuffer(GL_RENDERBUFFER, save->RenderbufferName); if (state & MESA_META_DRAW_BUFFERS) { - _mesa_drawbuffers(ctx, ctx->Const.MaxDrawBuffers, save->ColorDrawBuffers, NULL); + _mesa_drawbuffers(ctx, ctx->DrawBuffer, ctx->Const.MaxDrawBuffers, + save->ColorDrawBuffers, NULL); } ctx->Meta->SaveStackDepth--; diff --git a/src/mesa/main/buffers.c b/src/mesa/main/buffers.c index 37a9790923b..a338013fed5 100644 --- a/src/mesa/main/buffers.c +++ b/src/mesa/main/buffers.c @@ -277,7 +277,7 @@ _mesa_DrawBuffer(GLenum buffer) } /* if we get here, there's no error so set new state */ - _mesa_drawbuffers(ctx, 1, &buffer, &destMask); + _mesa_drawbuffers(ctx, ctx->DrawBuffer, 1, &buffer, &destMask); /* * Call device driver function. @@ -440,7 +440,7 @@ _mesa_DrawBuffers(GLsizei n, const GLenum *buffers) } /* OK, if we get here, there were no errors so set the new state */ - _mesa_drawbuffers(ctx, n, buffers, destMask); + _mesa_drawbuffers(ctx, ctx->DrawBuffer, n, buffers, destMask); /* * Call device driver function. Note that n can be equal to 0, @@ -459,13 +459,11 @@ _mesa_DrawBuffers(GLsizei n, const GLenum *buffers) * actual change. */ static void -updated_drawbuffers(struct gl_context *ctx) +updated_drawbuffers(struct gl_context *ctx, struct gl_framebuffer *fb) { FLUSH_VERTICES(ctx, _NEW_BUFFERS); if (ctx->API == API_OPENGL_COMPAT && !ctx->Extensions.ARB_ES2_compatibility) { - struct gl_framebuffer *fb = ctx->DrawBuffer; - /* Flag the FBO as requiring validation. */ if (_mesa_is_user_fbo(fb)) { fb->_Status = 0; @@ -482,6 +480,7 @@ updated_drawbuffers(struct gl_context *ctx) * so nothing should go wrong at this point. * * \param ctx current context + * \param fb the desired draw buffer * \param n number of color outputs to set * \param buffers array[n] of colorbuffer names, like GL_LEFT. * \param destMask array[n] of BUFFER_BIT_* bitmasks which correspond to the @@ -489,10 +488,9 @@ updated_drawbuffers(struct gl_context *ctx) * BUFFER_BIT_FRONT_LEFT | BUFFER_BIT_BACK_LEFT). */ void -_mesa_drawbuffers(struct gl_context *ctx, GLuint n, const GLenum *buffers, - const GLbitfield *destMask) +_mesa_drawbuffers(struct gl_context *ctx, struct gl_framebuffer *fb, + GLuint n, const GLenum *buffers, const GLbitfield *destMask) { - struct gl_framebuffer *fb = ctx->DrawBuffer; GLbitfield mask[MAX_DRAW_BUFFERS]; GLuint buf; @@ -518,7 +516,7 @@ _mesa_drawbuffers(struct gl_context *ctx, GLuint n, const GLenum *buffers, while (destMask0) { GLint bufIndex = ffs(destMask0) - 1; if (fb->_ColorDrawBufferIndexes[count] != bufIndex) { - updated_drawbuffers(ctx); + updated_drawbuffers(ctx, fb); fb->_ColorDrawBufferIndexes[count] = bufIndex; } count++; @@ -535,14 +533,14 @@ _mesa_drawbuffers(struct gl_context *ctx, GLuint n, const GLenum *buffers, /* only one bit should be set in the destMask[buf] field */ assert(_mesa_bitcount(destMask[buf]) == 1); if (fb->_ColorDrawBufferIndexes[buf] != bufIndex) { - updated_drawbuffers(ctx); + updated_drawbuffers(ctx, fb); fb->_ColorDrawBufferIndexes[buf] = bufIndex; } count = buf + 1; } else { if (fb->_ColorDrawBufferIndexes[buf] != -1) { - updated_drawbuffers(ctx); + updated_drawbuffers(ctx, fb); fb->_ColorDrawBufferIndexes[buf] = -1; } } @@ -554,7 +552,7 @@ _mesa_drawbuffers(struct gl_context *ctx, GLuint n, const GLenum *buffers, /* set remaining outputs to -1 (GL_NONE) */ for (buf = fb->_NumColorDrawBuffers; buf < ctx->Const.MaxDrawBuffers; buf++) { if (fb->_ColorDrawBufferIndexes[buf] != -1) { - updated_drawbuffers(ctx); + updated_drawbuffers(ctx, fb); fb->_ColorDrawBufferIndexes[buf] = -1; } } @@ -566,7 +564,7 @@ _mesa_drawbuffers(struct gl_context *ctx, GLuint n, const GLenum *buffers, /* also set context drawbuffer state */ for (buf = 0; buf < ctx->Const.MaxDrawBuffers; buf++) { if (ctx->Color.DrawBuffer[buf] != fb->ColorDrawBuffer[buf]) { - updated_drawbuffers(ctx); + updated_drawbuffers(ctx, fb); ctx->Color.DrawBuffer[buf] = fb->ColorDrawBuffer[buf]; } } @@ -585,7 +583,7 @@ _mesa_update_draw_buffers(struct gl_context *ctx) /* should be a window system FBO */ assert(_mesa_is_winsys_fbo(ctx->DrawBuffer)); - _mesa_drawbuffers(ctx, ctx->Const.MaxDrawBuffers, + _mesa_drawbuffers(ctx, ctx->DrawBuffer, ctx->Const.MaxDrawBuffers, ctx->Color.DrawBuffer, NULL); } diff --git a/src/mesa/main/buffers.h b/src/mesa/main/buffers.h index ebcfa1c1e74..bc6d74a867e 100644 --- a/src/mesa/main/buffers.h +++ b/src/mesa/main/buffers.h @@ -36,6 +36,7 @@ #include "glheader.h" struct gl_context; +struct gl_framebuffer; extern void GLAPIENTRY _mesa_DrawBuffer( GLenum mode ); @@ -44,7 +45,8 @@ extern void GLAPIENTRY _mesa_DrawBuffers(GLsizei n, const GLenum *buffers); extern void -_mesa_drawbuffers(struct gl_context *ctx, GLuint n, const GLenum *buffers, +_mesa_drawbuffers(struct gl_context *ctx, struct gl_framebuffer *fb, + GLuint n, const GLenum *buffers, const GLbitfield *destMask); extern void diff --git a/src/mesa/main/context.c b/src/mesa/main/context.c index 0a192de8c0a..279716a83d5 100644 --- a/src/mesa/main/context.c +++ b/src/mesa/main/context.c @@ -1565,7 +1565,8 @@ handle_first_current(struct gl_context *ctx) else buffer = GL_FRONT; - _mesa_drawbuffers(ctx, 1, &buffer, NULL /* destMask */); + _mesa_drawbuffers(ctx, ctx->DrawBuffer, 1, &buffer, + NULL /* destMask */); } if (ctx->ReadBuffer != _mesa_get_incomplete_framebuffer()) { diff --git a/src/mesa/main/framebuffer.c b/src/mesa/main/framebuffer.c index 0a0f73ba423..665a5ba1492 100644 --- a/src/mesa/main/framebuffer.c +++ b/src/mesa/main/framebuffer.c @@ -652,7 +652,7 @@ update_framebuffer(struct gl_context *ctx, struct gl_framebuffer *fb) * context state (GL_READ_BUFFER too). */ if (fb->ColorDrawBuffer[0] != ctx->Color.DrawBuffer[0]) { - _mesa_drawbuffers(ctx, ctx->Const.MaxDrawBuffers, + _mesa_drawbuffers(ctx, fb, ctx->Const.MaxDrawBuffers, ctx->Color.DrawBuffer, NULL); } } From 2f32e4847d154a9d16869cf03ce66f719d37ec4a Mon Sep 17 00:00:00 2001 From: Laura Ekstrand Date: Fri, 6 Feb 2015 10:57:57 -0800 Subject: [PATCH 097/834] main: Refactor _mesa_DrawBuffer. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This could have added a new DD table entry for DrawBuffer that takes an arbitrary draw buffer, but, after looking at the existing DD functions, Kenneth Graunke recommended that we just skip calling the DD functions in the case of ARB_direct_state_access. The DD implementations for DrawBuffer(s) have limited functionality, especially with respect to ARB_direct_state_access. [Fredrik: Call the driver function when fb is the bound draw buffer] Reviewed-by: Fredrik Höglund Signed-off-by: Fredrik Höglund --- src/mesa/main/buffers.c | 42 ++++++++++++++++++++++++----------------- src/mesa/main/buffers.h | 4 ++++ 2 files changed, 29 insertions(+), 17 deletions(-) diff --git a/src/mesa/main/buffers.c b/src/mesa/main/buffers.c index a338013fed5..8b52a5392a8 100644 --- a/src/mesa/main/buffers.c +++ b/src/mesa/main/buffers.c @@ -242,16 +242,16 @@ read_buffer_enum_to_index(GLenum buffer) * * See the GL_EXT_framebuffer_object spec for more info. */ -void GLAPIENTRY -_mesa_DrawBuffer(GLenum buffer) +void +_mesa_draw_buffer(struct gl_context *ctx, struct gl_framebuffer *fb, + GLenum buffer, const char *caller) { GLbitfield destMask; - GET_CURRENT_CONTEXT(ctx); FLUSH_VERTICES(ctx, 0); if (MESA_VERBOSE & VERBOSE_API) { - _mesa_debug(ctx, "glDrawBuffer %s\n", _mesa_lookup_enum_by_nr(buffer)); + _mesa_debug(ctx, "%s %s\n", caller, _mesa_lookup_enum_by_nr(buffer)); } if (buffer == GL_NONE) { @@ -259,33 +259,41 @@ _mesa_DrawBuffer(GLenum buffer) } else { const GLbitfield supportedMask - = supported_buffer_bitmask(ctx, ctx->DrawBuffer); + = supported_buffer_bitmask(ctx, fb); destMask = draw_buffer_enum_to_bitmask(ctx, buffer); if (destMask == BAD_MASK) { /* totally bogus buffer */ - _mesa_error(ctx, GL_INVALID_ENUM, - "glDrawBuffer(buffer=0x%x)", buffer); + _mesa_error(ctx, GL_INVALID_ENUM, "%s(invalid buffer %s)", caller, + _mesa_lookup_enum_by_nr(buffer)); return; } destMask &= supportedMask; if (destMask == 0x0) { /* none of the named color buffers exist! */ - _mesa_error(ctx, GL_INVALID_OPERATION, - "glDrawBuffer(buffer=0x%x)", buffer); + _mesa_error(ctx, GL_INVALID_OPERATION, "%s(invalid buffer %s)", + caller, _mesa_lookup_enum_by_nr(buffer)); return; } } /* if we get here, there's no error so set new state */ - _mesa_drawbuffers(ctx, ctx->DrawBuffer, 1, &buffer, &destMask); + _mesa_drawbuffers(ctx, fb, 1, &buffer, &destMask); - /* - * Call device driver function. - */ - if (ctx->Driver.DrawBuffers) - ctx->Driver.DrawBuffers(ctx, 1, &buffer); - else if (ctx->Driver.DrawBuffer) - ctx->Driver.DrawBuffer(ctx, buffer); + /* Call device driver function only if fb is the bound draw buffer */ + if (fb == ctx->DrawBuffer) { + if (ctx->Driver.DrawBuffers) + ctx->Driver.DrawBuffers(ctx, 1, &buffer); + else if (ctx->Driver.DrawBuffer) + ctx->Driver.DrawBuffer(ctx, buffer); + } +} + + +void GLAPIENTRY +_mesa_DrawBuffer(GLenum buffer) +{ + GET_CURRENT_CONTEXT(ctx); + _mesa_draw_buffer(ctx, ctx->DrawBuffer, buffer, "glDrawBuffer"); } diff --git a/src/mesa/main/buffers.h b/src/mesa/main/buffers.h index bc6d74a867e..4192afd3f13 100644 --- a/src/mesa/main/buffers.h +++ b/src/mesa/main/buffers.h @@ -38,6 +38,10 @@ struct gl_context; struct gl_framebuffer; +extern void +_mesa_draw_buffer(struct gl_context *ctx, struct gl_framebuffer *fb, + GLenum buffer, const char *caller); + extern void GLAPIENTRY _mesa_DrawBuffer( GLenum mode ); From 642fb71277b6e4c5d57ad7a7d6f2d5aae9b746ef Mon Sep 17 00:00:00 2001 From: Laura Ekstrand Date: Fri, 6 Feb 2015 14:44:43 -0800 Subject: [PATCH 098/834] main: Add entry point for NamedFramebufferDrawBuffer. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [Fredrik: Fix the name of the buf parameter in the XML file] Reviewed-by: Fredrik Höglund Signed-off-by: Fredrik Höglund --- .../glapi/gen/ARB_direct_state_access.xml | 5 +++++ src/mesa/main/buffers.c | 19 +++++++++++++++++++ src/mesa/main/buffers.h | 3 +++ src/mesa/main/tests/dispatch_sanity.cpp | 1 + 4 files changed, 28 insertions(+) diff --git a/src/mapi/glapi/gen/ARB_direct_state_access.xml b/src/mapi/glapi/gen/ARB_direct_state_access.xml index 3617e48a19c..32e3c0afbeb 100644 --- a/src/mapi/glapi/gen/ARB_direct_state_access.xml +++ b/src/mapi/glapi/gen/ARB_direct_state_access.xml @@ -187,6 +187,11 @@ + + + + + diff --git a/src/mesa/main/buffers.c b/src/mesa/main/buffers.c index 8b52a5392a8..9b66e38b1e1 100644 --- a/src/mesa/main/buffers.c +++ b/src/mesa/main/buffers.c @@ -297,6 +297,25 @@ _mesa_DrawBuffer(GLenum buffer) } +void GLAPIENTRY +_mesa_NamedFramebufferDrawBuffer(GLuint framebuffer, GLenum buf) +{ + GET_CURRENT_CONTEXT(ctx); + struct gl_framebuffer *fb; + + if (framebuffer) { + fb = _mesa_lookup_framebuffer_err(ctx, framebuffer, + "glNamedFramebufferDrawBuffer"); + if (!fb) + return; + } + else + fb = ctx->WinSysDrawBuffer; + + _mesa_draw_buffer(ctx, fb, buf, "glNamedFramebufferDrawBuffer"); +} + + /** * Called by glDrawBuffersARB; specifies the destination color renderbuffers * for N fragment program color outputs. diff --git a/src/mesa/main/buffers.h b/src/mesa/main/buffers.h index 4192afd3f13..12d57430a39 100644 --- a/src/mesa/main/buffers.h +++ b/src/mesa/main/buffers.h @@ -45,6 +45,9 @@ _mesa_draw_buffer(struct gl_context *ctx, struct gl_framebuffer *fb, extern void GLAPIENTRY _mesa_DrawBuffer( GLenum mode ); +extern void GLAPIENTRY +_mesa_NamedFramebufferDrawBuffer(GLuint framebuffer, GLenum buf); + extern void GLAPIENTRY _mesa_DrawBuffers(GLsizei n, const GLenum *buffers); diff --git a/src/mesa/main/tests/dispatch_sanity.cpp b/src/mesa/main/tests/dispatch_sanity.cpp index 23ddabd169d..931eeceb212 100644 --- a/src/mesa/main/tests/dispatch_sanity.cpp +++ b/src/mesa/main/tests/dispatch_sanity.cpp @@ -985,6 +985,7 @@ const struct function gl_core_functions_possible[] = { { "glNamedFramebufferParameteri", 45, -1 }, { "glNamedFramebufferTexture", 45, -1 }, { "glNamedFramebufferTextureLayer", 45, -1 }, + { "glNamedFramebufferDrawBuffer", 45, -1 }, { "glInvalidateNamedFramebufferSubData", 45, -1 }, { "glInvalidateNamedFramebufferData", 45, -1 }, { "glClearNamedFramebufferiv", 45, -1 }, From 7518c6b5b2ae8b2a6e931d4be1d5976d553983a0 Mon Sep 17 00:00:00 2001 From: Laura Ekstrand Date: Fri, 6 Feb 2015 15:23:05 -0800 Subject: [PATCH 099/834] main: Refactor _mesa_ReadBuffer. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This could have added a new DD table entry for ReadBuffer that takes an arbitrary read buffer, but, after looking at the existing DD functions, Kenneth Graunke recommended that we just skip calling the DD functions in the case of ARB_direct_state_access. The DD implementations for ReadBuffer have limited functionality, especially with respect to ARB_direct_state_access. [Fredrik: Call the driver function when fb is the bound read buffer] Reviewed-by: Fredrik Höglund Signed-off-by: Fredrik Höglund --- src/mesa/main/buffers.c | 47 ++++++++++++++++++++++------------------- src/mesa/main/buffers.h | 7 +++++- src/mesa/main/context.c | 2 +- 3 files changed, 32 insertions(+), 24 deletions(-) diff --git a/src/mesa/main/buffers.c b/src/mesa/main/buffers.c index 9b66e38b1e1..e1d1bafc367 100644 --- a/src/mesa/main/buffers.c +++ b/src/mesa/main/buffers.c @@ -623,11 +623,10 @@ _mesa_update_draw_buffers(struct gl_context *ctx) * \param bufferIndex the numerical index corresponding to 'buffer' */ void -_mesa_readbuffer(struct gl_context *ctx, GLenum buffer, GLint bufferIndex) +_mesa_readbuffer(struct gl_context *ctx, struct gl_framebuffer *fb, + GLenum buffer, GLint bufferIndex) { - struct gl_framebuffer *fb = ctx->ReadBuffer; - - if (_mesa_is_winsys_fbo(fb)) { + if ((fb == ctx->ReadBuffer) && _mesa_is_winsys_fbo(fb)) { /* Only update the per-context READ_BUFFER state if we're bound to * a window-system framebuffer. */ @@ -646,23 +645,17 @@ _mesa_readbuffer(struct gl_context *ctx, GLenum buffer, GLint bufferIndex) * Called by glReadBuffer to set the source renderbuffer for reading pixels. * \param mode color buffer such as GL_FRONT, GL_BACK, etc. */ -void GLAPIENTRY -_mesa_ReadBuffer(GLenum buffer) +void +_mesa_read_buffer(struct gl_context *ctx, struct gl_framebuffer *fb, + GLenum buffer, const char *caller) { - struct gl_framebuffer *fb; GLbitfield supportedMask; GLint srcBuffer; - GET_CURRENT_CONTEXT(ctx); FLUSH_VERTICES(ctx, 0); if (MESA_VERBOSE & VERBOSE_API) - _mesa_debug(ctx, "glReadBuffer %s\n", _mesa_lookup_enum_by_nr(buffer)); - - fb = ctx->ReadBuffer; - - if (MESA_VERBOSE & VERBOSE_API) - _mesa_debug(ctx, "glReadBuffer %s\n", _mesa_lookup_enum_by_nr(buffer)); + _mesa_debug(ctx, "%s %s\n", caller, _mesa_lookup_enum_by_nr(buffer)); if (buffer == GL_NONE) { /* This is legal--it means that no buffer should be bound for reading. */ @@ -673,24 +666,34 @@ _mesa_ReadBuffer(GLenum buffer) srcBuffer = read_buffer_enum_to_index(buffer); if (srcBuffer == -1) { _mesa_error(ctx, GL_INVALID_ENUM, - "glReadBuffer(buffer=0x%x)", buffer); + "%s(invalid buffer %s)", caller, + _mesa_lookup_enum_by_nr(buffer)); return; } supportedMask = supported_buffer_bitmask(ctx, fb); if (((1 << srcBuffer) & supportedMask) == 0) { _mesa_error(ctx, GL_INVALID_OPERATION, - "glReadBuffer(buffer=0x%x)", buffer); + "%s(invalid buffer %s)", caller, + _mesa_lookup_enum_by_nr(buffer)); return; } } /* OK, all error checking has been completed now */ - _mesa_readbuffer(ctx, buffer, srcBuffer); + _mesa_readbuffer(ctx, fb, buffer, srcBuffer); - /* - * Call device driver function. - */ - if (ctx->Driver.ReadBuffer) - (*ctx->Driver.ReadBuffer)(ctx, buffer); + /* Call the device driver function only if fb is the bound read buffer */ + if (fb == ctx->ReadBuffer) { + if (ctx->Driver.ReadBuffer) + (*ctx->Driver.ReadBuffer)(ctx, buffer); + } +} + + +void GLAPIENTRY +_mesa_ReadBuffer(GLenum buffer) +{ + GET_CURRENT_CONTEXT(ctx); + _mesa_read_buffer(ctx, ctx->ReadBuffer, buffer, "glReadBuffer"); } diff --git a/src/mesa/main/buffers.h b/src/mesa/main/buffers.h index 12d57430a39..ca7ad19235b 100644 --- a/src/mesa/main/buffers.h +++ b/src/mesa/main/buffers.h @@ -57,12 +57,17 @@ _mesa_drawbuffers(struct gl_context *ctx, struct gl_framebuffer *fb, const GLbitfield *destMask); extern void -_mesa_readbuffer(struct gl_context *ctx, GLenum buffer, GLint bufferIndex); +_mesa_readbuffer(struct gl_context *ctx, struct gl_framebuffer *fb, + GLenum buffer, GLint bufferIndex); extern void _mesa_update_draw_buffers(struct gl_context *ctx); +extern void +_mesa_read_buffer(struct gl_context *ctx, struct gl_framebuffer *fb, + GLenum buffer, const char *caller); + extern void GLAPIENTRY _mesa_ReadBuffer( GLenum mode ); diff --git a/src/mesa/main/context.c b/src/mesa/main/context.c index 279716a83d5..544cc142fde 100644 --- a/src/mesa/main/context.c +++ b/src/mesa/main/context.c @@ -1579,7 +1579,7 @@ handle_first_current(struct gl_context *ctx) bufferIndex = BUFFER_FRONT_LEFT; } - _mesa_readbuffer(ctx, buffer, bufferIndex); + _mesa_readbuffer(ctx, ctx->ReadBuffer, buffer, bufferIndex); } } From 1f0a5f32d328e54483dd623ad09bd3f6b119f7a6 Mon Sep 17 00:00:00 2001 From: Laura Ekstrand Date: Fri, 6 Feb 2015 15:36:52 -0800 Subject: [PATCH 100/834] main: Add entry point for NamedFramebufferReadBuffer. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [Fredrik: Fix the name of the buf parameter in the XML file] Reviewed-by: Fredrik Höglund Signed-off-by: Fredrik Höglund --- .../glapi/gen/ARB_direct_state_access.xml | 5 +++++ src/mesa/main/buffers.c | 19 +++++++++++++++++++ src/mesa/main/buffers.h | 3 +++ src/mesa/main/tests/dispatch_sanity.cpp | 1 + 4 files changed, 28 insertions(+) diff --git a/src/mapi/glapi/gen/ARB_direct_state_access.xml b/src/mapi/glapi/gen/ARB_direct_state_access.xml index 32e3c0afbeb..6e5c4d30462 100644 --- a/src/mapi/glapi/gen/ARB_direct_state_access.xml +++ b/src/mapi/glapi/gen/ARB_direct_state_access.xml @@ -192,6 +192,11 @@ + + + + + diff --git a/src/mesa/main/buffers.c b/src/mesa/main/buffers.c index e1d1bafc367..d9979ad9ed5 100644 --- a/src/mesa/main/buffers.c +++ b/src/mesa/main/buffers.c @@ -697,3 +697,22 @@ _mesa_ReadBuffer(GLenum buffer) GET_CURRENT_CONTEXT(ctx); _mesa_read_buffer(ctx, ctx->ReadBuffer, buffer, "glReadBuffer"); } + + +void GLAPIENTRY +_mesa_NamedFramebufferReadBuffer(GLuint framebuffer, GLenum src) +{ + GET_CURRENT_CONTEXT(ctx); + struct gl_framebuffer *fb; + + if (framebuffer) { + fb = _mesa_lookup_framebuffer_err(ctx, framebuffer, + "glNamedFramebufferReadBuffer"); + if (!fb) + return; + } + else + fb = ctx->WinSysReadBuffer; + + _mesa_read_buffer(ctx, fb, src, "glNamedFramebufferReadBuffer"); +} diff --git a/src/mesa/main/buffers.h b/src/mesa/main/buffers.h index ca7ad19235b..52a23188194 100644 --- a/src/mesa/main/buffers.h +++ b/src/mesa/main/buffers.h @@ -71,5 +71,8 @@ _mesa_read_buffer(struct gl_context *ctx, struct gl_framebuffer *fb, extern void GLAPIENTRY _mesa_ReadBuffer( GLenum mode ); +extern void GLAPIENTRY +_mesa_NamedFramebufferReadBuffer(GLuint framebuffer, GLenum src); + #endif diff --git a/src/mesa/main/tests/dispatch_sanity.cpp b/src/mesa/main/tests/dispatch_sanity.cpp index 931eeceb212..dcbb0c69e4c 100644 --- a/src/mesa/main/tests/dispatch_sanity.cpp +++ b/src/mesa/main/tests/dispatch_sanity.cpp @@ -986,6 +986,7 @@ const struct function gl_core_functions_possible[] = { { "glNamedFramebufferTexture", 45, -1 }, { "glNamedFramebufferTextureLayer", 45, -1 }, { "glNamedFramebufferDrawBuffer", 45, -1 }, + { "glNamedFramebufferReadBuffer", 45, -1 }, { "glInvalidateNamedFramebufferSubData", 45, -1 }, { "glInvalidateNamedFramebufferData", 45, -1 }, { "glClearNamedFramebufferiv", 45, -1 }, From 68c6964b376df7ef2248fab83e0f8abc6df56d47 Mon Sep 17 00:00:00 2001 From: Laura Ekstrand Date: Mon, 9 Feb 2015 13:33:13 -0800 Subject: [PATCH 101/834] main: Refactor DrawBuffers. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This could have added a new DD table entry for DrawBuffers that takes an arbitrary draw buffer, but, after looking at the existing DD functions, Kenneth Graunke recommended that we just skip calling the DD functions in the case of ARB_direct_state_access. The DD implementations for DrawBuffer(s) have limited functionality, especially with respect to ARB_direct_state_access. [Fredrik: Call the driver function when fb is the bound draw buffer] Reviewed-by: Fredrik Höglund Signed-off-by: Fredrik Höglund --- src/mesa/main/buffers.c | 71 ++++++++++++++++++++++++++++------------- src/mesa/main/buffers.h | 4 +++ 2 files changed, 52 insertions(+), 23 deletions(-) diff --git a/src/mesa/main/buffers.c b/src/mesa/main/buffers.c index d9979ad9ed5..0c6d379309f 100644 --- a/src/mesa/main/buffers.c +++ b/src/mesa/main/buffers.c @@ -325,13 +325,13 @@ _mesa_NamedFramebufferDrawBuffer(GLuint framebuffer, GLenum buf) * names cannot specify more than one buffer. For example, * GL_FRONT_AND_BACK is illegal. */ -void GLAPIENTRY -_mesa_DrawBuffers(GLsizei n, const GLenum *buffers) +void +_mesa_draw_buffers(struct gl_context *ctx, struct gl_framebuffer *fb, + GLsizei n, const GLenum *buffers, const char *caller) { GLuint output; GLbitfield usedBufferMask, supportedMask; GLbitfield destMask[MAX_DRAW_BUFFERS]; - GET_CURRENT_CONTEXT(ctx); FLUSH_VERTICES(ctx, 0); @@ -342,12 +342,18 @@ _mesa_DrawBuffers(GLsizei n, const GLenum *buffers) * "An INVALID_VALUE error is generated if n is greater than * MAX_DRAW_BUFFERS." */ - if (n < 0 || n > (GLsizei) ctx->Const.MaxDrawBuffers) { - _mesa_error(ctx, GL_INVALID_VALUE, "glDrawBuffersARB(n)"); + if (n < 0) { + _mesa_error(ctx, GL_INVALID_VALUE, "%s(n < 0)", caller); return; } - supportedMask = supported_buffer_bitmask(ctx, ctx->DrawBuffer); + if (n > (GLsizei) ctx->Const.MaxDrawBuffers) { + _mesa_error(ctx, GL_INVALID_VALUE, + "%s(n > maximum number of draw buffers)", caller); + return; + } + + supportedMask = supported_buffer_bitmask(ctx, fb); usedBufferMask = 0x0; /* From the ES 3.0 specification, page 180: @@ -355,9 +361,9 @@ _mesa_DrawBuffers(GLsizei n, const GLenum *buffers) * and the constant must be BACK or NONE." * (same restriction applies with GL_EXT_draw_buffers specification) */ - if (ctx->API == API_OPENGLES2 && _mesa_is_winsys_fbo(ctx->DrawBuffer) && + if (ctx->API == API_OPENGLES2 && _mesa_is_winsys_fbo(fb) && (n != 1 || (buffers[0] != GL_NONE && buffers[0] != GL_BACK))) { - _mesa_error(ctx, GL_INVALID_OPERATION, "glDrawBuffers(buffer)"); + _mesa_error(ctx, GL_INVALID_OPERATION, "%s(invalid buffers)", caller); return; } @@ -389,9 +395,11 @@ _mesa_DrawBuffers(GLsizei n, const GLenum *buffers) * or equal to the value of MAX_COLOR_ATTACHMENTS, then the error * INVALID_OPERATION results." */ - if (_mesa_is_user_fbo(ctx->DrawBuffer) && buffers[output] >= + if (_mesa_is_user_fbo(fb) && buffers[output] >= GL_COLOR_ATTACHMENT0 + ctx->Const.MaxDrawBuffers) { - _mesa_error(ctx, GL_INVALID_OPERATION, "glDrawBuffersARB(buffer)"); + _mesa_error(ctx, GL_INVALID_OPERATION, + "%s(buffers[%d] >= maximum number of draw buffers)", + caller, output); return; } @@ -402,9 +410,10 @@ _mesa_DrawBuffers(GLsizei n, const GLenum *buffers) * 4.5 or 4.6. Otherwise, an INVALID_ENUM error is generated. */ if (destMask[output] == BAD_MASK) { - _mesa_error(ctx, GL_INVALID_ENUM, "glDrawBuffersARB(buffer)"); + _mesa_error(ctx, GL_INVALID_ENUM, "%s(invalid buffer %s)", + caller, _mesa_lookup_enum_by_nr(buffers[output])); return; - } + } /* From the OpenGL 4.0 specification, page 256: * "For both the default framebuffer and framebuffer objects, the @@ -417,7 +426,8 @@ _mesa_DrawBuffers(GLsizei n, const GLenum *buffers) * but the Khronos conformance tests expect INVALID_ENUM. */ if (_mesa_bitcount(destMask[output]) > 1) { - _mesa_error(ctx, GL_INVALID_ENUM, "glDrawBuffersARB(buffer)"); + _mesa_error(ctx, GL_INVALID_ENUM, "%s(invalid buffer %s)", + caller, _mesa_lookup_enum_by_nr(buffers[output])); return; } @@ -434,7 +444,8 @@ _mesa_DrawBuffers(GLsizei n, const GLenum *buffers) destMask[output] &= supportedMask; if (destMask[output] == 0) { _mesa_error(ctx, GL_INVALID_OPERATION, - "glDrawBuffersARB(unsupported buffer)"); + "%s(unsupported buffer %s)", + caller, _mesa_lookup_enum_by_nr(buffers[output])); return; } @@ -443,10 +454,12 @@ _mesa_DrawBuffers(GLsizei n, const GLenum *buffers) * in bufs must be COLOR_ATTACHMENTi or NONE. [...] INVALID_OPERATION." * (same restriction applies with GL_EXT_draw_buffers specification) */ - if (ctx->API == API_OPENGLES2 && _mesa_is_user_fbo(ctx->DrawBuffer) && + if (ctx->API == API_OPENGLES2 && _mesa_is_user_fbo(fb) && buffers[output] != GL_NONE && buffers[output] != GL_COLOR_ATTACHMENT0 + output) { - _mesa_error(ctx, GL_INVALID_OPERATION, "glDrawBuffers(buffer)"); + _mesa_error(ctx, GL_INVALID_OPERATION, + "%s(unsupported buffer %s)", + caller, _mesa_lookup_enum_by_nr(buffers[output])); return; } @@ -457,7 +470,8 @@ _mesa_DrawBuffers(GLsizei n, const GLenum *buffers) */ if (destMask[output] & usedBufferMask) { _mesa_error(ctx, GL_INVALID_OPERATION, - "glDrawBuffersARB(duplicated buffer)"); + "%s(duplicated buffer %s)", + caller, _mesa_lookup_enum_by_nr(buffers[output])); return; } @@ -467,17 +481,28 @@ _mesa_DrawBuffers(GLsizei n, const GLenum *buffers) } /* OK, if we get here, there were no errors so set the new state */ - _mesa_drawbuffers(ctx, ctx->DrawBuffer, n, buffers, destMask); + _mesa_drawbuffers(ctx, fb, n, buffers, destMask); /* - * Call device driver function. Note that n can be equal to 0, + * Call device driver function if fb is the bound draw buffer. + * Note that n can be equal to 0, * in which case we don't want to reference buffers[0], which * may not be valid. */ - if (ctx->Driver.DrawBuffers) - ctx->Driver.DrawBuffers(ctx, n, buffers); - else if (ctx->Driver.DrawBuffer) - ctx->Driver.DrawBuffer(ctx, n > 0 ? buffers[0] : GL_NONE); + if (fb == ctx->DrawBuffer) { + if (ctx->Driver.DrawBuffers) + ctx->Driver.DrawBuffers(ctx, n, buffers); + else if (ctx->Driver.DrawBuffer) + ctx->Driver.DrawBuffer(ctx, n > 0 ? buffers[0] : GL_NONE); + } +} + + +void GLAPIENTRY +_mesa_DrawBuffers(GLsizei n, const GLenum *buffers) +{ + GET_CURRENT_CONTEXT(ctx); + _mesa_draw_buffers(ctx, ctx->DrawBuffer, n, buffers, "glDrawBuffers"); } diff --git a/src/mesa/main/buffers.h b/src/mesa/main/buffers.h index 52a23188194..66871d70a09 100644 --- a/src/mesa/main/buffers.h +++ b/src/mesa/main/buffers.h @@ -48,6 +48,10 @@ _mesa_DrawBuffer( GLenum mode ); extern void GLAPIENTRY _mesa_NamedFramebufferDrawBuffer(GLuint framebuffer, GLenum buf); +extern void +_mesa_draw_buffers(struct gl_context *ctx, struct gl_framebuffer *fb, + GLsizei n, const GLenum *buffers, const char *caller); + extern void GLAPIENTRY _mesa_DrawBuffers(GLsizei n, const GLenum *buffers); From 9de7a81626304dae5ced4e202a820e353f8fc1e9 Mon Sep 17 00:00:00 2001 From: Laura Ekstrand Date: Mon, 9 Feb 2015 14:08:00 -0800 Subject: [PATCH 102/834] main: Add entry point for NamedFramebufferDrawBuffers. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Fredrik Höglund Signed-off-by: Fredrik Höglund --- .../glapi/gen/ARB_direct_state_access.xml | 6 ++++++ src/mesa/main/buffers.c | 20 +++++++++++++++++++ src/mesa/main/buffers.h | 4 ++++ src/mesa/main/tests/dispatch_sanity.cpp | 1 + 4 files changed, 31 insertions(+) diff --git a/src/mapi/glapi/gen/ARB_direct_state_access.xml b/src/mapi/glapi/gen/ARB_direct_state_access.xml index 6e5c4d30462..bb9baf5a3d0 100644 --- a/src/mapi/glapi/gen/ARB_direct_state_access.xml +++ b/src/mapi/glapi/gen/ARB_direct_state_access.xml @@ -192,6 +192,12 @@ + + + + + + diff --git a/src/mesa/main/buffers.c b/src/mesa/main/buffers.c index 0c6d379309f..0536266d756 100644 --- a/src/mesa/main/buffers.c +++ b/src/mesa/main/buffers.c @@ -506,6 +506,26 @@ _mesa_DrawBuffers(GLsizei n, const GLenum *buffers) } +void GLAPIENTRY +_mesa_NamedFramebufferDrawBuffers(GLuint framebuffer, GLsizei n, + const GLenum *bufs) +{ + GET_CURRENT_CONTEXT(ctx); + struct gl_framebuffer *fb; + + if (framebuffer) { + fb = _mesa_lookup_framebuffer_err(ctx, framebuffer, + "glNamedFramebufferDrawBuffers"); + if (!fb) + return; + } + else + fb = ctx->WinSysDrawBuffer; + + _mesa_draw_buffers(ctx, fb, n, bufs, "glNamedFramebufferDrawBuffers"); +} + + /** * Performs necessary state updates when _mesa_drawbuffers makes an * actual change. diff --git a/src/mesa/main/buffers.h b/src/mesa/main/buffers.h index 66871d70a09..5aa79fda54b 100644 --- a/src/mesa/main/buffers.h +++ b/src/mesa/main/buffers.h @@ -55,6 +55,10 @@ _mesa_draw_buffers(struct gl_context *ctx, struct gl_framebuffer *fb, extern void GLAPIENTRY _mesa_DrawBuffers(GLsizei n, const GLenum *buffers); +extern void GLAPIENTRY +_mesa_NamedFramebufferDrawBuffers(GLuint framebuffer, GLsizei n, + const GLenum *bufs); + extern void _mesa_drawbuffers(struct gl_context *ctx, struct gl_framebuffer *fb, GLuint n, const GLenum *buffers, diff --git a/src/mesa/main/tests/dispatch_sanity.cpp b/src/mesa/main/tests/dispatch_sanity.cpp index dcbb0c69e4c..77dc1401d19 100644 --- a/src/mesa/main/tests/dispatch_sanity.cpp +++ b/src/mesa/main/tests/dispatch_sanity.cpp @@ -986,6 +986,7 @@ const struct function gl_core_functions_possible[] = { { "glNamedFramebufferTexture", 45, -1 }, { "glNamedFramebufferTextureLayer", 45, -1 }, { "glNamedFramebufferDrawBuffer", 45, -1 }, + { "glNamedFramebufferDrawBuffers", 45, -1 }, { "glNamedFramebufferReadBuffer", 45, -1 }, { "glInvalidateNamedFramebufferSubData", 45, -1 }, { "glInvalidateNamedFramebufferData", 45, -1 }, From 30dcaaec356cc117d7227c6680620cd50ff534e7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fredrik=20H=C3=B6glund?= Date: Sun, 10 May 2015 20:34:03 +0200 Subject: [PATCH 103/834] mesa: Add an extension flag for ARB_direct_state_access MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Fredrik Höglund Reviewed-by: Adam Jackson --- src/mesa/main/extensions.c | 2 +- src/mesa/main/mtypes.h | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/src/mesa/main/extensions.c b/src/mesa/main/extensions.c index f7ce0642aef..c82416aa072 100644 --- a/src/mesa/main/extensions.c +++ b/src/mesa/main/extensions.c @@ -104,7 +104,7 @@ static const struct extension extension_table[] = { { "GL_ARB_depth_clamp", o(ARB_depth_clamp), GL, 2003 }, { "GL_ARB_depth_texture", o(ARB_depth_texture), GLL, 2001 }, { "GL_ARB_derivative_control", o(ARB_derivative_control), GL, 2014 }, - { "GL_ARB_direct_state_access", o(dummy_false), GL, 2014 }, + { "GL_ARB_direct_state_access", o(ARB_direct_state_access), GL, 2014 }, { "GL_ARB_draw_buffers", o(dummy_true), GL, 2002 }, { "GL_ARB_draw_buffers_blend", o(ARB_draw_buffers_blend), GL, 2009 }, { "GL_ARB_draw_elements_base_vertex", o(ARB_draw_elements_base_vertex), GL, 2009 }, diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h index 737f0be6d62..83425176a3f 100644 --- a/src/mesa/main/mtypes.h +++ b/src/mesa/main/mtypes.h @@ -3621,6 +3621,7 @@ struct gl_extensions GLboolean ARB_depth_clamp; GLboolean ARB_depth_texture; GLboolean ARB_derivative_control; + GLboolean ARB_direct_state_access; GLboolean ARB_draw_buffers_blend; GLboolean ARB_draw_elements_base_vertex; GLboolean ARB_draw_indirect; From 03420eac0c53280beae5f72783e52950fd1e3fba Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fredrik=20H=C3=B6glund?= Date: Wed, 29 Apr 2015 19:44:06 +0200 Subject: [PATCH 104/834] mesa: Make GL_TEXTURE_CUBE_MAP valid in FramebufferTextureLayer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Fredrik Höglund Reviewed-by: Adam Jackson --- src/mesa/main/fbobject.c | 36 +++++++++++++++++++++++++++++++----- 1 file changed, 31 insertions(+), 5 deletions(-) diff --git a/src/mesa/main/fbobject.c b/src/mesa/main/fbobject.c index 971dc688a04..c2bc081cf1a 100644 --- a/src/mesa/main/fbobject.c +++ b/src/mesa/main/fbobject.c @@ -2704,9 +2704,9 @@ static bool check_texture_target(struct gl_context *ctx, GLenum target, const char *caller) { - /* We're being called by glFramebufferTextureLayer() and - * textarget is not used. The only legal texture types for - * that function are 3D and 1D/2D arrays textures. + /* We're being called by glFramebufferTextureLayer(). + * The only legal texture types for that function are 3D, + * cube-map, and 1D/2D/cube-map array textures. */ switch (target) { case GL_TEXTURE_3D: @@ -2715,6 +2715,11 @@ check_texture_target(struct gl_context *ctx, GLenum target, case GL_TEXTURE_CUBE_MAP_ARRAY: case GL_TEXTURE_2D_MULTISAMPLE_ARRAY: return true; + case GL_TEXTURE_CUBE_MAP: + /* This target is valid in TextureLayer when ARB_direct_state_access + * or OpenGL 4.5 is supported. + */ + return ctx->Extensions.ARB_direct_state_access; } _mesa_error(ctx, GL_INVALID_OPERATION, @@ -2847,6 +2852,13 @@ check_layer(struct gl_context *ctx, GLenum target, GLint layer, return false; } } + else if (target == GL_TEXTURE_CUBE_MAP) { + if (layer >= 6) { + _mesa_error(ctx, GL_INVALID_VALUE, + "%s(layer %u >= 6)", caller, layer); + return false; + } + } return true; } @@ -3035,6 +3047,7 @@ _mesa_FramebufferTextureLayer(GLenum target, GLenum attachment, GET_CURRENT_CONTEXT(ctx); struct gl_framebuffer *fb; struct gl_texture_object *texObj; + GLenum textarget = 0; const char *func = "glFramebufferTextureLayer"; @@ -3060,9 +3073,15 @@ _mesa_FramebufferTextureLayer(GLenum target, GLenum attachment, if (!check_level(ctx, texObj->Target, level, func)) return; + + if (texObj->Target == GL_TEXTURE_CUBE_MAP) { + assert(layer >= 0 && layer < 6); + textarget = GL_TEXTURE_CUBE_MAP_POSITIVE_X + layer; + layer = 0; + } } - _mesa_framebuffer_texture(ctx, fb, attachment, texObj, 0, level, + _mesa_framebuffer_texture(ctx, fb, attachment, texObj, textarget, level, layer, GL_FALSE, func); } @@ -3074,6 +3093,7 @@ _mesa_NamedFramebufferTextureLayer(GLuint framebuffer, GLenum attachment, GET_CURRENT_CONTEXT(ctx); struct gl_framebuffer *fb; struct gl_texture_object *texObj; + GLenum textarget = 0; const char *func = "glNamedFramebufferTextureLayer"; @@ -3095,9 +3115,15 @@ _mesa_NamedFramebufferTextureLayer(GLuint framebuffer, GLenum attachment, if (!check_level(ctx, texObj->Target, level, func)) return; + + if (texObj->Target == GL_TEXTURE_CUBE_MAP) { + assert(layer >= 0 && layer < 6); + textarget = GL_TEXTURE_CUBE_MAP_POSITIVE_X + layer; + layer = 0; + } } - _mesa_framebuffer_texture(ctx, fb, attachment, texObj, 0, level, + _mesa_framebuffer_texture(ctx, fb, attachment, texObj, textarget, level, layer, GL_FALSE, func); } From 7d212765a470972f4712e42caf6406b257220369 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fredrik=20H=C3=B6glund?= Date: Sun, 10 May 2015 20:44:07 +0200 Subject: [PATCH 105/834] mesa: Add ARB_direct_state_access checks in XFB functions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Fredrik Höglund Reviewed-by: Adam Jackson --- src/mesa/main/transformfeedback.c | 42 +++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/src/mesa/main/transformfeedback.c b/src/mesa/main/transformfeedback.c index 103011ce572..642fa96477a 100644 --- a/src/mesa/main/transformfeedback.c +++ b/src/mesa/main/transformfeedback.c @@ -706,6 +706,13 @@ _mesa_TransformFeedbackBufferBase(GLuint xfb, GLuint index, GLuint buffer) struct gl_transform_feedback_object *obj; struct gl_buffer_object *bufObj; + if (!ctx->Extensions.ARB_direct_state_access) { + _mesa_error(ctx, GL_INVALID_OPERATION, + "glTransformFeedbackBufferBase(GL_ARB_direct_state_access " + "is not supported)"); + return; + } + obj = lookup_transform_feedback_object_err(ctx, xfb, "glTransformFeedbackBufferBase"); if(!obj) { @@ -729,6 +736,13 @@ _mesa_TransformFeedbackBufferRange(GLuint xfb, GLuint index, GLuint buffer, struct gl_transform_feedback_object *obj; struct gl_buffer_object *bufObj; + if (!ctx->Extensions.ARB_direct_state_access) { + _mesa_error(ctx, GL_INVALID_OPERATION, + "glTransformFeedbackBufferRange(GL_ARB_direct_state_access " + "is not supported)"); + return; + } + obj = lookup_transform_feedback_object_err(ctx, xfb, "glTransformFeedbackBufferRange"); if(!obj) { @@ -1045,6 +1059,13 @@ _mesa_CreateTransformFeedbacks(GLsizei n, GLuint *names) { GET_CURRENT_CONTEXT(ctx); + if (!ctx->Extensions.ARB_direct_state_access) { + _mesa_error(ctx, GL_INVALID_OPERATION, + "glCreateTransformFeedbacks(GL_ARB_direct_state_access " + "is not supported)"); + return; + } + create_transform_feedbacks(ctx, n, names, true); } @@ -1215,6 +1236,13 @@ _mesa_GetTransformFeedbackiv(GLuint xfb, GLenum pname, GLint *param) struct gl_transform_feedback_object *obj; GET_CURRENT_CONTEXT(ctx); + if (!ctx->Extensions.ARB_direct_state_access) { + _mesa_error(ctx, GL_INVALID_OPERATION, + "glGetTransformFeedbackiv(GL_ARB_direct_state_access " + "is not supported)"); + return; + } + obj = lookup_transform_feedback_object_err(ctx, xfb, "glGetTransformFeedbackiv"); if(!obj) { @@ -1241,6 +1269,13 @@ _mesa_GetTransformFeedbacki_v(GLuint xfb, GLenum pname, GLuint index, struct gl_transform_feedback_object *obj; GET_CURRENT_CONTEXT(ctx); + if (!ctx->Extensions.ARB_direct_state_access) { + _mesa_error(ctx, GL_INVALID_OPERATION, + "glGetTransformFeedbacki_v(GL_ARB_direct_state_access " + "is not supported)"); + return; + } + obj = lookup_transform_feedback_object_err(ctx, xfb, "glGetTransformFeedbacki_v"); if(!obj) { @@ -1270,6 +1305,13 @@ _mesa_GetTransformFeedbacki64_v(GLuint xfb, GLenum pname, GLuint index, struct gl_transform_feedback_object *obj; GET_CURRENT_CONTEXT(ctx); + if (!ctx->Extensions.ARB_direct_state_access) { + _mesa_error(ctx, GL_INVALID_OPERATION, + "glGetTransformFeedbacki64_v(GL_ARB_direct_state_access " + "is not supported)"); + return; + } + obj = lookup_transform_feedback_object_err(ctx, xfb, "glGetTransformFeedbacki64_v"); if(!obj) { From 339ed0984d4f54fca91235a1df2ce3a850f6123f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fredrik=20H=C3=B6glund?= Date: Sun, 10 May 2015 20:46:36 +0200 Subject: [PATCH 106/834] mesa: Add ARB_direct_state_access checks in buffer object functions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Fredrik Höglund Reviewed-by: Adam Jackson --- src/mesa/main/bufferobj.c | 105 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 105 insertions(+) diff --git a/src/mesa/main/bufferobj.c b/src/mesa/main/bufferobj.c index 66dee680258..660bc94892e 100644 --- a/src/mesa/main/bufferobj.c +++ b/src/mesa/main/bufferobj.c @@ -1303,6 +1303,12 @@ create_buffers(GLsizei n, GLuint *buffers, bool dsa) const char *func = dsa ? "glCreateBuffers" : "glGenBuffers"; + if (dsa && !ctx->Extensions.ARB_direct_state_access) { + _mesa_error(ctx, GL_INVALID_OPERATION, + "%s(GL_ARB_direct_state_access is not supported)", func); + return; + } + if (MESA_VERBOSE & VERBOSE_API) _mesa_debug(ctx, "%s(%d)\n", func, n); @@ -1477,6 +1483,13 @@ _mesa_NamedBufferStorage(GLuint buffer, GLsizeiptr size, const GLvoid *data, GET_CURRENT_CONTEXT(ctx); struct gl_buffer_object *bufObj; + if (!ctx->Extensions.ARB_direct_state_access) { + _mesa_error(ctx, GL_INVALID_OPERATION, + "glNamedBufferStorage(GL_ARB_direct_state_access " + "is not supported)"); + return; + } + bufObj = _mesa_lookup_bufferobj_err(ctx, buffer, "glNamedBufferStorage"); if (!bufObj) return; @@ -1603,6 +1616,13 @@ _mesa_NamedBufferData(GLuint buffer, GLsizeiptr size, const GLvoid *data, GET_CURRENT_CONTEXT(ctx); struct gl_buffer_object *bufObj; + if (!ctx->Extensions.ARB_direct_state_access) { + _mesa_error(ctx, GL_INVALID_OPERATION, + "glNamedBufferData(GL_ARB_direct_state_access " + "is not supported)"); + return; + } + bufObj = _mesa_lookup_bufferobj_err(ctx, buffer, "glNamedBufferData"); if (!bufObj) return; @@ -1673,6 +1693,13 @@ _mesa_NamedBufferSubData(GLuint buffer, GLintptr offset, GET_CURRENT_CONTEXT(ctx); struct gl_buffer_object *bufObj; + if (!ctx->Extensions.ARB_direct_state_access) { + _mesa_error(ctx, GL_INVALID_OPERATION, + "glNamedBufferSubData(GL_ARB_direct_state_access " + "is not supported)"); + return; + } + bufObj = _mesa_lookup_bufferobj_err(ctx, buffer, "glNamedBufferSubData"); if (!bufObj) return; @@ -1710,6 +1737,13 @@ _mesa_GetNamedBufferSubData(GLuint buffer, GLintptr offset, GET_CURRENT_CONTEXT(ctx); struct gl_buffer_object *bufObj; + if (!ctx->Extensions.ARB_direct_state_access) { + _mesa_error(ctx, GL_INVALID_OPERATION, + "glGetNamedBufferSubData(GL_ARB_direct_state_access " + "is not supported)"); + return; + } + bufObj = _mesa_lookup_bufferobj_err(ctx, buffer, "glGetNamedBufferSubData"); if (!bufObj) @@ -1805,6 +1839,13 @@ _mesa_ClearNamedBufferData(GLuint buffer, GLenum internalformat, GET_CURRENT_CONTEXT(ctx); struct gl_buffer_object *bufObj; + if (!ctx->Extensions.ARB_direct_state_access) { + _mesa_error(ctx, GL_INVALID_OPERATION, + "glClearNamedBufferData(GL_ARB_direct_state_access " + "is not supported)"); + return; + } + bufObj = _mesa_lookup_bufferobj_err(ctx, buffer, "glClearNamedBufferData"); if (!bufObj) return; @@ -1842,6 +1883,13 @@ _mesa_ClearNamedBufferSubData(GLuint buffer, GLenum internalformat, GET_CURRENT_CONTEXT(ctx); struct gl_buffer_object *bufObj; + if (!ctx->Extensions.ARB_direct_state_access) { + _mesa_error(ctx, GL_INVALID_OPERATION, + "glClearNamedBufferSubData(GL_ARB_direct_state_access " + "is not supported)"); + return; + } + bufObj = _mesa_lookup_bufferobj_err(ctx, buffer, "glClearNamedBufferSubData"); if (!bufObj) @@ -1930,6 +1978,13 @@ _mesa_UnmapNamedBuffer(GLuint buffer) GET_CURRENT_CONTEXT(ctx); struct gl_buffer_object *bufObj; + if (!ctx->Extensions.ARB_direct_state_access) { + _mesa_error(ctx, GL_INVALID_OPERATION, + "glUnmapNamedBuffer(GL_ARB_direct_state_access " + "is not supported)"); + return GL_FALSE; + } + bufObj = _mesa_lookup_bufferobj_err(ctx, buffer, "glUnmapNamedBuffer"); if (!bufObj) return GL_FALSE; @@ -2039,6 +2094,13 @@ _mesa_GetNamedBufferParameteriv(GLuint buffer, GLenum pname, GLint *params) struct gl_buffer_object *bufObj; GLint64 parameter; + if (!ctx->Extensions.ARB_direct_state_access) { + _mesa_error(ctx, GL_INVALID_OPERATION, + "glGetNamedBufferParameteriv(GL_ARB_direct_state_access " + "is not supported)"); + return; + } + bufObj = _mesa_lookup_bufferobj_err(ctx, buffer, "glGetNamedBufferParameteriv"); if (!bufObj) @@ -2059,6 +2121,13 @@ _mesa_GetNamedBufferParameteri64v(GLuint buffer, GLenum pname, struct gl_buffer_object *bufObj; GLint64 parameter; + if (!ctx->Extensions.ARB_direct_state_access) { + _mesa_error(ctx, GL_INVALID_OPERATION, + "glGetNamedBufferParameteri64v(GL_ARB_direct_state_access " + "is not supported)"); + return; + } + bufObj = _mesa_lookup_bufferobj_err(ctx, buffer, "glGetNamedBufferParameteri64v"); if (!bufObj) @@ -2098,6 +2167,13 @@ _mesa_GetNamedBufferPointerv(GLuint buffer, GLenum pname, GLvoid **params) GET_CURRENT_CONTEXT(ctx); struct gl_buffer_object *bufObj; + if (!ctx->Extensions.ARB_direct_state_access) { + _mesa_error(ctx, GL_INVALID_OPERATION, + "glGetNamedBufferPointerv(GL_ARB_direct_state_access " + "is not supported)"); + return; + } + if (pname != GL_BUFFER_MAP_POINTER) { _mesa_error(ctx, GL_INVALID_ENUM, "glGetNamedBufferPointerv(pname != " "GL_BUFFER_MAP_POINTER)"); @@ -2212,6 +2288,13 @@ _mesa_CopyNamedBufferSubData(GLuint readBuffer, GLuint writeBuffer, GET_CURRENT_CONTEXT(ctx); struct gl_buffer_object *src, *dst; + if (!ctx->Extensions.ARB_direct_state_access) { + _mesa_error(ctx, GL_INVALID_OPERATION, + "glCopyNamedBufferSubData(GL_ARB_direct_state_access " + "is not supported)"); + return; + } + src = _mesa_lookup_bufferobj_err(ctx, readBuffer, "glCopyNamedBufferSubData"); if (!src) @@ -2430,6 +2513,13 @@ _mesa_MapNamedBufferRange(GLuint buffer, GLintptr offset, GLsizeiptr length, GET_CURRENT_CONTEXT(ctx); struct gl_buffer_object *bufObj; + if (!ctx->Extensions.ARB_direct_state_access) { + _mesa_error(ctx, GL_INVALID_OPERATION, + "glMapNamedBufferRange(GL_ARB_direct_state_access " + "is not supported)"); + return NULL; + } + if (!ctx->Extensions.ARB_map_buffer_range) { _mesa_error(ctx, GL_INVALID_OPERATION, "glMapNamedBufferRange(" @@ -2497,6 +2587,13 @@ _mesa_MapNamedBuffer(GLuint buffer, GLenum access) struct gl_buffer_object *bufObj; GLbitfield accessFlags; + if (!ctx->Extensions.ARB_direct_state_access) { + _mesa_error(ctx, GL_INVALID_OPERATION, + "glMapNamedBuffer(GL_ARB_direct_state_access " + "is not supported)"); + return NULL; + } + if (!get_map_buffer_access_flags(ctx, access, &accessFlags)) { _mesa_error(ctx, GL_INVALID_ENUM, "glMapNamedBuffer(invalid access)"); return NULL; @@ -2587,6 +2684,14 @@ _mesa_FlushMappedNamedBufferRange(GLuint buffer, GLintptr offset, GET_CURRENT_CONTEXT(ctx); struct gl_buffer_object *bufObj; + if (!ctx->Extensions.ARB_direct_state_access) { + _mesa_error(ctx, GL_INVALID_OPERATION, + "glFlushMappedNamedBufferRange(GL_ARB_direct_state_access " + "is not supported)"); + return; + } + + bufObj = _mesa_lookup_bufferobj_err(ctx, buffer, "glFlushMappedNamedBufferRange"); if (!bufObj) From 6ad0b7e07a0445e9e0f368e079c4f7b8a6757bb3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fredrik=20H=C3=B6glund?= Date: Sun, 10 May 2015 20:48:11 +0200 Subject: [PATCH 107/834] mesa: Add ARB_direct_state_access checks in FBO functions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Fredrik Höglund Reviewed-by: Adam Jackson --- src/mesa/main/blit.c | 7 +++++ src/mesa/main/buffers.c | 21 +++++++++++++ src/mesa/main/clear.c | 32 +++++++++++++++++++ src/mesa/main/fbobject.c | 67 ++++++++++++++++++++++++++++++++++++++++ 4 files changed, 127 insertions(+) diff --git a/src/mesa/main/blit.c b/src/mesa/main/blit.c index db8fee5a414..fac97245082 100644 --- a/src/mesa/main/blit.c +++ b/src/mesa/main/blit.c @@ -540,6 +540,13 @@ _mesa_BlitNamedFramebuffer(GLuint readFramebuffer, GLuint drawFramebuffer, GET_CURRENT_CONTEXT(ctx); struct gl_framebuffer *readFb, *drawFb; + if (!ctx->Extensions.ARB_direct_state_access) { + _mesa_error(ctx, GL_INVALID_OPERATION, + "glBlitNamedFramebuffer(GL_ARB_direct_state_access " + "is not supported)"); + return; + } + if (MESA_VERBOSE & VERBOSE_API) _mesa_debug(ctx, "glBlitNamedFramebuffer(%u %u %d, %d, %d, %d, " diff --git a/src/mesa/main/buffers.c b/src/mesa/main/buffers.c index 0536266d756..c83459addf7 100644 --- a/src/mesa/main/buffers.c +++ b/src/mesa/main/buffers.c @@ -303,6 +303,13 @@ _mesa_NamedFramebufferDrawBuffer(GLuint framebuffer, GLenum buf) GET_CURRENT_CONTEXT(ctx); struct gl_framebuffer *fb; + if (!ctx->Extensions.ARB_direct_state_access) { + _mesa_error(ctx, GL_INVALID_OPERATION, + "glNamedFramebufferDrawBuffer(GL_ARB_direct_state_access " + "is not supported)"); + return; + } + if (framebuffer) { fb = _mesa_lookup_framebuffer_err(ctx, framebuffer, "glNamedFramebufferDrawBuffer"); @@ -513,6 +520,13 @@ _mesa_NamedFramebufferDrawBuffers(GLuint framebuffer, GLsizei n, GET_CURRENT_CONTEXT(ctx); struct gl_framebuffer *fb; + if (!ctx->Extensions.ARB_direct_state_access) { + _mesa_error(ctx, GL_INVALID_OPERATION, + "glNamedFramebufferDrawBuffers(GL_ARB_direct_state_access " + "is not supported)"); + return; + } + if (framebuffer) { fb = _mesa_lookup_framebuffer_err(ctx, framebuffer, "glNamedFramebufferDrawBuffers"); @@ -750,6 +764,13 @@ _mesa_NamedFramebufferReadBuffer(GLuint framebuffer, GLenum src) GET_CURRENT_CONTEXT(ctx); struct gl_framebuffer *fb; + if (!ctx->Extensions.ARB_direct_state_access) { + _mesa_error(ctx, GL_INVALID_OPERATION, + "glNamedFramebufferReadBuffer(GL_ARB_direct_state_access " + "is not supported)"); + return; + } + if (framebuffer) { fb = _mesa_lookup_framebuffer_err(ctx, framebuffer, "glNamedFramebufferReadBuffer"); diff --git a/src/mesa/main/clear.c b/src/mesa/main/clear.c index 426caea4709..c6999f7fdb6 100644 --- a/src/mesa/main/clear.c +++ b/src/mesa/main/clear.c @@ -412,6 +412,14 @@ _mesa_ClearNamedFramebufferiv(GLuint framebuffer, GLenum buffer, { GLint oldfb; + GET_CURRENT_CONTEXT(ctx); + if (!ctx->Extensions.ARB_direct_state_access) { + _mesa_error(ctx, GL_INVALID_OPERATION, + "glClearNamedFramebufferiv(GL_ARB_direct_state_access " + "is not supported)"); + return; + } + _mesa_GetIntegerv(GL_DRAW_FRAMEBUFFER_BINDING, &oldfb); _mesa_BindFramebuffer(GL_DRAW_FRAMEBUFFER, framebuffer); _mesa_ClearBufferiv(buffer, drawbuffer, value); @@ -502,6 +510,14 @@ _mesa_ClearNamedFramebufferuiv(GLuint framebuffer, GLenum buffer, { GLint oldfb; + GET_CURRENT_CONTEXT(ctx); + if (!ctx->Extensions.ARB_direct_state_access) { + _mesa_error(ctx, GL_INVALID_OPERATION, + "glClearNamedFramebufferuiv(GL_ARB_direct_state_access " + "is not supported)"); + return; + } + _mesa_GetIntegerv(GL_DRAW_FRAMEBUFFER_BINDING, &oldfb); _mesa_BindFramebuffer(GL_DRAW_FRAMEBUFFER, framebuffer); _mesa_ClearBufferuiv(buffer, drawbuffer, value); @@ -613,6 +629,14 @@ _mesa_ClearNamedFramebufferfv(GLuint framebuffer, GLenum buffer, { GLint oldfb; + GET_CURRENT_CONTEXT(ctx); + if (!ctx->Extensions.ARB_direct_state_access) { + _mesa_error(ctx, GL_INVALID_OPERATION, + "glClearNamedFramebufferfv(GL_ARB_direct_state_access " + "is not supported)"); + return; + } + _mesa_GetIntegerv(GL_DRAW_FRAMEBUFFER_BINDING, &oldfb); _mesa_BindFramebuffer(GL_DRAW_FRAMEBUFFER, framebuffer); _mesa_ClearBufferfv(buffer, drawbuffer, value); @@ -695,6 +719,14 @@ _mesa_ClearNamedFramebufferfi(GLuint framebuffer, GLenum buffer, { GLint oldfb; + GET_CURRENT_CONTEXT(ctx); + if (!ctx->Extensions.ARB_direct_state_access) { + _mesa_error(ctx, GL_INVALID_OPERATION, + "glClearNamedFramebufferfi(GL_ARB_direct_state_access " + "is not supported)"); + return; + } + _mesa_GetIntegerv(GL_DRAW_FRAMEBUFFER_BINDING, &oldfb); _mesa_BindFramebuffer(GL_DRAW_FRAMEBUFFER, framebuffer); _mesa_ClearBufferfi(buffer, 0, depth, stencil); diff --git a/src/mesa/main/fbobject.c b/src/mesa/main/fbobject.c index c2bc081cf1a..65e194cb749 100644 --- a/src/mesa/main/fbobject.c +++ b/src/mesa/main/fbobject.c @@ -2454,6 +2454,12 @@ create_framebuffers(GLsizei n, GLuint *framebuffers, bool dsa) const char *func = dsa ? "glCreateFramebuffers" : "glGenFramebuffers"; + if (dsa && !ctx->Extensions.ARB_direct_state_access) { + _mesa_error(ctx, GL_INVALID_OPERATION, + "%s(GL_ARB_direct_state_access is not supported)", func); + return; + } + if (n < 0) { _mesa_error(ctx, GL_INVALID_VALUE, "%s(n < 0)", func); return; @@ -2552,6 +2558,13 @@ _mesa_CheckNamedFramebufferStatus(GLuint framebuffer, GLenum target) struct gl_framebuffer *fb; GET_CURRENT_CONTEXT(ctx); + if (!ctx->Extensions.ARB_direct_state_access) { + _mesa_error(ctx, GL_INVALID_OPERATION, + "glCheckNamedFramebufferStatus(GL_ARB_direct_state_access " + "is not supported)"); + return 0; + } + /* Validate the target (for conformance's sake) and grab a reference to the * default framebuffer in case framebuffer = 0. * Section 9.4 Framebuffer Completeness of the OpenGL 4.5 core spec @@ -3097,6 +3110,12 @@ _mesa_NamedFramebufferTextureLayer(GLuint framebuffer, GLenum attachment, const char *func = "glNamedFramebufferTextureLayer"; + if (!ctx->Extensions.ARB_direct_state_access) { + _mesa_error(ctx, GL_INVALID_OPERATION, + "%s(GL_ARB_direct_state_access is not supported)", func); + return; + } + /* Get the framebuffer object */ fb = _mesa_lookup_framebuffer_err(ctx, framebuffer, func); if (!fb) @@ -3182,6 +3201,12 @@ _mesa_NamedFramebufferTexture(GLuint framebuffer, GLenum attachment, const char *func = "glNamedFramebufferTexture"; + if (!ctx->Extensions.ARB_direct_state_access) { + _mesa_error(ctx, GL_INVALID_OPERATION, + "%s(GL_ARB_direct_state_access is not supported)", func); + return; + } + if (!_mesa_has_geometry_shaders(ctx)) { _mesa_error(ctx, GL_INVALID_OPERATION, "unsupported function (glNamedFramebufferTexture) called"); @@ -3307,6 +3332,13 @@ _mesa_NamedFramebufferRenderbuffer(GLuint framebuffer, GLenum attachment, struct gl_renderbuffer *rb; GET_CURRENT_CONTEXT(ctx); + if (!ctx->Extensions.ARB_direct_state_access) { + _mesa_error(ctx, GL_INVALID_OPERATION, + "glNamedFramebufferRenderbuffer(GL_ARB_direct_state_access " + "is not supported)"); + return; + } + fb = _mesa_lookup_framebuffer_err(ctx, framebuffer, "glNamedFramebufferRenderbuffer"); @@ -3639,6 +3671,13 @@ _mesa_GetNamedFramebufferAttachmentParameteriv(GLuint framebuffer, GET_CURRENT_CONTEXT(ctx); struct gl_framebuffer *buffer; + if (!ctx->Extensions.ARB_direct_state_access) { + _mesa_error(ctx, GL_INVALID_OPERATION, + "glGetNamedFramebufferAttachmentParameteriv(" + "GL_ARB_direct_state_access is not supported)"); + return; + } + if (framebuffer) { buffer = _mesa_lookup_framebuffer_err(ctx, framebuffer, "glGetNamedFramebufferAttachmentParameteriv"); @@ -3671,6 +3710,13 @@ _mesa_NamedFramebufferParameteri(GLuint framebuffer, GLenum pname, (void) pname; (void) param; + if (!ctx->Extensions.ARB_direct_state_access) { + _mesa_error(ctx, GL_INVALID_OPERATION, + "glNamedFramebufferParameteri(" + "GL_ARB_direct_state_access is not supported)"); + return; + } + _mesa_error(ctx, GL_INVALID_OPERATION, "glNamedFramebufferParameteri not supported " "(ARB_framebuffer_no_attachments not implemented)"); @@ -3687,6 +3733,13 @@ _mesa_GetNamedFramebufferParameteriv(GLuint framebuffer, GLenum pname, (void) pname; (void) param; + if (!ctx->Extensions.ARB_direct_state_access) { + _mesa_error(ctx, GL_INVALID_OPERATION, + "glNamedFramebufferParameteriv(" + "GL_ARB_direct_state_access is not supported)"); + return; + } + _mesa_error(ctx, GL_INVALID_OPERATION, "glGetNamedFramebufferParameteriv not supported " "(ARB_framebuffer_no_attachments not implemented)"); @@ -3855,6 +3908,13 @@ _mesa_InvalidateNamedFramebufferSubData(GLuint framebuffer, struct gl_framebuffer *fb; GET_CURRENT_CONTEXT(ctx); + if (!ctx->Extensions.ARB_direct_state_access) { + _mesa_error(ctx, GL_INVALID_OPERATION, + "glInvalidateNamedFramebufferSubData(" + "GL_ARB_direct_state_access is not supported)"); + return; + } + /* The OpenGL 4.5 core spec (02.02.2015) says (in Section 17.4 Whole * Framebuffer Operations, PDF page 522): "If framebuffer is zero, the * default draw framebuffer is affected." @@ -3916,6 +3976,13 @@ _mesa_InvalidateNamedFramebufferData(GLuint framebuffer, struct gl_framebuffer *fb; GET_CURRENT_CONTEXT(ctx); + if (!ctx->Extensions.ARB_direct_state_access) { + _mesa_error(ctx, GL_INVALID_OPERATION, + "glInvalidateNamedFramebufferData(" + "GL_ARB_direct_state_access is not supported)"); + return; + } + /* The OpenGL 4.5 core spec (02.02.2015) says (in Section 17.4 Whole * Framebuffer Operations, PDF page 522): "If framebuffer is zero, the * default draw framebuffer is affected." From cb49940766b581c6656473d89c221653c69fa0f9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fredrik=20H=C3=B6glund?= Date: Sun, 10 May 2015 20:49:15 +0200 Subject: [PATCH 108/834] mesa: Add ARB_direct_state_access checks in renderbuffer functions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Fredrik Höglund Reviewed-by: Adam Jackson --- src/mesa/main/fbobject.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/src/mesa/main/fbobject.c b/src/mesa/main/fbobject.c index 65e194cb749..8db651ca2a1 100644 --- a/src/mesa/main/fbobject.c +++ b/src/mesa/main/fbobject.c @@ -1489,6 +1489,14 @@ void GLAPIENTRY _mesa_CreateRenderbuffers(GLsizei n, GLuint *renderbuffers) { GET_CURRENT_CONTEXT(ctx); + + if (!ctx->Extensions.ARB_direct_state_access) { + _mesa_error(ctx, GL_INVALID_OPERATION, + "glCreateRenderbuffers(GL_ARB_direct_state_access " + "is not supported)"); + return; + } + create_render_buffers(ctx, n, renderbuffers, true); } @@ -1929,6 +1937,12 @@ renderbuffer_storage_named(GLuint renderbuffer, GLenum internalFormat, { GET_CURRENT_CONTEXT(ctx); + if (!ctx->Extensions.ARB_direct_state_access) { + _mesa_error(ctx, GL_INVALID_OPERATION, + "%s(GL_ARB_direct_state_access is not supported)", func); + return; + } + if (MESA_VERBOSE & VERBOSE_API) { if (samples == NO_SAMPLES) _mesa_debug(ctx, "%s(%u, %s, %d, %d)\n", @@ -2183,6 +2197,13 @@ _mesa_GetNamedRenderbufferParameteriv(GLuint renderbuffer, GLenum pname, { GET_CURRENT_CONTEXT(ctx); + if (!ctx->Extensions.ARB_direct_state_access) { + _mesa_error(ctx, GL_INVALID_OPERATION, + "glGetNamedRenderbufferParameteriv(" + "GL_ARB_direct_state_access is not supported)"); + return; + } + struct gl_renderbuffer *rb = _mesa_lookup_renderbuffer(ctx, renderbuffer); if (!rb || rb == &DummyRenderbuffer) { /* ID was reserved, but no real renderbuffer object made yet */ From 8940957238e8584ce27295791cee4cc3d6f7cf1e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fredrik=20H=C3=B6glund?= Date: Sun, 10 May 2015 20:52:49 +0200 Subject: [PATCH 109/834] mesa: Add ARB_direct_state_access checks in texture functions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Fredrik Höglund Reviewed-by: Adam Jackson --- src/mesa/main/genmipmap.c | 7 ++++ src/mesa/main/texgetimage.c | 14 +++++++ src/mesa/main/teximage.c | 74 ++++++++++++++++++++++++++++++++ src/mesa/main/texobj.c | 14 +++++++ src/mesa/main/texparam.c | 84 +++++++++++++++++++++++++++++++++++++ src/mesa/main/texstorage.c | 7 ++++ 6 files changed, 200 insertions(+) diff --git a/src/mesa/main/genmipmap.c b/src/mesa/main/genmipmap.c index 9aef090194e..32b9460ad07 100644 --- a/src/mesa/main/genmipmap.c +++ b/src/mesa/main/genmipmap.c @@ -158,6 +158,13 @@ _mesa_GenerateTextureMipmap(GLuint texture) struct gl_texture_object *texObj; GET_CURRENT_CONTEXT(ctx); + if (!ctx->Extensions.ARB_direct_state_access) { + _mesa_error(ctx, GL_INVALID_OPERATION, + "glGenerateTextureMipmap(GL_ARB_direct_state_access " + "is not supported)"); + return; + } + texObj = _mesa_lookup_texture_err(ctx, texture, "glGenerateTextureMipmap"); if (!texObj) return; diff --git a/src/mesa/main/texgetimage.c b/src/mesa/main/texgetimage.c index 92b4d6795c6..f582a7f78b0 100644 --- a/src/mesa/main/texgetimage.c +++ b/src/mesa/main/texgetimage.c @@ -1108,6 +1108,13 @@ _mesa_GetTextureImage(GLuint texture, GLint level, GLenum format, GLenum err; GET_CURRENT_CONTEXT(ctx); + if (!ctx->Extensions.ARB_direct_state_access) { + _mesa_error(ctx, GL_INVALID_OPERATION, + "glGetTextureImage(GL_ARB_direct_state_access " + "is not supported)"); + return; + } + /* * This has been moved here because a format/type mismatch can cause a NULL * texImage object, which in turn causes the mismatch error to be @@ -1344,6 +1351,13 @@ _mesa_GetCompressedTextureImage(GLuint texture, GLint level, GLint image_stride; GET_CURRENT_CONTEXT(ctx); + if (!ctx->Extensions.ARB_direct_state_access) { + _mesa_error(ctx, GL_INVALID_OPERATION, + "glGetCompressedTextureImage(GL_ARB_direct_state_access " + "is not supported)"); + return; + } + texObj = _mesa_lookup_texture_err(ctx, texture, "glGetCompressedTextureImage"); if (!texObj) diff --git a/src/mesa/main/teximage.c b/src/mesa/main/teximage.c index 7bc1da7f805..7616fd7cec9 100644 --- a/src/mesa/main/teximage.c +++ b/src/mesa/main/teximage.c @@ -3624,6 +3624,13 @@ texturesubimage(struct gl_context *ctx, GLuint dims, _mesa_lookup_enum_by_nr(format), _mesa_lookup_enum_by_nr(type), pixels); + if (!ctx->Extensions.ARB_direct_state_access) { + _mesa_error(ctx, GL_INVALID_OPERATION, + "glTextureSubImage%uD(GL_ARB_direct_state_access " + "is not supported)", dims); + return; + } + /* Get the texture object by Name. */ texObj = _mesa_lookup_texture(ctx, texture); if (!texObj) { @@ -4183,6 +4190,12 @@ _mesa_CopyTextureSubImage1D(GLuint texture, GLint level, const char *self = "glCopyTextureSubImage1D"; GET_CURRENT_CONTEXT(ctx); + if (!ctx->Extensions.ARB_direct_state_access) { + _mesa_error(ctx, GL_INVALID_OPERATION, + "%s(GL_ARB_direct_state_access is not supported)", self); + return; + } + texObj = _mesa_lookup_texture_err(ctx, texture, self); if (!texObj) return; @@ -4207,6 +4220,12 @@ _mesa_CopyTextureSubImage2D(GLuint texture, GLint level, const char *self = "glCopyTextureSubImage2D"; GET_CURRENT_CONTEXT(ctx); + if (!ctx->Extensions.ARB_direct_state_access) { + _mesa_error(ctx, GL_INVALID_OPERATION, + "%s(GL_ARB_direct_state_access is not supported)", self); + return; + } + texObj = _mesa_lookup_texture_err(ctx, texture, self); if (!texObj) return; @@ -4234,6 +4253,12 @@ _mesa_CopyTextureSubImage3D(GLuint texture, GLint level, const char *self = "glCopyTextureSubImage3D"; GET_CURRENT_CONTEXT(ctx); + if (!ctx->Extensions.ARB_direct_state_access) { + _mesa_error(ctx, GL_INVALID_OPERATION, + "%s(GL_ARB_direct_state_access is not supported)", self); + return; + } + texObj = _mesa_lookup_texture_err(ctx, texture, self); if (!texObj) return; @@ -4829,6 +4854,13 @@ _mesa_CompressedTextureSubImage1D(GLuint texture, GLint level, GLint xoffset, GET_CURRENT_CONTEXT(ctx); + if (!ctx->Extensions.ARB_direct_state_access) { + _mesa_error(ctx, GL_INVALID_OPERATION, + "glCompressedTextureSubImage1D(GL_ARB_direct_state_access " + "is not supported)"); + return; + } + texObj = _mesa_lookup_texture_err(ctx, texture, "glCompressedTextureSubImage1D"); if (!texObj) @@ -4907,6 +4939,13 @@ _mesa_CompressedTextureSubImage2D(GLuint texture, GLint level, GLint xoffset, GET_CURRENT_CONTEXT(ctx); + if (!ctx->Extensions.ARB_direct_state_access) { + _mesa_error(ctx, GL_INVALID_OPERATION, + "glCompressedTextureSubImage2D(GL_ARB_direct_state_access " + "is not supported)"); + return; + } + texObj = _mesa_lookup_texture_err(ctx, texture, "glCompressedTextureSubImage2D"); if (!texObj) @@ -4985,6 +5024,13 @@ _mesa_CompressedTextureSubImage3D(GLuint texture, GLint level, GLint xoffset, GET_CURRENT_CONTEXT(ctx); + if (!ctx->Extensions.ARB_direct_state_access) { + _mesa_error(ctx, GL_INVALID_OPERATION, + "glCompressedTextureSubImage3D(GL_ARB_direct_state_access " + "is not supported)"); + return; + } + texObj = _mesa_lookup_texture_err(ctx, texture, "glCompressedTextureSubImage3D"); if (!texObj) @@ -5469,6 +5515,13 @@ _mesa_TextureBuffer(GLuint texture, GLenum internalFormat, GLuint buffer) GET_CURRENT_CONTEXT(ctx); + if (!ctx->Extensions.ARB_direct_state_access) { + _mesa_error(ctx, GL_INVALID_OPERATION, + "glTextureBuffer(GL_ARB_direct_state_access " + "is not supported)"); + return; + } + if (buffer) { bufObj = _mesa_lookup_bufferobj_err(ctx, buffer, "glTextureBuffer"); if (!bufObj) @@ -5497,6 +5550,13 @@ _mesa_TextureBufferRange(GLuint texture, GLenum internalFormat, GLuint buffer, GET_CURRENT_CONTEXT(ctx); + if (!ctx->Extensions.ARB_direct_state_access) { + _mesa_error(ctx, GL_INVALID_OPERATION, + "glTextureBufferRange(GL_ARB_direct_state_access " + "is not supported)"); + return; + } + if (buffer) { bufObj = _mesa_lookup_bufferobj_err(ctx, buffer, "glTextureBufferRange"); @@ -5801,6 +5861,13 @@ _mesa_TextureStorage2DMultisample(GLuint texture, GLsizei samples, struct gl_texture_object *texObj; GET_CURRENT_CONTEXT(ctx); + if (!ctx->Extensions.ARB_direct_state_access) { + _mesa_error(ctx, GL_INVALID_OPERATION, + "glTextureStorage2DMultisample(GL_ARB_direct_state_access " + "is not supported)"); + return; + } + texObj = _mesa_lookup_texture_err(ctx, texture, "glTextureStorage2DMultisample"); if (!texObj) @@ -5821,6 +5888,13 @@ _mesa_TextureStorage3DMultisample(GLuint texture, GLsizei samples, struct gl_texture_object *texObj; GET_CURRENT_CONTEXT(ctx); + if (!ctx->Extensions.ARB_direct_state_access) { + _mesa_error(ctx, GL_INVALID_OPERATION, + "glTextureStorage3DMultisample(GL_ARB_direct_state_access " + "is not supported)"); + return; + } + /* Get the texture object by Name. */ texObj = _mesa_lookup_texture_err(ctx, texture, "glTextureStorage3DMultisample"); diff --git a/src/mesa/main/texobj.c b/src/mesa/main/texobj.c index c563f1e7434..d51e6954ba0 100644 --- a/src/mesa/main/texobj.c +++ b/src/mesa/main/texobj.c @@ -1317,6 +1317,13 @@ _mesa_CreateTextures(GLenum target, GLsizei n, GLuint *textures) GLint targetIndex; GET_CURRENT_CONTEXT(ctx); + if (!ctx->Extensions.ARB_direct_state_access) { + _mesa_error(ctx, GL_INVALID_OPERATION, + "glCreateTextures(GL_ARB_direct_state_access " + "is not supported)"); + return; + } + /* * The 4.5 core profile spec (30.10.2014) doesn't specify what * glCreateTextures should do with invalid targets, which was probably an @@ -1808,6 +1815,13 @@ _mesa_BindTextureUnit(GLuint unit, GLuint texture) _mesa_debug(ctx, "glBindTextureUnit %s %d\n", _mesa_lookup_enum_by_nr(GL_TEXTURE0+unit), (GLint) texture); + if (!ctx->Extensions.ARB_direct_state_access) { + _mesa_error(ctx, GL_INVALID_OPERATION, + "glBindTextureUnit(GL_ARB_direct_state_access " + "is not supported)"); + return; + } + /* Section 8.1 (Texture Objects) of the OpenGL 4.5 core profile spec * (20141030) says: * "When texture is zero, each of the targets enumerated at the diff --git a/src/mesa/main/texparam.c b/src/mesa/main/texparam.c index d74134f41b1..1fa5830026a 100644 --- a/src/mesa/main/texparam.c +++ b/src/mesa/main/texparam.c @@ -1108,6 +1108,13 @@ _mesa_TextureParameterfv(GLuint texture, GLenum pname, const GLfloat *params) struct gl_texture_object *texObj; GET_CURRENT_CONTEXT(ctx); + if (!ctx->Extensions.ARB_direct_state_access) { + _mesa_error(ctx, GL_INVALID_OPERATION, + "glTextureParameterfv(GL_ARB_direct_state_access " + "is not supported)"); + return; + } + texObj = get_texobj_by_name(ctx, texture, GL_FALSE); if (!texObj) { /* User passed a non-generated name. */ @@ -1124,6 +1131,13 @@ _mesa_TextureParameterf(GLuint texture, GLenum pname, GLfloat param) struct gl_texture_object *texObj; GET_CURRENT_CONTEXT(ctx); + if (!ctx->Extensions.ARB_direct_state_access) { + _mesa_error(ctx, GL_INVALID_OPERATION, + "glTextureParameterf(GL_ARB_direct_state_access " + "is not supported)"); + return; + } + texObj = get_texobj_by_name(ctx, texture, GL_FALSE); if (!texObj) { /* User passed a non-generated name. */ @@ -1140,6 +1154,13 @@ _mesa_TextureParameteri(GLuint texture, GLenum pname, GLint param) struct gl_texture_object *texObj; GET_CURRENT_CONTEXT(ctx); + if (!ctx->Extensions.ARB_direct_state_access) { + _mesa_error(ctx, GL_INVALID_OPERATION, + "glTextureParameteri(GL_ARB_direct_state_access " + "is not supported)"); + return; + } + texObj = get_texobj_by_name(ctx, texture, GL_FALSE); if (!texObj) { /* User passed a non-generated name. */ @@ -1157,6 +1178,13 @@ _mesa_TextureParameteriv(GLuint texture, GLenum pname, struct gl_texture_object *texObj; GET_CURRENT_CONTEXT(ctx); + if (!ctx->Extensions.ARB_direct_state_access) { + _mesa_error(ctx, GL_INVALID_OPERATION, + "glTextureParameteriv(GL_ARB_direct_state_access " + "is not supported)"); + return; + } + texObj = get_texobj_by_name(ctx, texture, GL_FALSE); if (!texObj) { /* User passed a non-generated name. */ @@ -1174,6 +1202,13 @@ _mesa_TextureParameterIiv(GLuint texture, GLenum pname, const GLint *params) struct gl_texture_object *texObj; GET_CURRENT_CONTEXT(ctx); + if (!ctx->Extensions.ARB_direct_state_access) { + _mesa_error(ctx, GL_INVALID_OPERATION, + "glTextureParameterIiv(GL_ARB_direct_state_access " + "is not supported)"); + return; + } + texObj = get_texobj_by_name(ctx, texture, GL_FALSE); if (!texObj) { /* User passed a non-generated name. */ @@ -1191,6 +1226,13 @@ _mesa_TextureParameterIuiv(GLuint texture, GLenum pname, const GLuint *params) struct gl_texture_object *texObj; GET_CURRENT_CONTEXT(ctx); + if (!ctx->Extensions.ARB_direct_state_access) { + _mesa_error(ctx, GL_INVALID_OPERATION, + "glTextureParameterIuiv(GL_ARB_direct_state_access " + "is not supported)"); + return; + } + texObj = get_texobj_by_name(ctx, texture, GL_FALSE); if (!texObj) { /* User passed a non-generated name. */ @@ -1650,6 +1692,13 @@ _mesa_GetTextureLevelParameterfv(GLuint texture, GLint level, GLint iparam; GET_CURRENT_CONTEXT(ctx); + if (!ctx->Extensions.ARB_direct_state_access) { + _mesa_error(ctx, GL_INVALID_OPERATION, + "glGetTextureLevelParameterfv(GL_ARB_direct_state_access " + "is not supported)"); + return; + } + texObj = _mesa_lookup_texture_err(ctx, texture, "glGetTextureLevelParameterfv"); if (!texObj) @@ -1668,6 +1717,13 @@ _mesa_GetTextureLevelParameteriv(GLuint texture, GLint level, struct gl_texture_object *texObj; GET_CURRENT_CONTEXT(ctx); + if (!ctx->Extensions.ARB_direct_state_access) { + _mesa_error(ctx, GL_INVALID_OPERATION, + "glGetTextureLevelParameteriv(GL_ARB_direct_state_access " + "is not supported)"); + return; + } + texObj = _mesa_lookup_texture_err(ctx, texture, "glGetTextureLevelParameteriv"); if (!texObj) @@ -2227,6 +2283,13 @@ _mesa_GetTextureParameterfv(GLuint texture, GLenum pname, GLfloat *params) struct gl_texture_object *obj; GET_CURRENT_CONTEXT(ctx); + if (!ctx->Extensions.ARB_direct_state_access) { + _mesa_error(ctx, GL_INVALID_OPERATION, + "glGetTextureParameterfv(GL_ARB_direct_state_access " + "is not supported)"); + return; + } + obj = get_texobj_by_name(ctx, texture, GL_TRUE); if (!obj) { /* User passed a non-generated name. */ @@ -2244,6 +2307,13 @@ _mesa_GetTextureParameteriv(GLuint texture, GLenum pname, GLint *params) struct gl_texture_object *obj; GET_CURRENT_CONTEXT(ctx); + if (!ctx->Extensions.ARB_direct_state_access) { + _mesa_error(ctx, GL_INVALID_OPERATION, + "glGetTextureParameteriv(GL_ARB_direct_state_access " + "is not supported)"); + return; + } + obj = get_texobj_by_name(ctx, texture, GL_TRUE); if (!obj) { /* User passed a non-generated name. */ @@ -2261,6 +2331,13 @@ _mesa_GetTextureParameterIiv(GLuint texture, GLenum pname, GLint *params) struct gl_texture_object *texObj; GET_CURRENT_CONTEXT(ctx); + if (!ctx->Extensions.ARB_direct_state_access) { + _mesa_error(ctx, GL_INVALID_OPERATION, + "glGetTextureParameterIiv(GL_ARB_direct_state_access " + "is not supported)"); + return; + } + texObj = get_texobj_by_name(ctx, texture, GL_TRUE); if (!texObj) { /* User passed a non-generated name. */ @@ -2279,6 +2356,13 @@ _mesa_GetTextureParameterIuiv(GLuint texture, GLenum pname, GLuint *params) struct gl_texture_object *texObj; GET_CURRENT_CONTEXT(ctx); + if (!ctx->Extensions.ARB_direct_state_access) { + _mesa_error(ctx, GL_INVALID_OPERATION, + "glGetTextureParameterIuiv(GL_ARB_direct_state_access " + "is not supported)"); + return; + } + texObj = get_texobj_by_name(ctx, texture, GL_TRUE); if (!texObj) { /* User passed a non-generated name. */ diff --git a/src/mesa/main/texstorage.c b/src/mesa/main/texstorage.c index 53cb2c091f8..dee74a825ea 100644 --- a/src/mesa/main/texstorage.c +++ b/src/mesa/main/texstorage.c @@ -507,6 +507,13 @@ texturestorage(GLuint dims, GLuint texture, GLsizei levels, _mesa_lookup_enum_by_nr(internalformat), width, height, depth); + if (!ctx->Extensions.ARB_direct_state_access) { + _mesa_error(ctx, GL_INVALID_OPERATION, + "glTextureStorage%uD(GL_ARB_direct_state_access " + "is not supported)", dims); + return; + } + /* Check the format to make sure it is sized. */ if (!_mesa_is_legal_tex_storage_format(ctx, internalformat)) { _mesa_error(ctx, GL_INVALID_ENUM, From 36b05793372b86b914d9b95d0188f5f387e01d68 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fredrik=20H=C3=B6glund?= Date: Sun, 10 May 2015 20:54:18 +0200 Subject: [PATCH 110/834] mesa: Add ARB_direct_state_access checks in VAO functions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Fredrik Höglund Reviewed-by: Adam Jackson --- src/mesa/main/arrayobj.c | 22 ++++++++++++++ src/mesa/main/varray.c | 64 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 86 insertions(+) diff --git a/src/mesa/main/arrayobj.c b/src/mesa/main/arrayobj.c index 7c4004043de..320f435ea94 100644 --- a/src/mesa/main/arrayobj.c +++ b/src/mesa/main/arrayobj.c @@ -617,6 +617,14 @@ void GLAPIENTRY _mesa_CreateVertexArrays(GLsizei n, GLuint *arrays) { GET_CURRENT_CONTEXT(ctx); + + if (!ctx->Extensions.ARB_direct_state_access) { + _mesa_error(ctx, GL_INVALID_OPERATION, + "glCreateVertexArrays(GL_ARB_direct_state_access " + "is not supported"); + return; + } + gen_vertex_arrays(ctx, n, arrays, true, "glCreateVertexArrays"); } @@ -659,6 +667,13 @@ _mesa_VertexArrayElementBuffer(GLuint vaobj, GLuint buffer) struct gl_vertex_array_object *vao; struct gl_buffer_object *bufObj; + if (!ctx->Extensions.ARB_direct_state_access) { + _mesa_error(ctx, GL_INVALID_OPERATION, + "glVertexArrayElementBuffer(GL_ARB_direct_state_access " + "is not supported"); + return; + } + ASSERT_OUTSIDE_BEGIN_END(ctx); /* The GL_ARB_direct_state_access specification says: @@ -695,6 +710,13 @@ _mesa_GetVertexArrayiv(GLuint vaobj, GLenum pname, GLint *param) ASSERT_OUTSIDE_BEGIN_END(ctx); + if (!ctx->Extensions.ARB_direct_state_access) { + _mesa_error(ctx, GL_INVALID_OPERATION, + "glGetVertexArrayiv(GL_ARB_direct_state_access " + "is not supported"); + return; + } + /* The GL_ARB_direct_state_access specification says: * * "An INVALID_OPERATION error is generated if is not diff --git a/src/mesa/main/varray.c b/src/mesa/main/varray.c index 7389037ae85..da6bbce52aa 100644 --- a/src/mesa/main/varray.c +++ b/src/mesa/main/varray.c @@ -777,6 +777,13 @@ _mesa_EnableVertexArrayAttrib(GLuint vaobj, GLuint index) GET_CURRENT_CONTEXT(ctx); struct gl_vertex_array_object *vao; + if (!ctx->Extensions.ARB_direct_state_access) { + _mesa_error(ctx, GL_INVALID_OPERATION, + "glEnableVertexArrayAttrib(GL_ARB_direct_state_access " + "is not supported"); + return; + } + /* The ARB_direct_state_access specification says: * * "An INVALID_OPERATION error is generated by EnableVertexArrayAttrib @@ -830,6 +837,13 @@ _mesa_DisableVertexArrayAttrib(GLuint vaobj, GLuint index) GET_CURRENT_CONTEXT(ctx); struct gl_vertex_array_object *vao; + if (!ctx->Extensions.ARB_direct_state_access) { + _mesa_error(ctx, GL_INVALID_OPERATION, + "glDisableVertexArrayAttrib(GL_ARB_direct_state_access " + "is not supported"); + return; + } + /* The ARB_direct_state_access specification says: * * "An INVALID_OPERATION error is generated by EnableVertexArrayAttrib @@ -1094,6 +1108,13 @@ _mesa_GetVertexArrayIndexediv(GLuint vaobj, GLuint index, GET_CURRENT_CONTEXT(ctx); struct gl_vertex_array_object *vao; + if (!ctx->Extensions.ARB_direct_state_access) { + _mesa_error(ctx, GL_INVALID_OPERATION, + "glGetVertexArrayIndexediv(GL_ARB_direct_state_access " + "is not supported"); + return; + } + /* The ARB_direct_state_access specification says: * * "An INVALID_OPERATION error is generated if is not @@ -1157,6 +1178,14 @@ _mesa_GetVertexArrayIndexed64iv(GLuint vaobj, GLuint index, GET_CURRENT_CONTEXT(ctx); struct gl_vertex_array_object *vao; + if (!ctx->Extensions.ARB_direct_state_access) { + _mesa_error(ctx, GL_INVALID_OPERATION, + "glGetVertexArrayIndexed64iv(GL_ARB_direct_state_access " + "is not supported"); + return; + } + + /* The ARB_direct_state_access specification says: * * "An INVALID_OPERATION error is generated if is not @@ -1745,6 +1774,13 @@ _mesa_VertexArrayVertexBuffer(GLuint vaobj, GLuint bindingIndex, GLuint buffer, GET_CURRENT_CONTEXT(ctx); struct gl_vertex_array_object *vao; + if (!ctx->Extensions.ARB_direct_state_access) { + _mesa_error(ctx, GL_INVALID_OPERATION, + "glVertexArrayVertexBuffer(GL_ARB_direct_state_access " + "is not supported"); + return; + } + /* The ARB_direct_state_access specification says: * * "An INVALID_OPERATION error is generated by VertexArrayVertexBuffer @@ -1910,6 +1946,14 @@ _mesa_VertexArrayVertexBuffers(GLuint vaobj, GLuint first, GLsizei count, GET_CURRENT_CONTEXT(ctx); struct gl_vertex_array_object *vao; + if (!ctx->Extensions.ARB_direct_state_access) { + _mesa_error(ctx, GL_INVALID_OPERATION, + "glVertexArrayVertexBuffers(GL_ARB_direct_state_access " + "is not supported"); + return; + } + + /* The ARB_direct_state_access specification says: * * "An INVALID_OPERATION error is generated by VertexArrayVertexBuffer @@ -2018,6 +2062,12 @@ vertex_array_attrib_format(GLuint vaobj, GLuint attribIndex, GLint size, GET_CURRENT_CONTEXT(ctx); struct gl_vertex_array_object *vao; + if (!ctx->Extensions.ARB_direct_state_access) { + _mesa_error(ctx, GL_INVALID_OPERATION, + "%s(GL_ARB_direct_state_access is not supported", func); + return; + } + ASSERT_OUTSIDE_BEGIN_END(ctx); /* The ARB_direct_state_access spec says: @@ -2155,6 +2205,13 @@ _mesa_VertexArrayAttribBinding(GLuint vaobj, GLuint attribIndex, GLuint bindingI GET_CURRENT_CONTEXT(ctx); struct gl_vertex_array_object *vao; + if (!ctx->Extensions.ARB_direct_state_access) { + _mesa_error(ctx, GL_INVALID_OPERATION, + "glVertexArrayAttribBinding(GL_ARB_direct_state_access " + "is not supported"); + return; + } + /* The ARB_direct_state_access specification says: * * "An INVALID_OPERATION error is generated by VertexArrayAttribBinding @@ -2229,6 +2286,13 @@ _mesa_VertexArrayBindingDivisor(GLuint vaobj, GLuint bindingIndex, GLuint diviso struct gl_vertex_array_object *vao; GET_CURRENT_CONTEXT(ctx); + if (!ctx->Extensions.ARB_direct_state_access) { + _mesa_error(ctx, GL_INVALID_OPERATION, + "glVertexArrayBindingDivisor(GL_ARB_direct_state_access " + "is not supported"); + return; + } + /* The ARB_direct_state_access specification says: * * "An INVALID_OPERATION error is generated by VertexArrayBindingDivisor From 9e7149c8986348bf9567f049444783ef52775f4e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fredrik=20H=C3=B6glund?= Date: Sun, 10 May 2015 20:55:31 +0200 Subject: [PATCH 111/834] mesa: Add ARB_direct_state_access checks in sampler object functions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Fredrik Höglund Reviewed-by: Adam Jackson --- src/mesa/main/samplerobj.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/mesa/main/samplerobj.c b/src/mesa/main/samplerobj.c index a3aacc66aa3..60711a5b5e3 100644 --- a/src/mesa/main/samplerobj.c +++ b/src/mesa/main/samplerobj.c @@ -221,6 +221,13 @@ void GLAPIENTRY _mesa_CreateSamplers(GLsizei count, GLuint *samplers) { GET_CURRENT_CONTEXT(ctx); + + if (!ctx->Extensions.ARB_direct_state_access) { + _mesa_error(ctx, GL_INVALID_OPERATION, "glCreateSamplers(" + "GL_ARB_direct_state_access is not supported)"); + return; + } + create_samplers(ctx, count, samplers, "glCreateSamplers"); } From bebf3c6ab314bde05ac5a3b4d3e63fd36243c58e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fredrik=20H=C3=B6glund?= Date: Sun, 10 May 2015 20:56:18 +0200 Subject: [PATCH 112/834] mesa: Add ARB_direct_state_access checks in program pipeline functions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Fredrik Höglund Reviewed-by: Adam Jackson --- src/mesa/main/pipelineobj.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/mesa/main/pipelineobj.c b/src/mesa/main/pipelineobj.c index 0fefa7d568b..a33cdd139c8 100644 --- a/src/mesa/main/pipelineobj.c +++ b/src/mesa/main/pipelineobj.c @@ -553,6 +553,12 @@ _mesa_CreateProgramPipelines(GLsizei n, GLuint *pipelines) { GET_CURRENT_CONTEXT(ctx); + if (!ctx->Extensions.ARB_direct_state_access) { + _mesa_error(ctx, GL_INVALID_OPERATION, "glCreateProgramPipelines(" + "GL_ARB_direct_state_access is not supported)"); + return; + } + create_program_pipelines(ctx, n, pipelines, true); } From d3368e0c9e27ced6059eb2ecdf2aa999a00e90b0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fredrik=20H=C3=B6glund?= Date: Sun, 10 May 2015 20:57:29 +0200 Subject: [PATCH 113/834] mesa: Add ARB_direct_state_access checks in query object functions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Fredrik Höglund Reviewed-by: Adam Jackson --- src/mesa/main/queryobj.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/mesa/main/queryobj.c b/src/mesa/main/queryobj.c index 5ff1b953231..2784b4c0532 100644 --- a/src/mesa/main/queryobj.c +++ b/src/mesa/main/queryobj.c @@ -284,6 +284,13 @@ _mesa_CreateQueries(GLenum target, GLsizei n, GLuint *ids) { GET_CURRENT_CONTEXT(ctx); + if (!ctx->Extensions.ARB_direct_state_access) { + _mesa_error(ctx, GL_INVALID_OPERATION, + "glCreateQueries(GL_ARB_direct_state_access " + "is not supported)"); + return; + } + switch (target) { case GL_SAMPLES_PASSED: case GL_ANY_SAMPLES_PASSED: From 121030eed8fc41789d2f4f7517bbc0dd6199667b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fredrik=20H=C3=B6glund?= Date: Sun, 10 May 2015 21:04:22 +0200 Subject: [PATCH 114/834] i915: Enable ARB_direct_state_access MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This extension requires OpenGL 2.0, so enable it on gen3 and later. Signed-off-by: Fredrik Höglund Reviewed-by: Adam Jackson --- src/mesa/drivers/dri/i915/intel_extensions.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/mesa/drivers/dri/i915/intel_extensions.c b/src/mesa/drivers/dri/i915/intel_extensions.c index ab7820f1232..590c6efcccd 100644 --- a/src/mesa/drivers/dri/i915/intel_extensions.c +++ b/src/mesa/drivers/dri/i915/intel_extensions.c @@ -83,6 +83,7 @@ intelInitExtensions(struct gl_context *ctx) if (intel->gen >= 3) { ctx->Extensions.ARB_ES2_compatibility = true; ctx->Extensions.ARB_depth_texture = true; + ctx->Extensions.ARB_direct_state_access = true; ctx->Extensions.ARB_fragment_program = true; ctx->Extensions.ARB_shadow = true; ctx->Extensions.ARB_texture_non_power_of_two = true; From a57feba0a35de35728269aeb26b039e4f2393d69 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fredrik=20H=C3=B6glund?= Date: Sun, 10 May 2015 21:05:11 +0200 Subject: [PATCH 115/834] i965: Enable ARB_direct_state_access MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Fredrik Höglund Reviewed-by: Adam Jackson --- src/mesa/drivers/dri/i965/intel_extensions.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/mesa/drivers/dri/i965/intel_extensions.c b/src/mesa/drivers/dri/i965/intel_extensions.c index cafb77455d7..18b69a0fd8b 100644 --- a/src/mesa/drivers/dri/i965/intel_extensions.c +++ b/src/mesa/drivers/dri/i965/intel_extensions.c @@ -183,6 +183,7 @@ intelInitExtensions(struct gl_context *ctx) ctx->Extensions.ARB_depth_buffer_float = true; ctx->Extensions.ARB_depth_clamp = true; ctx->Extensions.ARB_depth_texture = true; + ctx->Extensions.ARB_direct_state_access = true; ctx->Extensions.ARB_draw_elements_base_vertex = true; ctx->Extensions.ARB_draw_instanced = true; ctx->Extensions.ARB_ES2_compatibility = true; From 357bf80caade9e0be20dcc88ec38884e34abc986 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fredrik=20H=C3=B6glund?= Date: Sun, 10 May 2015 21:06:52 +0200 Subject: [PATCH 116/834] st/mesa: Enable ARB_direct_state_access MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Assume that all drivers that advertise support for NPOT textures are able to support GL 2.0. v2: Add a comment. Signed-off-by: Fredrik Höglund Reviewed-by: Adam Jackson --- src/mesa/state_tracker/st_extensions.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/mesa/state_tracker/st_extensions.c b/src/mesa/state_tracker/st_extensions.c index b1057f3eadd..23a45883d9a 100644 --- a/src/mesa/state_tracker/st_extensions.c +++ b/src/mesa/state_tracker/st_extensions.c @@ -650,6 +650,12 @@ void st_init_extensions(struct pipe_screen *screen, ARRAY_SIZE(vertex_mapping), PIPE_BUFFER, PIPE_BIND_VERTEX_BUFFER); + /* ARB_direct_state_access requires OpenGL 2.0. Assume that all drivers + * that support NPOT textures are able to support GL 2.0. + */ + if (extensions->ARB_texture_non_power_of_two) + extensions->ARB_direct_state_access = GL_TRUE; + if (extensions->ARB_stencil_texturing) extensions->ARB_texture_stencil8 = GL_TRUE; From d9109cc2111a765f09fbf54072b77528e35c156d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fredrik=20H=C3=B6glund?= Date: Thu, 7 May 2015 20:29:21 +0200 Subject: [PATCH 117/834] docs: Update the ARB_direct_state_access status MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Fredrik Höglund Reviewed-by: Adam Jackson --- docs/GL3.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/GL3.txt b/docs/GL3.txt index 5a15bc55f47..c7009308aba 100644 --- a/docs/GL3.txt +++ b/docs/GL3.txt @@ -192,10 +192,10 @@ GL 4.5, GLSL 4.50: GL_ARB_conditional_render_inverted DONE (i965, nv50, nvc0, llvmpipe, softpipe) GL_ARB_cull_distance not started GL_ARB_derivative_control DONE (i965, nv50, nvc0, r600) - GL_ARB_direct_state_access started + GL_ARB_direct_state_access DONE (all drivers) - Transform Feedback object DONE - Buffer object DONE - - Framebuffer object started (Laura Ekstrand) + - Framebuffer object DONE - Renderbuffer object DONE - Texture object DONE - Vertex array object DONE From b9cb7c19806e8ec33a626c289788876499cd8a27 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fredrik=20H=C3=B6glund?= Date: Thu, 7 May 2015 20:29:46 +0200 Subject: [PATCH 118/834] docs/relnotes: Mark off ARB_direct_state_access for 10.6 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit v2: Make it clear that ARB_direct_state_access is only available on drivers that support GL 2.0+ Signed-off-by: Fredrik Höglund Reviewed-by: Adam Jackson --- docs/relnotes/10.6.0.html | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/relnotes/10.6.0.html b/docs/relnotes/10.6.0.html index 6d379868865..474a2c71fd0 100644 --- a/docs/relnotes/10.6.0.html +++ b/docs/relnotes/10.6.0.html @@ -48,6 +48,7 @@ Note: some of the new features are only available with certain drivers.
  • GL_ARB_clip_control on i965
  • GL_ARB_depth_buffer_float on freedreno
  • GL_ARB_depth_clamp on freedreno
  • +
  • GL_ARB_direct_state_access on all drivers that support GL 2.0+
  • GL_ARB_draw_indirect, GL_ARB_multi_draw_indirect on r600
  • GL_ARB_draw_instanced on freedreno
  • GL_ARB_gpu_shader_fp64 on nvc0, softpipe
  • From 48c84a36dd04774489ab655a583ecb9266476856 Mon Sep 17 00:00:00 2001 From: Samuel Pitoiset Date: Mon, 11 May 2015 21:36:16 +0200 Subject: [PATCH 119/834] nvc0: do not expose MP counters for nvf0 (GK110+) This fixes a crash when trying to monitor MP counters because compute support is not implemented for nvf0. Reported-by: Ilia Mirkin Signed-off-by: Samuel Pitoiset Reviewed-by: Ilia Mirkin --- src/gallium/drivers/nouveau/nvc0/nvc0_query.c | 83 +++++++++++-------- 1 file changed, 47 insertions(+), 36 deletions(-) diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_query.c b/src/gallium/drivers/nouveau/nvc0/nvc0_query.c index 52032eb6f83..74f210cbf47 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_query.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query.c @@ -1407,11 +1407,14 @@ nvc0_screen_get_driver_query_info(struct pipe_screen *pscreen, count += NVC0_QUERY_DRV_STAT_COUNT; if (screen->base.device->drm_version >= 0x01000101) { - if (screen->base.class_3d >= NVE4_3D_CLASS) { - count += NVE4_PM_QUERY_COUNT; - } else if (screen->compute) { - count += NVC0_PM_QUERY_COUNT; /* NVC0_COMPUTE is not always enabled */ + if (screen->base.class_3d == NVE4_3D_CLASS) { + count += NVE4_PM_QUERY_COUNT; + } else + if (screen->base.class_3d < NVE4_3D_CLASS) { + /* NVC0_COMPUTE is not always enabled */ + count += NVC0_PM_QUERY_COUNT; + } } } @@ -1437,19 +1440,21 @@ nvc0_screen_get_driver_query_info(struct pipe_screen *pscreen, } else #endif if (id < count) { - if (screen->base.class_3d >= NVE4_3D_CLASS) { - info->name = nve4_pm_query_names[id - NVC0_QUERY_DRV_STAT_COUNT]; - info->query_type = NVE4_PM_QUERY(id - NVC0_QUERY_DRV_STAT_COUNT); - info->max_value.u64 = - (id < NVE4_PM_QUERY_METRIC_MP_OCCUPANCY) ? 0 : 100; - info->group_id = NVC0_QUERY_MP_COUNTER_GROUP; - return 1; - } else if (screen->compute) { - info->name = nvc0_pm_query_names[id - NVC0_QUERY_DRV_STAT_COUNT]; - info->query_type = NVC0_PM_QUERY(id - NVC0_QUERY_DRV_STAT_COUNT); - info->group_id = NVC0_QUERY_MP_COUNTER_GROUP; - return 1; + if (screen->base.class_3d == NVE4_3D_CLASS) { + info->name = nve4_pm_query_names[id - NVC0_QUERY_DRV_STAT_COUNT]; + info->query_type = NVE4_PM_QUERY(id - NVC0_QUERY_DRV_STAT_COUNT); + info->max_value.u64 = + (id < NVE4_PM_QUERY_METRIC_MP_OCCUPANCY) ? 0 : 100; + info->group_id = NVC0_QUERY_MP_COUNTER_GROUP; + return 1; + } else + if (screen->base.class_3d < NVE4_3D_CLASS) { + info->name = nvc0_pm_query_names[id - NVC0_QUERY_DRV_STAT_COUNT]; + info->query_type = NVC0_PM_QUERY(id - NVC0_QUERY_DRV_STAT_COUNT); + info->group_id = NVC0_QUERY_MP_COUNTER_GROUP; + return 1; + } } } /* user asked for info about non-existing query */ @@ -1469,10 +1474,13 @@ nvc0_screen_get_driver_query_group_info(struct pipe_screen *pscreen, #endif if (screen->base.device->drm_version >= 0x01000101) { - if (screen->base.class_3d >= NVE4_3D_CLASS) { - count++; - } else if (screen->compute) { - count++; /* NVC0_COMPUTE is not always enabled */ + if (screen->compute) { + if (screen->base.class_3d == NVE4_3D_CLASS) { + count++; + } else + if (screen->base.class_3d < NVE4_3D_CLASS) { + count++; /* NVC0_COMPUTE is not always enabled */ + } } } @@ -1480,25 +1488,28 @@ nvc0_screen_get_driver_query_group_info(struct pipe_screen *pscreen, return count; if (id == NVC0_QUERY_MP_COUNTER_GROUP) { - info->name = "MP counters"; - info->type = PIPE_DRIVER_QUERY_GROUP_TYPE_GPU; + if (screen->compute) { + info->name = "MP counters"; + info->type = PIPE_DRIVER_QUERY_GROUP_TYPE_GPU; - if (screen->base.class_3d >= NVE4_3D_CLASS) { - info->num_queries = NVE4_PM_QUERY_COUNT; + if (screen->base.class_3d == NVE4_3D_CLASS) { + info->num_queries = NVE4_PM_QUERY_COUNT; - /* On NVE4+, each multiprocessor have 8 hardware counters separated - * in two distinct domains, but we allow only one active query - * simultaneously because some of them use more than one hardware - * counter and this will result in an undefined behaviour. */ - info->max_active_queries = 1; /* TODO: handle multiple hw counters */ - return 1; - } else if (screen->compute) { - info->num_queries = NVC0_PM_QUERY_COUNT; + /* On NVE4+, each multiprocessor have 8 hardware counters separated + * in two distinct domains, but we allow only one active query + * simultaneously because some of them use more than one hardware + * counter and this will result in an undefined behaviour. */ + info->max_active_queries = 1; /* TODO: handle multiple hw counters */ + return 1; + } else + if (screen->base.class_3d < NVE4_3D_CLASS) { + info->num_queries = NVC0_PM_QUERY_COUNT; - /* On NVC0:NVE4, each multiprocessor have 8 hardware counters - * in a single domain. */ - info->max_active_queries = 8; - return 1; + /* On NVC0:NVE4, each multiprocessor have 8 hardware counters + * in a single domain. */ + info->max_active_queries = 8; + return 1; + } } } #ifdef NOUVEAU_ENABLE_DRIVER_STATISTICS From 7469f2fd2305fac1444dbbef5752958afd53e66d Mon Sep 17 00:00:00 2001 From: Samuel Pitoiset Date: Tue, 12 May 2015 17:35:17 +0200 Subject: [PATCH 120/834] nv30: remove unused nvfx_fp_memcpy() function and comment nv40_fp_bra() The nv40_fp_bra() function in the same file is also unused but this is the only place where the nv30/nv40 isa is documented. Signed-off-by: Samuel Pitoiset Reviewed-by: Ilia Mirkin --- .../drivers/nouveau/nv30/nvfx_fragprog.c | 17 +++-------------- 1 file changed, 3 insertions(+), 14 deletions(-) diff --git a/src/gallium/drivers/nouveau/nv30/nvfx_fragprog.c b/src/gallium/drivers/nouveau/nv30/nvfx_fragprog.c index bbdca8102f0..9889c4e5f40 100644 --- a/src/gallium/drivers/nouveau/nv30/nvfx_fragprog.c +++ b/src/gallium/drivers/nouveau/nv30/nvfx_fragprog.c @@ -327,6 +327,8 @@ nv40_fp_rep(struct nvfx_fpc *fpc, unsigned count, unsigned target) //util_dynarray_append(&fpc->loop_stack, unsigned, target); } +#if 0 +/* documentation only */ /* warning: this only works forward, and probably only if not inside any IF */ static void nv40_fp_bra(struct nvfx_fpc *fpc, unsigned target) @@ -352,6 +354,7 @@ nv40_fp_bra(struct nvfx_fpc *fpc, unsigned target) reloc.location = fpc->inst_offset + 3; util_dynarray_append(&fpc->label_relocs, struct nvfx_relocation, reloc); } +#endif static void nv40_fp_brk(struct nvfx_fpc *fpc) @@ -1201,17 +1204,3 @@ out_err: tgsi_dump(fp->pipe.tokens, 0); goto out; } - -static inline void -nvfx_fp_memcpy(void* dst, const void* src, size_t len) -{ -#ifndef PIPE_ARCH_BIG_ENDIAN - memcpy(dst, src, len); -#else - size_t i; - for(i = 0; i < len; i += 4) { - uint32_t v = *(uint32_t*)((char*)src + i); - *(uint32_t*)((char*)dst + i) = (v >> 16) | (v << 16); - } -#endif -} From 70651b7041c9d90f4fb6c693c4ebb643a50dd9d0 Mon Sep 17 00:00:00 2001 From: Samuel Pitoiset Date: Tue, 12 May 2015 17:13:13 +0200 Subject: [PATCH 121/834] nv50/ir: remove unused private field cycle to SchedDataCalculator Signed-off-by: Samuel Pitoiset Reviewed-by: Ilia Mirkin --- src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp index d9aed34a0ce..be6fe9574df 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp @@ -2712,7 +2712,6 @@ private: RegScores *score; // for current BB std::vector scoreBoards; - int cycle; int prevData; operation prevOp; From ac1ac94b38d051b2413ea8f58b16891f1a55757d Mon Sep 17 00:00:00 2001 From: Samuel Pitoiset Date: Tue, 12 May 2015 17:13:14 +0200 Subject: [PATCH 122/834] nv50/ir: silence compiler warnings about mismatched tags These warnings have been detected by Clang 3.6. codegen/nv50_ir_from_tgsi.cpp:1319:10: warning: struct 'Source' was previously declared as a class [-Wmismatched-tags] const struct tgsi::Source *code; Signed-off-by: Samuel Pitoiset Reviewed-by: Ilia Mirkin --- src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp index 254629f907a..6f7f397609b 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp @@ -1316,7 +1316,7 @@ private: }; private: - const struct tgsi::Source *code; + const tgsi::Source *code; const struct nv50_ir_prog_info *info; struct { @@ -1955,13 +1955,13 @@ isResourceSpecial(const int r) } static inline bool -isResourceRaw(const struct tgsi::Source *code, const int r) +isResourceRaw(const tgsi::Source *code, const int r) { return isResourceSpecial(r) || code->resources[r].raw; } static inline nv50_ir::TexTarget -getResourceTarget(const struct tgsi::Source *code, int r) +getResourceTarget(const tgsi::Source *code, int r) { if (isResourceSpecial(r)) return nv50_ir::TEX_TARGET_BUFFER; From 175cbb447ae85b93b8b0244f345064763131481f Mon Sep 17 00:00:00 2001 From: Samuel Pitoiset Date: Tue, 12 May 2015 17:13:15 +0200 Subject: [PATCH 123/834] nvc0: remove unused nv50_tsc_wrap_mode() function Signed-off-by: Samuel Pitoiset Reviewed-by: Ilia Mirkin --- src/gallium/drivers/nouveau/nvc0/nvc0_state.c | 18 ------------------ 1 file changed, 18 deletions(-) diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_state.c b/src/gallium/drivers/nouveau/nvc0/nvc0_state.c index dca06f4cddb..63c3c52a5b2 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_state.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_state.c @@ -413,24 +413,6 @@ nvc0_zsa_state_delete(struct pipe_context *pipe, void *hwcso) #define NV50_TSC_WRAP_CASE(n) \ case PIPE_TEX_WRAP_##n: return NV50_TSC_WRAP_##n -static INLINE unsigned -nv50_tsc_wrap_mode(unsigned wrap) -{ - switch (wrap) { - NV50_TSC_WRAP_CASE(REPEAT); - NV50_TSC_WRAP_CASE(MIRROR_REPEAT); - NV50_TSC_WRAP_CASE(CLAMP_TO_EDGE); - NV50_TSC_WRAP_CASE(CLAMP_TO_BORDER); - NV50_TSC_WRAP_CASE(CLAMP); - NV50_TSC_WRAP_CASE(MIRROR_CLAMP_TO_EDGE); - NV50_TSC_WRAP_CASE(MIRROR_CLAMP_TO_BORDER); - NV50_TSC_WRAP_CASE(MIRROR_CLAMP); - default: - NOUVEAU_ERR("unknown wrap mode: %d\n", wrap); - return NV50_TSC_WRAP_REPEAT; - } -} - static void nvc0_sampler_state_delete(struct pipe_context *pipe, void *hwcso) { From 8362068c1b28eb2d3e334abc980c4711957d61fd Mon Sep 17 00:00:00 2001 From: Alexander von Gluck IV Date: Wed, 13 May 2015 17:13:37 -0500 Subject: [PATCH 124/834] egl: Add needed extern "C" for C++ access MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Haiku's egl driver is C++ due to the interface natively being C++ Reviewed-⁠by: Brian Paul --- src/egl/main/eglapi.h | 10 ++++++++++ src/egl/main/eglarray.h | 8 ++++++++ src/egl/main/eglcompiler.h | 8 ++++++++ src/egl/main/eglconfig.h | 8 ++++++++ src/egl/main/eglcontext.h | 8 ++++++++ src/egl/main/eglcurrent.h | 8 ++++++++ src/egl/main/egldefines.h | 6 ++++++ src/egl/main/egldisplay.h | 9 +++++++++ src/egl/main/egldriver.h | 10 ++++++++++ src/egl/main/eglimage.h | 9 +++++++++ src/egl/main/egllog.h | 8 ++++++++ src/egl/main/eglsurface.h | 8 ++++++++ src/egl/main/egltypedefs.h | 10 ++++++++++ src/loader/loader.h | 9 +++++++++ 14 files changed, 119 insertions(+) diff --git a/src/egl/main/eglapi.h b/src/egl/main/eglapi.h index 066a416b3e3..e07970f8b1c 100644 --- a/src/egl/main/eglapi.h +++ b/src/egl/main/eglapi.h @@ -31,6 +31,11 @@ #ifndef EGLAPI_INCLUDED #define EGLAPI_INCLUDED + +#ifdef __cplusplus +extern "C" { +#endif + /** * A generic function ptr type */ @@ -209,4 +214,9 @@ struct _egl_api #endif }; + +#ifdef __cplusplus +} +#endif + #endif /* EGLAPI_INCLUDED */ diff --git a/src/egl/main/eglarray.h b/src/egl/main/eglarray.h index 228f6c3d2ca..29b7128b68d 100644 --- a/src/egl/main/eglarray.h +++ b/src/egl/main/eglarray.h @@ -34,6 +34,10 @@ #include "egltypedefs.h" +#ifdef __cplusplus +extern "C" { +#endif + typedef EGLBoolean (*_EGLArrayForEach)(void *elem, void *foreach_data); @@ -83,4 +87,8 @@ _eglGetArraySize(_EGLArray *array) } +#ifdef __cplusplus +} +#endif + #endif /* EGLARRAY_INCLUDED */ diff --git a/src/egl/main/eglcompiler.h b/src/egl/main/eglcompiler.h index b457a40a32a..9804ca4f281 100644 --- a/src/egl/main/eglcompiler.h +++ b/src/egl/main/eglcompiler.h @@ -30,9 +30,17 @@ #ifndef EGLCOMPILER_INCLUDED #define EGLCOMPILER_INCLUDED +#ifdef __cplusplus +extern "C" { +#endif + #define STATIC_ASSERT(COND) \ do { \ (void) sizeof(char [1 - 2*!(COND)]); \ } while (0) +#ifdef __cplusplus +} +#endif + #endif /* EGLCOMPILER_INCLUDED */ diff --git a/src/egl/main/eglconfig.h b/src/egl/main/eglconfig.h index dc59ea3f72f..84cb2276b70 100644 --- a/src/egl/main/eglconfig.h +++ b/src/egl/main/eglconfig.h @@ -39,6 +39,10 @@ #include "egltypedefs.h" +#ifdef __cplusplus +extern "C" { +#endif + /* update _eglValidationTable and _eglOffsetOfConfig before updating this * struct */ struct _egl_config @@ -225,4 +229,8 @@ extern EGLBoolean _eglGetConfigs(_EGLDriver *drv, _EGLDisplay *dpy, EGLConfig *configs, EGLint config_size, EGLint *num_config); +#ifdef __cplusplus +} +#endif + #endif /* EGLCONFIG_INCLUDED */ diff --git a/src/egl/main/eglcontext.h b/src/egl/main/eglcontext.h index 241917f3bea..69bf77d8aff 100644 --- a/src/egl/main/eglcontext.h +++ b/src/egl/main/eglcontext.h @@ -37,6 +37,10 @@ #include "egldisplay.h" +#ifdef __cplusplus +extern "C" { +#endif + /** * "Base" class for device driver contexts. */ @@ -150,4 +154,8 @@ _eglGetContextHandle(_EGLContext *ctx) } +#ifdef __cplusplus +} +#endif + #endif /* EGLCONTEXT_INCLUDED */ diff --git a/src/egl/main/eglcurrent.h b/src/egl/main/eglcurrent.h index 3343755c985..1e386acdafb 100644 --- a/src/egl/main/eglcurrent.h +++ b/src/egl/main/eglcurrent.h @@ -34,6 +34,10 @@ #include "egltypedefs.h" +#ifdef __cplusplus +extern "C" { +#endif + #define _EGL_API_ALL_BITS \ (EGL_OPENGL_ES_BIT | \ EGL_OPENVG_BIT | \ @@ -115,4 +119,8 @@ extern EGLBoolean _eglError(EGLint errCode, const char *msg); +#ifdef __cplusplus +} +#endif + #endif /* EGLCURRENT_INCLUDED */ diff --git a/src/egl/main/egldefines.h b/src/egl/main/egldefines.h index 4c9e014fcea..a32cab26408 100644 --- a/src/egl/main/egldefines.h +++ b/src/egl/main/egldefines.h @@ -34,6 +34,9 @@ #ifndef EGLDEFINES_INCLUDED #define EGLDEFINES_INCLUDED +#ifdef __cplusplus +extern "C" { +#endif #define _EGL_MAX_EXTENSIONS_LEN 1000 @@ -41,5 +44,8 @@ #define ARRAY_SIZE(a) (sizeof(a) / sizeof((a)[0])) +#ifdef __cplusplus +} +#endif #endif /* EGLDEFINES_INCLUDED */ diff --git a/src/egl/main/egldisplay.h b/src/egl/main/egldisplay.h index 36f50b97cb3..6862e2f51b7 100644 --- a/src/egl/main/egldisplay.h +++ b/src/egl/main/egldisplay.h @@ -39,6 +39,10 @@ #include "eglarray.h" +#ifdef __cplusplus +extern "C" { +#endif + enum _egl_platform_type { _EGL_PLATFORM_WINDOWS, _EGL_PLATFORM_X11, @@ -270,4 +274,9 @@ _eglGetWaylandDisplay(struct wl_display *native_display, const EGLint *attrib_list); #endif + +#ifdef __cplusplus +} +#endif + #endif /* EGLDISPLAY_INCLUDED */ diff --git a/src/egl/main/egldriver.h b/src/egl/main/egldriver.h index 11300ce1ee2..cd95cd305f8 100644 --- a/src/egl/main/egldriver.h +++ b/src/egl/main/egldriver.h @@ -38,6 +38,11 @@ #include "eglapi.h" #include + +#ifdef __cplusplus +extern "C" { +#endif + /** * Define an inline driver typecast function. * @@ -124,4 +129,9 @@ _eglSearchPathForEach(EGLBoolean (*callback)(const char *, size_t, void *), void *callback_data); +#ifdef __cplusplus +} +#endif + + #endif /* EGLDRIVER_INCLUDED */ diff --git a/src/egl/main/eglimage.h b/src/egl/main/eglimage.h index 50a87a18890..d06f91cd1a4 100644 --- a/src/egl/main/eglimage.h +++ b/src/egl/main/eglimage.h @@ -35,6 +35,11 @@ #include "egltypedefs.h" #include "egldisplay.h" + +#ifdef __cplusplus +extern "C" { +#endif + struct _egl_image_attrib_int { EGLint Value; @@ -161,4 +166,8 @@ _eglGetImageHandle(_EGLImage *img) } +#ifdef __cplusplus +} +#endif + #endif /* EGLIMAGE_INCLUDED */ diff --git a/src/egl/main/egllog.h b/src/egl/main/egllog.h index 12a477ee054..cf58525005e 100644 --- a/src/egl/main/egllog.h +++ b/src/egl/main/egllog.h @@ -34,6 +34,10 @@ #include "egltypedefs.h" +#ifdef __cplusplus +extern "C" { +#endif + #define _EGL_FATAL 0 /* unrecoverable error */ #define _EGL_WARNING 1 /* recoverable error/problem */ #define _EGL_INFO 2 /* just useful info */ @@ -55,4 +59,8 @@ extern void _eglLog(EGLint level, const char *fmtStr, ...); +#ifdef __cplusplus +} +#endif + #endif /* EGLLOG_INCLUDED */ diff --git a/src/egl/main/eglsurface.h b/src/egl/main/eglsurface.h index 438e27cebc8..74c429a9628 100644 --- a/src/egl/main/eglsurface.h +++ b/src/egl/main/eglsurface.h @@ -37,6 +37,10 @@ #include "egldisplay.h" +#ifdef __cplusplus +extern "C" { +#endif + /** * "Base" class for device driver surfaces. */ @@ -174,4 +178,8 @@ _eglGetSurfaceHandle(_EGLSurface *surf) } +#ifdef __cplusplus +} +#endif + #endif /* EGLSURFACE_INCLUDED */ diff --git a/src/egl/main/egltypedefs.h b/src/egl/main/egltypedefs.h index e90959affae..82064a71fc0 100644 --- a/src/egl/main/egltypedefs.h +++ b/src/egl/main/egltypedefs.h @@ -38,6 +38,11 @@ #include "eglcompiler.h" + +#ifdef __cplusplus +extern "C" { +#endif + typedef struct _egl_api _EGLAPI; typedef struct _egl_array _EGLArray; @@ -68,4 +73,9 @@ typedef struct _egl_sync _EGLSync; typedef struct _egl_thread_info _EGLThreadInfo; + +#ifdef __cplusplus +} +#endif + #endif /* EGLTYPEDEFS_INCLUDED */ diff --git a/src/loader/loader.h b/src/loader/loader.h index 810e7da7f9f..60c58f2f8fc 100644 --- a/src/loader/loader.h +++ b/src/loader/loader.h @@ -27,6 +27,10 @@ #ifndef LOADER_H #define LOADER_H +#ifdef __cplusplus +extern "C" { +#endif + /* Helpers to figure out driver and device name, eg. from pci-id, etc. */ #define _LOADER_DRI (1 << 0) @@ -61,4 +65,9 @@ loader_get_user_preferred_fd(int default_fd, int *different_device); void loader_set_logger(void (*logger)(int level, const char *fmt, ...)); + +#ifdef __cplusplus +} +#endif + #endif /* LOADER_H */ From 0fbf49ce574ca645c68fdbb24d35c61fdd64921b Mon Sep 17 00:00:00 2001 From: Alexander von Gluck IV Date: Wed, 13 May 2015 17:14:10 -0500 Subject: [PATCH 125/834] egl/haiku: Drop extern "C". No longer needed MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-⁠by: Brian Paul --- src/egl/drivers/haiku/egl_haiku.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/egl/drivers/haiku/egl_haiku.cpp b/src/egl/drivers/haiku/egl_haiku.cpp index 4cf2ccb9db8..4d9888ded2b 100644 --- a/src/egl/drivers/haiku/egl_haiku.cpp +++ b/src/egl/drivers/haiku/egl_haiku.cpp @@ -27,7 +27,6 @@ #include #include -extern "C" { #include "loader.h" #include "eglconfig.h" #include "eglcontext.h" @@ -38,7 +37,6 @@ extern "C" { #include "eglsurface.h" #include "eglimage.h" #include "egltypedefs.h" -} #include #include From fcc7d6323bbea489219225f467d59192d538e95f Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Tue, 12 May 2015 14:46:50 -0400 Subject: [PATCH 126/834] freedreno: enable a306 Whitelist adreno 306 (as found in msm8916/apq8016). Works pretty much out of the box, although the smaller GMEM size requires more tiles to fit 1920x1080, so bump up the max # of tiles as well. Since it is just whitelist + trivial change, it makes sense to land on all the active release branches. Note that a305c ends up with gpu-id "306", hence a306 ends up with gpu-id of "307". Apparently that is what happens when you let the marketing dept name things. Cc: "10.4" and "10.5" and "10.6" Signed-off-by: Rob Clark --- src/gallium/drivers/freedreno/freedreno_context.h | 2 +- src/gallium/drivers/freedreno/freedreno_screen.c | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/src/gallium/drivers/freedreno/freedreno_context.h b/src/gallium/drivers/freedreno/freedreno_context.h index 2c816b4b1f6..e420f1e5bd9 100644 --- a/src/gallium/drivers/freedreno/freedreno_context.h +++ b/src/gallium/drivers/freedreno/freedreno_context.h @@ -297,7 +297,7 @@ struct fd_context { */ struct fd_gmem_stateobj gmem; struct fd_vsc_pipe pipe[8]; - struct fd_tile tile[64]; + struct fd_tile tile[256]; /* which state objects need to be re-emit'd: */ enum { diff --git a/src/gallium/drivers/freedreno/freedreno_screen.c b/src/gallium/drivers/freedreno/freedreno_screen.c index f81ec80e045..c596d03b084 100644 --- a/src/gallium/drivers/freedreno/freedreno_screen.c +++ b/src/gallium/drivers/freedreno/freedreno_screen.c @@ -520,6 +520,7 @@ fd_screen_create(struct fd_device *dev) case 220: fd2_screen_init(pscreen); break; + case 307: case 320: case 330: fd3_screen_init(pscreen); From 4925c35660b777ae6b33a1f87a2f74f3436c7c41 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Wed, 13 May 2015 14:36:03 -0400 Subject: [PATCH 127/834] freedreno: fix bug in tile/slot calculation This was causing corruption with hw binning on a306. Unlikely that it is a306 specific, but rather the smaller gmem size resulted in different tile configuration which was triggering the bug at certain resolutions. Signed-off-by: Rob Clark Cc: "10.4" and "10.5" and "10.6" --- src/gallium/drivers/freedreno/freedreno_gmem.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/src/gallium/drivers/freedreno/freedreno_gmem.c b/src/gallium/drivers/freedreno/freedreno_gmem.c index 11a1b62b26b..c105378ec4e 100644 --- a/src/gallium/drivers/freedreno/freedreno_gmem.c +++ b/src/gallium/drivers/freedreno/freedreno_gmem.c @@ -117,6 +117,7 @@ calculate_tiles(struct fd_context *ctx) uint32_t i, j, t, xoff, yoff; uint32_t tpp_x, tpp_y; bool has_zs = !!(ctx->resolve & (FD_BUFFER_DEPTH | FD_BUFFER_STENCIL)); + int tile_n[ARRAY_SIZE(ctx->pipe)]; if (has_zs) { struct fd_resource *rsc = fd_resource(pfb->zsbuf->texture); @@ -247,6 +248,7 @@ calculate_tiles(struct fd_context *ctx) /* configure tiles: */ t = 0; yoff = miny; + memset(tile_n, 0, sizeof(tile_n)); for (i = 0; i < nbins_y; i++) { uint32_t bw, bh; @@ -257,20 +259,17 @@ calculate_tiles(struct fd_context *ctx) for (j = 0; j < nbins_x; j++) { struct fd_tile *tile = &ctx->tile[t]; - uint32_t n, p; + uint32_t p; assert(t < ARRAY_SIZE(ctx->tile)); /* pipe number: */ p = ((i / tpp_y) * div_round_up(nbins_x, tpp_x)) + (j / tpp_x); - /* slot number: */ - n = ((i % tpp_y) * tpp_x) + (j % tpp_x); - /* clip bin width: */ bw = MIN2(bin_w, minx + width - xoff); - tile->n = n; + tile->n = tile_n[p]++; tile->p = p; tile->bin_w = bw; tile->bin_h = bh; From 209360bbb91bb10346ebc509db3d8173ea32f6b1 Mon Sep 17 00:00:00 2001 From: Emil Velikov Date: Mon, 11 May 2015 23:43:44 +0100 Subject: [PATCH 128/834] egl/main: drop support for external egl drivers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The only user (egl_gallium) is not longer around. Signed-off-by: Emil Velikov Reviewed-by: Marek Olšák Reviewed-by: Chad Versace --- src/egl/main/egldriver.c | 364 ++------------------------------------- 1 file changed, 13 insertions(+), 351 deletions(-) diff --git a/src/egl/main/egldriver.c b/src/egl/main/egldriver.c index 6983af966b6..4cadbc75b8a 100644 --- a/src/egl/main/egldriver.c +++ b/src/egl/main/egldriver.c @@ -45,21 +45,13 @@ #include "egldriver.h" #include "egllog.h" -#if defined(_EGL_OS_UNIX) -#include -#include -#include -#include -#endif - #ifdef _EGL_BUILT_IN_DRIVER_HAIKU _EGLDriver* _eglBuiltInDriverHaiku(const char* args); #endif typedef struct _egl_module { - char *Path; + char *Name; _EGLMain_t BuiltIn; - void *Handle; _EGLDriver *Driver; } _EGLModule; @@ -79,153 +71,24 @@ const struct { { NULL, NULL } }; -/** - * Wrappers for dlopen/dlclose() - */ -#if defined(_EGL_OS_WINDOWS) - - -typedef HMODULE lib_handle; - -static HMODULE -open_library(const char *filename) -{ - return LoadLibrary(filename); -} - -static void -close_library(HMODULE lib) -{ - FreeLibrary(lib); -} - - -static const char * -library_suffix(void) -{ - return ".dll"; -} - - -#elif defined(_EGL_OS_UNIX) - - -typedef void * lib_handle; - -static void * -open_library(const char *filename) -{ - return dlopen(filename, RTLD_LAZY); -} - -static void -close_library(void *lib) -{ - dlclose(lib); -} - - -static const char * -library_suffix(void) -{ - return ".so"; -} - - -#endif - - -/** - * Open the named driver and find its bootstrap function: _eglMain(). - */ -static _EGLMain_t -_eglOpenLibrary(const char *driverPath, lib_handle *handle) -{ - lib_handle lib; - _EGLMain_t mainFunc = NULL; - const char *error = "unknown error"; - - assert(driverPath); - - _eglLog(_EGL_DEBUG, "dlopen(%s)", driverPath); - lib = open_library(driverPath); - -#if defined(_EGL_OS_WINDOWS) - /* XXX untested */ - if (lib) - mainFunc = (_EGLMain_t) GetProcAddress(lib, "_eglMain"); -#elif defined(_EGL_OS_UNIX) - if (lib) { - union { - _EGLMain_t func; - void *ptr; - } tmp = { NULL }; - /* direct cast gives a warning when compiled with -pedantic */ - tmp.ptr = dlsym(lib, "_eglMain"); - mainFunc = tmp.func; - if (!mainFunc) - error = dlerror(); - } - else { - error = dlerror(); - } -#endif - - if (!lib) { - _eglLog(_EGL_WARNING, "Could not open driver %s (%s)", - driverPath, error); - return NULL; - } - - if (!mainFunc) { - _eglLog(_EGL_WARNING, "_eglMain not found in %s (%s)", - driverPath, error); - if (lib) - close_library(lib); - return NULL; - } - - *handle = lib; - return mainFunc; -} - - /** * Load a module and create the driver object. */ static EGLBoolean _eglLoadModule(_EGLModule *mod) { - _EGLMain_t mainFunc; - lib_handle lib; _EGLDriver *drv; if (mod->Driver) return EGL_TRUE; - if (mod->BuiltIn) { - lib = (lib_handle) NULL; - mainFunc = mod->BuiltIn; - } - else { - mainFunc = _eglOpenLibrary(mod->Path, &lib); - if (!mainFunc) + if (!mod->BuiltIn) return EGL_FALSE; - } - drv = mainFunc(NULL); - if (!drv) { - if (lib) - close_library(lib); + drv = mod->BuiltIn(NULL); + if (!drv || !drv->Name) return EGL_FALSE; - } - if (!drv->Name) { - _eglLog(_EGL_WARNING, "Driver loaded from %s has no name", mod->Path); - drv->Name = "UNNAMED"; - } - - mod->Handle = (void *) lib; mod->Driver = drv; return EGL_TRUE; @@ -243,20 +106,11 @@ _eglUnloadModule(_EGLModule *mod) if (mod->Driver && mod->Driver->Unload) mod->Driver->Unload(mod->Driver); - /* - * XXX At this point (atexit), the module might be the last reference to - * libEGL. Closing the module might unmap libEGL and give problems. - */ -#if 0 - if (mod->Handle) - close_library(mod->Handle); -#endif #elif defined(_EGL_OS_WINDOWS) /* XXX Windows unloads DLLs before atexit */ #endif mod->Driver = NULL; - mod->Handle = NULL; } @@ -264,7 +118,7 @@ _eglUnloadModule(_EGLModule *mod) * Add a module to the module array. */ static _EGLModule * -_eglAddModule(const char *path) +_eglAddModule(const char *name) { _EGLModule *mod; EGLint i; @@ -278,22 +132,22 @@ _eglAddModule(const char *path) /* find duplicates */ for (i = 0; i < _eglModules->Size; i++) { mod = _eglModules->Elements[i]; - if (strcmp(mod->Path, path) == 0) + if (strcmp(mod->Name, name) == 0) return mod; } /* allocate a new one */ mod = calloc(1, sizeof(*mod)); if (mod) { - mod->Path = _eglstrdup(path); - if (!mod->Path) { + mod->Name = _eglstrdup(name); + if (!mod->Name) { free(mod); mod = NULL; } } if (mod) { _eglAppendArray(_eglModules, (void *) mod); - _eglLog(_EGL_DEBUG, "added %s to module array", mod->Path); + _eglLog(_EGL_DEBUG, "added %s to module array", mod->Name); } return mod; @@ -309,154 +163,11 @@ _eglFreeModule(void *module) _EGLModule *mod = (_EGLModule *) module; _eglUnloadModule(mod); - free(mod->Path); + free(mod->Name); free(mod); } -/** - * A loader function for use with _eglPreloadForEach. The loader data is the - * filename of the driver. This function stops on the first valid driver. - */ -static EGLBoolean -_eglLoaderFile(const char *dir, size_t len, void *loader_data) -{ - char path[1024]; - const char *filename = (const char *) loader_data; - size_t flen = strlen(filename); - - /* make a full path */ - if (len + flen + 2 > sizeof(path)) - return EGL_TRUE; - if (len) { - memcpy(path, dir, len); - path[len++] = '/'; - } - memcpy(path + len, filename, flen); - len += flen; - path[len] = '\0'; - - if (library_suffix()) { - const char *suffix = library_suffix(); - size_t slen = strlen(suffix); - const char *p; - EGLBoolean need_suffix; - - p = filename + flen - slen; - need_suffix = (p < filename || strcmp(p, suffix) != 0); - if (need_suffix) { - /* overflow */ - if (len + slen + 1 > sizeof(path)) - return EGL_TRUE; - strcpy(path + len, suffix); - } - } - -#if defined(_EGL_OS_UNIX) - /* check if the file exists */ - if (access(path, F_OK)) - return EGL_TRUE; -#endif - - _eglAddModule(path); - - return EGL_TRUE; -} - - -/** - * Run the callback function on each driver directory. - * - * The process may end prematurely if the callback function returns false. - */ -static void -_eglPreloadForEach(const char *search_path, - EGLBoolean (*loader)(const char *, size_t, void *), - void *loader_data) -{ - const char *cur, *next; - size_t len; - - cur = search_path; - while (cur) { - next = strchr(cur, ':'); - len = (next) ? next - cur : strlen(cur); - - if (!loader(cur, len, loader_data)) - break; - - cur = (next) ? next + 1 : NULL; - } -} - - -/** - * Return a list of colon-separated driver directories. - */ -static const char * -_eglGetSearchPath(void) -{ - static char search_path[1024]; - -#if defined(_EGL_OS_UNIX) || defined(_EGL_OS_WINDOWS) - if (search_path[0] == '\0') { - char *buf = search_path; - size_t len = sizeof(search_path); - EGLBoolean use_env; - char dir_sep; - int ret; - -#if defined(_EGL_OS_UNIX) - use_env = (geteuid() == getuid() && getegid() == getgid()); - dir_sep = '/'; -#else - use_env = EGL_TRUE; - dir_sep = '\\'; -#endif - - if (use_env) { - char *p; - - /* extract the dirname from EGL_DRIVER */ - p = getenv("EGL_DRIVER"); - if (p && strchr(p, dir_sep)) { - ret = _eglsnprintf(buf, len, "%s", p); - if (ret > 0 && ret < len) { - p = strrchr(buf, dir_sep); - *p++ = ':'; - - len -= p - buf; - buf = p; - } - } - - /* append EGL_DRIVERS_PATH */ - p = getenv("EGL_DRIVERS_PATH"); - if (p) { - ret = _eglsnprintf(buf, len, "%s:", p); - if (ret > 0 && ret < len) { - buf += ret; - len -= ret; - } - } - } - else { - _eglLog(_EGL_DEBUG, - "ignore EGL_DRIVERS_PATH for setuid/setgid binaries"); - } - - ret = _eglsnprintf(buf, len, "%s", _EGL_DRIVER_SEARCH_DIR); - if (ret < 0 || ret >= len) - search_path[0] = '\0'; - - _eglLog(_EGL_DEBUG, "EGL search path is %s", search_path); - } -#endif /* defined(_EGL_OS_UNIX) || defined(_EGL_OS_WINDOWS) */ - - return search_path; -} - - /** * Add the user driver to the module array. * @@ -465,42 +176,15 @@ _eglGetSearchPath(void) static EGLBoolean _eglAddUserDriver(void) { - const char *search_path = _eglGetSearchPath(); char *env; - size_t name_len = 0; env = getenv("EGL_DRIVER"); -#if defined(_EGL_OS_UNIX) - if (env && strchr(env, '/')) { - search_path = ""; - if ((geteuid() != getuid() || getegid() != getgid())) { - _eglLog(_EGL_DEBUG, - "ignore EGL_DRIVER for setuid/setgid binaries"); - env = NULL; - } - } - else if (env) { - char *suffix = strchr(env, '.'); - name_len = (suffix) ? suffix - env : strlen(env); - } -#else - if (env) - name_len = strlen(env); -#endif /* _EGL_OS_UNIX */ - - /* - * Try built-in drivers first if we know the driver name. This makes sure - * we do not load the outdated external driver that is still on the - * filesystem. - */ - if (name_len) { - _EGLModule *mod; + if (env) { EGLint i; for (i = 0; _eglBuiltInDrivers[i].name; i++) { - if (strlen(_eglBuiltInDrivers[i].name) == name_len && - !strncmp(_eglBuiltInDrivers[i].name, env, name_len)) { - mod = _eglAddModule(env); + if (!strcmp(_eglBuiltInDrivers[i].name, env)) { + _EGLModule *mod = _eglAddModule(env); if (mod) mod->BuiltIn = _eglBuiltInDrivers[i].main; @@ -509,13 +193,6 @@ _eglAddUserDriver(void) } } - /* otherwise, treat env as a path */ - if (env) { - _eglPreloadForEach(search_path, _eglLoaderFile, (void *) env); - - return EGL_TRUE; - } - return EGL_FALSE; } @@ -683,18 +360,3 @@ _eglUnloadDrivers(void) _eglModules = NULL; } } - - -/** - * Invoke a callback function on each EGL search path. - * - * The first argument of the callback function is the name of the search path. - * The second argument is the length of the name. - */ -void -_eglSearchPathForEach(EGLBoolean (*callback)(const char *, size_t, void *), - void *callback_data) -{ - const char *search_path = _eglGetSearchPath(); - _eglPreloadForEach(search_path, callback, callback_data); -} From 1fac38ee3238d5e4429f4a7250e160bc9db2c9f4 Mon Sep 17 00:00:00 2001 From: Emil Velikov Date: Mon, 11 May 2015 23:43:45 +0100 Subject: [PATCH 129/834] egl/main: cleanup function prototypes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cleanup the function propotypes which were part of the previous EGL drivers. Signed-off-by: Emil Velikov Reviewed-by: Marek Olšák Reviewed-by: Chad Versace --- src/egl/main/egldriver.c | 4 ---- src/egl/main/egldriver.h | 12 ++---------- 2 files changed, 2 insertions(+), 14 deletions(-) diff --git a/src/egl/main/egldriver.c b/src/egl/main/egldriver.c index 4cadbc75b8a..6ef79d96502 100644 --- a/src/egl/main/egldriver.c +++ b/src/egl/main/egldriver.c @@ -45,10 +45,6 @@ #include "egldriver.h" #include "egllog.h" -#ifdef _EGL_BUILT_IN_DRIVER_HAIKU -_EGLDriver* _eglBuiltInDriverHaiku(const char* args); -#endif - typedef struct _egl_module { char *Name; _EGLMain_t BuiltIn; diff --git a/src/egl/main/egldriver.h b/src/egl/main/egldriver.h index cd95cd305f8..1cf6628446b 100644 --- a/src/egl/main/egldriver.h +++ b/src/egl/main/egldriver.h @@ -91,20 +91,12 @@ struct _egl_driver }; -extern _EGLDriver * -_eglBuiltInDriverGALLIUM(const char *args); - - extern _EGLDriver * _eglBuiltInDriverDRI2(const char *args); -extern _EGLDriver * -_eglBuiltInDriverGLX(const char *args); - - -extern _EGLDriver * -_eglMain(const char *args); +extern _EGLDriver* +_eglBuiltInDriverHaiku(const char* args); extern _EGLDriver * From 97909881234dc32fdbe2baa889997af540c4464e Mon Sep 17 00:00:00 2001 From: Emil Velikov Date: Mon, 11 May 2015 23:43:46 +0100 Subject: [PATCH 130/834] egl/main: Update README.txt MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The driver search/load is not done at eglGetDisplay (or eglOpenDisplay as the readme called it) time, but during eglInitialize(). Drop _eglMain (available only for external drivers) reference. Mention we use function(s), specific to the built-in driver(s). Signed-off-by: Emil Velikov Reviewed-by: Marek Olšák Reviewed-by: Chad Versace --- src/egl/main/README.txt | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/src/egl/main/README.txt b/src/egl/main/README.txt index b3d253dd133..1af99599729 100644 --- a/src/egl/main/README.txt +++ b/src/egl/main/README.txt @@ -16,10 +16,10 @@ The EGL code here basically consists of two things: Bootstrapping: -When the apps calls eglOpenDisplay() a device driver is selected and loaded -(look for dlsym() or LoadLibrary() in egldriver.c). +When the apps calls eglInitialize() a device driver is selected and loaded +(look for _eglAddDrivers() and _eglLoadModule() in egldriver.c). -The driver's _eglMain() function is then called. This driver function +The built-in driver's entry point function is then called. This driver function allocates, initializes and returns a new _EGLDriver object (usually a subclass of that type). @@ -30,10 +30,9 @@ driver->API.Initialize and driver->API.Terminate _must_ be implemented with driver-specific code (no default/fallback function is possible). -A bit later, the app will call eglInitialize(). This will get routed -to the driver->API.Initialize() function. Any additional driver -initialization that wasn't done in _eglMain() should be done at this -point. Typically, this will involve setting up visual configs, etc. +Shortly after, the driver->API.Initialize() function is executed. Any additional +driver initialization that wasn't done in the driver entry point should be +done at this point. Typically, this will involve setting up visual configs, etc. From f9bf9133ccc3764dc38f6359a26004e12b888af3 Mon Sep 17 00:00:00 2001 From: Emil Velikov Date: Mon, 11 May 2015 23:43:47 +0100 Subject: [PATCH 131/834] egl: fix the EGL_MESA_image_dma_buf_export header declarations MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Similar to other EGL extensions - guard the function prototypes by EGL_EGLEXT_PROTOTYPES as the libEGL library does (should) not provide the symbols statically. Instead users should call eglGetProcAddress, which returns the function pointer. The latter of which was missing the type declaration (typedef). Cc: Dave Airlie Cc: Marc-André Lureau Signed-off-by: Emil Velikov Reviewed-by: Marek Olšák Reviewed-by: Chad Versace --- include/EGL/eglmesaext.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/include/EGL/eglmesaext.h b/include/EGL/eglmesaext.h index 27cf7ebe7e7..40a60ec35ed 100644 --- a/include/EGL/eglmesaext.h +++ b/include/EGL/eglmesaext.h @@ -127,9 +127,15 @@ typedef EGLBoolean (EGLAPIENTRYP PFNEGLSWAPBUFFERSREGIONNOK) (EGLDisplay dpy, EG #if KHRONOS_SUPPORT_INT64 #ifndef EGL_MESA_image_dma_buf_export #define EGL_MESA_image_dma_buf_export 1 +#ifdef EGL_EGLEXT_PROTOTYPES EGLAPI EGLBoolean EGLAPIENTRY eglExportDMABUFImageQueryMESA (EGLDisplay dpy, EGLImageKHR image, EGLint *fourcc, EGLint *nplanes, EGLuint64KHR *modifiers); EGLAPI EGLBoolean EGLAPIENTRY eglExportDMABUFImageMESA (EGLDisplay dpy, EGLImageKHR image, int *fds, EGLint *strides, EGLint *offsets); #endif +#endif + +typedef EGLBoolean (EGLAPIENTRYP PFNEGLEXPORTDMABUFIMAGEQUERYMESA) (EGLDisplay dpy, EGLImageKHR image, EGLint *fourcc, EGLint *nplanes, EGLuint64KHR *modifiers); +typedef EGLBoolean (EGLAPIENTRYP PFNEGLEXPORTDMABUFIMAGEMESA) (EGLDisplay dpy, EGLImageKHR image, int *fds, EGLint *strides, EGLint *offsets); + #endif #ifdef __cplusplus } From e3cc5ad49d2f3f08c29d0b64d1c0c52560789d93 Mon Sep 17 00:00:00 2001 From: Emil Velikov Date: Mon, 11 May 2015 23:43:48 +0100 Subject: [PATCH 132/834] egl/main: expose only core EGL functions statically MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The EGL 1.3, 1.4 and 1.5 spec (as quoted below) explicitly mentions that providing static symbols for functions provided by EGL extensions is not portable. Considering that relatively recently we've seen a non-mesa desktop EGL implementation, the fact that we opt for such behaviour has gone unnoticed. From the EGL 1.5 specification: For functions that are queryable with eglGetProcAddress, implementations may choose to also export those functions statically from the object libraries implementing those functions. However, portable clients cannot rely on this behavior. To encourage devs against writing such non-portable code, let's hide the symbols similar to the official binary driver from NVIDIA. v2: Quote the EGL 1.5 spec, as suggested by Chad. Cc: Brian Paul Cc: Chad Versace Cc: Daniel Kurtz Signed-off-by: Emil Velikov Reviewed-by: Marek Olšák Reviewed-by: Chad Versace --- src/egl/main/eglapi.c | 265 ++++++++++++++++++------------------- src/egl/main/egltypedefs.h | 2 - 2 files changed, 132 insertions(+), 135 deletions(-) diff --git a/src/egl/main/eglapi.c b/src/egl/main/eglapi.c index 3f02c5c5539..9e6cb2fb8d7 100644 --- a/src/egl/main/eglapi.c +++ b/src/egl/main/eglapi.c @@ -270,7 +270,7 @@ eglGetDisplay(EGLNativeDisplayType nativeDisplay) return _eglGetDisplayHandle(dpy); } -EGLDisplay EGLAPIENTRY +static EGLDisplay EGLAPIENTRY eglGetPlatformDisplayEXT(EGLenum platform, void *native_display, const EGLint *attrib_list) { @@ -697,7 +697,7 @@ eglCreateWindowSurface(EGLDisplay dpy, EGLConfig config, } -EGLSurface EGLAPIENTRY +static EGLSurface EGLAPIENTRY eglCreatePlatformWindowSurfaceEXT(EGLDisplay dpy, EGLConfig config, void *native_window, const EGLint *attrib_list) @@ -750,7 +750,7 @@ eglCreatePixmapSurface(EGLDisplay dpy, EGLConfig config, attrib_list); } -EGLSurface EGLAPIENTRY +static EGLSurface EGLAPIENTRY eglCreatePlatformPixmapSurfaceEXT(EGLDisplay dpy, EGLConfig config, void *native_pixmap, const EGLint *attrib_list) @@ -921,7 +921,7 @@ eglSwapBuffers(EGLDisplay dpy, EGLSurface surface) #ifdef EGL_EXT_swap_buffers_with_damage -EGLBoolean EGLAPIENTRY +static EGLBoolean EGLAPIENTRY eglSwapBuffersWithDamageEXT(EGLDisplay dpy, EGLSurface surface, EGLint *rects, EGLint n_rects) { @@ -1108,117 +1108,9 @@ eglGetError(void) } -__eglMustCastToProperFunctionPointerType EGLAPIENTRY -eglGetProcAddress(const char *procname) -{ - static const struct { - const char *name; - _EGLProc function; - } egl_functions[] = { - /* core functions should not be queryable, but, well... */ -#ifdef _EGL_GET_CORE_ADDRESSES - /* alphabetical order */ - { "eglBindAPI", (_EGLProc) eglBindAPI }, - { "eglBindTexImage", (_EGLProc) eglBindTexImage }, - { "eglChooseConfig", (_EGLProc) eglChooseConfig }, - { "eglCopyBuffers", (_EGLProc) eglCopyBuffers }, - { "eglCreateContext", (_EGLProc) eglCreateContext }, - { "eglCreatePbufferFromClientBuffer", (_EGLProc) eglCreatePbufferFromClientBuffer }, - { "eglCreatePbufferSurface", (_EGLProc) eglCreatePbufferSurface }, - { "eglCreatePixmapSurface", (_EGLProc) eglCreatePixmapSurface }, - { "eglCreateWindowSurface", (_EGLProc) eglCreateWindowSurface }, - { "eglDestroyContext", (_EGLProc) eglDestroyContext }, - { "eglDestroySurface", (_EGLProc) eglDestroySurface }, - { "eglGetConfigAttrib", (_EGLProc) eglGetConfigAttrib }, - { "eglGetConfigs", (_EGLProc) eglGetConfigs }, - { "eglGetCurrentContext", (_EGLProc) eglGetCurrentContext }, - { "eglGetCurrentDisplay", (_EGLProc) eglGetCurrentDisplay }, - { "eglGetCurrentSurface", (_EGLProc) eglGetCurrentSurface }, - { "eglGetDisplay", (_EGLProc) eglGetDisplay }, - { "eglGetError", (_EGLProc) eglGetError }, - { "eglGetProcAddress", (_EGLProc) eglGetProcAddress }, - { "eglInitialize", (_EGLProc) eglInitialize }, - { "eglMakeCurrent", (_EGLProc) eglMakeCurrent }, - { "eglQueryAPI", (_EGLProc) eglQueryAPI }, - { "eglQueryContext", (_EGLProc) eglQueryContext }, - { "eglQueryString", (_EGLProc) eglQueryString }, - { "eglQuerySurface", (_EGLProc) eglQuerySurface }, - { "eglReleaseTexImage", (_EGLProc) eglReleaseTexImage }, - { "eglReleaseThread", (_EGLProc) eglReleaseThread }, - { "eglSurfaceAttrib", (_EGLProc) eglSurfaceAttrib }, - { "eglSwapBuffers", (_EGLProc) eglSwapBuffers }, - { "eglSwapInterval", (_EGLProc) eglSwapInterval }, - { "eglTerminate", (_EGLProc) eglTerminate }, - { "eglWaitClient", (_EGLProc) eglWaitClient }, - { "eglWaitGL", (_EGLProc) eglWaitGL }, - { "eglWaitNative", (_EGLProc) eglWaitNative }, -#endif /* _EGL_GET_CORE_ADDRESSES */ -#ifdef EGL_MESA_drm_display - { "eglGetDRMDisplayMESA", (_EGLProc) eglGetDRMDisplayMESA }, -#endif - { "eglCreateImageKHR", (_EGLProc) eglCreateImageKHR }, - { "eglDestroyImageKHR", (_EGLProc) eglDestroyImageKHR }, - { "eglCreateSyncKHR", (_EGLProc) eglCreateSyncKHR }, - { "eglCreateSync64KHR", (_EGLProc) eglCreateSync64KHR }, - { "eglDestroySyncKHR", (_EGLProc) eglDestroySyncKHR }, - { "eglClientWaitSyncKHR", (_EGLProc) eglClientWaitSyncKHR }, - { "eglWaitSyncKHR", (_EGLProc) eglWaitSyncKHR }, - { "eglSignalSyncKHR", (_EGLProc) eglSignalSyncKHR }, - { "eglGetSyncAttribKHR", (_EGLProc) eglGetSyncAttribKHR }, -#ifdef EGL_NOK_swap_region - { "eglSwapBuffersRegionNOK", (_EGLProc) eglSwapBuffersRegionNOK }, -#endif -#ifdef EGL_MESA_drm_image - { "eglCreateDRMImageMESA", (_EGLProc) eglCreateDRMImageMESA }, - { "eglExportDRMImageMESA", (_EGLProc) eglExportDRMImageMESA }, -#endif -#ifdef EGL_WL_bind_wayland_display - { "eglBindWaylandDisplayWL", (_EGLProc) eglBindWaylandDisplayWL }, - { "eglUnbindWaylandDisplayWL", (_EGLProc) eglUnbindWaylandDisplayWL }, - { "eglQueryWaylandBufferWL", (_EGLProc) eglQueryWaylandBufferWL }, -#endif -#ifdef EGL_WL_create_wayland_buffer_from_image - { "eglCreateWaylandBufferFromImageWL", (_EGLProc) eglCreateWaylandBufferFromImageWL }, -#endif - { "eglPostSubBufferNV", (_EGLProc) eglPostSubBufferNV }, -#ifdef EGL_EXT_swap_buffers_with_damage - { "eglSwapBuffersWithDamageEXT", (_EGLProc) eglSwapBuffersWithDamageEXT }, -#endif - { "eglGetPlatformDisplayEXT", (_EGLProc) eglGetPlatformDisplayEXT }, - { "eglCreatePlatformWindowSurfaceEXT", (_EGLProc) eglCreatePlatformWindowSurfaceEXT }, - { "eglCreatePlatformPixmapSurfaceEXT", (_EGLProc) eglCreatePlatformPixmapSurfaceEXT }, - { "eglGetSyncValuesCHROMIUM", (_EGLProc) eglGetSyncValuesCHROMIUM }, -#ifdef EGL_MESA_dma_buf_image_export - { "eglExportDMABUFImageQueryMESA", (_EGLProc) eglExportDMABUFImageQueryMESA }, - { "eglExportDMABUFImageMESA", (_EGLProc) eglExportDMABUFImageMESA }, -#endif - { NULL, NULL } - }; - EGLint i; - _EGLProc ret; - - if (!procname) - RETURN_EGL_SUCCESS(NULL, NULL); - - ret = NULL; - if (strncmp(procname, "egl", 3) == 0) { - for (i = 0; egl_functions[i].name; i++) { - if (strcmp(egl_functions[i].name, procname) == 0) { - ret = egl_functions[i].function; - break; - } - } - } - if (!ret) - ret = _eglGetDriverProc(procname); - - RETURN_EGL_SUCCESS(NULL, ret); -} - - #ifdef EGL_MESA_drm_display -EGLDisplay EGLAPIENTRY +static EGLDisplay EGLAPIENTRY eglGetDRMDisplayMESA(int fd) { _EGLDisplay *dpy = _eglFindDisplay(_EGL_PLATFORM_DRM, (void *) (intptr_t) fd); @@ -1329,7 +1221,7 @@ eglReleaseThread(void) } -EGLImageKHR EGLAPIENTRY +static EGLImageKHR EGLAPIENTRY eglCreateImageKHR(EGLDisplay dpy, EGLContext ctx, EGLenum target, EGLClientBuffer buffer, const EGLint *attr_list) { @@ -1358,7 +1250,7 @@ eglCreateImageKHR(EGLDisplay dpy, EGLContext ctx, EGLenum target, } -EGLBoolean EGLAPIENTRY +static EGLBoolean EGLAPIENTRY eglDestroyImageKHR(EGLDisplay dpy, EGLImageKHR image) { _EGLDisplay *disp = _eglLockDisplay(dpy); @@ -1423,21 +1315,21 @@ _eglCreateSync(EGLDisplay dpy, EGLenum type, const EGLint *attrib_list, } -EGLSyncKHR EGLAPIENTRY +static EGLSyncKHR EGLAPIENTRY eglCreateSyncKHR(EGLDisplay dpy, EGLenum type, const EGLint *attrib_list) { return _eglCreateSync(dpy, type, attrib_list, NULL, EGL_FALSE); } -EGLSyncKHR EGLAPIENTRY +static EGLSyncKHR EGLAPIENTRY eglCreateSync64KHR(EGLDisplay dpy, EGLenum type, const EGLAttribKHR *attrib_list) { return _eglCreateSync(dpy, type, NULL, attrib_list, EGL_TRUE); } -EGLBoolean EGLAPIENTRY +static EGLBoolean EGLAPIENTRY eglDestroySyncKHR(EGLDisplay dpy, EGLSyncKHR sync) { _EGLDisplay *disp = _eglLockDisplay(dpy); @@ -1456,7 +1348,7 @@ eglDestroySyncKHR(EGLDisplay dpy, EGLSyncKHR sync) } -EGLint EGLAPIENTRY +static EGLint EGLAPIENTRY eglClientWaitSyncKHR(EGLDisplay dpy, EGLSyncKHR sync, EGLint flags, EGLTimeKHR timeout) { _EGLDisplay *disp = _eglLockDisplay(dpy); @@ -1477,7 +1369,7 @@ eglClientWaitSyncKHR(EGLDisplay dpy, EGLSyncKHR sync, EGLint flags, EGLTimeKHR t } -EGLint EGLAPIENTRY +static EGLint EGLAPIENTRY eglWaitSyncKHR(EGLDisplay dpy, EGLSyncKHR sync, EGLint flags) { _EGLDisplay *disp = _eglLockDisplay(dpy); @@ -1503,7 +1395,7 @@ eglWaitSyncKHR(EGLDisplay dpy, EGLSyncKHR sync, EGLint flags) } -EGLBoolean EGLAPIENTRY +static EGLBoolean EGLAPIENTRY eglSignalSyncKHR(EGLDisplay dpy, EGLSyncKHR sync, EGLenum mode) { _EGLDisplay *disp = _eglLockDisplay(dpy); @@ -1519,7 +1411,7 @@ eglSignalSyncKHR(EGLDisplay dpy, EGLSyncKHR sync, EGLenum mode) } -EGLBoolean EGLAPIENTRY +static EGLBoolean EGLAPIENTRY eglGetSyncAttribKHR(EGLDisplay dpy, EGLSyncKHR sync, EGLint attribute, EGLint *value) { _EGLDisplay *disp = _eglLockDisplay(dpy); @@ -1538,7 +1430,7 @@ eglGetSyncAttribKHR(EGLDisplay dpy, EGLSyncKHR sync, EGLint attribute, EGLint *v #ifdef EGL_NOK_swap_region -EGLBoolean EGLAPIENTRY +static EGLBoolean EGLAPIENTRY eglSwapBuffersRegionNOK(EGLDisplay dpy, EGLSurface surface, EGLint numRects, const EGLint *rects) { @@ -1568,7 +1460,7 @@ eglSwapBuffersRegionNOK(EGLDisplay dpy, EGLSurface surface, #ifdef EGL_MESA_drm_image -EGLImageKHR EGLAPIENTRY +static EGLImageKHR EGLAPIENTRY eglCreateDRMImageMESA(EGLDisplay dpy, const EGLint *attr_list) { _EGLDisplay *disp = _eglLockDisplay(dpy); @@ -1586,7 +1478,7 @@ eglCreateDRMImageMESA(EGLDisplay dpy, const EGLint *attr_list) RETURN_EGL_EVAL(disp, ret); } -EGLBoolean EGLAPIENTRY +static EGLBoolean EGLAPIENTRY eglExportDRMImageMESA(EGLDisplay dpy, EGLImageKHR image, EGLint *name, EGLint *handle, EGLint *stride) { @@ -1611,7 +1503,7 @@ eglExportDRMImageMESA(EGLDisplay dpy, EGLImageKHR image, #ifdef EGL_WL_bind_wayland_display struct wl_display; -EGLBoolean EGLAPIENTRY +static EGLBoolean EGLAPIENTRY eglBindWaylandDisplayWL(EGLDisplay dpy, struct wl_display *display) { _EGLDisplay *disp = _eglLockDisplay(dpy); @@ -1629,7 +1521,7 @@ eglBindWaylandDisplayWL(EGLDisplay dpy, struct wl_display *display) RETURN_EGL_EVAL(disp, ret); } -EGLBoolean EGLAPIENTRY +static EGLBoolean EGLAPIENTRY eglUnbindWaylandDisplayWL(EGLDisplay dpy, struct wl_display *display) { _EGLDisplay *disp = _eglLockDisplay(dpy); @@ -1647,7 +1539,7 @@ eglUnbindWaylandDisplayWL(EGLDisplay dpy, struct wl_display *display) RETURN_EGL_EVAL(disp, ret); } -EGLBoolean EGLAPIENTRY +static EGLBoolean EGLAPIENTRY eglQueryWaylandBufferWL(EGLDisplay dpy, struct wl_resource *buffer, EGLint attribute, EGLint *value) { @@ -1668,7 +1560,7 @@ eglQueryWaylandBufferWL(EGLDisplay dpy, struct wl_resource *buffer, #endif #ifdef EGL_WL_create_wayland_buffer_from_image -struct wl_buffer * EGLAPIENTRY +static struct wl_buffer * EGLAPIENTRY eglCreateWaylandBufferFromImageWL(EGLDisplay dpy, EGLImageKHR image) { _EGLDisplay *disp = _eglLockDisplay(dpy); @@ -1690,7 +1582,7 @@ eglCreateWaylandBufferFromImageWL(EGLDisplay dpy, EGLImageKHR image) } #endif -EGLBoolean EGLAPIENTRY +static EGLBoolean EGLAPIENTRY eglPostSubBufferNV(EGLDisplay dpy, EGLSurface surface, EGLint x, EGLint y, EGLint width, EGLint height) { @@ -1709,7 +1601,7 @@ eglPostSubBufferNV(EGLDisplay dpy, EGLSurface surface, RETURN_EGL_EVAL(disp, ret); } -EGLBoolean EGLAPIENTRY +static EGLBoolean EGLAPIENTRY eglGetSyncValuesCHROMIUM(EGLDisplay display, EGLSurface surface, EGLuint64KHR *ust, EGLuint64KHR *msc, EGLuint64KHR *sbc) @@ -1732,7 +1624,7 @@ eglGetSyncValuesCHROMIUM(EGLDisplay display, EGLSurface surface, } #ifdef EGL_MESA_image_dma_buf_export -EGLBoolean EGLAPIENTRY +static EGLBoolean EGLAPIENTRY eglExportDMABUFImageQueryMESA(EGLDisplay dpy, EGLImageKHR image, EGLint *fourcc, EGLint *nplanes, EGLuint64KHR *modifiers) @@ -1754,7 +1646,7 @@ eglExportDMABUFImageQueryMESA(EGLDisplay dpy, EGLImageKHR image, RETURN_EGL_EVAL(disp, ret); } -EGLBoolean EGLAPIENTRY +static EGLBoolean EGLAPIENTRY eglExportDMABUFImageMESA(EGLDisplay dpy, EGLImageKHR image, int *fds, EGLint *strides, EGLint *offsets) { @@ -1774,3 +1666,110 @@ eglExportDMABUFImageMESA(EGLDisplay dpy, EGLImageKHR image, RETURN_EGL_EVAL(disp, ret); } #endif + +__eglMustCastToProperFunctionPointerType EGLAPIENTRY +eglGetProcAddress(const char *procname) +{ + static const struct { + const char *name; + _EGLProc function; + } egl_functions[] = { + /* core functions should not be queryable, but, well... */ +#ifdef _EGL_GET_CORE_ADDRESSES + /* alphabetical order */ + { "eglBindAPI", (_EGLProc) eglBindAPI }, + { "eglBindTexImage", (_EGLProc) eglBindTexImage }, + { "eglChooseConfig", (_EGLProc) eglChooseConfig }, + { "eglCopyBuffers", (_EGLProc) eglCopyBuffers }, + { "eglCreateContext", (_EGLProc) eglCreateContext }, + { "eglCreatePbufferFromClientBuffer", (_EGLProc) eglCreatePbufferFromClientBuffer }, + { "eglCreatePbufferSurface", (_EGLProc) eglCreatePbufferSurface }, + { "eglCreatePixmapSurface", (_EGLProc) eglCreatePixmapSurface }, + { "eglCreateWindowSurface", (_EGLProc) eglCreateWindowSurface }, + { "eglDestroyContext", (_EGLProc) eglDestroyContext }, + { "eglDestroySurface", (_EGLProc) eglDestroySurface }, + { "eglGetConfigAttrib", (_EGLProc) eglGetConfigAttrib }, + { "eglGetConfigs", (_EGLProc) eglGetConfigs }, + { "eglGetCurrentContext", (_EGLProc) eglGetCurrentContext }, + { "eglGetCurrentDisplay", (_EGLProc) eglGetCurrentDisplay }, + { "eglGetCurrentSurface", (_EGLProc) eglGetCurrentSurface }, + { "eglGetDisplay", (_EGLProc) eglGetDisplay }, + { "eglGetError", (_EGLProc) eglGetError }, + { "eglGetProcAddress", (_EGLProc) eglGetProcAddress }, + { "eglInitialize", (_EGLProc) eglInitialize }, + { "eglMakeCurrent", (_EGLProc) eglMakeCurrent }, + { "eglQueryAPI", (_EGLProc) eglQueryAPI }, + { "eglQueryContext", (_EGLProc) eglQueryContext }, + { "eglQueryString", (_EGLProc) eglQueryString }, + { "eglQuerySurface", (_EGLProc) eglQuerySurface }, + { "eglReleaseTexImage", (_EGLProc) eglReleaseTexImage }, + { "eglReleaseThread", (_EGLProc) eglReleaseThread }, + { "eglSurfaceAttrib", (_EGLProc) eglSurfaceAttrib }, + { "eglSwapBuffers", (_EGLProc) eglSwapBuffers }, + { "eglSwapInterval", (_EGLProc) eglSwapInterval }, + { "eglTerminate", (_EGLProc) eglTerminate }, + { "eglWaitClient", (_EGLProc) eglWaitClient }, + { "eglWaitGL", (_EGLProc) eglWaitGL }, + { "eglWaitNative", (_EGLProc) eglWaitNative }, +#endif /* _EGL_GET_CORE_ADDRESSES */ +#ifdef EGL_MESA_drm_display + { "eglGetDRMDisplayMESA", (_EGLProc) eglGetDRMDisplayMESA }, +#endif + { "eglCreateImageKHR", (_EGLProc) eglCreateImageKHR }, + { "eglDestroyImageKHR", (_EGLProc) eglDestroyImageKHR }, + { "eglCreateSyncKHR", (_EGLProc) eglCreateSyncKHR }, + { "eglCreateSync64KHR", (_EGLProc) eglCreateSync64KHR }, + { "eglDestroySyncKHR", (_EGLProc) eglDestroySyncKHR }, + { "eglClientWaitSyncKHR", (_EGLProc) eglClientWaitSyncKHR }, + { "eglWaitSyncKHR", (_EGLProc) eglWaitSyncKHR }, + { "eglSignalSyncKHR", (_EGLProc) eglSignalSyncKHR }, + { "eglGetSyncAttribKHR", (_EGLProc) eglGetSyncAttribKHR }, +#ifdef EGL_NOK_swap_region + { "eglSwapBuffersRegionNOK", (_EGLProc) eglSwapBuffersRegionNOK }, +#endif +#ifdef EGL_MESA_drm_image + { "eglCreateDRMImageMESA", (_EGLProc) eglCreateDRMImageMESA }, + { "eglExportDRMImageMESA", (_EGLProc) eglExportDRMImageMESA }, +#endif +#ifdef EGL_WL_bind_wayland_display + { "eglBindWaylandDisplayWL", (_EGLProc) eglBindWaylandDisplayWL }, + { "eglUnbindWaylandDisplayWL", (_EGLProc) eglUnbindWaylandDisplayWL }, + { "eglQueryWaylandBufferWL", (_EGLProc) eglQueryWaylandBufferWL }, +#endif +#ifdef EGL_WL_create_wayland_buffer_from_image + { "eglCreateWaylandBufferFromImageWL", (_EGLProc) eglCreateWaylandBufferFromImageWL }, +#endif + { "eglPostSubBufferNV", (_EGLProc) eglPostSubBufferNV }, +#ifdef EGL_EXT_swap_buffers_with_damage + { "eglSwapBuffersWithDamageEXT", (_EGLProc) eglSwapBuffersWithDamageEXT }, +#endif + { "eglGetPlatformDisplayEXT", (_EGLProc) eglGetPlatformDisplayEXT }, + { "eglCreatePlatformWindowSurfaceEXT", (_EGLProc) eglCreatePlatformWindowSurfaceEXT }, + { "eglCreatePlatformPixmapSurfaceEXT", (_EGLProc) eglCreatePlatformPixmapSurfaceEXT }, + { "eglGetSyncValuesCHROMIUM", (_EGLProc) eglGetSyncValuesCHROMIUM }, +#ifdef EGL_MESA_dma_buf_image_export + { "eglExportDMABUFImageQueryMESA", (_EGLProc) eglExportDMABUFImageQueryMESA }, + { "eglExportDMABUFImageMESA", (_EGLProc) eglExportDMABUFImageMESA }, +#endif + { NULL, NULL } + }; + EGLint i; + _EGLProc ret; + + if (!procname) + RETURN_EGL_SUCCESS(NULL, NULL); + + ret = NULL; + if (strncmp(procname, "egl", 3) == 0) { + for (i = 0; egl_functions[i].name; i++) { + if (strcmp(egl_functions[i].name, procname) == 0) { + ret = egl_functions[i].function; + break; + } + } + } + if (!ret) + ret = _eglGetDriverProc(procname); + + RETURN_EGL_SUCCESS(NULL, ret); +} diff --git a/src/egl/main/egltypedefs.h b/src/egl/main/egltypedefs.h index 82064a71fc0..7facdb47f86 100644 --- a/src/egl/main/egltypedefs.h +++ b/src/egl/main/egltypedefs.h @@ -31,8 +31,6 @@ #ifndef EGLTYPEDEFS_INCLUDED #define EGLTYPEDEFS_INCLUDED -#define EGL_EGLEXT_PROTOTYPES - #include #include From ffc94e32a38b3948fe4ae2717a3f55802eb8aae8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= Date: Mon, 11 May 2015 23:43:49 +0100 Subject: [PATCH 133/834] egl: more define fixes for EGL_MESA_image_dma_buf_export MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit s/EGL_MESA_dma_buf_image_export/EGL_MESA_image_dma_buf_export as defined by the spec Reviewed-by: Marek Olšák Reviewed-by: Chad Versace --- src/egl/main/eglapi.c | 2 +- src/egl/main/eglfallbacks.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/egl/main/eglapi.c b/src/egl/main/eglapi.c index 9e6cb2fb8d7..e4d098c1eaa 100644 --- a/src/egl/main/eglapi.c +++ b/src/egl/main/eglapi.c @@ -1747,7 +1747,7 @@ eglGetProcAddress(const char *procname) { "eglCreatePlatformWindowSurfaceEXT", (_EGLProc) eglCreatePlatformWindowSurfaceEXT }, { "eglCreatePlatformPixmapSurfaceEXT", (_EGLProc) eglCreatePlatformPixmapSurfaceEXT }, { "eglGetSyncValuesCHROMIUM", (_EGLProc) eglGetSyncValuesCHROMIUM }, -#ifdef EGL_MESA_dma_buf_image_export +#ifdef EGL_MESA_image_dma_buf_export { "eglExportDMABUFImageQueryMESA", (_EGLProc) eglExportDMABUFImageQueryMESA }, { "eglExportDMABUFImageMESA", (_EGLProc) eglExportDMABUFImageMESA }, #endif diff --git a/src/egl/main/eglfallbacks.c b/src/egl/main/eglfallbacks.c index d12b8491ad1..c44ec6cc835 100644 --- a/src/egl/main/eglfallbacks.c +++ b/src/egl/main/eglfallbacks.c @@ -102,7 +102,7 @@ _eglInitDriverFallbacks(_EGLDriver *drv) drv->API.SwapBuffersRegionNOK = NULL; #endif -#ifdef EGL_MESA_dma_buf_image_export +#ifdef EGL_MESA_image_dma_buf_export drv->API.ExportDMABUFImageQueryMESA = NULL; drv->API.ExportDMABUFImageMESA = NULL; #endif From 448e01b2918c76dfff8abfbd56a606fdff8c356c Mon Sep 17 00:00:00 2001 From: Emil Velikov Date: Tue, 12 May 2015 00:30:16 +0100 Subject: [PATCH 134/834] egl/main: fix EGL_KHR_get_all_proc_addresses The extension requires that the address of the core functions should be available via eglGetProcAddress. Currently the list is guarded by _EGL_GET_CORE_ADDRESSES, which was only set for the scons (windows) build. Unconditionally enable it for all the builds (automake, android and haiku) considering that the extension is not platform specific and is always enabled. v2: Drop the _EGL_GET_CORE_ADDRESSES macro altogether. Cc: mesa-stable@lists.freedesktop.org Signed-off-by: Emil Velikov --- src/egl/main/eglapi.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/egl/main/eglapi.c b/src/egl/main/eglapi.c index e4d098c1eaa..dbfad634162 100644 --- a/src/egl/main/eglapi.c +++ b/src/egl/main/eglapi.c @@ -1674,8 +1674,9 @@ eglGetProcAddress(const char *procname) const char *name; _EGLProc function; } egl_functions[] = { - /* core functions should not be queryable, but, well... */ -#ifdef _EGL_GET_CORE_ADDRESSES + /* core functions queryable in the presence of + * EGL_KHR_get_all_proc_addresses or EGL 1.5 + */ /* alphabetical order */ { "eglBindAPI", (_EGLProc) eglBindAPI }, { "eglBindTexImage", (_EGLProc) eglBindTexImage }, @@ -1711,7 +1712,6 @@ eglGetProcAddress(const char *procname) { "eglWaitClient", (_EGLProc) eglWaitClient }, { "eglWaitGL", (_EGLProc) eglWaitGL }, { "eglWaitNative", (_EGLProc) eglWaitNative }, -#endif /* _EGL_GET_CORE_ADDRESSES */ #ifdef EGL_MESA_drm_display { "eglGetDRMDisplayMESA", (_EGLProc) eglGetDRMDisplayMESA }, #endif From 0c4eef6a2cad72571efb15a5fdca2726f0540d07 Mon Sep 17 00:00:00 2001 From: Emil Velikov Date: Thu, 14 May 2015 18:50:22 +0000 Subject: [PATCH 135/834] egl: remove remaining EGL_MESA_copy_context skeleton With earlier commit (7a58262e58d egl: Remove skeleton implementation of EGL_MESA_screen_surface) we've removed the skeleton implementation of eglCopyContextMESA(). Just like EGL_MESA_screen_surface this extension was never implemented in mesa. Signed-off-by: Emil Velikov Reviewed-by: Adam Jackson --- include/EGL/eglmesaext.h | 11 ----------- src/egl/main/eglapi.c | 1 - src/egl/main/egldisplay.h | 1 - 3 files changed, 13 deletions(-) diff --git a/include/EGL/eglmesaext.h b/include/EGL/eglmesaext.h index 40a60ec35ed..87748cadbee 100644 --- a/include/EGL/eglmesaext.h +++ b/include/EGL/eglmesaext.h @@ -34,17 +34,6 @@ extern "C" { #include -#ifndef EGL_MESA_copy_context -#define EGL_MESA_copy_context 1 - -#ifdef EGL_EGLEXT_PROTOTYPES -EGLAPI EGLBoolean EGLAPIENTRY eglCopyContextMESA(EGLDisplay dpy, EGLContext source, EGLContext dest, EGLint mask); -#endif /* EGL_EGLEXT_PROTOTYPES */ - -typedef EGLBoolean (EGLAPIENTRYP PFNEGLCOPYCONTEXTMESA) (EGLDisplay dpy, EGLContext source, EGLContext dest, EGLint mask); - -#endif /* EGL_MESA_copy_context */ - #ifndef EGL_MESA_drm_display #define EGL_MESA_drm_display 1 diff --git a/src/egl/main/eglapi.c b/src/egl/main/eglapi.c index dbfad634162..fbb14f1524a 100644 --- a/src/egl/main/eglapi.c +++ b/src/egl/main/eglapi.c @@ -341,7 +341,6 @@ _eglCreateExtensionsString(_EGLDisplay *dpy) char *exts = dpy->ExtensionsString; - _EGL_CHECK_EXTENSION(MESA_copy_context); _EGL_CHECK_EXTENSION(MESA_drm_display); _EGL_CHECK_EXTENSION(MESA_drm_image); _EGL_CHECK_EXTENSION(MESA_configless_context); diff --git a/src/egl/main/egldisplay.h b/src/egl/main/egldisplay.h index 6862e2f51b7..bb394ec2962 100644 --- a/src/egl/main/egldisplay.h +++ b/src/egl/main/egldisplay.h @@ -90,7 +90,6 @@ struct _egl_resource */ struct _egl_extensions { - EGLBoolean MESA_copy_context; EGLBoolean MESA_drm_display; EGLBoolean MESA_drm_image; EGLBoolean MESA_configless_context; From 3687d752e51829b4723c9abb07ae56d2bbcda570 Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Wed, 11 Mar 2015 22:41:49 -0700 Subject: [PATCH 136/834] i965/fs: Combine the fs_visitor constructors. For scalar GS support, we either need to add a fourth constructor which takes the GS structures, or combine the existing two and pass the shader stage. Given that they're not significantly different, I opted for the latter. v2: Remove more stuff from the .h file (Jason and Jordan). Signed-off-by: Kenneth Graunke Reviewed-by: Jason Ekstrand Reviewed-by: Jordan Justen --- src/mesa/drivers/dri/i965/brw_cs.cpp | 6 ++- src/mesa/drivers/dri/i965/brw_fs.cpp | 6 ++- src/mesa/drivers/dri/i965/brw_fs.h | 24 ++------- src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 55 +++----------------- src/mesa/drivers/dri/i965/brw_vec4.cpp | 3 +- 5 files changed, 20 insertions(+), 74 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_cs.cpp b/src/mesa/drivers/dri/i965/brw_cs.cpp index fc2d8576495..1f2a9d2ea67 100644 --- a/src/mesa/drivers/dri/i965/brw_cs.cpp +++ b/src/mesa/drivers/dri/i965/brw_cs.cpp @@ -90,7 +90,8 @@ brw_cs_emit(struct brw_context *brw, /* Now the main event: Visit the shader IR and generate our CS IR for it. */ - fs_visitor v8(brw, mem_ctx, key, prog_data, prog, cp, 8); + fs_visitor v8(brw, mem_ctx, MESA_SHADER_COMPUTE, key, &prog_data->base, prog, + &cp->Base, 8); if (!v8.run_cs()) { fail_msg = v8.fail_msg; } else if (local_workgroup_size <= 8 * brw->max_cs_threads) { @@ -98,7 +99,8 @@ brw_cs_emit(struct brw_context *brw, prog_data->simd_size = 8; } - fs_visitor v16(brw, mem_ctx, key, prog_data, prog, cp, 16); + fs_visitor v16(brw, mem_ctx, MESA_SHADER_COMPUTE, key, &prog_data->base, prog, + &cp->Base, 16); if (likely(!(INTEL_DEBUG & DEBUG_NO16)) && !fail_msg && !v8.simd16_unsupported && local_workgroup_size <= 16 * brw->max_cs_threads) { diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 08664cf328c..b63ca23e3d8 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -4303,7 +4303,8 @@ brw_wm_fs_emit(struct brw_context *brw, /* Now the main event: Visit the shader IR and generate our FS IR for it. */ - fs_visitor v(brw, mem_ctx, key, prog_data, prog, fp, 8); + fs_visitor v(brw, mem_ctx, MESA_SHADER_FRAGMENT, key, &prog_data->base, + prog, &fp->Base, 8); if (!v.run_fs()) { if (prog) { prog->LinkStatus = false; @@ -4317,7 +4318,8 @@ brw_wm_fs_emit(struct brw_context *brw, } cfg_t *simd16_cfg = NULL; - fs_visitor v2(brw, mem_ctx, key, prog_data, prog, fp, 16); + fs_visitor v2(brw, mem_ctx, MESA_SHADER_FRAGMENT, key, &prog_data->base, + prog, &fp->Base, 16); if (likely(!(INTEL_DEBUG & DEBUG_NO16) || brw->use_rep_send)) { if (!v.simd16_unsupported) { /* Try a SIMD16 compile */ diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h index 1d7de2effbd..991cff96325 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.h +++ b/src/mesa/drivers/dri/i965/brw_fs.h @@ -75,30 +75,14 @@ public: fs_visitor(struct brw_context *brw, void *mem_ctx, - const struct brw_wm_prog_key *key, - struct brw_wm_prog_data *prog_data, + gl_shader_stage stage, + const void *key, + struct brw_stage_prog_data *prog_data, struct gl_shader_program *shader_prog, - struct gl_fragment_program *fp, - unsigned dispatch_width); - - fs_visitor(struct brw_context *brw, - void *mem_ctx, - const struct brw_vs_prog_key *key, - struct brw_vs_prog_data *prog_data, - struct gl_shader_program *shader_prog, - struct gl_vertex_program *cp, - unsigned dispatch_width); - - fs_visitor(struct brw_context *brw, - void *mem_ctx, - const struct brw_cs_prog_key *key, - struct brw_cs_prog_data *prog_data, - struct gl_shader_program *shader_prog, - struct gl_compute_program *cp, + struct gl_program *prog, unsigned dispatch_width); ~fs_visitor(); - void init(); fs_reg *variable_storage(ir_variable *var); fs_reg vgrf(const glsl_type *const type); diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp index 78f269ef973..abaea5f4e13 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp @@ -4144,64 +4144,21 @@ fs_visitor::resolve_bool_comparison(ir_rvalue *rvalue, fs_reg *reg) fs_visitor::fs_visitor(struct brw_context *brw, void *mem_ctx, - const struct brw_wm_prog_key *key, - struct brw_wm_prog_data *prog_data, + gl_shader_stage stage, + const void *key, + struct brw_stage_prog_data *prog_data, struct gl_shader_program *shader_prog, - struct gl_fragment_program *fp, + struct gl_program *prog, unsigned dispatch_width) - : backend_visitor(brw, shader_prog, &fp->Base, &prog_data->base, - MESA_SHADER_FRAGMENT), + : backend_visitor(brw, shader_prog, prog, prog_data, stage), reg_null_f(retype(brw_null_vec(dispatch_width), BRW_REGISTER_TYPE_F)), reg_null_d(retype(brw_null_vec(dispatch_width), BRW_REGISTER_TYPE_D)), reg_null_ud(retype(brw_null_vec(dispatch_width), BRW_REGISTER_TYPE_UD)), - key(key), prog_data(&prog_data->base), + key(key), prog_data(prog_data), dispatch_width(dispatch_width), promoted_constants(0) { this->mem_ctx = mem_ctx; - init(); -} -fs_visitor::fs_visitor(struct brw_context *brw, - void *mem_ctx, - const struct brw_vs_prog_key *key, - struct brw_vs_prog_data *prog_data, - struct gl_shader_program *shader_prog, - struct gl_vertex_program *cp, - unsigned dispatch_width) - : backend_visitor(brw, shader_prog, &cp->Base, &prog_data->base.base, - MESA_SHADER_VERTEX), - reg_null_f(retype(brw_null_vec(dispatch_width), BRW_REGISTER_TYPE_F)), - reg_null_d(retype(brw_null_vec(dispatch_width), BRW_REGISTER_TYPE_D)), - reg_null_ud(retype(brw_null_vec(dispatch_width), BRW_REGISTER_TYPE_UD)), - key(key), prog_data(&prog_data->base.base), - dispatch_width(dispatch_width), promoted_constants(0) -{ - this->mem_ctx = mem_ctx; - init(); -} - -fs_visitor::fs_visitor(struct brw_context *brw, - void *mem_ctx, - const struct brw_cs_prog_key *key, - struct brw_cs_prog_data *prog_data, - struct gl_shader_program *shader_prog, - struct gl_compute_program *cp, - unsigned dispatch_width) - : backend_visitor(brw, shader_prog, &cp->Base, &prog_data->base, - MESA_SHADER_COMPUTE), - reg_null_f(retype(brw_null_vec(dispatch_width), BRW_REGISTER_TYPE_F)), - reg_null_d(retype(brw_null_vec(dispatch_width), BRW_REGISTER_TYPE_D)), - reg_null_ud(retype(brw_null_vec(dispatch_width), BRW_REGISTER_TYPE_UD)), - key(key), prog_data(&prog_data->base), - dispatch_width(dispatch_width), promoted_constants(0) -{ - this->mem_ctx = mem_ctx; - init(); -} - -void -fs_visitor::init() -{ switch (stage) { case MESA_SHADER_FRAGMENT: key_tex = &((const brw_wm_prog_key *) key)->tex; diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp index 2841d983ad5..e9681b73343 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp @@ -1895,7 +1895,8 @@ brw_vs_emit(struct brw_context *brw, } if (brw->scalar_vs && (prog || use_nir)) { - fs_visitor v(brw, mem_ctx, &c->key, prog_data, prog, &c->vp->program, 8); + fs_visitor v(brw, mem_ctx, MESA_SHADER_VERTEX, &c->key, + &prog_data->base.base, prog, &c->vp->program.Base, 8); if (!v.run_vs()) { if (prog) { prog->LinkStatus = false; From 40a8b2f92a8aef25199324046114023c4ed3d772 Mon Sep 17 00:00:00 2001 From: Alexander von Gluck IV Date: Thu, 14 May 2015 17:19:44 -0500 Subject: [PATCH 137/834] gallium/aux: Add needed extern "C" wrappers Reviewed-by: Brian Paul --- src/gallium/auxiliary/postprocess/postprocess.h | 9 +++++++++ src/gallium/auxiliary/util/u_tests.h | 8 ++++++++ 2 files changed, 17 insertions(+) diff --git a/src/gallium/auxiliary/postprocess/postprocess.h b/src/gallium/auxiliary/postprocess/postprocess.h index c72f2c4b407..9b9f981a5ff 100644 --- a/src/gallium/auxiliary/postprocess/postprocess.h +++ b/src/gallium/auxiliary/postprocess/postprocess.h @@ -30,6 +30,10 @@ #include "pipe/p_state.h" +#ifdef __cplusplus +extern "C" { +#endif + struct cso_context; struct pp_queue_t; /* Forward definition */ @@ -85,4 +89,9 @@ void pp_celshade_free(struct pp_queue_t *, unsigned int); void pp_nocolor_free(struct pp_queue_t *, unsigned int); void pp_jimenezmlaa_free(struct pp_queue_t *, unsigned int); + +#ifdef __cplusplus +} +#endif + #endif diff --git a/src/gallium/auxiliary/util/u_tests.h b/src/gallium/auxiliary/util/u_tests.h index 49ae54f876b..106b0a0a938 100644 --- a/src/gallium/auxiliary/util/u_tests.h +++ b/src/gallium/auxiliary/util/u_tests.h @@ -30,8 +30,16 @@ #include "pipe/p_compiler.h" +#ifdef __cplusplus +extern "C" { +#endif + struct pipe_screen; void util_run_tests(struct pipe_screen *screen); +#ifdef __cplusplus +} +#endif + #endif From 624b38add99c21e2deb2029b157e92c3f0e7cdd4 Mon Sep 17 00:00:00 2001 From: Alexander von Gluck IV Date: Thu, 14 May 2015 17:20:17 -0500 Subject: [PATCH 138/834] gallium/drivers: Add extern "C" wrappers to public entry Reviewed-by: Brian Paul --- src/gallium/drivers/llvmpipe/lp_public.h | 8 ++++++++ src/gallium/drivers/rbug/rbug_public.h | 8 ++++++++ src/gallium/drivers/softpipe/sp_public.h | 8 ++++++++ 3 files changed, 24 insertions(+) diff --git a/src/gallium/drivers/llvmpipe/lp_public.h b/src/gallium/drivers/llvmpipe/lp_public.h index ec6b660b48e..27ab1baefbb 100644 --- a/src/gallium/drivers/llvmpipe/lp_public.h +++ b/src/gallium/drivers/llvmpipe/lp_public.h @@ -1,10 +1,18 @@ #ifndef LP_PUBLIC_H #define LP_PUBLIC_H +#ifdef __cplusplus +extern "C" { +#endif + struct pipe_screen; struct sw_winsys; struct pipe_screen * llvmpipe_create_screen(struct sw_winsys *winsys); +#ifdef __cplusplus +} +#endif + #endif diff --git a/src/gallium/drivers/rbug/rbug_public.h b/src/gallium/drivers/rbug/rbug_public.h index b66740b49cd..83f9c94e31f 100644 --- a/src/gallium/drivers/rbug/rbug_public.h +++ b/src/gallium/drivers/rbug/rbug_public.h @@ -28,6 +28,10 @@ #ifndef RBUG_PUBLIC_H #define RBUG_PUBLIC_H +#ifdef __cplusplus +extern "C" { +#endif + struct pipe_screen; struct pipe_context; @@ -37,4 +41,8 @@ rbug_screen_create(struct pipe_screen *screen); boolean rbug_enabled(void); +#ifdef __cplusplus +} +#endif + #endif /* RBUG_PUBLIC_H */ diff --git a/src/gallium/drivers/softpipe/sp_public.h b/src/gallium/drivers/softpipe/sp_public.h index 62d0903d87a..88a9b5e6643 100644 --- a/src/gallium/drivers/softpipe/sp_public.h +++ b/src/gallium/drivers/softpipe/sp_public.h @@ -1,10 +1,18 @@ #ifndef SP_PUBLIC_H #define SP_PUBLIC_H +#ifdef __cplusplus +extern "C" { +#endif + struct pipe_screen; struct sw_winsys; struct pipe_screen * softpipe_create_screen(struct sw_winsys *winsys); +#ifdef __cplusplus +} +#endif + #endif From 73aef2d1d8a0fe417df1ab4a35029be74891ee37 Mon Sep 17 00:00:00 2001 From: Alexander von Gluck IV Date: Thu, 14 May 2015 17:27:14 -0500 Subject: [PATCH 139/834] winsys/hgl: Add needed extern "C" to hgl winsys Reviewed-by: Brian Paul --- src/gallium/winsys/sw/hgl/hgl_sw_winsys.h | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/gallium/winsys/sw/hgl/hgl_sw_winsys.h b/src/gallium/winsys/sw/hgl/hgl_sw_winsys.h index bdcddfb4f2b..a81f890826e 100644 --- a/src/gallium/winsys/sw/hgl/hgl_sw_winsys.h +++ b/src/gallium/winsys/sw/hgl/hgl_sw_winsys.h @@ -27,9 +27,16 @@ #ifndef _HGL_SOFTWAREWINSYS_H #define _HGL_SOFTWAREWINSYS_H +#ifdef __cplusplus +extern "C" { +#endif + struct sw_winsys; struct sw_winsys* hgl_create_sw_winsys(void); +#ifdef __cplusplus +} +#endif #endif From 9b5da7f06a4052b1298e4684d992a319fb4153ba Mon Sep 17 00:00:00 2001 From: Alexander von Gluck IV Date: Thu, 14 May 2015 17:29:00 -0500 Subject: [PATCH 140/834] st/hgl: Move st_api creation to st and extern "C" it Reviewed-by: Brian Paul --- src/gallium/state_trackers/hgl/hgl.c | 16 ++++++++++++---- src/gallium/state_trackers/hgl/hgl_context.h | 14 ++++++++++---- 2 files changed, 22 insertions(+), 8 deletions(-) diff --git a/src/gallium/state_trackers/hgl/hgl.c b/src/gallium/state_trackers/hgl/hgl.c index 77f7c2256e5..1e804c07e6b 100644 --- a/src/gallium/state_trackers/hgl/hgl.c +++ b/src/gallium/state_trackers/hgl/hgl.c @@ -7,8 +7,7 @@ * Alexander von Gluck IV, kallisti5@unixzen.com */ - -#include "GLView.h" +#include "hgl_context.h" #include @@ -17,8 +16,9 @@ #include "util/u_format.h" #include "util/u_memory.h" #include "util/u_inlines.h" +#include "state_tracker/st_gl_api.h" /* for st_gl_api_create */ -#include "hgl_context.h" +#include "GLView.h" #ifdef DEBUG @@ -93,7 +93,7 @@ hgl_st_framebuffer_validate_textures(struct st_framebuffer_iface *stfbi, for (i = 0; i < ST_ATTACHMENT_COUNT; i++) pipe_resource_reference(&buffer->textures[i], NULL); } - + memset(&templat, 0, sizeof(templat)); templat.target = buffer->target; templat.width0 = width; @@ -258,6 +258,14 @@ hgl_create_st_framebuffer(struct hgl_context* context) } +struct st_api* +hgl_create_st_api() +{ + CALLED(); + return st_gl_api_create(); +} + + struct st_manager * hgl_create_st_manager(struct hgl_context* context) { diff --git a/src/gallium/state_trackers/hgl/hgl_context.h b/src/gallium/state_trackers/hgl/hgl_context.h index 4840d9e2ee4..d2ec7fb49c4 100644 --- a/src/gallium/state_trackers/hgl/hgl_context.h +++ b/src/gallium/state_trackers/hgl/hgl_context.h @@ -9,9 +9,6 @@ #define HGL_CONTEXT_H -#ifdef __cplusplus -extern "C" { -#endif #include "state_tracker/st_api.h" #include "state_tracker/st_manager.h" #include "pipe/p_compiler.h" @@ -20,8 +17,10 @@ extern "C" { #include "os/os_thread.h" #include "bitmap_wrapper.h" + + #ifdef __cplusplus -} +extern "C" { #endif @@ -82,6 +81,9 @@ struct hgl_context }; +// hgl state_tracker api +struct st_api* hgl_create_st_api(void); + // hgl state_tracker framebuffer struct hgl_buffer* hgl_create_st_framebuffer(struct hgl_context* context); @@ -94,4 +96,8 @@ struct st_visual* hgl_create_st_visual(ulong options); void hgl_destroy_st_visual(struct st_visual* visual); +#ifdef __cplusplus +} +#endif + #endif /* HGL_CONTEXT_H */ From 7de484871dae1643cbecf346066968f61684f06f Mon Sep 17 00:00:00 2001 From: Alexander von Gluck IV Date: Thu, 14 May 2015 17:30:35 -0500 Subject: [PATCH 141/834] target/haiku-softpipe: Move api init into st code We also reduce the amount of need-to-know information about st_api to require one less extern "C" in st_manager.h Reviewed-by: Brian Paul --- .../targets/haiku-softpipe/GalliumContext.cpp | 23 ++++++++----------- .../targets/haiku-softpipe/GalliumContext.h | 6 +---- 2 files changed, 10 insertions(+), 19 deletions(-) diff --git a/src/gallium/targets/haiku-softpipe/GalliumContext.cpp b/src/gallium/targets/haiku-softpipe/GalliumContext.cpp index b24aef7dd5d..1e3874bdb17 100644 --- a/src/gallium/targets/haiku-softpipe/GalliumContext.cpp +++ b/src/gallium/targets/haiku-softpipe/GalliumContext.cpp @@ -15,14 +15,13 @@ #include "GLView.h" #include "bitmap_wrapper.h" -extern "C" { + #include "glapi/glapi.h" #include "pipe/p_format.h" -#include "state_tracker/st_cb_fbo.h" -#include "state_tracker/st_cb_flush.h" +//#include "state_tracker/st_cb_fbo.h" +//#include "state_tracker/st_cb_flush.h" #include "state_tracker/st_context.h" #include "state_tracker/st_gl_api.h" -#include "state_tracker/st_manager.h" #include "state_tracker/sw_winsys.h" #include "sw/hgl/hgl_sw_winsys.h" #include "util/u_atomic.h" @@ -30,7 +29,6 @@ extern "C" { #include "target-helpers/inline_sw_helper.h" #include "target-helpers/inline_debug_helper.h" -} #ifdef DEBUG @@ -127,7 +125,8 @@ GalliumContext::CreateContext(Bitmap *bitmap) context->read = NULL; context->st = NULL; - context->api = st_gl_api_create(); + // Create st_gl_api + context->api = hgl_create_st_api(); if (!context->api) { ERROR("%s: Couldn't obtain Mesa state tracker API!\n", __func__); return -1; @@ -159,12 +158,10 @@ GalliumContext::CreateContext(Bitmap *bitmap) attribs.minor = 0; //attribs.flags |= ST_CONTEXT_FLAG_DEBUG; - struct st_api* api = context->api; - // Create context using state tracker api call enum st_context_error result; - context->st = api->create_context(api, context->manager, &attribs, - &result, context->st); + context->st = context->api->create_context(context->api, context->manager, + &attribs, &result, context->st); if (!context->st) { ERROR("%s: Couldn't create mesa state tracker context!\n", @@ -289,10 +286,8 @@ GalliumContext::SetCurrentContext(Bitmap *bitmap, context_id contextID) return B_ERROR; } - struct st_api* api = context->api; - if (!bitmap) { - api->make_current(context->api, NULL, NULL, NULL); + context->api->make_current(context->api, NULL, NULL, NULL); return B_OK; } @@ -305,7 +300,7 @@ GalliumContext::SetCurrentContext(Bitmap *bitmap, context_id contextID) } // We need to lock and unlock framebuffers before accessing them - api->make_current(context->api, context->st, context->draw->stfbi, + context->api->make_current(context->api, context->st, context->draw->stfbi, context->read->stfbi); //if (context->textures[ST_ATTACHMENT_BACK_LEFT] diff --git a/src/gallium/targets/haiku-softpipe/GalliumContext.h b/src/gallium/targets/haiku-softpipe/GalliumContext.h index b50d52895fc..22076cbb141 100644 --- a/src/gallium/targets/haiku-softpipe/GalliumContext.h +++ b/src/gallium/targets/haiku-softpipe/GalliumContext.h @@ -12,14 +12,10 @@ #include #include -extern "C" { -//#include "state_tracker/st_api.h" #include "pipe/p_compiler.h" #include "pipe/p_screen.h" #include "postprocess/filters.h" -#include "os/os_thread.h" #include "hgl_context.h" -} #include "bitmap_wrapper.h" @@ -56,6 +52,6 @@ private: context_id fCurrentContext; pipe_mutex fMutex; }; - + #endif /* GALLIUMCONTEXT_H */ From d43aed9646972c4aca44f47d1b445bd8e67024d7 Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Fri, 15 May 2015 11:55:46 -0700 Subject: [PATCH 142/834] i965: Fix FS unit tests Commit 3687d75 changed the fs_visitor constructors, but it didn't update all the users. As a result, 'make check' fails. I added the explicit cast to the gl_program* parameter to make it more clear which NULL was which. Signed-off-by: Ian Romanick Reviewed-by: Kenneth Graunke --- src/mesa/drivers/dri/i965/test_fs_cmod_propagation.cpp | 3 ++- src/mesa/drivers/dri/i965/test_fs_saturate_propagation.cpp | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/mesa/drivers/dri/i965/test_fs_cmod_propagation.cpp b/src/mesa/drivers/dri/i965/test_fs_cmod_propagation.cpp index 206a76e9242..0e48e824711 100644 --- a/src/mesa/drivers/dri/i965/test_fs_cmod_propagation.cpp +++ b/src/mesa/drivers/dri/i965/test_fs_cmod_propagation.cpp @@ -45,7 +45,8 @@ public: cmod_propagation_fs_visitor(struct brw_context *brw, struct brw_wm_prog_data *prog_data, struct gl_shader_program *shader_prog) - : fs_visitor(brw, NULL, NULL, prog_data, shader_prog, NULL, 8) {} + : fs_visitor(brw, NULL, MESA_SHADER_FRAGMENT, NULL, &prog_data->base, + shader_prog, (struct gl_program *) NULL, 8) {} }; diff --git a/src/mesa/drivers/dri/i965/test_fs_saturate_propagation.cpp b/src/mesa/drivers/dri/i965/test_fs_saturate_propagation.cpp index 4c91af3ea8d..8b1fab06607 100644 --- a/src/mesa/drivers/dri/i965/test_fs_saturate_propagation.cpp +++ b/src/mesa/drivers/dri/i965/test_fs_saturate_propagation.cpp @@ -45,7 +45,8 @@ public: saturate_propagation_fs_visitor(struct brw_context *brw, struct brw_wm_prog_data *prog_data, struct gl_shader_program *shader_prog) - : fs_visitor(brw, NULL, NULL, prog_data, shader_prog, NULL, 8) {} + : fs_visitor(brw, NULL, MESA_SHADER_FRAGMENT, NULL, &prog_data->base, + shader_prog, (struct gl_program *) NULL, 8) {} }; From b3059bb7c53d33764b96b76971b3ff98675198c1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fredrik=20H=C3=B6glund?= Date: Fri, 15 May 2015 19:30:38 +0200 Subject: [PATCH 143/834] st/mesa: Flush the bitmap cache in st_BlitFramebuffer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit With DSA we can no longer rely on this being done in st_validate_state in response to the framebuffer bindings having changed. This fixes the ext_framebuffer_multisample-bitmap piglit test. Signed-off-by: Fredrik Höglund Reviewed-by: Brian Paul --- src/mesa/state_tracker/st_cb_blit.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/mesa/state_tracker/st_cb_blit.c b/src/mesa/state_tracker/st_cb_blit.c index bbaedd108f6..6d9371852c5 100644 --- a/src/mesa/state_tracker/st_cb_blit.c +++ b/src/mesa/state_tracker/st_cb_blit.c @@ -36,6 +36,7 @@ #include "st_context.h" #include "st_texture.h" +#include "st_cb_bitmap.h" #include "st_cb_blit.h" #include "st_cb_fbo.h" #include "st_atom.h" @@ -93,6 +94,9 @@ st_BlitFramebuffer(struct gl_context *ctx, st_validate_state(st); + /* Make sure bitmap rendering has landed in the framebuffers */ + st_flush_bitmap_cache(st); + clip.srcX0 = srcX0; clip.srcY0 = srcY0; clip.srcX1 = srcX1; From 0784bb01b5e42db7d7241e9baa793f647ef5be52 Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Tue, 12 May 2015 11:37:42 -0700 Subject: [PATCH 144/834] glapi: Mark a couple functions "ignore" for GLX Without this the next patch will try to put these functions in the dispatch table in indirect_init.c. Signed-off-by: Ian Romanick Reviewed-by: Emil Velikov --- src/mapi/glapi/gen/gl_API.xml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/mapi/glapi/gen/gl_API.xml b/src/mapi/glapi/gen/gl_API.xml index d1565989c2e..4c23bd9f663 100644 --- a/src/mapi/glapi/gen/gl_API.xml +++ b/src/mapi/glapi/gen/gl_API.xml @@ -6672,7 +6672,7 @@ - +
    @@ -6738,7 +6738,7 @@ - + From 5aaabd7630ca6fd72a3333687249574e6fcbe663 Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Tue, 12 May 2015 12:24:04 -0700 Subject: [PATCH 145/834] mesa: Remove all vestiges of glFramebufferTextureFaceARB Mesa does not (and probably never will) support GL_ARB_geometry_shader4, so this function will never exist. Having a function that is exec="skip" and offset="assign" is just weird. There are still a couple 'exec="skip" offset="assign"' functions remaining. These remain because we either support GLX protocol for them (glSampleMaskSGIS and glSamplePatternSGIS) or older DRI drivers still need them in the dispatch table (glResizeBuffersMESA). The SGIS functions can be removed later. Signed-off-by: Ian Romanick Reviewed-by: Emil Velikov --- src/mapi/glapi/gen/ARB_geometry_shader4.xml | 2 +- src/mapi/glapi/tests/check_table.cpp | 1 - src/mesa/main/dlist.c | 30 +-------------------- src/mesa/main/tests/dispatch_sanity.cpp | 9 +++---- 4 files changed, 5 insertions(+), 37 deletions(-) diff --git a/src/mapi/glapi/gen/ARB_geometry_shader4.xml b/src/mapi/glapi/gen/ARB_geometry_shader4.xml index e62047c9bd6..280e7a07599 100644 --- a/src/mapi/glapi/gen/ARB_geometry_shader4.xml +++ b/src/mapi/glapi/gen/ARB_geometry_shader4.xml @@ -45,7 +45,7 @@ - + diff --git a/src/mapi/glapi/tests/check_table.cpp b/src/mapi/glapi/tests/check_table.cpp index 5d759df76d9..09bf4f3585c 100644 --- a/src/mapi/glapi/tests/check_table.cpp +++ b/src/mapi/glapi/tests/check_table.cpp @@ -1137,7 +1137,6 @@ const struct name_offset known_dispatch[] = { { "glDrawElementsInstancedARB", _O(DrawElementsInstancedARB) }, { "glRenderbufferStorageMultisample", _O(RenderbufferStorageMultisample) }, { "glFramebufferTexture", _O(FramebufferTexture) }, - { "glFramebufferTextureFaceARB", _O(FramebufferTextureFaceARB) }, { "glProgramParameteri", _O(ProgramParameteri) }, { "glVertexAttribDivisor", _O(VertexAttribDivisor) }, { "glFlushMappedBufferRange", _O(FlushMappedBufferRange) }, diff --git a/src/mesa/main/dlist.c b/src/mesa/main/dlist.c index 431c4b48b79..aafe486fb60 100644 --- a/src/mesa/main/dlist.c +++ b/src/mesa/main/dlist.c @@ -7592,28 +7592,6 @@ save_FramebufferTexture(GLenum target, GLenum attachment, } } -static void GLAPIENTRY -save_FramebufferTextureFace(GLenum target, GLenum attachment, - GLuint texture, GLint level, GLenum face) -{ - Node *n; - GET_CURRENT_CONTEXT(ctx); - ASSERT_OUTSIDE_SAVE_BEGIN_END_AND_FLUSH(ctx); - n = alloc_instruction(ctx, OPCODE_FRAMEBUFFER_TEXTURE_FACE, 5); - if (n) { - n[1].e = target; - n[2].e = attachment; - n[3].ui = texture; - n[4].i = level; - n[5].e = face; - } - if (ctx->ExecuteFlag) { - CALL_FramebufferTextureFaceARB(ctx->Exec, (target, attachment, texture, - level, face)); - } -} - - static void GLAPIENTRY save_WaitSync(GLsync sync, GLbitfield flags, GLuint64 timeout) @@ -8873,11 +8851,6 @@ execute_list(struct gl_context *ctx, GLuint list) CALL_FramebufferTexture(ctx->Exec, (n[1].e, n[2].e, n[3].ui, n[4].i)); break; - case OPCODE_FRAMEBUFFER_TEXTURE_FACE: - CALL_FramebufferTextureFaceARB(ctx->Exec, (n[1].e, n[2].e, - n[3].ui, n[4].i, n[5].e)); - break; - /* GL_ARB_sync */ case OPCODE_WAIT_SYNC: { @@ -9644,10 +9617,9 @@ _mesa_initialize_save_table(const struct gl_context *ctx) SET_BlendEquationiARB(table, save_BlendEquationi); SET_BlendEquationSeparateiARB(table, save_BlendEquationSeparatei); - /* GL_ARB_geometry_shader4 */ + /* OpenGL 3.2 */ SET_ProgramParameteri(table, save_ProgramParameteri); SET_FramebufferTexture(table, save_FramebufferTexture); - SET_FramebufferTextureFaceARB(table, save_FramebufferTextureFace); /* GL_NV_conditional_render */ SET_BeginConditionalRender(table, save_BeginConditionalRender); diff --git a/src/mesa/main/tests/dispatch_sanity.cpp b/src/mesa/main/tests/dispatch_sanity.cpp index 77dc1401d19..d38b68d0c9a 100644 --- a/src/mesa/main/tests/dispatch_sanity.cpp +++ b/src/mesa/main/tests/dispatch_sanity.cpp @@ -535,12 +535,9 @@ const struct function gl_core_functions_possible[] = { { "glGetInteger64i_v", 32, -1 }, { "glGetBufferParameteri64v", 32, -1 }, { "glFramebufferTexture", 32, -1 }, - - /* GL_ARB_geometry_shader4 */ - { "glProgramParameteriARB", 32, -1 }, - { "glFramebufferTextureARB", 32, -1 }, - { "glFramebufferTextureLayerARB", 32, -1 }, - { "glFramebufferTextureFaceARB", 32, -1 }, + { "glProgramParameteri", 32, -1 }, + { "glFramebufferTexture", 32, -1 }, + { "glFramebufferTextureLayer", 32, -1 }, /* GL 3.3 */ { "glVertexAttribDivisor", 33, -1 }, From d649fcf727bffa11a5426ebcf38f51f478664b17 Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Fri, 8 May 2015 18:50:11 -0700 Subject: [PATCH 146/834] glapi: Store static dispatch offsets in a separate table Since the set of functions with static will never change, there is no reason to store it in the XML. It's just one of those fields that confuses people adding new functions. This is split out from the rest of the series so that in-code assertions can be used to verify that the data in the Python code matches the XML. Signed-off-by: Ian Romanick Reviewed-by: Emil Velikov Reviewed-by: Dylan Baker --- src/mapi/glapi/gen/Makefile.am | 2 + src/mapi/glapi/gen/gl_XML.py | 8 + src/mapi/glapi/gen/static_data.py | 436 ++++++++++++++++++++++++++++++ 3 files changed, 446 insertions(+) create mode 100644 src/mapi/glapi/gen/static_data.py diff --git a/src/mapi/glapi/gen/Makefile.am b/src/mapi/glapi/gen/Makefile.am index c8d41746887..4d23f825c92 100644 --- a/src/mapi/glapi/gen/Makefile.am +++ b/src/mapi/glapi/gen/Makefile.am @@ -75,6 +75,7 @@ EXTRA_DIST= \ glX_proto_size.py \ glX_server_table.py \ remap_helper.py \ + static_data.py \ SConscript \ gl_API.dtd @@ -197,6 +198,7 @@ COMMON = $(API_XML) \ gl_XML.py \ glX_XML.py \ license.py \ + static_data.py \ typeexpr.py COMMON_GLX = $(COMMON) glX_API.xml glX_XML.py glX_proto_common.py diff --git a/src/mapi/glapi/gen/gl_XML.py b/src/mapi/glapi/gen/gl_XML.py index 1a2bc2b9112..0695f845e4f 100644 --- a/src/mapi/glapi/gen/gl_XML.py +++ b/src/mapi/glapi/gen/gl_XML.py @@ -30,6 +30,7 @@ import xml.etree.ElementTree as ET import re, sys, string import os.path import typeexpr +import static_data def parse_GL_API( file_name, factory = None ): @@ -694,6 +695,13 @@ class gl_function( gl_item ): if offset == "assign": self.assign_offset = 1 + if self.offset == -1: + assert name not in static_data.offsets + else: + assert static_data.offsets[name] == self.offset + else: + assert name not in static_data.offsets + if not self.name: self.name = true_name diff --git a/src/mapi/glapi/gen/static_data.py b/src/mapi/glapi/gen/static_data.py new file mode 100644 index 00000000000..2ce093c2af6 --- /dev/null +++ b/src/mapi/glapi/gen/static_data.py @@ -0,0 +1,436 @@ +#!/usr/bin/env python + +# Copyright (C) 2015 Intel Corporation +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice (including the next +# paragraph) shall be included in all copies or substantial portions of the +# Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +# IN THE SOFTWARE. + +"""Table of functions that have ABI-mandated offsets in the dispatch table. + +This list will never change.""" +offsets = { + "NewList": 0, + "EndList": 1, + "CallList": 2, + "CallLists": 3, + "DeleteLists": 4, + "GenLists": 5, + "ListBase": 6, + "Begin": 7, + "Bitmap": 8, + "Color3b": 9, + "Color3bv": 10, + "Color3d": 11, + "Color3dv": 12, + "Color3f": 13, + "Color3fv": 14, + "Color3i": 15, + "Color3iv": 16, + "Color3s": 17, + "Color3sv": 18, + "Color3ub": 19, + "Color3ubv": 20, + "Color3ui": 21, + "Color3uiv": 22, + "Color3us": 23, + "Color3usv": 24, + "Color4b": 25, + "Color4bv": 26, + "Color4d": 27, + "Color4dv": 28, + "Color4f": 29, + "Color4fv": 30, + "Color4i": 31, + "Color4iv": 32, + "Color4s": 33, + "Color4sv": 34, + "Color4ub": 35, + "Color4ubv": 36, + "Color4ui": 37, + "Color4uiv": 38, + "Color4us": 39, + "Color4usv": 40, + "EdgeFlag": 41, + "EdgeFlagv": 42, + "End": 43, + "Indexd": 44, + "Indexdv": 45, + "Indexf": 46, + "Indexfv": 47, + "Indexi": 48, + "Indexiv": 49, + "Indexs": 50, + "Indexsv": 51, + "Normal3b": 52, + "Normal3bv": 53, + "Normal3d": 54, + "Normal3dv": 55, + "Normal3f": 56, + "Normal3fv": 57, + "Normal3i": 58, + "Normal3iv": 59, + "Normal3s": 60, + "Normal3sv": 61, + "RasterPos2d": 62, + "RasterPos2dv": 63, + "RasterPos2f": 64, + "RasterPos2fv": 65, + "RasterPos2i": 66, + "RasterPos2iv": 67, + "RasterPos2s": 68, + "RasterPos2sv": 69, + "RasterPos3d": 70, + "RasterPos3dv": 71, + "RasterPos3f": 72, + "RasterPos3fv": 73, + "RasterPos3i": 74, + "RasterPos3iv": 75, + "RasterPos3s": 76, + "RasterPos3sv": 77, + "RasterPos4d": 78, + "RasterPos4dv": 79, + "RasterPos4f": 80, + "RasterPos4fv": 81, + "RasterPos4i": 82, + "RasterPos4iv": 83, + "RasterPos4s": 84, + "RasterPos4sv": 85, + "Rectd": 86, + "Rectdv": 87, + "Rectf": 88, + "Rectfv": 89, + "Recti": 90, + "Rectiv": 91, + "Rects": 92, + "Rectsv": 93, + "TexCoord1d": 94, + "TexCoord1dv": 95, + "TexCoord1f": 96, + "TexCoord1fv": 97, + "TexCoord1i": 98, + "TexCoord1iv": 99, + "TexCoord1s": 100, + "TexCoord1sv": 101, + "TexCoord2d": 102, + "TexCoord2dv": 103, + "TexCoord2f": 104, + "TexCoord2fv": 105, + "TexCoord2i": 106, + "TexCoord2iv": 107, + "TexCoord2s": 108, + "TexCoord2sv": 109, + "TexCoord3d": 110, + "TexCoord3dv": 111, + "TexCoord3f": 112, + "TexCoord3fv": 113, + "TexCoord3i": 114, + "TexCoord3iv": 115, + "TexCoord3s": 116, + "TexCoord3sv": 117, + "TexCoord4d": 118, + "TexCoord4dv": 119, + "TexCoord4f": 120, + "TexCoord4fv": 121, + "TexCoord4i": 122, + "TexCoord4iv": 123, + "TexCoord4s": 124, + "TexCoord4sv": 125, + "Vertex2d": 126, + "Vertex2dv": 127, + "Vertex2f": 128, + "Vertex2fv": 129, + "Vertex2i": 130, + "Vertex2iv": 131, + "Vertex2s": 132, + "Vertex2sv": 133, + "Vertex3d": 134, + "Vertex3dv": 135, + "Vertex3f": 136, + "Vertex3fv": 137, + "Vertex3i": 138, + "Vertex3iv": 139, + "Vertex3s": 140, + "Vertex3sv": 141, + "Vertex4d": 142, + "Vertex4dv": 143, + "Vertex4f": 144, + "Vertex4fv": 145, + "Vertex4i": 146, + "Vertex4iv": 147, + "Vertex4s": 148, + "Vertex4sv": 149, + "ClipPlane": 150, + "ColorMaterial": 151, + "CullFace": 152, + "Fogf": 153, + "Fogfv": 154, + "Fogi": 155, + "Fogiv": 156, + "FrontFace": 157, + "Hint": 158, + "Lightf": 159, + "Lightfv": 160, + "Lighti": 161, + "Lightiv": 162, + "LightModelf": 163, + "LightModelfv": 164, + "LightModeli": 165, + "LightModeliv": 166, + "LineStipple": 167, + "LineWidth": 168, + "Materialf": 169, + "Materialfv": 170, + "Materiali": 171, + "Materialiv": 172, + "PointSize": 173, + "PolygonMode": 174, + "PolygonStipple": 175, + "Scissor": 176, + "ShadeModel": 177, + "TexParameterf": 178, + "TexParameterfv": 179, + "TexParameteri": 180, + "TexParameteriv": 181, + "TexImage1D": 182, + "TexImage2D": 183, + "TexEnvf": 184, + "TexEnvfv": 185, + "TexEnvi": 186, + "TexEnviv": 187, + "TexGend": 188, + "TexGendv": 189, + "TexGenf": 190, + "TexGenfv": 191, + "TexGeni": 192, + "TexGeniv": 193, + "FeedbackBuffer": 194, + "SelectBuffer": 195, + "RenderMode": 196, + "InitNames": 197, + "LoadName": 198, + "PassThrough": 199, + "PopName": 200, + "PushName": 201, + "DrawBuffer": 202, + "Clear": 203, + "ClearAccum": 204, + "ClearIndex": 205, + "ClearColor": 206, + "ClearStencil": 207, + "ClearDepth": 208, + "StencilMask": 209, + "ColorMask": 210, + "DepthMask": 211, + "IndexMask": 212, + "Accum": 213, + "Disable": 214, + "Enable": 215, + "Finish": 216, + "Flush": 217, + "PopAttrib": 218, + "PushAttrib": 219, + "Map1d": 220, + "Map1f": 221, + "Map2d": 222, + "Map2f": 223, + "MapGrid1d": 224, + "MapGrid1f": 225, + "MapGrid2d": 226, + "MapGrid2f": 227, + "EvalCoord1d": 228, + "EvalCoord1dv": 229, + "EvalCoord1f": 230, + "EvalCoord1fv": 231, + "EvalCoord2d": 232, + "EvalCoord2dv": 233, + "EvalCoord2f": 234, + "EvalCoord2fv": 235, + "EvalMesh1": 236, + "EvalPoint1": 237, + "EvalMesh2": 238, + "EvalPoint2": 239, + "AlphaFunc": 240, + "BlendFunc": 241, + "LogicOp": 242, + "StencilFunc": 243, + "StencilOp": 244, + "DepthFunc": 245, + "PixelZoom": 246, + "PixelTransferf": 247, + "PixelTransferi": 248, + "PixelStoref": 249, + "PixelStorei": 250, + "PixelMapfv": 251, + "PixelMapuiv": 252, + "PixelMapusv": 253, + "ReadBuffer": 254, + "CopyPixels": 255, + "ReadPixels": 256, + "DrawPixels": 257, + "GetBooleanv": 258, + "GetClipPlane": 259, + "GetDoublev": 260, + "GetError": 261, + "GetFloatv": 262, + "GetIntegerv": 263, + "GetLightfv": 264, + "GetLightiv": 265, + "GetMapdv": 266, + "GetMapfv": 267, + "GetMapiv": 268, + "GetMaterialfv": 269, + "GetMaterialiv": 270, + "GetPixelMapfv": 271, + "GetPixelMapuiv": 272, + "GetPixelMapusv": 273, + "GetPolygonStipple": 274, + "GetString": 275, + "GetTexEnvfv": 276, + "GetTexEnviv": 277, + "GetTexGendv": 278, + "GetTexGenfv": 279, + "GetTexGeniv": 280, + "GetTexImage": 281, + "GetTexParameterfv": 282, + "GetTexParameteriv": 283, + "GetTexLevelParameterfv": 284, + "GetTexLevelParameteriv": 285, + "IsEnabled": 286, + "IsList": 287, + "DepthRange": 288, + "Frustum": 289, + "LoadIdentity": 290, + "LoadMatrixf": 291, + "LoadMatrixd": 292, + "MatrixMode": 293, + "MultMatrixf": 294, + "MultMatrixd": 295, + "Ortho": 296, + "PopMatrix": 297, + "PushMatrix": 298, + "Rotated": 299, + "Rotatef": 300, + "Scaled": 301, + "Scalef": 302, + "Translated": 303, + "Translatef": 304, + "Viewport": 305, + "ArrayElement": 306, + "ColorPointer": 308, + "DisableClientState": 309, + "DrawArrays": 310, + "DrawElements": 311, + "EdgeFlagPointer": 312, + "EnableClientState": 313, + "GetPointerv": 329, + "IndexPointer": 314, + "InterleavedArrays": 317, + "NormalPointer": 318, + "TexCoordPointer": 320, + "VertexPointer": 321, + "PolygonOffset": 319, + "CopyTexImage1D": 323, + "CopyTexImage2D": 324, + "CopyTexSubImage1D": 325, + "CopyTexSubImage2D": 326, + "TexSubImage1D": 332, + "TexSubImage2D": 333, + "AreTexturesResident": 322, + "BindTexture": 307, + "DeleteTextures": 327, + "GenTextures": 328, + "IsTexture": 330, + "PrioritizeTextures": 331, + "Indexub": 315, + "Indexubv": 316, + "PopClientAttrib": 334, + "PushClientAttrib": 335, + "BlendColor": 336, + "BlendEquation": 337, + "DrawRangeElements": 338, + "ColorTable": 339, + "ColorTableParameterfv": 340, + "ColorTableParameteriv": 341, + "CopyColorTable": 342, + "GetColorTable": 343, + "GetColorTableParameterfv": 344, + "GetColorTableParameteriv": 345, + "ColorSubTable": 346, + "CopyColorSubTable": 347, + "ConvolutionFilter1D": 348, + "ConvolutionFilter2D": 349, + "ConvolutionParameterf": 350, + "ConvolutionParameterfv": 351, + "ConvolutionParameteri": 352, + "ConvolutionParameteriv": 353, + "CopyConvolutionFilter1D": 354, + "CopyConvolutionFilter2D": 355, + "GetConvolutionFilter": 356, + "GetConvolutionParameterfv": 357, + "GetConvolutionParameteriv": 358, + "GetSeparableFilter": 359, + "SeparableFilter2D": 360, + "GetHistogram": 361, + "GetHistogramParameterfv": 362, + "GetHistogramParameteriv": 363, + "GetMinmax": 364, + "GetMinmaxParameterfv": 365, + "GetMinmaxParameteriv": 366, + "Histogram": 367, + "Minmax": 368, + "ResetHistogram": 369, + "ResetMinmax": 370, + "TexImage3D": 371, + "TexSubImage3D": 372, + "CopyTexSubImage3D": 373, + "ActiveTexture": 374, + "ClientActiveTexture": 375, + "MultiTexCoord1d": 376, + "MultiTexCoord1dv": 377, + "MultiTexCoord1fARB": 378, + "MultiTexCoord1fvARB": 379, + "MultiTexCoord1i": 380, + "MultiTexCoord1iv": 381, + "MultiTexCoord1s": 382, + "MultiTexCoord1sv": 383, + "MultiTexCoord2d": 384, + "MultiTexCoord2dv": 385, + "MultiTexCoord2fARB": 386, + "MultiTexCoord2fvARB": 387, + "MultiTexCoord2i": 388, + "MultiTexCoord2iv": 389, + "MultiTexCoord2s": 390, + "MultiTexCoord2sv": 391, + "MultiTexCoord3d": 392, + "MultiTexCoord3dv": 393, + "MultiTexCoord3fARB": 394, + "MultiTexCoord3fvARB": 395, + "MultiTexCoord3i": 396, + "MultiTexCoord3iv": 397, + "MultiTexCoord3s": 398, + "MultiTexCoord3sv": 399, + "MultiTexCoord4d": 400, + "MultiTexCoord4dv": 401, + "MultiTexCoord4fARB": 402, + "MultiTexCoord4fvARB": 403, + "MultiTexCoord4i": 404, + "MultiTexCoord4iv": 405, + "MultiTexCoord4s": 406, + "MultiTexCoord4sv": 407 +} From d9be1db4b69a04f58a951351051ef9798d55da98 Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Tue, 12 May 2015 15:11:09 -0700 Subject: [PATCH 147/834] glapi: Store list of functions with static dispatch in a separate table The set of functions with static dispatch is (supposed to be) defined by the Linux OpenGL ABI. We export quite a few more functions than that for historical reasons. However, this list should never grow. This table is used instead of the static_dispatch tag in the XML to generate the static dispatch functions. I used nm libGL.so | grep ' T gl[^X]' | sed 's/.* T //' before and after the change. diff showed no differences. Signed-off-by: Ian Romanick Reviewed-by: Emil Velikov Reviewed-by: Dylan Baker --- src/mapi/glapi/gen/gl_XML.py | 2 +- src/mapi/glapi/gen/static_data.py | 1519 +++++++++++++++++++++++++++++ 2 files changed, 1520 insertions(+), 1 deletion(-) diff --git a/src/mapi/glapi/gen/gl_XML.py b/src/mapi/glapi/gen/gl_XML.py index 0695f845e4f..89b09f250dc 100644 --- a/src/mapi/glapi/gen/gl_XML.py +++ b/src/mapi/glapi/gen/gl_XML.py @@ -650,7 +650,7 @@ class gl_function( gl_item ): name = element.get( "name" ) alias = element.get( "alias" ) - if is_attr_true(element, "static_dispatch", "true"): + if name in static_data.functions: self.static_entry_points.append(name) self.entry_points.append( name ) diff --git a/src/mapi/glapi/gen/static_data.py b/src/mapi/glapi/gen/static_data.py index 2ce093c2af6..a1983ffcf35 100644 --- a/src/mapi/glapi/gen/static_data.py +++ b/src/mapi/glapi/gen/static_data.py @@ -434,3 +434,1522 @@ offsets = { "MultiTexCoord4s": 406, "MultiTexCoord4sv": 407 } + +functions = [ + "Accum", + "ActiveProgramEXT", + "ActiveTexture", + "ActiveTextureARB", + "AlphaFragmentOp1ATI", + "AlphaFragmentOp2ATI", + "AlphaFragmentOp3ATI", + "AlphaFunc", + "AlphaFuncx", + "AreProgramsResidentNV", + "AreTexturesResident", + "AreTexturesResidentEXT", + "ArrayElement", + "ArrayElementEXT", + "AttachObjectARB", + "AttachShader", + "Begin", + "BeginConditionalRender", + "BeginConditionalRenderNV", + "BeginFragmentShaderATI", + "BeginPerfMonitorAMD", + "BeginQuery", + "BeginQueryARB", + "BeginQueryIndexed", + "BeginTransformFeedback", + "BeginTransformFeedbackEXT", + "BindAttribLocation", + "BindAttribLocationARB", + "BindBuffer", + "BindBufferARB", + "BindBufferBase", + "BindBufferBaseEXT", + "BindBufferOffsetEXT", + "BindBufferRange", + "BindBufferRangeEXT", + "BindBuffersBase", + "BindBuffersRange", + "BindFragDataLocation", + "BindFragDataLocationEXT", + "BindFragDataLocationIndexed", + "BindFragmentShaderATI", + "BindFramebuffer", + "BindFramebufferEXT", + "BindImageTexture", + "BindImageTextures", + "BindProgramARB", + "BindProgramNV", + "BindRenderbuffer", + "BindRenderbufferEXT", + "BindSampler", + "BindSamplers", + "BindTexture", + "BindTextureEXT", + "BindTextures", + "BindTextureUnit", + "BindTransformFeedback", + "BindVertexArray", + "BindVertexBuffer", + "BindVertexBuffers", + "Bitmap", + "BlendColor", + "BlendColorEXT", + "BlendEquation", + "BlendEquationEXT", + "BlendEquationiARB", + "BlendEquationIndexedAMD", + "BlendEquationSeparate", + "BlendEquationSeparateiARB", + "BlendEquationSeparateIndexedAMD", + "BlendFunc", + "BlendFunciARB", + "BlendFuncIndexedAMD", + "BlendFuncSeparate", + "BlendFuncSeparateEXT", + "BlendFuncSeparateiARB", + "BlendFuncSeparateIndexedAMD", + "BlitFramebuffer", + "BufferData", + "BufferDataARB", + "BufferStorage", + "BufferSubData", + "BufferSubDataARB", + "CallList", + "CallLists", + "CheckFramebufferStatus", + "CheckFramebufferStatusEXT", + "ClampColor", + "ClampColorARB", + "Clear", + "ClearAccum", + "ClearBufferData", + "ClearBufferfi", + "ClearBufferfv", + "ClearBufferiv", + "ClearBufferSubData", + "ClearBufferuiv", + "ClearColor", + "ClearColorIiEXT", + "ClearColorIuiEXT", + "ClearColorx", + "ClearDepth", + "ClearDepthf", + "ClearDepthx", + "ClearIndex", + "ClearNamedBufferData", + "ClearNamedBufferSubData", + "ClearStencil", + "ClearTexImage", + "ClearTexSubImage", + "ClientActiveTexture", + "ClientActiveTextureARB", + "ClientWaitSync", + "ClipControl", + "ClipPlane", + "ClipPlanef", + "ClipPlanex", + "Color3b", + "Color3bv", + "Color3d", + "Color3dv", + "Color3f", + "Color3fv", + "Color3i", + "Color3iv", + "Color3s", + "Color3sv", + "Color3ub", + "Color3ubv", + "Color3ui", + "Color3uiv", + "Color3us", + "Color3usv", + "Color4b", + "Color4bv", + "Color4d", + "Color4dv", + "Color4f", + "Color4fv", + "Color4i", + "Color4iv", + "Color4s", + "Color4sv", + "Color4ub", + "Color4ubv", + "Color4ui", + "Color4uiv", + "Color4us", + "Color4usv", + "Color4x", + "ColorFragmentOp1ATI", + "ColorFragmentOp2ATI", + "ColorFragmentOp3ATI", + "ColorMask", + "ColorMaski", + "ColorMaskIndexedEXT", + "ColorMaterial", + "ColorP3ui", + "ColorP3uiv", + "ColorP4ui", + "ColorP4uiv", + "ColorPointer", + "ColorPointerEXT", + "ColorSubTable", + "ColorTable", + "ColorTableEXT", + "ColorTableParameterfv", + "ColorTableParameteriv", + "CompileShader", + "CompileShaderARB", + "CompressedTexImage1D", + "CompressedTexImage1DARB", + "CompressedTexImage2D", + "CompressedTexImage2DARB", + "CompressedTexImage3D", + "CompressedTexImage3DARB", + "CompressedTexSubImage1D", + "CompressedTexSubImage1DARB", + "CompressedTexSubImage2D", + "CompressedTexSubImage2DARB", + "CompressedTexSubImage3D", + "CompressedTexSubImage3DARB", + "CompressedTextureSubImage1D", + "CompressedTextureSubImage2D", + "CompressedTextureSubImage3D", + "ConvolutionFilter1D", + "ConvolutionFilter2D", + "ConvolutionParameterf", + "ConvolutionParameterfv", + "ConvolutionParameteri", + "ConvolutionParameteriv", + "CopyBufferSubData", + "CopyColorSubTable", + "CopyColorTable", + "CopyConvolutionFilter1D", + "CopyConvolutionFilter2D", + "CopyImageSubData", + "CopyNamedBufferSubData", + "CopyPixels", + "CopyTexImage1D", + "CopyTexImage1DEXT", + "CopyTexImage2D", + "CopyTexImage2DEXT", + "CopyTexSubImage1D", + "CopyTexSubImage1DEXT", + "CopyTexSubImage2D", + "CopyTexSubImage2DEXT", + "CopyTexSubImage3D", + "CopyTexSubImage3DEXT", + "CopyTextureSubImage1D", + "CopyTextureSubImage2D", + "CopyTextureSubImage3D", + "CreateBuffers", + "CreateProgram", + "CreateProgramObjectARB", + "CreateProgramPipelines", + "CreateQueries", + "CreateRenderbuffers", + "CreateSamplers", + "CreateShader", + "CreateShaderObjectARB", + "CreateShaderProgramEXT", + "CreateTextures", + "CreateTransformFeedbacks", + "CreateVertexArrays", + "CullFace", + "DebugMessageCallback", + "DebugMessageCallbackARB", + "DebugMessageControl", + "DebugMessageControlARB", + "DebugMessageInsert", + "DebugMessageInsertARB", + "DeleteBuffers", + "DeleteBuffersARB", + "DeleteFragmentShaderATI", + "DeleteFramebuffers", + "DeleteFramebuffersEXT", + "DeleteLists", + "DeleteObjectARB", + "DeletePerfMonitorsAMD", + "DeleteProgram", + "DeleteProgramsARB", + "DeleteProgramsNV", + "DeleteQueries", + "DeleteQueriesARB", + "DeleteRenderbuffers", + "DeleteRenderbuffersEXT", + "DeleteSamplers", + "DeleteShader", + "DeleteSync", + "DeleteTextures", + "DeleteTexturesEXT", + "DeleteTransformFeedbacks", + "DeleteVertexArrays", + "DepthFunc", + "DepthMask", + "DepthRange", + "DepthRangeArrayv", + "DepthRangef", + "DepthRangeIndexed", + "DepthRangex", + "DetachObjectARB", + "DetachShader", + "Disable", + "DisableClientState", + "Disablei", + "DisableIndexedEXT", + "DisableVertexArrayAttrib", + "DisableVertexAttribArray", + "DisableVertexAttribArrayARB", + "DispatchCompute", + "DispatchComputeIndirect", + "DrawArrays", + "DrawArraysEXT", + "DrawArraysIndirect", + "DrawArraysInstanced", + "DrawArraysInstancedARB", + "DrawArraysInstancedBaseInstance", + "DrawArraysInstancedEXT", + "DrawBuffer", + "DrawBuffers", + "DrawBuffersARB", + "DrawBuffersATI", + "DrawElements", + "DrawElementsBaseVertex", + "DrawElementsIndirect", + "DrawElementsInstanced", + "DrawElementsInstancedARB", + "DrawElementsInstancedBaseInstance", + "DrawElementsInstancedBaseVertex", + "DrawElementsInstancedBaseVertexBaseInstance", + "DrawElementsInstancedEXT", + "DrawPixels", + "DrawRangeElements", + "DrawRangeElementsBaseVertex", + "DrawRangeElementsEXT", + "DrawTransformFeedback", + "DrawTransformFeedbackInstanced", + "DrawTransformFeedbackStream", + "DrawTransformFeedbackStreamInstanced", + "EdgeFlag", + "EdgeFlagPointer", + "EdgeFlagPointerEXT", + "EdgeFlagv", + "EGLImageTargetRenderbufferStorageOES", + "EGLImageTargetTexture2DOES", + "Enable", + "EnableClientState", + "Enablei", + "EnableIndexedEXT", + "EnableVertexArrayAttrib", + "EnableVertexAttribArray", + "EnableVertexAttribArrayARB", + "End", + "EndConditionalRender", + "EndConditionalRenderNV", + "EndFragmentShaderATI", + "EndList", + "EndPerfMonitorAMD", + "EndQuery", + "EndQueryARB", + "EndQueryIndexed", + "EndTransformFeedback", + "EndTransformFeedbackEXT", + "EvalCoord1d", + "EvalCoord1dv", + "EvalCoord1f", + "EvalCoord1fv", + "EvalCoord2d", + "EvalCoord2dv", + "EvalCoord2f", + "EvalCoord2fv", + "EvalMesh1", + "EvalMesh2", + "EvalPoint1", + "EvalPoint2", + "ExecuteProgramNV", + "FeedbackBuffer", + "FenceSync", + "Finish", + "Flush", + "FlushMappedBufferRange", + "FlushMappedNamedBufferRange", + "FogCoordd", + "FogCoorddEXT", + "FogCoorddv", + "FogCoorddvEXT", + "FogCoordf", + "FogCoordfEXT", + "FogCoordfv", + "FogCoordfvEXT", + "FogCoordPointer", + "FogCoordPointerEXT", + "Fogf", + "Fogfv", + "Fogi", + "Fogiv", + "Fogx", + "Fogxv", + "FramebufferRenderbuffer", + "FramebufferRenderbufferEXT", + "FramebufferTexture", + "FramebufferTexture1D", + "FramebufferTexture1DEXT", + "FramebufferTexture2D", + "FramebufferTexture2DEXT", + "FramebufferTexture3D", + "FramebufferTexture3DEXT", + "FramebufferTextureARB", + "FramebufferTextureLayer", + "FramebufferTextureLayerARB", + "FramebufferTextureLayerEXT", + "FrontFace", + "Frustum", + "Frustumf", + "Frustumx", + "GenBuffers", + "GenBuffersARB", + "GenerateMipmap", + "GenerateMipmapEXT", + "GenerateTextureMipmap", + "GenFragmentShadersATI", + "GenFramebuffers", + "GenFramebuffersEXT", + "GenLists", + "GenPerfMonitorsAMD", + "GenProgramsARB", + "GenProgramsNV", + "GenQueries", + "GenQueriesARB", + "GenRenderbuffers", + "GenRenderbuffersEXT", + "GenSamplers", + "GenTextures", + "GenTexturesEXT", + "GenTransformFeedbacks", + "GenVertexArrays", + "GetActiveAtomicCounterBufferiv", + "GetActiveAttrib", + "GetActiveAttribARB", + "GetActiveUniform", + "GetActiveUniformARB", + "GetActiveUniformBlockiv", + "GetActiveUniformBlockName", + "GetActiveUniformName", + "GetActiveUniformsiv", + "GetAttachedObjectsARB", + "GetAttachedShaders", + "GetAttribLocation", + "GetAttribLocationARB", + "GetBooleanIndexedvEXT", + "GetBooleani_v", + "GetBooleanv", + "GetBufferParameteri64v", + "GetBufferParameteriv", + "GetBufferParameterivARB", + "GetBufferPointerv", + "GetBufferPointervARB", + "GetBufferSubData", + "GetBufferSubDataARB", + "GetClipPlane", + "GetClipPlanef", + "GetClipPlanex", + "GetColorTable", + "GetColorTableEXT", + "GetColorTableParameterfv", + "GetColorTableParameterfvEXT", + "GetColorTableParameteriv", + "GetColorTableParameterivEXT", + "GetCompressedTexImage", + "GetCompressedTexImageARB", + "GetCompressedTextureImage", + "GetConvolutionFilter", + "GetConvolutionParameterfv", + "GetConvolutionParameteriv", + "GetDebugMessageLog", + "GetDebugMessageLogARB", + "GetDoublei_v", + "GetDoublev", + "GetError", + "GetFixedv", + "GetFloati_v", + "GetFloatv", + "GetFragDataIndex", + "GetFragDataLocation", + "GetFragDataLocationEXT", + "GetFramebufferAttachmentParameteriv", + "GetFramebufferAttachmentParameterivEXT", + "GetGraphicsResetStatusARB", + "GetHandleARB", + "GetHistogram", + "GetHistogramParameterfv", + "GetHistogramParameteriv", + "GetInfoLogARB", + "GetInteger64i_v", + "GetInteger64v", + "GetIntegerIndexedvEXT", + "GetIntegeri_v", + "GetIntegerv", + "GetLightfv", + "GetLightiv", + "GetLightxv", + "GetMapdv", + "GetMapfv", + "GetMapiv", + "GetMaterialfv", + "GetMaterialiv", + "GetMaterialxv", + "GetMinmax", + "GetMinmaxParameterfv", + "GetMinmaxParameteriv", + "GetMultisamplefv", + "GetNamedBufferParameteri64v", + "GetNamedBufferParameteriv", + "GetNamedBufferPointerv", + "GetNamedBufferSubData", + "GetNamedRenderbufferParameteriv", + "GetnColorTableARB", + "GetnCompressedTexImageARB", + "GetnConvolutionFilterARB", + "GetnHistogramARB", + "GetnMapdvARB", + "GetnMapfvARB", + "GetnMapivARB", + "GetnMinmaxARB", + "GetnPixelMapfvARB", + "GetnPixelMapuivARB", + "GetnPixelMapusvARB", + "GetnPolygonStippleARB", + "GetnSeparableFilterARB", + "GetnTexImageARB", + "GetnUniformdvARB", + "GetnUniformfvARB", + "GetnUniformivARB", + "GetnUniformuivARB", + "GetObjectLabel", + "GetObjectParameterfvARB", + "GetObjectParameterivAPPLE", + "GetObjectParameterivARB", + "GetObjectPtrLabel", + "GetPerfMonitorCounterDataAMD", + "GetPerfMonitorCounterInfoAMD", + "GetPerfMonitorCountersAMD", + "GetPerfMonitorCounterStringAMD", + "GetPerfMonitorGroupsAMD", + "GetPerfMonitorGroupStringAMD", + "GetPixelMapfv", + "GetPixelMapuiv", + "GetPixelMapusv", + "GetPointerv", + "GetPointervEXT", + "GetPolygonStipple", + "GetProgramBinary", + "GetProgramEnvParameterdvARB", + "GetProgramEnvParameterfvARB", + "GetProgramInfoLog", + "GetProgramInterfaceiv", + "GetProgramiv", + "GetProgramivARB", + "GetProgramivNV", + "GetProgramLocalParameterdvARB", + "GetProgramLocalParameterfvARB", + "GetProgramNamedParameterdvNV", + "GetProgramNamedParameterfvNV", + "GetProgramParameterdvNV", + "GetProgramParameterfvNV", + "GetProgramResourceIndex", + "GetProgramResourceiv", + "GetProgramResourceLocation", + "GetProgramResourceLocationIndex", + "GetProgramResourceName", + "GetProgramStringARB", + "GetProgramStringNV", + "GetQueryBufferObjecti64v", + "GetQueryBufferObjectiv", + "GetQueryBufferObjectui64v", + "GetQueryBufferObjectuiv", + "GetQueryIndexediv", + "GetQueryiv", + "GetQueryivARB", + "GetQueryObjectiv", + "GetQueryObjectivARB", + "GetQueryObjectuiv", + "GetQueryObjectuivARB", + "GetRenderbufferParameteriv", + "GetRenderbufferParameterivEXT", + "GetSamplerParameterfv", + "GetSamplerParameterIiv", + "GetSamplerParameterIuiv", + "GetSamplerParameteriv", + "GetSeparableFilter", + "GetShaderInfoLog", + "GetShaderiv", + "GetShaderPrecisionFormat", + "GetShaderSource", + "GetShaderSourceARB", + "GetString", + "GetStringi", + "GetSynciv", + "GetTexBumpParameterfvATI", + "GetTexBumpParameterivATI", + "GetTexEnvfv", + "GetTexEnviv", + "GetTexEnvxv", + "GetTexGendv", + "GetTexGenfv", + "GetTexGeniv", + "GetTexImage", + "GetTexLevelParameterfv", + "GetTexLevelParameteriv", + "GetTexParameterfv", + "GetTexParameterIiv", + "GetTexParameterIivEXT", + "GetTexParameterIuiv", + "GetTexParameterIuivEXT", + "GetTexParameteriv", + "GetTexParameterxv", + "GetTextureImage", + "GetTextureLevelParameterfv", + "GetTextureLevelParameteriv", + "GetTextureParameterfv", + "GetTextureParameterIiv", + "GetTextureParameterIuiv", + "GetTextureParameteriv", + "GetTrackMatrixivNV", + "GetTransformFeedbacki64_v", + "GetTransformFeedbackiv", + "GetTransformFeedbacki_v", + "GetTransformFeedbackVarying", + "GetTransformFeedbackVaryingEXT", + "GetUniformBlockIndex", + "GetUniformdv", + "GetUniformfv", + "GetUniformfvARB", + "GetUniformIndices", + "GetUniformiv", + "GetUniformivARB", + "GetUniformLocation", + "GetUniformLocationARB", + "GetUniformuiv", + "GetUniformuivEXT", + "GetVertexArrayIndexed64iv", + "GetVertexArrayIndexediv", + "GetVertexArrayiv", + "GetVertexAttribdv", + "GetVertexAttribdvARB", + "GetVertexAttribdvNV", + "GetVertexAttribfv", + "GetVertexAttribfvARB", + "GetVertexAttribfvNV", + "GetVertexAttribIiv", + "GetVertexAttribIivEXT", + "GetVertexAttribIuiv", + "GetVertexAttribIuivEXT", + "GetVertexAttribiv", + "GetVertexAttribivARB", + "GetVertexAttribivNV", + "GetVertexAttribLdv", + "GetVertexAttribPointerv", + "GetVertexAttribPointervARB", + "GetVertexAttribPointervNV", + "Hint", + "Histogram", + "Indexd", + "Indexdv", + "Indexf", + "Indexfv", + "Indexi", + "Indexiv", + "IndexMask", + "IndexPointer", + "IndexPointerEXT", + "Indexs", + "Indexsv", + "Indexub", + "Indexubv", + "InitNames", + "InterleavedArrays", + "InvalidateBufferData", + "InvalidateBufferSubData", + "InvalidateFramebuffer", + "InvalidateSubFramebuffer", + "InvalidateTexImage", + "InvalidateTexSubImage", + "IsBuffer", + "IsBufferARB", + "IsEnabled", + "IsEnabledi", + "IsEnabledIndexedEXT", + "IsFramebuffer", + "IsFramebufferEXT", + "IsList", + "IsProgram", + "IsProgramARB", + "IsProgramNV", + "IsQuery", + "IsQueryARB", + "IsRenderbuffer", + "IsRenderbufferEXT", + "IsSampler", + "IsShader", + "IsSync", + "IsTexture", + "IsTextureEXT", + "IsTransformFeedback", + "IsVertexArray", + "Lightf", + "Lightfv", + "Lighti", + "Lightiv", + "LightModelf", + "LightModelfv", + "LightModeli", + "LightModeliv", + "LightModelx", + "LightModelxv", + "Lightx", + "Lightxv", + "LineStipple", + "LineWidth", + "LineWidthx", + "LinkProgram", + "LinkProgramARB", + "ListBase", + "LoadIdentity", + "LoadMatrixd", + "LoadMatrixf", + "LoadMatrixx", + "LoadName", + "LoadProgramNV", + "LoadTransposeMatrixd", + "LoadTransposeMatrixdARB", + "LoadTransposeMatrixf", + "LoadTransposeMatrixfARB", + "LockArraysEXT", + "LogicOp", + "Map1d", + "Map1f", + "Map2d", + "Map2f", + "MapBuffer", + "MapBufferARB", + "MapBufferRange", + "MapGrid1d", + "MapGrid1f", + "MapGrid2d", + "MapGrid2f", + "MapNamedBuffer", + "MapNamedBufferRange", + "Materialf", + "Materialfv", + "Materiali", + "Materialiv", + "Materialx", + "Materialxv", + "MatrixMode", + "MemoryBarrier", + "Minmax", + "MinSampleShading", + "MinSampleShadingARB", + "MultiDrawArrays", + "MultiDrawArraysEXT", + "MultiDrawArraysIndirect", + "MultiDrawElements", + "MultiDrawElementsBaseVertex", + "MultiDrawElementsEXT", + "MultiDrawElementsIndirect", + "MultiTexCoord1d", + "MultiTexCoord1dARB", + "MultiTexCoord1dv", + "MultiTexCoord1dvARB", + "MultiTexCoord1f", + "MultiTexCoord1fARB", + "MultiTexCoord1fv", + "MultiTexCoord1fvARB", + "MultiTexCoord1i", + "MultiTexCoord1iARB", + "MultiTexCoord1iv", + "MultiTexCoord1ivARB", + "MultiTexCoord1s", + "MultiTexCoord1sARB", + "MultiTexCoord1sv", + "MultiTexCoord1svARB", + "MultiTexCoord2d", + "MultiTexCoord2dARB", + "MultiTexCoord2dv", + "MultiTexCoord2dvARB", + "MultiTexCoord2f", + "MultiTexCoord2fARB", + "MultiTexCoord2fv", + "MultiTexCoord2fvARB", + "MultiTexCoord2i", + "MultiTexCoord2iARB", + "MultiTexCoord2iv", + "MultiTexCoord2ivARB", + "MultiTexCoord2s", + "MultiTexCoord2sARB", + "MultiTexCoord2sv", + "MultiTexCoord2svARB", + "MultiTexCoord3d", + "MultiTexCoord3dARB", + "MultiTexCoord3dv", + "MultiTexCoord3dvARB", + "MultiTexCoord3f", + "MultiTexCoord3fARB", + "MultiTexCoord3fv", + "MultiTexCoord3fvARB", + "MultiTexCoord3i", + "MultiTexCoord3iARB", + "MultiTexCoord3iv", + "MultiTexCoord3ivARB", + "MultiTexCoord3s", + "MultiTexCoord3sARB", + "MultiTexCoord3sv", + "MultiTexCoord3svARB", + "MultiTexCoord4d", + "MultiTexCoord4dARB", + "MultiTexCoord4dv", + "MultiTexCoord4dvARB", + "MultiTexCoord4f", + "MultiTexCoord4fARB", + "MultiTexCoord4fv", + "MultiTexCoord4fvARB", + "MultiTexCoord4i", + "MultiTexCoord4iARB", + "MultiTexCoord4iv", + "MultiTexCoord4ivARB", + "MultiTexCoord4s", + "MultiTexCoord4sARB", + "MultiTexCoord4sv", + "MultiTexCoord4svARB", + "MultiTexCoord4x", + "MultiTexCoordP1ui", + "MultiTexCoordP1uiv", + "MultiTexCoordP2ui", + "MultiTexCoordP2uiv", + "MultiTexCoordP3ui", + "MultiTexCoordP3uiv", + "MultiTexCoordP4ui", + "MultiTexCoordP4uiv", + "MultMatrixd", + "MultMatrixf", + "MultMatrixx", + "MultTransposeMatrixd", + "MultTransposeMatrixdARB", + "MultTransposeMatrixf", + "MultTransposeMatrixfARB", + "NamedBufferData", + "NamedBufferStorage", + "NamedBufferSubData", + "NamedRenderbufferStorage", + "NamedRenderbufferStorageMultisample", + "NewList", + "Normal3b", + "Normal3bv", + "Normal3d", + "Normal3dv", + "Normal3f", + "Normal3fv", + "Normal3i", + "Normal3iv", + "Normal3s", + "Normal3sv", + "Normal3x", + "NormalP3ui", + "NormalP3uiv", + "NormalPointer", + "NormalPointerEXT", + "ObjectLabel", + "ObjectPtrLabel", + "ObjectPurgeableAPPLE", + "ObjectUnpurgeableAPPLE", + "Ortho", + "Orthof", + "Orthox", + "PassTexCoordATI", + "PassThrough", + "PauseTransformFeedback", + "PixelMapfv", + "PixelMapuiv", + "PixelMapusv", + "PixelStoref", + "PixelStorei", + "PixelTransferf", + "PixelTransferi", + "PixelZoom", + "PointParameterf", + "PointParameterfARB", + "PointParameterfEXT", + "PointParameterfv", + "PointParameterfvARB", + "PointParameterfvEXT", + "PointParameteri", + "PointParameteriNV", + "PointParameteriv", + "PointParameterivNV", + "PointParameterx", + "PointParameterxv", + "PointSize", + "PointSizePointerOES", + "PointSizex", + "PolygonMode", + "PolygonOffset", + "PolygonOffsetClampEXT", + "PolygonOffsetEXT", + "PolygonOffsetx", + "PolygonStipple", + "PopAttrib", + "PopClientAttrib", + "PopDebugGroup", + "PopMatrix", + "PopName", + "PrimitiveRestartIndex", + "PrimitiveRestartIndexNV", + "PrimitiveRestartNV", + "PrioritizeTextures", + "PrioritizeTexturesEXT", + "ProgramBinary", + "ProgramEnvParameter4dARB", + "ProgramEnvParameter4dvARB", + "ProgramEnvParameter4fARB", + "ProgramEnvParameter4fvARB", + "ProgramLocalParameter4dARB", + "ProgramLocalParameter4dvARB", + "ProgramLocalParameter4fARB", + "ProgramLocalParameter4fvARB", + "ProgramNamedParameter4dNV", + "ProgramNamedParameter4dvNV", + "ProgramNamedParameter4fNV", + "ProgramNamedParameter4fvNV", + "ProgramParameter4dNV", + "ProgramParameter4dvNV", + "ProgramParameter4fNV", + "ProgramParameter4fvNV", + "ProgramParameteri", + "ProgramParameteriARB", + "ProgramParameters4dvNV", + "ProgramParameters4fvNV", + "ProgramStringARB", + "ProvokingVertex", + "ProvokingVertexEXT", + "PushAttrib", + "PushClientAttrib", + "PushDebugGroup", + "PushMatrix", + "PushName", + "RasterPos2d", + "RasterPos2dv", + "RasterPos2f", + "RasterPos2fv", + "RasterPos2i", + "RasterPos2iv", + "RasterPos2s", + "RasterPos2sv", + "RasterPos3d", + "RasterPos3dv", + "RasterPos3f", + "RasterPos3fv", + "RasterPos3i", + "RasterPos3iv", + "RasterPos3s", + "RasterPos3sv", + "RasterPos4d", + "RasterPos4dv", + "RasterPos4f", + "RasterPos4fv", + "RasterPos4i", + "RasterPos4iv", + "RasterPos4s", + "RasterPos4sv", + "ReadBuffer", + "ReadnPixelsARB", + "ReadPixels", + "Rectd", + "Rectdv", + "Rectf", + "Rectfv", + "Recti", + "Rectiv", + "Rects", + "Rectsv", + "ReleaseShaderCompiler", + "RenderbufferStorage", + "RenderbufferStorageEXT", + "RenderbufferStorageMultisample", + "RenderbufferStorageMultisampleEXT", + "RenderMode", + "RequestResidentProgramsNV", + "ResetHistogram", + "ResetMinmax", + "ResizeBuffersMESA", + "ResumeTransformFeedback", + "Rotated", + "Rotatef", + "Rotatex", + "SampleCoverage", + "SampleCoverageARB", + "SampleCoveragex", + "SampleMapATI", + "SampleMaski", + "SamplerParameterf", + "SamplerParameterfv", + "SamplerParameteri", + "SamplerParameterIiv", + "SamplerParameterIuiv", + "SamplerParameteriv", + "Scaled", + "Scalef", + "Scalex", + "Scissor", + "ScissorArrayv", + "ScissorIndexed", + "ScissorIndexedv", + "SecondaryColor3b", + "SecondaryColor3bEXT", + "SecondaryColor3bv", + "SecondaryColor3bvEXT", + "SecondaryColor3d", + "SecondaryColor3dEXT", + "SecondaryColor3dv", + "SecondaryColor3dvEXT", + "SecondaryColor3f", + "SecondaryColor3fEXT", + "SecondaryColor3fv", + "SecondaryColor3fvEXT", + "SecondaryColor3i", + "SecondaryColor3iEXT", + "SecondaryColor3iv", + "SecondaryColor3ivEXT", + "SecondaryColor3s", + "SecondaryColor3sEXT", + "SecondaryColor3sv", + "SecondaryColor3svEXT", + "SecondaryColor3ub", + "SecondaryColor3ubEXT", + "SecondaryColor3ubv", + "SecondaryColor3ubvEXT", + "SecondaryColor3ui", + "SecondaryColor3uiEXT", + "SecondaryColor3uiv", + "SecondaryColor3uivEXT", + "SecondaryColor3us", + "SecondaryColor3usEXT", + "SecondaryColor3usv", + "SecondaryColor3usvEXT", + "SecondaryColorP3ui", + "SecondaryColorP3uiv", + "SecondaryColorPointer", + "SecondaryColorPointerEXT", + "SelectBuffer", + "SelectPerfMonitorCountersAMD", + "SeparableFilter2D", + "SetFragmentShaderConstantATI", + "ShadeModel", + "ShaderBinary", + "ShaderSource", + "ShaderSourceARB", + "StencilFunc", + "StencilFuncSeparate", + "StencilMask", + "StencilMaskSeparate", + "StencilOp", + "StencilOpSeparate", + "TexBuffer", + "TexBufferARB", + "TexBufferRange", + "TexBumpParameterfvATI", + "TexBumpParameterivATI", + "TexCoord1d", + "TexCoord1dv", + "TexCoord1f", + "TexCoord1fv", + "TexCoord1i", + "TexCoord1iv", + "TexCoord1s", + "TexCoord1sv", + "TexCoord2d", + "TexCoord2dv", + "TexCoord2f", + "TexCoord2fv", + "TexCoord2i", + "TexCoord2iv", + "TexCoord2s", + "TexCoord2sv", + "TexCoord3d", + "TexCoord3dv", + "TexCoord3f", + "TexCoord3fv", + "TexCoord3i", + "TexCoord3iv", + "TexCoord3s", + "TexCoord3sv", + "TexCoord4d", + "TexCoord4dv", + "TexCoord4f", + "TexCoord4fv", + "TexCoord4i", + "TexCoord4iv", + "TexCoord4s", + "TexCoord4sv", + "TexCoordP1ui", + "TexCoordP1uiv", + "TexCoordP2ui", + "TexCoordP2uiv", + "TexCoordP3ui", + "TexCoordP3uiv", + "TexCoordP4ui", + "TexCoordP4uiv", + "TexCoordPointer", + "TexCoordPointerEXT", + "TexEnvf", + "TexEnvfv", + "TexEnvi", + "TexEnviv", + "TexEnvx", + "TexEnvxv", + "TexGend", + "TexGendv", + "TexGenf", + "TexGenfv", + "TexGeni", + "TexGeniv", + "TexImage1D", + "TexImage2D", + "TexImage2DMultisample", + "TexImage3D", + "TexImage3DEXT", + "TexImage3DMultisample", + "TexParameterf", + "TexParameterfv", + "TexParameteri", + "TexParameterIiv", + "TexParameterIivEXT", + "TexParameterIuiv", + "TexParameterIuivEXT", + "TexParameteriv", + "TexParameterx", + "TexParameterxv", + "TexStorage1D", + "TexStorage2D", + "TexStorage2DMultisample", + "TexStorage3D", + "TexStorage3DMultisample", + "TexSubImage1D", + "TexSubImage1DEXT", + "TexSubImage2D", + "TexSubImage2DEXT", + "TexSubImage3D", + "TexSubImage3DEXT", + "TextureBarrier", + "TextureBarrierNV", + "TextureBuffer", + "TextureBufferRange", + "TextureParameterf", + "TextureParameterfv", + "TextureParameteri", + "TextureParameterIiv", + "TextureParameterIuiv", + "TextureParameteriv", + "TextureStorage1D", + "TextureStorage1DEXT", + "TextureStorage2D", + "TextureStorage2DEXT", + "TextureStorage2DMultisample", + "TextureStorage3D", + "TextureStorage3DEXT", + "TextureStorage3DMultisample", + "TextureSubImage1D", + "TextureSubImage2D", + "TextureSubImage3D", + "TextureView", + "TrackMatrixNV", + "TransformFeedbackBufferBase", + "TransformFeedbackBufferRange", + "TransformFeedbackVaryings", + "TransformFeedbackVaryingsEXT", + "Translated", + "Translatef", + "Translatex", + "Uniform1d", + "Uniform1dv", + "Uniform1f", + "Uniform1fARB", + "Uniform1fv", + "Uniform1fvARB", + "Uniform1i", + "Uniform1iARB", + "Uniform1iv", + "Uniform1ivARB", + "Uniform1ui", + "Uniform1uiEXT", + "Uniform1uiv", + "Uniform1uivEXT", + "Uniform2d", + "Uniform2dv", + "Uniform2f", + "Uniform2fARB", + "Uniform2fv", + "Uniform2fvARB", + "Uniform2i", + "Uniform2iARB", + "Uniform2iv", + "Uniform2ivARB", + "Uniform2ui", + "Uniform2uiEXT", + "Uniform2uiv", + "Uniform2uivEXT", + "Uniform3d", + "Uniform3dv", + "Uniform3f", + "Uniform3fARB", + "Uniform3fv", + "Uniform3fvARB", + "Uniform3i", + "Uniform3iARB", + "Uniform3iv", + "Uniform3ivARB", + "Uniform3ui", + "Uniform3uiEXT", + "Uniform3uiv", + "Uniform3uivEXT", + "Uniform4d", + "Uniform4dv", + "Uniform4f", + "Uniform4fARB", + "Uniform4fv", + "Uniform4fvARB", + "Uniform4i", + "Uniform4iARB", + "Uniform4iv", + "Uniform4ivARB", + "Uniform4ui", + "Uniform4uiEXT", + "Uniform4uiv", + "Uniform4uivEXT", + "UniformBlockBinding", + "UniformMatrix2dv", + "UniformMatrix2fv", + "UniformMatrix2fvARB", + "UniformMatrix2x3dv", + "UniformMatrix2x3fv", + "UniformMatrix2x4dv", + "UniformMatrix2x4fv", + "UniformMatrix3dv", + "UniformMatrix3fv", + "UniformMatrix3fvARB", + "UniformMatrix3x2dv", + "UniformMatrix3x2fv", + "UniformMatrix3x4dv", + "UniformMatrix3x4fv", + "UniformMatrix4dv", + "UniformMatrix4fv", + "UniformMatrix4fvARB", + "UniformMatrix4x2dv", + "UniformMatrix4x2fv", + "UniformMatrix4x3dv", + "UniformMatrix4x3fv", + "UnlockArraysEXT", + "UnmapBuffer", + "UnmapBufferARB", + "UnmapNamedBuffer", + "UseProgram", + "UseProgramObjectARB", + "UseShaderProgramEXT", + "ValidateProgram", + "ValidateProgramARB", + "VDPAUFiniNV", + "VDPAUGetSurfaceivNV", + "VDPAUInitNV", + "VDPAUIsSurfaceNV", + "VDPAUMapSurfacesNV", + "VDPAURegisterOutputSurfaceNV", + "VDPAURegisterVideoSurfaceNV", + "VDPAUSurfaceAccessNV", + "VDPAUUnmapSurfacesNV", + "VDPAUUnregisterSurfaceNV", + "Vertex2d", + "Vertex2dv", + "Vertex2f", + "Vertex2fv", + "Vertex2i", + "Vertex2iv", + "Vertex2s", + "Vertex2sv", + "Vertex3d", + "Vertex3dv", + "Vertex3f", + "Vertex3fv", + "Vertex3i", + "Vertex3iv", + "Vertex3s", + "Vertex3sv", + "Vertex4d", + "Vertex4dv", + "Vertex4f", + "Vertex4fv", + "Vertex4i", + "Vertex4iv", + "Vertex4s", + "Vertex4sv", + "VertexArrayAttribBinding", + "VertexArrayAttribFormat", + "VertexArrayAttribIFormat", + "VertexArrayAttribLFormat", + "VertexArrayBindingDivisor", + "VertexArrayElementBuffer", + "VertexArrayVertexBuffer", + "VertexArrayVertexBuffers", + "VertexAttrib1d", + "VertexAttrib1dARB", + "VertexAttrib1dNV", + "VertexAttrib1dv", + "VertexAttrib1dvARB", + "VertexAttrib1dvNV", + "VertexAttrib1f", + "VertexAttrib1fARB", + "VertexAttrib1fNV", + "VertexAttrib1fv", + "VertexAttrib1fvARB", + "VertexAttrib1fvNV", + "VertexAttrib1s", + "VertexAttrib1sARB", + "VertexAttrib1sNV", + "VertexAttrib1sv", + "VertexAttrib1svARB", + "VertexAttrib1svNV", + "VertexAttrib2d", + "VertexAttrib2dARB", + "VertexAttrib2dNV", + "VertexAttrib2dv", + "VertexAttrib2dvARB", + "VertexAttrib2dvNV", + "VertexAttrib2f", + "VertexAttrib2fARB", + "VertexAttrib2fNV", + "VertexAttrib2fv", + "VertexAttrib2fvARB", + "VertexAttrib2fvNV", + "VertexAttrib2s", + "VertexAttrib2sARB", + "VertexAttrib2sNV", + "VertexAttrib2sv", + "VertexAttrib2svARB", + "VertexAttrib2svNV", + "VertexAttrib3d", + "VertexAttrib3dARB", + "VertexAttrib3dNV", + "VertexAttrib3dv", + "VertexAttrib3dvARB", + "VertexAttrib3dvNV", + "VertexAttrib3f", + "VertexAttrib3fARB", + "VertexAttrib3fNV", + "VertexAttrib3fv", + "VertexAttrib3fvARB", + "VertexAttrib3fvNV", + "VertexAttrib3s", + "VertexAttrib3sARB", + "VertexAttrib3sNV", + "VertexAttrib3sv", + "VertexAttrib3svARB", + "VertexAttrib3svNV", + "VertexAttrib4bv", + "VertexAttrib4bvARB", + "VertexAttrib4d", + "VertexAttrib4dARB", + "VertexAttrib4dNV", + "VertexAttrib4dv", + "VertexAttrib4dvARB", + "VertexAttrib4dvNV", + "VertexAttrib4f", + "VertexAttrib4fARB", + "VertexAttrib4fNV", + "VertexAttrib4fv", + "VertexAttrib4fvARB", + "VertexAttrib4fvNV", + "VertexAttrib4iv", + "VertexAttrib4ivARB", + "VertexAttrib4Nbv", + "VertexAttrib4NbvARB", + "VertexAttrib4Niv", + "VertexAttrib4NivARB", + "VertexAttrib4Nsv", + "VertexAttrib4NsvARB", + "VertexAttrib4Nub", + "VertexAttrib4NubARB", + "VertexAttrib4Nubv", + "VertexAttrib4NubvARB", + "VertexAttrib4Nuiv", + "VertexAttrib4NuivARB", + "VertexAttrib4Nusv", + "VertexAttrib4NusvARB", + "VertexAttrib4s", + "VertexAttrib4sARB", + "VertexAttrib4sNV", + "VertexAttrib4sv", + "VertexAttrib4svARB", + "VertexAttrib4svNV", + "VertexAttrib4ubNV", + "VertexAttrib4ubv", + "VertexAttrib4ubvARB", + "VertexAttrib4ubvNV", + "VertexAttrib4uiv", + "VertexAttrib4uivARB", + "VertexAttrib4usv", + "VertexAttrib4usvARB", + "VertexAttribBinding", + "VertexAttribDivisor", + "VertexAttribDivisorARB", + "VertexAttribFormat", + "VertexAttribI1i", + "VertexAttribI1iEXT", + "VertexAttribI1iv", + "VertexAttribI1ivEXT", + "VertexAttribI1ui", + "VertexAttribI1uiEXT", + "VertexAttribI1uiv", + "VertexAttribI1uivEXT", + "VertexAttribI2i", + "VertexAttribI2iEXT", + "VertexAttribI2iv", + "VertexAttribI2ivEXT", + "VertexAttribI2ui", + "VertexAttribI2uiEXT", + "VertexAttribI2uiv", + "VertexAttribI2uivEXT", + "VertexAttribI3i", + "VertexAttribI3iEXT", + "VertexAttribI3iv", + "VertexAttribI3ivEXT", + "VertexAttribI3ui", + "VertexAttribI3uiEXT", + "VertexAttribI3uiv", + "VertexAttribI3uivEXT", + "VertexAttribI4bv", + "VertexAttribI4bvEXT", + "VertexAttribI4i", + "VertexAttribI4iEXT", + "VertexAttribI4iv", + "VertexAttribI4ivEXT", + "VertexAttribI4sv", + "VertexAttribI4svEXT", + "VertexAttribI4ubv", + "VertexAttribI4ubvEXT", + "VertexAttribI4ui", + "VertexAttribI4uiEXT", + "VertexAttribI4uiv", + "VertexAttribI4uivEXT", + "VertexAttribI4usv", + "VertexAttribI4usvEXT", + "VertexAttribIFormat", + "VertexAttribIPointer", + "VertexAttribIPointerEXT", + "VertexAttribL1d", + "VertexAttribL1dv", + "VertexAttribL2d", + "VertexAttribL2dv", + "VertexAttribL3d", + "VertexAttribL3dv", + "VertexAttribL4d", + "VertexAttribL4dv", + "VertexAttribLFormat", + "VertexAttribLPointer", + "VertexAttribP1ui", + "VertexAttribP1uiv", + "VertexAttribP2ui", + "VertexAttribP2uiv", + "VertexAttribP3ui", + "VertexAttribP3uiv", + "VertexAttribP4ui", + "VertexAttribP4uiv", + "VertexAttribPointer", + "VertexAttribPointerARB", + "VertexAttribPointerNV", + "VertexAttribs1dvNV", + "VertexAttribs1fvNV", + "VertexAttribs1svNV", + "VertexAttribs2dvNV", + "VertexAttribs2fvNV", + "VertexAttribs2svNV", + "VertexAttribs3dvNV", + "VertexAttribs3fvNV", + "VertexAttribs3svNV", + "VertexAttribs4dvNV", + "VertexAttribs4fvNV", + "VertexAttribs4svNV", + "VertexAttribs4ubvNV", + "VertexBindingDivisor", + "VertexP2ui", + "VertexP2uiv", + "VertexP3ui", + "VertexP3uiv", + "VertexP4ui", + "VertexP4uiv", + "VertexPointer", + "VertexPointerEXT", + "Viewport", + "ViewportArrayv", + "ViewportIndexedf", + "ViewportIndexedfv", + "WaitSync", + "WindowPos2d", + "WindowPos2dARB", + "WindowPos2dMESA", + "WindowPos2dv", + "WindowPos2dvARB", + "WindowPos2dvMESA", + "WindowPos2f", + "WindowPos2fARB", + "WindowPos2fMESA", + "WindowPos2fv", + "WindowPos2fvARB", + "WindowPos2fvMESA", + "WindowPos2i", + "WindowPos2iARB", + "WindowPos2iMESA", + "WindowPos2iv", + "WindowPos2ivARB", + "WindowPos2ivMESA", + "WindowPos2s", + "WindowPos2sARB", + "WindowPos2sMESA", + "WindowPos2sv", + "WindowPos2svARB", + "WindowPos2svMESA", + "WindowPos3d", + "WindowPos3dARB", + "WindowPos3dMESA", + "WindowPos3dv", + "WindowPos3dvARB", + "WindowPos3dvMESA", + "WindowPos3f", + "WindowPos3fARB", + "WindowPos3fMESA", + "WindowPos3fv", + "WindowPos3fvARB", + "WindowPos3fvMESA", + "WindowPos3i", + "WindowPos3iARB", + "WindowPos3iMESA", + "WindowPos3iv", + "WindowPos3ivARB", + "WindowPos3ivMESA", + "WindowPos3s", + "WindowPos3sARB", + "WindowPos3sMESA", + "WindowPos3sv", + "WindowPos3svARB", + "WindowPos3svMESA", + "WindowPos4dMESA", + "WindowPos4dvMESA", + "WindowPos4fMESA", + "WindowPos4fvMESA", + "WindowPos4iMESA", + "WindowPos4ivMESA", + "WindowPos4sMESA", + "WindowPos4svMESA", +] From 44e67398cc5b7aed6f78807e485f600e9eb834bb Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Tue, 12 May 2015 16:27:12 -0700 Subject: [PATCH 148/834] glapi: Remove all static_dispatch tags from the XML Changes generated by: cd src/mapi/glapi/gen for i in *.xml; do cat $i |\ sed 's/[[:space:]]*static_dispatch="[^"]*">/>/' |\ sed 's/[[:space:]]*static_dispatch="[^"]*"[[:space:]]*$//' |\ sed 's/[[:space:]]*static_dispatch="[^"]*"[[:space:]]*/ /' > x mv x $i done Comparing the output of nm libGL.so | grep ' T gl[^X]' | sed 's/.* T //' before and after this commit showed no differences. Signed-off-by: Ian Romanick Reviewed-by: Emil Velikov Reviewed-by: Dylan Baker --- .../glapi/gen/APPLE_vertex_array_object.xml | 8 +- .../glapi/gen/ARB_internalformat_query.xml | 2 +- .../glapi/gen/ARB_separate_shader_objects.xml | 122 +++++++++--------- src/mapi/glapi/gen/EXT_framebuffer_object.xml | 2 +- .../glapi/gen/EXT_separate_shader_objects.xml | 88 ++++++------- src/mapi/glapi/gen/GL4x.xml | 8 +- .../glapi/gen/INTEL_performance_query.xml | 20 +-- src/mapi/glapi/gen/OES_fixed_point.xml | 84 ++++++------ src/mapi/glapi/gen/OES_single_precision.xml | 12 +- src/mapi/glapi/gen/es_EXT.xml | 118 ++++++++--------- src/mapi/glapi/gen/gl_API.xml | 112 ++++++++-------- 11 files changed, 288 insertions(+), 288 deletions(-) diff --git a/src/mapi/glapi/gen/APPLE_vertex_array_object.xml b/src/mapi/glapi/gen/APPLE_vertex_array_object.xml index 5eb53b14e9f..08c1b86e311 100644 --- a/src/mapi/glapi/gen/APPLE_vertex_array_object.xml +++ b/src/mapi/glapi/gen/APPLE_vertex_array_object.xml @@ -6,22 +6,22 @@ + deprecated="3.1"> - + + deprecated="3.1"> - + diff --git a/src/mapi/glapi/gen/ARB_internalformat_query.xml b/src/mapi/glapi/gen/ARB_internalformat_query.xml index 70a2a310975..9c3154f0561 100644 --- a/src/mapi/glapi/gen/ARB_internalformat_query.xml +++ b/src/mapi/glapi/gen/ARB_internalformat_query.xml @@ -8,7 +8,7 @@ - diff --git a/src/mapi/glapi/gen/ARB_separate_shader_objects.xml b/src/mapi/glapi/gen/ARB_separate_shader_objects.xml index 96ae2b9cb62..e2196b3c37f 100644 --- a/src/mapi/glapi/gen/ARB_separate_shader_objects.xml +++ b/src/mapi/glapi/gen/ARB_separate_shader_objects.xml @@ -15,69 +15,69 @@ - + - + - + - + - + - + - + - + - + - + - + - + @@ -85,25 +85,25 @@ - + - + - + - + @@ -111,25 +111,25 @@ - + - + - + - + @@ -137,170 +137,170 @@ - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + @@ -308,88 +308,88 @@ - + - + - + - + - + - + - + - + - + - + - + - + - + diff --git a/src/mapi/glapi/gen/EXT_framebuffer_object.xml b/src/mapi/glapi/gen/EXT_framebuffer_object.xml index 2cf75bc6783..16e1a1fe67d 100644 --- a/src/mapi/glapi/gen/EXT_framebuffer_object.xml +++ b/src/mapi/glapi/gen/EXT_framebuffer_object.xml @@ -186,7 +186,7 @@ - + diff --git a/src/mapi/glapi/gen/EXT_separate_shader_objects.xml b/src/mapi/glapi/gen/EXT_separate_shader_objects.xml index c6163a193b8..0d32cb25a4e 100644 --- a/src/mapi/glapi/gen/EXT_separate_shader_objects.xml +++ b/src/mapi/glapi/gen/EXT_separate_shader_objects.xml @@ -31,65 +31,65 @@ - + - + - + - + - + - + - + - + - + - + - + - + - + @@ -97,25 +97,25 @@ - + - + - + - + @@ -123,25 +123,25 @@ - + - + - + - + @@ -149,145 +149,145 @@ - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + diff --git a/src/mapi/glapi/gen/GL4x.xml b/src/mapi/glapi/gen/GL4x.xml index 848316e9eda..ac63d73f917 100644 --- a/src/mapi/glapi/gen/GL4x.xml +++ b/src/mapi/glapi/gen/GL4x.xml @@ -13,13 +13,13 @@ - + - + @@ -27,12 +27,12 @@ - + - + diff --git a/src/mapi/glapi/gen/INTEL_performance_query.xml b/src/mapi/glapi/gen/INTEL_performance_query.xml index 25cd1817f57..0f4d68718e3 100644 --- a/src/mapi/glapi/gen/INTEL_performance_query.xml +++ b/src/mapi/glapi/gen/INTEL_performance_query.xml @@ -5,21 +5,21 @@ - + - + - + - + @@ -29,7 +29,7 @@ - + @@ -43,24 +43,24 @@ - + - + - + - + - + diff --git a/src/mapi/glapi/gen/OES_fixed_point.xml b/src/mapi/glapi/gen/OES_fixed_point.xml index d62d6e2ec51..7f0b95d9130 100644 --- a/src/mapi/glapi/gen/OES_fixed_point.xml +++ b/src/mapi/glapi/gen/OES_fixed_point.xml @@ -13,13 +13,13 @@ - - @@ -27,12 +27,12 @@ - - @@ -40,24 +40,24 @@ - - + - - @@ -67,62 +67,62 @@ - - - - - - - - - - @@ -131,14 +131,14 @@ - - @@ -148,18 +148,18 @@ - - - @@ -167,41 +167,41 @@ - - - - - - @@ -209,39 +209,39 @@ - - - - - - @@ -249,25 +249,25 @@ + es1="1.0" alias="GetTexParameterxv"> - + es1="1.0" alias="PointParameterxv"> - @@ -275,21 +275,21 @@ - - - diff --git a/src/mapi/glapi/gen/OES_single_precision.xml b/src/mapi/glapi/gen/OES_single_precision.xml index 8346b64ee83..935113a5e0b 100644 --- a/src/mapi/glapi/gen/OES_single_precision.xml +++ b/src/mapi/glapi/gen/OES_single_precision.xml @@ -6,30 +6,30 @@ - - - - - @@ -39,7 +39,7 @@ - diff --git a/src/mapi/glapi/gen/es_EXT.xml b/src/mapi/glapi/gen/es_EXT.xml index 3a2adeb0491..0b2d02b2d41 100644 --- a/src/mapi/glapi/gen/es_EXT.xml +++ b/src/mapi/glapi/gen/es_EXT.xml @@ -11,7 +11,7 @@ + es1="1.0"> @@ -25,7 +25,7 @@ + es1="1.0"> @@ -41,7 +41,7 @@ + es1="1.0"> @@ -72,7 +72,7 @@ - @@ -81,12 +81,12 @@ - - @@ -95,12 +95,12 @@ - - @@ -109,12 +109,12 @@ - - @@ -123,7 +123,7 @@ - @@ -178,38 +178,38 @@ + es1="1.0"> + es1="1.0"> + es1="1.0"> + es1="1.0"> @@ -218,7 +218,7 @@ + es1="1.0"> @@ -227,25 +227,25 @@ + es1="1.0"> + es1="1.0"> + es1="1.0"> + es1="1.0"> @@ -253,7 +253,7 @@ @@ -261,19 +261,19 @@ + es1="1.0"> + es1="1.0"> + es1="1.0"> @@ -308,17 +308,17 @@ + exec="skip"> - + exec="skip"> @@ -326,7 +326,7 @@ + exec="skip"> @@ -343,7 +343,7 @@ + es1="1.0" desktop="false"> @@ -358,7 +358,7 @@ - @@ -388,42 +388,42 @@ - - - - - - @@ -460,13 +460,13 @@ + es1="1.0" es2="2.0"> - @@ -474,7 +474,7 @@ + es1="1.0" es2="2.0"> @@ -506,7 +506,7 @@ + es2="2.0"> @@ -519,7 +519,7 @@ @@ -535,7 +535,7 @@ + es2="2.0"> @@ -548,7 +548,7 @@ + es2="2.0"> @@ -557,7 +557,7 @@ - @@ -572,7 +572,7 @@ + es2="2.0"> @@ -631,7 +631,7 @@ + es2="2.0"> @@ -640,7 +640,7 @@ + es2="2.0"> @@ -667,7 +667,7 @@ + offset="assign" desktop="false"> @@ -690,24 +690,24 @@ + es2="2.0"> + es2="2.0"> + es2="2.0"> + es2="2.0"> @@ -783,7 +783,7 @@ + es2="2.0"> @@ -792,7 +792,7 @@ + es2="2.0"> @@ -821,7 +821,7 @@ + es1="1.0" es2="2.0"> @@ -830,7 +830,7 @@ + es1="1.0" es2="2.0"> @@ -840,7 +840,7 @@ + es2="2.0"> diff --git a/src/mapi/glapi/gen/gl_API.xml b/src/mapi/glapi/gen/gl_API.xml index 4c23bd9f663..7f1b954d219 100644 --- a/src/mapi/glapi/gen/gl_API.xml +++ b/src/mapi/glapi/gen/gl_API.xml @@ -8189,17 +8189,17 @@ - + - + - + @@ -8669,7 +8669,7 @@ - + @@ -8678,21 +8678,21 @@ - + - + - + @@ -8701,38 +8701,38 @@ - + - + - + - + - + - + @@ -8804,7 +8804,7 @@ - + @@ -8813,7 +8813,7 @@ - + @@ -8823,31 +8823,31 @@ - + - + - + - + - + @@ -8855,7 +8855,7 @@ - + @@ -8864,7 +8864,7 @@ - + @@ -8872,21 +8872,21 @@ - + - + - + @@ -8896,7 +8896,7 @@ - + @@ -8968,7 +8968,7 @@ - + @@ -8977,19 +8977,19 @@ - + - + - + @@ -8997,7 +8997,7 @@ - + @@ -9005,14 +9005,14 @@ - + - + @@ -9170,14 +9170,14 @@ - - @@ -9589,7 +9589,7 @@ - + @@ -9598,7 +9598,7 @@ - + @@ -11248,7 +11248,7 @@ - + @@ -11257,7 +11257,7 @@ - + @@ -11354,12 +11354,12 @@ - + - + @@ -12548,7 +12548,7 @@ + deprecated="3.1"> @@ -12684,7 +12684,7 @@ - + @@ -12705,7 +12705,7 @@ - + @@ -12790,13 +12790,13 @@ + exec="skip"> + exec="skip"> @@ -12885,7 +12885,7 @@ - + @@ -12905,14 +12905,14 @@ - + + deprecated="3.1"> @@ -12923,7 +12923,7 @@ + deprecated="3.1"> @@ -12931,7 +12931,7 @@ + deprecated="3.1"> @@ -12943,12 +12943,12 @@ - + - + @@ -12956,7 +12956,7 @@ - + @@ -12984,12 +12984,12 @@ - + - + From 7a22e78704427bb34280cf2c6f3774c2d830bebb Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Tue, 12 May 2015 16:43:15 -0700 Subject: [PATCH 149/834] glapi: Whitespace clean up after the previous commit Signed-off-by: Ian Romanick Reviewed-by: Emil Velikov Reviewed-by: Dylan Baker --- .../glapi/gen/APPLE_vertex_array_object.xml | 6 +- .../glapi/gen/ARB_internalformat_query.xml | 3 +- src/mapi/glapi/gen/OES_fixed_point.xml | 114 ++++-------- src/mapi/glapi/gen/OES_single_precision.xml | 15 +- src/mapi/glapi/gen/es_EXT.xml | 162 +++++++----------- src/mapi/glapi/gen/gl_API.xml | 22 +-- 6 files changed, 112 insertions(+), 210 deletions(-) diff --git a/src/mapi/glapi/gen/APPLE_vertex_array_object.xml b/src/mapi/glapi/gen/APPLE_vertex_array_object.xml index 08c1b86e311..9410e4e51c7 100644 --- a/src/mapi/glapi/gen/APPLE_vertex_array_object.xml +++ b/src/mapi/glapi/gen/APPLE_vertex_array_object.xml @@ -5,8 +5,7 @@ - + @@ -15,8 +14,7 @@ - + diff --git a/src/mapi/glapi/gen/ARB_internalformat_query.xml b/src/mapi/glapi/gen/ARB_internalformat_query.xml index 9c3154f0561..48e3a169a19 100644 --- a/src/mapi/glapi/gen/ARB_internalformat_query.xml +++ b/src/mapi/glapi/gen/ARB_internalformat_query.xml @@ -8,8 +8,7 @@ - + diff --git a/src/mapi/glapi/gen/OES_fixed_point.xml b/src/mapi/glapi/gen/OES_fixed_point.xml index 7f0b95d9130..8f3bfd0693f 100644 --- a/src/mapi/glapi/gen/OES_fixed_point.xml +++ b/src/mapi/glapi/gen/OES_fixed_point.xml @@ -19,29 +19,25 @@ - + - + - + - + @@ -51,14 +47,12 @@ - + - + @@ -67,63 +61,53 @@ - + - + - + - + - + - + - + - + - + - + @@ -131,15 +115,13 @@ - + - + @@ -148,69 +130,59 @@ - + - + - + - + - + - + - + - + - + - + @@ -221,54 +193,46 @@ - + - + - + - + - + - + - + - + @@ -282,15 +246,13 @@ - + - + diff --git a/src/mapi/glapi/gen/OES_single_precision.xml b/src/mapi/glapi/gen/OES_single_precision.xml index 935113a5e0b..c6795042ee5 100644 --- a/src/mapi/glapi/gen/OES_single_precision.xml +++ b/src/mapi/glapi/gen/OES_single_precision.xml @@ -6,19 +6,16 @@ - + - + - + @@ -29,8 +26,7 @@ - + @@ -39,8 +35,7 @@ - + diff --git a/src/mapi/glapi/gen/es_EXT.xml b/src/mapi/glapi/gen/es_EXT.xml index 0b2d02b2d41..1107b217230 100644 --- a/src/mapi/glapi/gen/es_EXT.xml +++ b/src/mapi/glapi/gen/es_EXT.xml @@ -11,7 +11,7 @@ + es1="1.0"> @@ -24,8 +24,7 @@ - + @@ -40,8 +39,7 @@ - + @@ -72,8 +70,7 @@ - + @@ -81,13 +78,11 @@ - + - + @@ -95,13 +90,11 @@ - + - + @@ -109,13 +102,11 @@ - + - + @@ -123,8 +114,7 @@ - + @@ -177,40 +167,35 @@ - + - + + alias="CheckFramebufferStatus" es1="1.0"> - + + es1="1.0"> + alias="FramebufferRenderbuffer" es1="1.0"> @@ -218,7 +203,7 @@ + es1="1.0"> @@ -226,26 +211,22 @@ - + - + - + + alias="GetFramebufferAttachmentParameteriv" es1="1.0"> @@ -253,27 +234,24 @@ + alias="GetRenderbufferParameteriv" es1="1.0"> - + - + + es1="1.0"> @@ -308,25 +286,23 @@ + exec="skip"> - + + exec="skip"> - + @@ -343,7 +319,7 @@ + es1="1.0" desktop="false"> @@ -358,8 +334,7 @@ - + @@ -388,43 +363,37 @@ - + - + - + - + - + - + @@ -460,21 +429,19 @@ + es1="1.0" es2="2.0"> - + - + @@ -506,7 +473,7 @@ + es2="2.0"> @@ -519,8 +486,7 @@ + alias="CompressedTexSubImage3D" es2="2.0"> @@ -534,8 +500,7 @@ - + @@ -548,7 +513,7 @@ + es2="2.0"> @@ -557,8 +522,7 @@ - + @@ -571,8 +535,7 @@ - + @@ -630,8 +593,7 @@ - + @@ -639,8 +601,7 @@ - + @@ -689,25 +650,21 @@ - + - + - + - + @@ -782,8 +739,7 @@ - + @@ -791,8 +747,7 @@ - + @@ -821,7 +776,7 @@ + es1="1.0" es2="2.0"> @@ -830,7 +785,7 @@ + es1="1.0" es2="2.0"> @@ -839,8 +794,7 @@ - + diff --git a/src/mapi/glapi/gen/gl_API.xml b/src/mapi/glapi/gen/gl_API.xml index 7f1b954d219..1f0411efaec 100644 --- a/src/mapi/glapi/gen/gl_API.xml +++ b/src/mapi/glapi/gen/gl_API.xml @@ -9170,15 +9170,13 @@ - + - + @@ -12547,8 +12545,7 @@ - + @@ -12789,14 +12786,12 @@ - + - + @@ -12911,8 +12906,7 @@ - + @@ -12923,7 +12917,7 @@ + deprecated="3.1"> @@ -12931,7 +12925,7 @@ + deprecated="3.1"> From ea54b3ea1adb19b55d6aa55f9afaa0eba3f4e865 Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Tue, 12 May 2015 17:17:11 -0700 Subject: [PATCH 150/834] glapi: Remove static_dispatch from the DTD Signed-off-by: Ian Romanick Reviewed-by: Emil Velikov --- src/mapi/glapi/gen/gl_API.dtd | 1 - 1 file changed, 1 deletion(-) diff --git a/src/mapi/glapi/gen/gl_API.dtd b/src/mapi/glapi/gen/gl_API.dtd index ab321fad0f8..298ba3c888a 100644 --- a/src/mapi/glapi/gen/gl_API.dtd +++ b/src/mapi/glapi/gen/gl_API.dtd @@ -34,7 +34,6 @@ Date: Tue, 12 May 2015 15:24:03 -0700 Subject: [PATCH 151/834] glapi: Remove static dispatch for functions that didn't exist in 10.5 Comparing the output of nm libGL.so | grep ' T gl[^X]' | sed 's/.* T //' between 10.5.5 and this commit, the only change is the removal of glFramebufferTextureFaceARB. This function was removed a couple commits previously. None of these functions are particuarly new. If applications were not statically linking them with 10.5.5, there's approximately zero chance they will for 10.6. Signed-off-by: Ian Romanick Reviewed-by: Emil Velikov --- src/mapi/glapi/gen/static_data.py | 81 ------------------------------- 1 file changed, 81 deletions(-) diff --git a/src/mapi/glapi/gen/static_data.py b/src/mapi/glapi/gen/static_data.py index a1983ffcf35..b3cddc23386 100644 --- a/src/mapi/glapi/gen/static_data.py +++ b/src/mapi/glapi/gen/static_data.py @@ -540,8 +540,6 @@ functions = [ "ClearDepthf", "ClearDepthx", "ClearIndex", - "ClearNamedBufferData", - "ClearNamedBufferSubData", "ClearStencil", "ClearTexImage", "ClearTexSubImage", @@ -632,7 +630,6 @@ functions = [ "CopyConvolutionFilter1D", "CopyConvolutionFilter2D", "CopyImageSubData", - "CopyNamedBufferSubData", "CopyPixels", "CopyTexImage1D", "CopyTexImage1DEXT", @@ -647,19 +644,12 @@ functions = [ "CopyTextureSubImage1D", "CopyTextureSubImage2D", "CopyTextureSubImage3D", - "CreateBuffers", "CreateProgram", "CreateProgramObjectARB", - "CreateProgramPipelines", - "CreateQueries", - "CreateRenderbuffers", - "CreateSamplers", "CreateShader", "CreateShaderObjectARB", "CreateShaderProgramEXT", "CreateTextures", - "CreateTransformFeedbacks", - "CreateVertexArrays", "CullFace", "DebugMessageCallback", "DebugMessageCallbackARB", @@ -702,7 +692,6 @@ functions = [ "DisableClientState", "Disablei", "DisableIndexedEXT", - "DisableVertexArrayAttrib", "DisableVertexAttribArray", "DisableVertexAttribArrayARB", "DispatchCompute", @@ -745,7 +734,6 @@ functions = [ "EnableClientState", "Enablei", "EnableIndexedEXT", - "EnableVertexArrayAttrib", "EnableVertexAttribArray", "EnableVertexAttribArrayARB", "End", @@ -777,7 +765,6 @@ functions = [ "Finish", "Flush", "FlushMappedBufferRange", - "FlushMappedNamedBufferRange", "FogCoordd", "FogCoorddEXT", "FogCoorddv", @@ -907,11 +894,6 @@ functions = [ "GetMinmaxParameterfv", "GetMinmaxParameteriv", "GetMultisamplefv", - "GetNamedBufferParameteri64v", - "GetNamedBufferParameteriv", - "GetNamedBufferPointerv", - "GetNamedBufferSubData", - "GetNamedRenderbufferParameteriv", "GetnColorTableARB", "GetnCompressedTexImageARB", "GetnConvolutionFilterARB", @@ -951,7 +933,6 @@ functions = [ "GetProgramEnvParameterdvARB", "GetProgramEnvParameterfvARB", "GetProgramInfoLog", - "GetProgramInterfaceiv", "GetProgramiv", "GetProgramivARB", "GetProgramivNV", @@ -961,17 +942,8 @@ functions = [ "GetProgramNamedParameterfvNV", "GetProgramParameterdvNV", "GetProgramParameterfvNV", - "GetProgramResourceIndex", - "GetProgramResourceiv", - "GetProgramResourceLocation", - "GetProgramResourceLocationIndex", - "GetProgramResourceName", "GetProgramStringARB", "GetProgramStringNV", - "GetQueryBufferObjecti64v", - "GetQueryBufferObjectiv", - "GetQueryBufferObjectui64v", - "GetQueryBufferObjectuiv", "GetQueryIndexediv", "GetQueryiv", "GetQueryivARB", @@ -1020,13 +992,9 @@ functions = [ "GetTextureParameterIuiv", "GetTextureParameteriv", "GetTrackMatrixivNV", - "GetTransformFeedbacki64_v", - "GetTransformFeedbackiv", - "GetTransformFeedbacki_v", "GetTransformFeedbackVarying", "GetTransformFeedbackVaryingEXT", "GetUniformBlockIndex", - "GetUniformdv", "GetUniformfv", "GetUniformfvARB", "GetUniformIndices", @@ -1036,9 +1004,6 @@ functions = [ "GetUniformLocationARB", "GetUniformuiv", "GetUniformuivEXT", - "GetVertexArrayIndexed64iv", - "GetVertexArrayIndexediv", - "GetVertexArrayiv", "GetVertexAttribdv", "GetVertexAttribdvARB", "GetVertexAttribdvNV", @@ -1052,7 +1017,6 @@ functions = [ "GetVertexAttribiv", "GetVertexAttribivARB", "GetVertexAttribivNV", - "GetVertexAttribLdv", "GetVertexAttribPointerv", "GetVertexAttribPointervARB", "GetVertexAttribPointervNV", @@ -1142,8 +1106,6 @@ functions = [ "MapGrid1f", "MapGrid2d", "MapGrid2f", - "MapNamedBuffer", - "MapNamedBufferRange", "Materialf", "Materialfv", "Materiali", @@ -1242,11 +1204,6 @@ functions = [ "MultTransposeMatrixdARB", "MultTransposeMatrixf", "MultTransposeMatrixfARB", - "NamedBufferData", - "NamedBufferStorage", - "NamedBufferSubData", - "NamedRenderbufferStorage", - "NamedRenderbufferStorageMultisample", "NewList", "Normal3b", "Normal3bv", @@ -1547,7 +1504,6 @@ functions = [ "TextureBarrier", "TextureBarrierNV", "TextureBuffer", - "TextureBufferRange", "TextureParameterf", "TextureParameterfv", "TextureParameteri", @@ -1567,15 +1523,11 @@ functions = [ "TextureSubImage3D", "TextureView", "TrackMatrixNV", - "TransformFeedbackBufferBase", - "TransformFeedbackBufferRange", "TransformFeedbackVaryings", "TransformFeedbackVaryingsEXT", "Translated", "Translatef", "Translatex", - "Uniform1d", - "Uniform1dv", "Uniform1f", "Uniform1fARB", "Uniform1fv", @@ -1588,8 +1540,6 @@ functions = [ "Uniform1uiEXT", "Uniform1uiv", "Uniform1uivEXT", - "Uniform2d", - "Uniform2dv", "Uniform2f", "Uniform2fARB", "Uniform2fv", @@ -1602,8 +1552,6 @@ functions = [ "Uniform2uiEXT", "Uniform2uiv", "Uniform2uivEXT", - "Uniform3d", - "Uniform3dv", "Uniform3f", "Uniform3fARB", "Uniform3fv", @@ -1616,8 +1564,6 @@ functions = [ "Uniform3uiEXT", "Uniform3uiv", "Uniform3uivEXT", - "Uniform4d", - "Uniform4dv", "Uniform4f", "Uniform4fARB", "Uniform4fv", @@ -1631,31 +1577,21 @@ functions = [ "Uniform4uiv", "Uniform4uivEXT", "UniformBlockBinding", - "UniformMatrix2dv", "UniformMatrix2fv", "UniformMatrix2fvARB", - "UniformMatrix2x3dv", "UniformMatrix2x3fv", - "UniformMatrix2x4dv", "UniformMatrix2x4fv", - "UniformMatrix3dv", "UniformMatrix3fv", "UniformMatrix3fvARB", - "UniformMatrix3x2dv", "UniformMatrix3x2fv", - "UniformMatrix3x4dv", "UniformMatrix3x4fv", - "UniformMatrix4dv", "UniformMatrix4fv", "UniformMatrix4fvARB", - "UniformMatrix4x2dv", "UniformMatrix4x2fv", - "UniformMatrix4x3dv", "UniformMatrix4x3fv", "UnlockArraysEXT", "UnmapBuffer", "UnmapBufferARB", - "UnmapNamedBuffer", "UseProgram", "UseProgramObjectARB", "UseShaderProgramEXT", @@ -1695,14 +1631,6 @@ functions = [ "Vertex4iv", "Vertex4s", "Vertex4sv", - "VertexArrayAttribBinding", - "VertexArrayAttribFormat", - "VertexArrayAttribIFormat", - "VertexArrayAttribLFormat", - "VertexArrayBindingDivisor", - "VertexArrayElementBuffer", - "VertexArrayVertexBuffer", - "VertexArrayVertexBuffers", "VertexAttrib1d", "VertexAttrib1dARB", "VertexAttrib1dNV", @@ -1848,16 +1776,7 @@ functions = [ "VertexAttribIFormat", "VertexAttribIPointer", "VertexAttribIPointerEXT", - "VertexAttribL1d", - "VertexAttribL1dv", - "VertexAttribL2d", - "VertexAttribL2dv", - "VertexAttribL3d", - "VertexAttribL3dv", - "VertexAttribL4d", - "VertexAttribL4dv", "VertexAttribLFormat", - "VertexAttribLPointer", "VertexAttribP1ui", "VertexAttribP1uiv", "VertexAttribP2ui", From c1ad2bac71fab362e397219d7274af842ef83878 Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Tue, 12 May 2015 15:47:41 -0700 Subject: [PATCH 152/834] glapi: Remove static dispatch for functions that didn't exist in 10.4 Comparing the output of nm libGL.so | grep ' T gl[^X]' | sed 's/.* T //' between 10.4.7 and this commit, the only change is the removal of glFramebufferTextureFaceARB. This function was removed a couple commits previously. None of these functions are particuarly new. If applications were not statically linking them with 10.4.7, there's approximately zero chance they will for 10.6. Almost all of these functions are for GL_ARB_direct_state_access. Since the whole DSA API wasn't statically exported (and the extension wasn't enabled!), I think there's exactly zero chance anyone linked against these symbols. Signed-off-by: Ian Romanick Reviewed-by: Emil Velikov --- src/mapi/glapi/gen/static_data.py | 33 ------------------------------- 1 file changed, 33 deletions(-) diff --git a/src/mapi/glapi/gen/static_data.py b/src/mapi/glapi/gen/static_data.py index b3cddc23386..9623db2af61 100644 --- a/src/mapi/glapi/gen/static_data.py +++ b/src/mapi/glapi/gen/static_data.py @@ -490,7 +490,6 @@ functions = [ "BindTexture", "BindTextureEXT", "BindTextures", - "BindTextureUnit", "BindTransformFeedback", "BindVertexArray", "BindVertexBuffer", @@ -615,9 +614,6 @@ functions = [ "CompressedTexSubImage2DARB", "CompressedTexSubImage3D", "CompressedTexSubImage3DARB", - "CompressedTextureSubImage1D", - "CompressedTextureSubImage2D", - "CompressedTextureSubImage3D", "ConvolutionFilter1D", "ConvolutionFilter2D", "ConvolutionParameterf", @@ -641,15 +637,11 @@ functions = [ "CopyTexSubImage2DEXT", "CopyTexSubImage3D", "CopyTexSubImage3DEXT", - "CopyTextureSubImage1D", - "CopyTextureSubImage2D", - "CopyTextureSubImage3D", "CreateProgram", "CreateProgramObjectARB", "CreateShader", "CreateShaderObjectARB", "CreateShaderProgramEXT", - "CreateTextures", "CullFace", "DebugMessageCallback", "DebugMessageCallbackARB", @@ -802,7 +794,6 @@ functions = [ "GenBuffersARB", "GenerateMipmap", "GenerateMipmapEXT", - "GenerateTextureMipmap", "GenFragmentShadersATI", "GenFramebuffers", "GenFramebuffersEXT", @@ -853,7 +844,6 @@ functions = [ "GetColorTableParameterivEXT", "GetCompressedTexImage", "GetCompressedTexImageARB", - "GetCompressedTextureImage", "GetConvolutionFilter", "GetConvolutionParameterfv", "GetConvolutionParameteriv", @@ -984,13 +974,6 @@ functions = [ "GetTexParameterIuivEXT", "GetTexParameteriv", "GetTexParameterxv", - "GetTextureImage", - "GetTextureLevelParameterfv", - "GetTextureLevelParameteriv", - "GetTextureParameterfv", - "GetTextureParameterIiv", - "GetTextureParameterIuiv", - "GetTextureParameteriv", "GetTrackMatrixivNV", "GetTransformFeedbackVarying", "GetTransformFeedbackVaryingEXT", @@ -1255,7 +1238,6 @@ functions = [ "PointSizex", "PolygonMode", "PolygonOffset", - "PolygonOffsetClampEXT", "PolygonOffsetEXT", "PolygonOffsetx", "PolygonStipple", @@ -1503,24 +1485,9 @@ functions = [ "TexSubImage3DEXT", "TextureBarrier", "TextureBarrierNV", - "TextureBuffer", - "TextureParameterf", - "TextureParameterfv", - "TextureParameteri", - "TextureParameterIiv", - "TextureParameterIuiv", - "TextureParameteriv", - "TextureStorage1D", "TextureStorage1DEXT", - "TextureStorage2D", "TextureStorage2DEXT", - "TextureStorage2DMultisample", - "TextureStorage3D", "TextureStorage3DEXT", - "TextureStorage3DMultisample", - "TextureSubImage1D", - "TextureSubImage2D", - "TextureSubImage3D", "TextureView", "TrackMatrixNV", "TransformFeedbackVaryings", From 90a1a4e2345fe25517f5189cca1188162992f39b Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Tue, 12 May 2015 16:01:17 -0700 Subject: [PATCH 153/834] glapi: Remove static dispatch for functions that didn't exist in 10.3 Comparing the output of nm libGL.so | grep ' T gl[^X]' | sed 's/.* T //' between 10.3.7 and this commit, the only change is the removal of glFramebufferTextureFaceARB. This function was removed a couple commits previously. glClipControl was, at the time 10.3 shipped, a very new function. It was added by GL_ARB_clip_control. That extension was ratified by the Khronos Board of Promoters on August 7, 2014. It's less than a year old, and I don't think it's is likely that there are many applications using that extension... much less statically linking with the function. Signed-off-by: Ian Romanick Reviewed-by: Emil Velikov --- src/mapi/glapi/gen/static_data.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/mapi/glapi/gen/static_data.py b/src/mapi/glapi/gen/static_data.py index 9623db2af61..ab9c605fe9e 100644 --- a/src/mapi/glapi/gen/static_data.py +++ b/src/mapi/glapi/gen/static_data.py @@ -545,7 +545,6 @@ functions = [ "ClientActiveTexture", "ClientActiveTextureARB", "ClientWaitSync", - "ClipControl", "ClipPlane", "ClipPlanef", "ClipPlanex", From 4adfc6ed31983bcf52e106469f22bb121a56532b Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Tue, 12 May 2015 16:56:37 -0700 Subject: [PATCH 154/834] glapi: Remove static dispatch for functions that didn't exist in fglrx Comparing the output of nm -D arch/x86_64/usr/X11R6/lib64/fglrx/fglrx-libGL.so.1.2 |\ grep ' T gl[^X]' | sed 's/.* T //' between Catalyst 14.6 Beta and this commit, the only change is a bunch of functions that AMD exports that Mesa does not and some OpenGL ES 1.1 functions that Mesa exported but AMD does not. The OpenGL ES 1.1 functions (e.g., glAlphaFuncx) are added by extensions in desktop. Our infrastructure doesn't allow us to statically export a function in one lib and not in another. The GLES1 conformance tests expect to be able to link with these functions, so we have to export them. If a function is not statically exported by either of the major binary drivers on Linux, there is almost zero chance that any application statically links with it. As a side note... I find it odd that AMD exports glTextureBarrierNV but not glTextureBarrier. Signed-off-by: Ian Romanick Reviewed-by: Emil Velikov --- src/mapi/glapi/gen/static_data.py | 121 ------------------------------ 1 file changed, 121 deletions(-) diff --git a/src/mapi/glapi/gen/static_data.py b/src/mapi/glapi/gen/static_data.py index ab9c605fe9e..8489d0ecbda 100644 --- a/src/mapi/glapi/gen/static_data.py +++ b/src/mapi/glapi/gen/static_data.py @@ -437,7 +437,6 @@ offsets = { functions = [ "Accum", - "ActiveProgramEXT", "ActiveTexture", "ActiveTextureARB", "AlphaFragmentOp1ATI", @@ -445,7 +444,6 @@ functions = [ "AlphaFragmentOp3ATI", "AlphaFunc", "AlphaFuncx", - "AreProgramsResidentNV", "AreTexturesResident", "AreTexturesResidentEXT", "ArrayElement", @@ -482,7 +480,6 @@ functions = [ "BindImageTexture", "BindImageTextures", "BindProgramARB", - "BindProgramNV", "BindRenderbuffer", "BindRenderbufferEXT", "BindSampler", @@ -596,7 +593,6 @@ functions = [ "ColorPointerEXT", "ColorSubTable", "ColorTable", - "ColorTableEXT", "ColorTableParameterfv", "ColorTableParameteriv", "CompileShader", @@ -640,7 +636,6 @@ functions = [ "CreateProgramObjectARB", "CreateShader", "CreateShaderObjectARB", - "CreateShaderProgramEXT", "CullFace", "DebugMessageCallback", "DebugMessageCallbackARB", @@ -658,7 +653,6 @@ functions = [ "DeletePerfMonitorsAMD", "DeleteProgram", "DeleteProgramsARB", - "DeleteProgramsNV", "DeleteQueries", "DeleteQueriesARB", "DeleteRenderbuffers", @@ -719,8 +713,6 @@ functions = [ "EdgeFlagPointer", "EdgeFlagPointerEXT", "EdgeFlagv", - "EGLImageTargetRenderbufferStorageOES", - "EGLImageTargetTexture2DOES", "Enable", "EnableClientState", "Enablei", @@ -750,7 +742,6 @@ functions = [ "EvalMesh2", "EvalPoint1", "EvalPoint2", - "ExecuteProgramNV", "FeedbackBuffer", "FenceSync", "Finish", @@ -799,7 +790,6 @@ functions = [ "GenLists", "GenPerfMonitorsAMD", "GenProgramsARB", - "GenProgramsNV", "GenQueries", "GenQueriesARB", "GenRenderbuffers", @@ -836,11 +826,8 @@ functions = [ "GetClipPlanef", "GetClipPlanex", "GetColorTable", - "GetColorTableEXT", "GetColorTableParameterfv", - "GetColorTableParameterfvEXT", "GetColorTableParameteriv", - "GetColorTableParameterivEXT", "GetCompressedTexImage", "GetCompressedTexImageARB", "GetConvolutionFilter", @@ -903,7 +890,6 @@ functions = [ "GetnUniformuivARB", "GetObjectLabel", "GetObjectParameterfvARB", - "GetObjectParameterivAPPLE", "GetObjectParameterivARB", "GetObjectPtrLabel", "GetPerfMonitorCounterDataAMD", @@ -924,15 +910,9 @@ functions = [ "GetProgramInfoLog", "GetProgramiv", "GetProgramivARB", - "GetProgramivNV", "GetProgramLocalParameterdvARB", "GetProgramLocalParameterfvARB", - "GetProgramNamedParameterdvNV", - "GetProgramNamedParameterfvNV", - "GetProgramParameterdvNV", - "GetProgramParameterfvNV", "GetProgramStringARB", - "GetProgramStringNV", "GetQueryIndexediv", "GetQueryiv", "GetQueryivARB", @@ -973,7 +953,6 @@ functions = [ "GetTexParameterIuivEXT", "GetTexParameteriv", "GetTexParameterxv", - "GetTrackMatrixivNV", "GetTransformFeedbackVarying", "GetTransformFeedbackVaryingEXT", "GetUniformBlockIndex", @@ -988,20 +967,16 @@ functions = [ "GetUniformuivEXT", "GetVertexAttribdv", "GetVertexAttribdvARB", - "GetVertexAttribdvNV", "GetVertexAttribfv", "GetVertexAttribfvARB", - "GetVertexAttribfvNV", "GetVertexAttribIiv", "GetVertexAttribIivEXT", "GetVertexAttribIuiv", "GetVertexAttribIuivEXT", "GetVertexAttribiv", "GetVertexAttribivARB", - "GetVertexAttribivNV", "GetVertexAttribPointerv", "GetVertexAttribPointervARB", - "GetVertexAttribPointervNV", "Hint", "Histogram", "Indexd", @@ -1035,7 +1010,6 @@ functions = [ "IsList", "IsProgram", "IsProgramARB", - "IsProgramNV", "IsQuery", "IsQueryARB", "IsRenderbuffer", @@ -1070,7 +1044,6 @@ functions = [ "LoadMatrixf", "LoadMatrixx", "LoadName", - "LoadProgramNV", "LoadTransposeMatrixd", "LoadTransposeMatrixdARB", "LoadTransposeMatrixf", @@ -1204,8 +1177,6 @@ functions = [ "NormalPointerEXT", "ObjectLabel", "ObjectPtrLabel", - "ObjectPurgeableAPPLE", - "ObjectUnpurgeableAPPLE", "Ortho", "Orthof", "Orthox", @@ -1227,9 +1198,7 @@ functions = [ "PointParameterfvARB", "PointParameterfvEXT", "PointParameteri", - "PointParameteriNV", "PointParameteriv", - "PointParameterivNV", "PointParameterx", "PointParameterxv", "PointSize", @@ -1237,7 +1206,6 @@ functions = [ "PointSizex", "PolygonMode", "PolygonOffset", - "PolygonOffsetEXT", "PolygonOffsetx", "PolygonStipple", "PopAttrib", @@ -1259,18 +1227,8 @@ functions = [ "ProgramLocalParameter4dvARB", "ProgramLocalParameter4fARB", "ProgramLocalParameter4fvARB", - "ProgramNamedParameter4dNV", - "ProgramNamedParameter4dvNV", - "ProgramNamedParameter4fNV", - "ProgramNamedParameter4fvNV", - "ProgramParameter4dNV", - "ProgramParameter4dvNV", - "ProgramParameter4fNV", - "ProgramParameter4fvNV", "ProgramParameteri", "ProgramParameteriARB", - "ProgramParameters4dvNV", - "ProgramParameters4fvNV", "ProgramStringARB", "ProvokingVertex", "ProvokingVertexEXT", @@ -1320,10 +1278,8 @@ functions = [ "RenderbufferStorageMultisample", "RenderbufferStorageMultisampleEXT", "RenderMode", - "RequestResidentProgramsNV", "ResetHistogram", "ResetMinmax", - "ResizeBuffersMESA", "ResumeTransformFeedback", "Rotated", "Rotatef", @@ -1482,13 +1438,11 @@ functions = [ "TexSubImage2DEXT", "TexSubImage3D", "TexSubImage3DEXT", - "TextureBarrier", "TextureBarrierNV", "TextureStorage1DEXT", "TextureStorage2DEXT", "TextureStorage3DEXT", "TextureView", - "TrackMatrixNV", "TransformFeedbackVaryings", "TransformFeedbackVaryingsEXT", "Translated", @@ -1560,19 +1514,8 @@ functions = [ "UnmapBufferARB", "UseProgram", "UseProgramObjectARB", - "UseShaderProgramEXT", "ValidateProgram", "ValidateProgramARB", - "VDPAUFiniNV", - "VDPAUGetSurfaceivNV", - "VDPAUInitNV", - "VDPAUIsSurfaceNV", - "VDPAUMapSurfacesNV", - "VDPAURegisterOutputSurfaceNV", - "VDPAURegisterVideoSurfaceNV", - "VDPAUSurfaceAccessNV", - "VDPAUUnmapSurfacesNV", - "VDPAUUnregisterSurfaceNV", "Vertex2d", "Vertex2dv", "Vertex2f", @@ -1599,72 +1542,50 @@ functions = [ "Vertex4sv", "VertexAttrib1d", "VertexAttrib1dARB", - "VertexAttrib1dNV", "VertexAttrib1dv", "VertexAttrib1dvARB", - "VertexAttrib1dvNV", "VertexAttrib1f", "VertexAttrib1fARB", - "VertexAttrib1fNV", "VertexAttrib1fv", "VertexAttrib1fvARB", - "VertexAttrib1fvNV", "VertexAttrib1s", "VertexAttrib1sARB", - "VertexAttrib1sNV", "VertexAttrib1sv", "VertexAttrib1svARB", - "VertexAttrib1svNV", "VertexAttrib2d", "VertexAttrib2dARB", - "VertexAttrib2dNV", "VertexAttrib2dv", "VertexAttrib2dvARB", - "VertexAttrib2dvNV", "VertexAttrib2f", "VertexAttrib2fARB", - "VertexAttrib2fNV", "VertexAttrib2fv", "VertexAttrib2fvARB", - "VertexAttrib2fvNV", "VertexAttrib2s", "VertexAttrib2sARB", - "VertexAttrib2sNV", "VertexAttrib2sv", "VertexAttrib2svARB", - "VertexAttrib2svNV", "VertexAttrib3d", "VertexAttrib3dARB", - "VertexAttrib3dNV", "VertexAttrib3dv", "VertexAttrib3dvARB", - "VertexAttrib3dvNV", "VertexAttrib3f", "VertexAttrib3fARB", - "VertexAttrib3fNV", "VertexAttrib3fv", "VertexAttrib3fvARB", - "VertexAttrib3fvNV", "VertexAttrib3s", "VertexAttrib3sARB", - "VertexAttrib3sNV", "VertexAttrib3sv", "VertexAttrib3svARB", - "VertexAttrib3svNV", "VertexAttrib4bv", "VertexAttrib4bvARB", "VertexAttrib4d", "VertexAttrib4dARB", - "VertexAttrib4dNV", "VertexAttrib4dv", "VertexAttrib4dvARB", - "VertexAttrib4dvNV", "VertexAttrib4f", "VertexAttrib4fARB", - "VertexAttrib4fNV", "VertexAttrib4fv", "VertexAttrib4fvARB", - "VertexAttrib4fvNV", "VertexAttrib4iv", "VertexAttrib4ivARB", "VertexAttrib4Nbv", @@ -1683,14 +1604,10 @@ functions = [ "VertexAttrib4NusvARB", "VertexAttrib4s", "VertexAttrib4sARB", - "VertexAttrib4sNV", "VertexAttrib4sv", "VertexAttrib4svARB", - "VertexAttrib4svNV", - "VertexAttrib4ubNV", "VertexAttrib4ubv", "VertexAttrib4ubvARB", - "VertexAttrib4ubvNV", "VertexAttrib4uiv", "VertexAttrib4uivARB", "VertexAttrib4usv", @@ -1753,20 +1670,6 @@ functions = [ "VertexAttribP4uiv", "VertexAttribPointer", "VertexAttribPointerARB", - "VertexAttribPointerNV", - "VertexAttribs1dvNV", - "VertexAttribs1fvNV", - "VertexAttribs1svNV", - "VertexAttribs2dvNV", - "VertexAttribs2fvNV", - "VertexAttribs2svNV", - "VertexAttribs3dvNV", - "VertexAttribs3fvNV", - "VertexAttribs3svNV", - "VertexAttribs4dvNV", - "VertexAttribs4fvNV", - "VertexAttribs4svNV", - "VertexAttribs4ubvNV", "VertexBindingDivisor", "VertexP2ui", "VertexP2uiv", @@ -1783,58 +1686,34 @@ functions = [ "WaitSync", "WindowPos2d", "WindowPos2dARB", - "WindowPos2dMESA", "WindowPos2dv", "WindowPos2dvARB", - "WindowPos2dvMESA", "WindowPos2f", "WindowPos2fARB", - "WindowPos2fMESA", "WindowPos2fv", "WindowPos2fvARB", - "WindowPos2fvMESA", "WindowPos2i", "WindowPos2iARB", - "WindowPos2iMESA", "WindowPos2iv", "WindowPos2ivARB", - "WindowPos2ivMESA", "WindowPos2s", "WindowPos2sARB", - "WindowPos2sMESA", "WindowPos2sv", "WindowPos2svARB", - "WindowPos2svMESA", "WindowPos3d", "WindowPos3dARB", - "WindowPos3dMESA", "WindowPos3dv", "WindowPos3dvARB", - "WindowPos3dvMESA", "WindowPos3f", "WindowPos3fARB", - "WindowPos3fMESA", "WindowPos3fv", "WindowPos3fvARB", - "WindowPos3fvMESA", "WindowPos3i", "WindowPos3iARB", - "WindowPos3iMESA", "WindowPos3iv", "WindowPos3ivARB", - "WindowPos3ivMESA", "WindowPos3s", "WindowPos3sARB", - "WindowPos3sMESA", "WindowPos3sv", "WindowPos3svARB", - "WindowPos3svMESA", - "WindowPos4dMESA", - "WindowPos4dvMESA", - "WindowPos4fMESA", - "WindowPos4fvMESA", - "WindowPos4iMESA", - "WindowPos4ivMESA", - "WindowPos4sMESA", - "WindowPos4svMESA", ] From d2ee60cd5286695f738117c87c24b04b08340c6b Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Tue, 12 May 2015 17:27:26 -0700 Subject: [PATCH 155/834] glapi: Remove static dispatch for functions that didn't exist in NVIDIA Comparing the output of nm -D libGL.so.349.16 | grep ' T gl[^X]' | sed 's/.* T //' between Catalyst NVIDIA 349.16 and this commit, the only change is a bunch of functions that NVIDIA exports that Mesa does not. If a function is not statically exported by either of the major binary drivers on Linux, there is almost zero chance that any application statically links with it. Signed-off-by: Ian Romanick Reviewed-by: Emil Velikov --- src/mapi/glapi/gen/static_data.py | 46 ------------------------------- 1 file changed, 46 deletions(-) diff --git a/src/mapi/glapi/gen/static_data.py b/src/mapi/glapi/gen/static_data.py index 8489d0ecbda..cf909fc0349 100644 --- a/src/mapi/glapi/gen/static_data.py +++ b/src/mapi/glapi/gen/static_data.py @@ -439,9 +439,6 @@ functions = [ "Accum", "ActiveTexture", "ActiveTextureARB", - "AlphaFragmentOp1ATI", - "AlphaFragmentOp2ATI", - "AlphaFragmentOp3ATI", "AlphaFunc", "AlphaFuncx", "AreTexturesResident", @@ -453,28 +450,21 @@ functions = [ "Begin", "BeginConditionalRender", "BeginConditionalRenderNV", - "BeginFragmentShaderATI", - "BeginPerfMonitorAMD", "BeginQuery", "BeginQueryARB", "BeginQueryIndexed", "BeginTransformFeedback", - "BeginTransformFeedbackEXT", "BindAttribLocation", "BindAttribLocationARB", "BindBuffer", "BindBufferARB", "BindBufferBase", - "BindBufferBaseEXT", - "BindBufferOffsetEXT", "BindBufferRange", - "BindBufferRangeEXT", "BindBuffersBase", "BindBuffersRange", "BindFragDataLocation", "BindFragDataLocationEXT", "BindFragDataLocationIndexed", - "BindFragmentShaderATI", "BindFramebuffer", "BindFramebufferEXT", "BindImageTexture", @@ -497,17 +487,13 @@ functions = [ "BlendEquation", "BlendEquationEXT", "BlendEquationiARB", - "BlendEquationIndexedAMD", "BlendEquationSeparate", "BlendEquationSeparateiARB", - "BlendEquationSeparateIndexedAMD", "BlendFunc", "BlendFunciARB", - "BlendFuncIndexedAMD", "BlendFuncSeparate", "BlendFuncSeparateEXT", "BlendFuncSeparateiARB", - "BlendFuncSeparateIndexedAMD", "BlitFramebuffer", "BufferData", "BufferDataARB", @@ -578,9 +564,6 @@ functions = [ "Color4us", "Color4usv", "Color4x", - "ColorFragmentOp1ATI", - "ColorFragmentOp2ATI", - "ColorFragmentOp3ATI", "ColorMask", "ColorMaski", "ColorMaskIndexedEXT", @@ -623,13 +606,9 @@ functions = [ "CopyImageSubData", "CopyPixels", "CopyTexImage1D", - "CopyTexImage1DEXT", "CopyTexImage2D", - "CopyTexImage2DEXT", "CopyTexSubImage1D", - "CopyTexSubImage1DEXT", "CopyTexSubImage2D", - "CopyTexSubImage2DEXT", "CopyTexSubImage3D", "CopyTexSubImage3DEXT", "CreateProgram", @@ -645,12 +624,10 @@ functions = [ "DebugMessageInsertARB", "DeleteBuffers", "DeleteBuffersARB", - "DeleteFragmentShaderATI", "DeleteFramebuffers", "DeleteFramebuffersEXT", "DeleteLists", "DeleteObjectARB", - "DeletePerfMonitorsAMD", "DeleteProgram", "DeleteProgramsARB", "DeleteQueries", @@ -722,14 +699,11 @@ functions = [ "End", "EndConditionalRender", "EndConditionalRenderNV", - "EndFragmentShaderATI", "EndList", - "EndPerfMonitorAMD", "EndQuery", "EndQueryARB", "EndQueryIndexed", "EndTransformFeedback", - "EndTransformFeedbackEXT", "EvalCoord1d", "EvalCoord1dv", "EvalCoord1f", @@ -784,11 +758,9 @@ functions = [ "GenBuffersARB", "GenerateMipmap", "GenerateMipmapEXT", - "GenFragmentShadersATI", "GenFramebuffers", "GenFramebuffersEXT", "GenLists", - "GenPerfMonitorsAMD", "GenProgramsARB", "GenQueries", "GenQueriesARB", @@ -892,12 +864,6 @@ functions = [ "GetObjectParameterfvARB", "GetObjectParameterivARB", "GetObjectPtrLabel", - "GetPerfMonitorCounterDataAMD", - "GetPerfMonitorCounterInfoAMD", - "GetPerfMonitorCountersAMD", - "GetPerfMonitorCounterStringAMD", - "GetPerfMonitorGroupsAMD", - "GetPerfMonitorGroupStringAMD", "GetPixelMapfv", "GetPixelMapuiv", "GetPixelMapusv", @@ -935,8 +901,6 @@ functions = [ "GetString", "GetStringi", "GetSynciv", - "GetTexBumpParameterfvATI", - "GetTexBumpParameterivATI", "GetTexEnvfv", "GetTexEnviv", "GetTexEnvxv", @@ -954,7 +918,6 @@ functions = [ "GetTexParameteriv", "GetTexParameterxv", "GetTransformFeedbackVarying", - "GetTransformFeedbackVaryingEXT", "GetUniformBlockIndex", "GetUniformfv", "GetUniformfvARB", @@ -1180,7 +1143,6 @@ functions = [ "Ortho", "Orthof", "Orthox", - "PassTexCoordATI", "PassThrough", "PauseTransformFeedback", "PixelMapfv", @@ -1287,7 +1249,6 @@ functions = [ "SampleCoverage", "SampleCoverageARB", "SampleCoveragex", - "SampleMapATI", "SampleMaski", "SamplerParameterf", "SamplerParameterfv", @@ -1339,9 +1300,7 @@ functions = [ "SecondaryColorPointer", "SecondaryColorPointerEXT", "SelectBuffer", - "SelectPerfMonitorCountersAMD", "SeparableFilter2D", - "SetFragmentShaderConstantATI", "ShadeModel", "ShaderBinary", "ShaderSource", @@ -1355,8 +1314,6 @@ functions = [ "TexBuffer", "TexBufferARB", "TexBufferRange", - "TexBumpParameterfvATI", - "TexBumpParameterivATI", "TexCoord1d", "TexCoord1dv", "TexCoord1f", @@ -1433,9 +1390,7 @@ functions = [ "TexStorage3D", "TexStorage3DMultisample", "TexSubImage1D", - "TexSubImage1DEXT", "TexSubImage2D", - "TexSubImage2DEXT", "TexSubImage3D", "TexSubImage3DEXT", "TextureBarrierNV", @@ -1444,7 +1399,6 @@ functions = [ "TextureStorage3DEXT", "TextureView", "TransformFeedbackVaryings", - "TransformFeedbackVaryingsEXT", "Translated", "Translatef", "Translatex", From 0fe7eab8d9a8c984ceb7aa333715799840ea7ec0 Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Tue, 12 May 2015 18:09:13 -0700 Subject: [PATCH 156/834] glapi: Add a list of functions that are not used but still need dispatch slots Signed-off-by: Ian Romanick Reviewed-by: Emil Velikov --- src/mapi/glapi/gen/static_data.py | 56 +++++++++++++++++++++++++++++++ 1 file changed, 56 insertions(+) diff --git a/src/mapi/glapi/gen/static_data.py b/src/mapi/glapi/gen/static_data.py index cf909fc0349..142c503b0c5 100644 --- a/src/mapi/glapi/gen/static_data.py +++ b/src/mapi/glapi/gen/static_data.py @@ -1671,3 +1671,59 @@ functions = [ "WindowPos3sv", "WindowPos3svARB", ] + +"""Functions that need dispatch slots but are not used + +Some of these functions may have GLX protocol support (for +indirect-rendering). Other were used in previous versions of Mesa. They keep +slots in the dispatch table so that newer versions of libGL can still be used +with older drivers.""" +unused_functions = [ + # SGIS_multisample + "SampleMaskSGIS", + "SamplePatternSGIS", + + # NV_vertex_program + "AreProgramsResidentNV", + "ExecuteProgramNV", + "GetProgramParameterdvNV", + "GetProgramParameterfvNV", + "GetProgramivNV", + "GetProgramStringNV", + "GetTrackMatrixivNV", + "GetVertexAttribdvNV", + "GetVertexAttribfvNV", + "GetVertexAttribivNV", + "LoadProgramNV", + "ProgramParameters4dvNV", + "ProgramParameters4fvNV", + "RequestResidentProgramsNV", + "TrackMatrixNV", + "VertexAttribPointerNV", + + # MESA_resize_buffers + "ResizeBuffersMESA", + + # ATI_envmap_bumpmap + "TexBumpParameterfvATI", + "TexBumpParameterivATI", + "GetTexBumpParameterfvATI", + "GetTexBumpParameterivATI", + + # NV_fragment_program + "ProgramNamedParameter4fNV", + "ProgramNamedParameter4dNV", + "ProgramNamedParameter4fvNV", + "ProgramNamedParameter4dvNV", + "GetProgramNamedParameterfvNV", + "GetProgramNamedParameterdvNV", + + # APPLE_flush_buffer_range + "BufferParameteriAPPLE", + "FlushMappedBufferRangeAPPLE", + + # EXT_separate_shader_objects + "UseShaderProgramEXT", + "ActiveProgramEXT", + "CreateShaderProgramEXT", +] From 2b419e0db92248ca768d2d26aa2f8276c70356e2 Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Fri, 8 May 2015 19:33:13 -0700 Subject: [PATCH 157/834] glapi: Use the offsets from static_data.py instead of from the XML Signed-off-by: Ian Romanick Reviewed-by: Emil Velikov Reviewed-by: Dylan Baker --- src/mapi/glapi/gen/gl_XML.py | 22 +++++----------------- 1 file changed, 5 insertions(+), 17 deletions(-) diff --git a/src/mapi/glapi/gen/gl_XML.py b/src/mapi/glapi/gen/gl_XML.py index 89b09f250dc..67aba81a74e 100644 --- a/src/mapi/glapi/gen/gl_XML.py +++ b/src/mapi/glapi/gen/gl_XML.py @@ -626,7 +626,7 @@ class gl_function( gl_item ): # Decimal('1.1') }. self.api_map = {} - self.assign_offset = 0 + self.assign_offset = False self.static_entry_points = [] @@ -685,23 +685,11 @@ class gl_function( gl_item ): # Only try to set the offset when a non-alias entry-point # is being processed. - offset = element.get( "offset" ) - if offset: - try: - o = int( offset ) - self.offset = o - except Exception, e: - self.offset = -1 - if offset == "assign": - self.assign_offset = 1 - - if self.offset == -1: - assert name not in static_data.offsets - else: - assert static_data.offsets[name] == self.offset + if name in static_data.offsets: + self.offset = static_data.offsets[name] else: - assert name not in static_data.offsets - + self.offset = -1 + self.assign_offset = self.exec_flavor != "skip" or name in static_data.unused_functions if not self.name: self.name = true_name From f507d33d4fd0834529b77c2e24271904a14247ac Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Tue, 12 May 2015 19:15:09 -0700 Subject: [PATCH 158/834] glapi: Remove all offset tags from the XML Changes generated by: cd src/mapi/glapi/gen for i in *.xml; do cat $i |\ sed 's/[[:space:]]*offset="[^"]*">/>/' |\ sed 's/[[:space:]]*offset="[^"]*"[[:space:]]*$//' |\ sed 's/[[:space:]]*offset="[^"]*"[[:space:]]*/ /' > x mv x $i done Signed-off-by: Ian Romanick Reviewed-by: Emil Velikov --- .../glapi/gen/AMD_performance_monitor.xml | 22 +- src/mapi/glapi/gen/APPLE_object_purgeable.xml | 6 +- .../glapi/gen/APPLE_vertex_array_object.xml | 4 +- src/mapi/glapi/gen/ARB_ES2_compatibility.xml | 10 +- src/mapi/glapi/gen/ARB_base_instance.xml | 6 +- .../glapi/gen/ARB_blend_func_extended.xml | 4 +- .../glapi/gen/ARB_clear_buffer_object.xml | 8 +- src/mapi/glapi/gen/ARB_clear_texture.xml | 4 +- src/mapi/glapi/gen/ARB_clip_control.xml | 2 +- src/mapi/glapi/gen/ARB_compute_shader.xml | 4 +- src/mapi/glapi/gen/ARB_copy_buffer.xml | 2 +- src/mapi/glapi/gen/ARB_copy_image.xml | 2 +- .../glapi/gen/ARB_direct_state_access.xml | 158 +- src/mapi/glapi/gen/ARB_draw_buffers_blend.xml | 8 +- .../gen/ARB_draw_elements_base_vertex.xml | 8 +- src/mapi/glapi/gen/ARB_draw_indirect.xml | 8 +- src/mapi/glapi/gen/ARB_draw_instanced.xml | 4 +- src/mapi/glapi/gen/ARB_framebuffer_object.xml | 40 +- src/mapi/glapi/gen/ARB_get_program_binary.xml | 6 +- src/mapi/glapi/gen/ARB_gpu_shader_fp64.xml | 36 +- .../glapi/gen/ARB_internalformat_query.xml | 2 +- src/mapi/glapi/gen/ARB_invalidate_subdata.xml | 12 +- src/mapi/glapi/gen/ARB_map_buffer_range.xml | 4 +- src/mapi/glapi/gen/ARB_multi_bind.xml | 12 +- .../glapi/gen/ARB_program_interface_query.xml | 12 +- src/mapi/glapi/gen/ARB_robustness.xml | 40 +- src/mapi/glapi/gen/ARB_sampler_objects.xml | 28 +- .../glapi/gen/ARB_separate_shader_objects.xml | 122 +- .../glapi/gen/ARB_shader_atomic_counters.xml | 2 +- .../glapi/gen/ARB_shader_image_load_store.xml | 4 +- src/mapi/glapi/gen/ARB_sync.xml | 14 +- .../glapi/gen/ARB_texture_buffer_range.xml | 2 +- .../glapi/gen/ARB_texture_multisample.xml | 8 +- src/mapi/glapi/gen/ARB_texture_storage.xml | 12 +- .../gen/ARB_texture_storage_multisample.xml | 4 +- src/mapi/glapi/gen/ARB_texture_view.xml | 2 +- .../glapi/gen/ARB_uniform_buffer_object.xml | 14 +- .../glapi/gen/ARB_vertex_array_object.xml | 8 +- .../glapi/gen/ARB_vertex_attrib_64bit.xml | 20 +- .../glapi/gen/ARB_vertex_attrib_binding.xml | 12 +- .../gen/ARB_vertex_type_2_10_10_10_rev.xml | 76 +- src/mapi/glapi/gen/ARB_viewport_array.xml | 20 +- src/mapi/glapi/gen/EXT_framebuffer_object.xml | 4 +- src/mapi/glapi/gen/EXT_gpu_shader4.xml | 28 +- src/mapi/glapi/gen/EXT_provoking_vertex.xml | 2 +- .../glapi/gen/EXT_separate_shader_objects.xml | 6 +- src/mapi/glapi/gen/EXT_texture_integer.xml | 4 +- src/mapi/glapi/gen/EXT_transform_feedback.xml | 16 +- src/mapi/glapi/gen/GL3x.xml | 100 +- src/mapi/glapi/gen/GL4x.xml | 2 +- .../glapi/gen/INTEL_performance_query.xml | 20 +- src/mapi/glapi/gen/KHR_debug.xml | 20 +- src/mapi/glapi/gen/NV_primitive_restart.xml | 2 +- src/mapi/glapi/gen/NV_texture_barrier.xml | 2 +- src/mapi/glapi/gen/NV_vdpau_interop.xml | 20 +- src/mapi/glapi/gen/OES_EGL_image.xml | 4 +- src/mapi/glapi/gen/OES_fixed_point.xml | 6 +- src/mapi/glapi/gen/es_EXT.xml | 22 +- src/mapi/glapi/gen/gl_API.xml | 1462 ++++++++--------- src/mapi/glapi/gen/gl_and_es_API.xml | 86 +- 60 files changed, 1289 insertions(+), 1289 deletions(-) diff --git a/src/mapi/glapi/gen/AMD_performance_monitor.xml b/src/mapi/glapi/gen/AMD_performance_monitor.xml index b96b263db03..41b52083883 100644 --- a/src/mapi/glapi/gen/AMD_performance_monitor.xml +++ b/src/mapi/glapi/gen/AMD_performance_monitor.xml @@ -5,13 +5,13 @@ - + - + @@ -19,14 +19,14 @@ - + - + @@ -34,24 +34,24 @@ - + - + - + - + @@ -59,15 +59,15 @@ - + - + - + diff --git a/src/mapi/glapi/gen/APPLE_object_purgeable.xml b/src/mapi/glapi/gen/APPLE_object_purgeable.xml index 62fa64ad212..829fc263156 100644 --- a/src/mapi/glapi/gen/APPLE_object_purgeable.xml +++ b/src/mapi/glapi/gen/APPLE_object_purgeable.xml @@ -13,21 +13,21 @@ - + - + - + diff --git a/src/mapi/glapi/gen/APPLE_vertex_array_object.xml b/src/mapi/glapi/gen/APPLE_vertex_array_object.xml index 9410e4e51c7..7312f9b35f0 100644 --- a/src/mapi/glapi/gen/APPLE_vertex_array_object.xml +++ b/src/mapi/glapi/gen/APPLE_vertex_array_object.xml @@ -5,7 +5,7 @@ - + @@ -14,7 +14,7 @@ - + diff --git a/src/mapi/glapi/gen/ARB_ES2_compatibility.xml b/src/mapi/glapi/gen/ARB_ES2_compatibility.xml index d157366122b..c96e71c44f2 100644 --- a/src/mapi/glapi/gen/ARB_ES2_compatibility.xml +++ b/src/mapi/glapi/gen/ARB_ES2_compatibility.xml @@ -18,17 +18,17 @@ - + - + - + @@ -45,11 +45,11 @@ - + - + diff --git a/src/mapi/glapi/gen/ARB_base_instance.xml b/src/mapi/glapi/gen/ARB_base_instance.xml index 1478e39700c..571e0a42848 100644 --- a/src/mapi/glapi/gen/ARB_base_instance.xml +++ b/src/mapi/glapi/gen/ARB_base_instance.xml @@ -8,7 +8,7 @@ - @@ -17,7 +17,7 @@ - @@ -27,7 +27,7 @@ - diff --git a/src/mapi/glapi/gen/ARB_blend_func_extended.xml b/src/mapi/glapi/gen/ARB_blend_func_extended.xml index 32adcde7733..406140f7ddf 100644 --- a/src/mapi/glapi/gen/ARB_blend_func_extended.xml +++ b/src/mapi/glapi/gen/ARB_blend_func_extended.xml @@ -8,14 +8,14 @@ - + - + diff --git a/src/mapi/glapi/gen/ARB_clear_buffer_object.xml b/src/mapi/glapi/gen/ARB_clear_buffer_object.xml index cb97a0185b0..2284eacd656 100644 --- a/src/mapi/glapi/gen/ARB_clear_buffer_object.xml +++ b/src/mapi/glapi/gen/ARB_clear_buffer_object.xml @@ -8,7 +8,7 @@ - + @@ -16,7 +16,7 @@ - + @@ -26,7 +26,7 @@ - diff --git a/src/mapi/glapi/gen/ARB_compute_shader.xml b/src/mapi/glapi/gen/ARB_compute_shader.xml index 1db373e9901..78d352f1f37 100644 --- a/src/mapi/glapi/gen/ARB_compute_shader.xml +++ b/src/mapi/glapi/gen/ARB_compute_shader.xml @@ -26,13 +26,13 @@ - + - + diff --git a/src/mapi/glapi/gen/ARB_copy_buffer.xml b/src/mapi/glapi/gen/ARB_copy_buffer.xml index 6982ed1d26f..d1c6f1fecce 100644 --- a/src/mapi/glapi/gen/ARB_copy_buffer.xml +++ b/src/mapi/glapi/gen/ARB_copy_buffer.xml @@ -11,7 +11,7 @@ - + diff --git a/src/mapi/glapi/gen/ARB_copy_image.xml b/src/mapi/glapi/gen/ARB_copy_image.xml index 2fbd84557a9..af672cd38bf 100644 --- a/src/mapi/glapi/gen/ARB_copy_image.xml +++ b/src/mapi/glapi/gen/ARB_copy_image.xml @@ -5,7 +5,7 @@ - + diff --git a/src/mapi/glapi/gen/ARB_direct_state_access.xml b/src/mapi/glapi/gen/ARB_direct_state_access.xml index bb9baf5a3d0..4c8f73ede7d 100644 --- a/src/mapi/glapi/gen/ARB_direct_state_access.xml +++ b/src/mapi/glapi/gen/ARB_direct_state_access.xml @@ -9,18 +9,18 @@ - + - + - + @@ -28,20 +28,20 @@ - + - + - + @@ -50,33 +50,33 @@ - + - + - + - + - + @@ -84,7 +84,7 @@ - + @@ -92,7 +92,7 @@ - + @@ -102,13 +102,13 @@ - + - + @@ -116,36 +116,36 @@ - + - + - + - + - + - + @@ -283,19 +283,19 @@ - + - + - + @@ -303,7 +303,7 @@ - + @@ -311,19 +311,19 @@ - + - + - + @@ -331,14 +331,14 @@ - + - + @@ -346,7 +346,7 @@ - + @@ -355,7 +355,7 @@ - + @@ -364,7 +364,7 @@ - + @@ -374,7 +374,7 @@ - + @@ -384,7 +384,7 @@ - + @@ -396,7 +396,7 @@ - + @@ -410,7 +410,7 @@ - + @@ -420,7 +420,7 @@ - + @@ -432,7 +432,7 @@ - + @@ -446,7 +446,7 @@ - + @@ -455,7 +455,7 @@ - + @@ -466,7 +466,7 @@ - + @@ -478,52 +478,52 @@ - + - + - + - + - + - + - + - + - + @@ -532,46 +532,46 @@ - + - + - + - + - + - + - + @@ -579,27 +579,27 @@ - + - + - + - + - + @@ -607,7 +607,7 @@ - + @@ -616,7 +616,7 @@ - + @@ -625,7 +625,7 @@ - + @@ -633,7 +633,7 @@ - + @@ -641,32 +641,32 @@ - + - + - + - + - + @@ -675,48 +675,48 @@ - + - + - + - + - + - + - + diff --git a/src/mapi/glapi/gen/ARB_draw_buffers_blend.xml b/src/mapi/glapi/gen/ARB_draw_buffers_blend.xml index 0b6947cc5f3..8c33fbf89b8 100644 --- a/src/mapi/glapi/gen/ARB_draw_buffers_blend.xml +++ b/src/mapi/glapi/gen/ARB_draw_buffers_blend.xml @@ -8,24 +8,24 @@ - + - + - + - + diff --git a/src/mapi/glapi/gen/ARB_draw_elements_base_vertex.xml b/src/mapi/glapi/gen/ARB_draw_elements_base_vertex.xml index 986654848ca..207ead3c37b 100644 --- a/src/mapi/glapi/gen/ARB_draw_elements_base_vertex.xml +++ b/src/mapi/glapi/gen/ARB_draw_elements_base_vertex.xml @@ -8,7 +8,7 @@ - + @@ -16,7 +16,7 @@ - @@ -27,7 +27,7 @@ - @@ -37,7 +37,7 @@ - diff --git a/src/mapi/glapi/gen/ARB_draw_indirect.xml b/src/mapi/glapi/gen/ARB_draw_indirect.xml index 7de03cd3502..2001eb00b59 100644 --- a/src/mapi/glapi/gen/ARB_draw_indirect.xml +++ b/src/mapi/glapi/gen/ARB_draw_indirect.xml @@ -8,12 +8,12 @@ - + - + @@ -24,14 +24,14 @@ - + - + diff --git a/src/mapi/glapi/gen/ARB_draw_instanced.xml b/src/mapi/glapi/gen/ARB_draw_instanced.xml index 7ee7629c706..b1c8221444a 100644 --- a/src/mapi/glapi/gen/ARB_draw_instanced.xml +++ b/src/mapi/glapi/gen/ARB_draw_instanced.xml @@ -8,14 +8,14 @@ - + - + diff --git a/src/mapi/glapi/gen/ARB_framebuffer_object.xml b/src/mapi/glapi/gen/ARB_framebuffer_object.xml index 999a8ef13ad..e9480b4ccd3 100644 --- a/src/mapi/glapi/gen/ARB_framebuffer_object.xml +++ b/src/mapi/glapi/gen/ARB_framebuffer_object.xml @@ -140,33 +140,33 @@ - + - + + es2="2.0"> - + + es2="2.0"> @@ -174,7 +174,7 @@ - + @@ -183,46 +183,46 @@ - + - + - + + es2="2.0"> - + + es2="2.0"> - + @@ -232,7 +232,7 @@ + es2="2.0"> @@ -241,7 +241,7 @@ - + @@ -251,7 +251,7 @@ - + @@ -261,7 +261,7 @@ + es2="2.0"> @@ -269,7 +269,7 @@ - + @@ -277,7 +277,7 @@ - + @@ -291,7 +291,7 @@ - + diff --git a/src/mapi/glapi/gen/ARB_get_program_binary.xml b/src/mapi/glapi/gen/ARB_get_program_binary.xml index e84d0678e72..25e0a37c84b 100644 --- a/src/mapi/glapi/gen/ARB_get_program_binary.xml +++ b/src/mapi/glapi/gen/ARB_get_program_binary.xml @@ -11,7 +11,7 @@ - + @@ -19,14 +19,14 @@ - + - + diff --git a/src/mapi/glapi/gen/ARB_gpu_shader_fp64.xml b/src/mapi/glapi/gen/ARB_gpu_shader_fp64.xml index 4f860ef8c69..fd1ad117e51 100644 --- a/src/mapi/glapi/gen/ARB_gpu_shader_fp64.xml +++ b/src/mapi/glapi/gen/ARB_gpu_shader_fp64.xml @@ -5,25 +5,25 @@ - + - + - + - + @@ -31,94 +31,94 @@ - + - + - + - + - + - + - + - + - + - + - + - + - + - + diff --git a/src/mapi/glapi/gen/ARB_internalformat_query.xml b/src/mapi/glapi/gen/ARB_internalformat_query.xml index 48e3a169a19..16d14b1b7ab 100644 --- a/src/mapi/glapi/gen/ARB_internalformat_query.xml +++ b/src/mapi/glapi/gen/ARB_internalformat_query.xml @@ -8,7 +8,7 @@ - + diff --git a/src/mapi/glapi/gen/ARB_invalidate_subdata.xml b/src/mapi/glapi/gen/ARB_invalidate_subdata.xml index 31b515cdb85..052816ad7e0 100644 --- a/src/mapi/glapi/gen/ARB_invalidate_subdata.xml +++ b/src/mapi/glapi/gen/ARB_invalidate_subdata.xml @@ -3,7 +3,7 @@ - + @@ -14,22 +14,22 @@ - + - + - + - + @@ -39,7 +39,7 @@ - + diff --git a/src/mapi/glapi/gen/ARB_map_buffer_range.xml b/src/mapi/glapi/gen/ARB_map_buffer_range.xml index d8745044694..cf7b211482d 100644 --- a/src/mapi/glapi/gen/ARB_map_buffer_range.xml +++ b/src/mapi/glapi/gen/ARB_map_buffer_range.xml @@ -15,7 +15,7 @@ - + @@ -23,7 +23,7 @@ - + diff --git a/src/mapi/glapi/gen/ARB_multi_bind.xml b/src/mapi/glapi/gen/ARB_multi_bind.xml index 4f2f2a2596b..f42eaa28e96 100644 --- a/src/mapi/glapi/gen/ARB_multi_bind.xml +++ b/src/mapi/glapi/gen/ARB_multi_bind.xml @@ -7,14 +7,14 @@ - + - + @@ -23,25 +23,25 @@ - + - + - + - + diff --git a/src/mapi/glapi/gen/ARB_program_interface_query.xml b/src/mapi/glapi/gen/ARB_program_interface_query.xml index 59eb59c64d5..5b6d5cc59bd 100644 --- a/src/mapi/glapi/gen/ARB_program_interface_query.xml +++ b/src/mapi/glapi/gen/ARB_program_interface_query.xml @@ -56,21 +56,21 @@ - + - + - + @@ -79,7 +79,7 @@ - + @@ -90,14 +90,14 @@ - + - + diff --git a/src/mapi/glapi/gen/ARB_robustness.xml b/src/mapi/glapi/gen/ARB_robustness.xml index 65843149cd8..9b2f2f0a74c 100644 --- a/src/mapi/glapi/gen/ARB_robustness.xml +++ b/src/mapi/glapi/gen/ARB_robustness.xml @@ -20,26 +20,26 @@ - + - + - + - + @@ -48,19 +48,19 @@ - + - + - + @@ -68,12 +68,12 @@ - + - + @@ -82,7 +82,7 @@ - + @@ -95,7 +95,7 @@ - + @@ -103,7 +103,7 @@ - + @@ -111,7 +111,7 @@ - + @@ -122,7 +122,7 @@ - + @@ -131,7 +131,7 @@ - + @@ -142,7 +142,7 @@ - + @@ -151,28 +151,28 @@ - + - + - + - + diff --git a/src/mapi/glapi/gen/ARB_sampler_objects.xml b/src/mapi/glapi/gen/ARB_sampler_objects.xml index 9173dee5c73..bc69e97bde7 100644 --- a/src/mapi/glapi/gen/ARB_sampler_objects.xml +++ b/src/mapi/glapi/gen/ARB_sampler_objects.xml @@ -7,81 +7,81 @@ - + - + - + - + - + - + - + - + - + - + - + - + - + - + diff --git a/src/mapi/glapi/gen/ARB_separate_shader_objects.xml b/src/mapi/glapi/gen/ARB_separate_shader_objects.xml index e2196b3c37f..c20ee4b50ff 100644 --- a/src/mapi/glapi/gen/ARB_separate_shader_objects.xml +++ b/src/mapi/glapi/gen/ARB_separate_shader_objects.xml @@ -15,69 +15,69 @@ - + - + - + - + - + - + - + - + - + - + - + - + @@ -85,25 +85,25 @@ - + - + - + - + @@ -111,25 +111,25 @@ - + - + - + - + @@ -137,170 +137,170 @@ - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + @@ -308,88 +308,88 @@ - + - + - + - + - + - + - + - + - + - + - + - + - + diff --git a/src/mapi/glapi/gen/ARB_shader_atomic_counters.xml b/src/mapi/glapi/gen/ARB_shader_atomic_counters.xml index f3b74e9c28c..0b0b60fa7b2 100644 --- a/src/mapi/glapi/gen/ARB_shader_atomic_counters.xml +++ b/src/mapi/glapi/gen/ARB_shader_atomic_counters.xml @@ -35,7 +35,7 @@ - + diff --git a/src/mapi/glapi/gen/ARB_shader_image_load_store.xml b/src/mapi/glapi/gen/ARB_shader_image_load_store.xml index 7ccfca41d81..c6a97bf1878 100644 --- a/src/mapi/glapi/gen/ARB_shader_image_load_store.xml +++ b/src/mapi/glapi/gen/ARB_shader_image_load_store.xml @@ -70,7 +70,7 @@ - + @@ -80,7 +80,7 @@ - + diff --git a/src/mapi/glapi/gen/ARB_sync.xml b/src/mapi/glapi/gen/ARB_sync.xml index 58f1639c421..d8a1c34e0c8 100644 --- a/src/mapi/glapi/gen/ARB_sync.xml +++ b/src/mapi/glapi/gen/ARB_sync.xml @@ -39,40 +39,40 @@ --> - + - + - + - + - + - + - + diff --git a/src/mapi/glapi/gen/ARB_texture_buffer_range.xml b/src/mapi/glapi/gen/ARB_texture_buffer_range.xml index 2176c08efcb..36bcc49325f 100644 --- a/src/mapi/glapi/gen/ARB_texture_buffer_range.xml +++ b/src/mapi/glapi/gen/ARB_texture_buffer_range.xml @@ -9,7 +9,7 @@ - + diff --git a/src/mapi/glapi/gen/ARB_texture_multisample.xml b/src/mapi/glapi/gen/ARB_texture_multisample.xml index 1f65a8bcf80..d7cf2a30aa2 100644 --- a/src/mapi/glapi/gen/ARB_texture_multisample.xml +++ b/src/mapi/glapi/gen/ARB_texture_multisample.xml @@ -34,7 +34,7 @@ - + @@ -43,7 +43,7 @@ - + @@ -53,13 +53,13 @@ - + - + diff --git a/src/mapi/glapi/gen/ARB_texture_storage.xml b/src/mapi/glapi/gen/ARB_texture_storage.xml index 1d63e7ce3a4..7df39424157 100644 --- a/src/mapi/glapi/gen/ARB_texture_storage.xml +++ b/src/mapi/glapi/gen/ARB_texture_storage.xml @@ -10,14 +10,14 @@ - + - + @@ -25,7 +25,7 @@ - + @@ -34,7 +34,7 @@ - + @@ -42,7 +42,7 @@ - + @@ -51,7 +51,7 @@ - + diff --git a/src/mapi/glapi/gen/ARB_texture_storage_multisample.xml b/src/mapi/glapi/gen/ARB_texture_storage_multisample.xml index 0f9d323d4ff..0d39fa235da 100644 --- a/src/mapi/glapi/gen/ARB_texture_storage_multisample.xml +++ b/src/mapi/glapi/gen/ARB_texture_storage_multisample.xml @@ -7,7 +7,7 @@ - + @@ -16,7 +16,7 @@ - + diff --git a/src/mapi/glapi/gen/ARB_texture_view.xml b/src/mapi/glapi/gen/ARB_texture_view.xml index 3e6b8c904ce..4215fc5bd54 100644 --- a/src/mapi/glapi/gen/ARB_texture_view.xml +++ b/src/mapi/glapi/gen/ARB_texture_view.xml @@ -7,7 +7,7 @@ - + diff --git a/src/mapi/glapi/gen/ARB_uniform_buffer_object.xml b/src/mapi/glapi/gen/ARB_uniform_buffer_object.xml index 11aacb03309..cf86bbb2f8a 100644 --- a/src/mapi/glapi/gen/ARB_uniform_buffer_object.xml +++ b/src/mapi/glapi/gen/ARB_uniform_buffer_object.xml @@ -39,14 +39,14 @@ - + - + @@ -54,7 +54,7 @@ - + @@ -62,20 +62,20 @@ - + - + - + @@ -86,7 +86,7 @@ - + diff --git a/src/mapi/glapi/gen/ARB_vertex_array_object.xml b/src/mapi/glapi/gen/ARB_vertex_array_object.xml index f2277d2bc28..4a392dbb427 100644 --- a/src/mapi/glapi/gen/ARB_vertex_array_object.xml +++ b/src/mapi/glapi/gen/ARB_vertex_array_object.xml @@ -10,21 +10,21 @@ - + - + - + - + diff --git a/src/mapi/glapi/gen/ARB_vertex_attrib_64bit.xml b/src/mapi/glapi/gen/ARB_vertex_attrib_64bit.xml index fc49f84b5ba..211642fc317 100644 --- a/src/mapi/glapi/gen/ARB_vertex_attrib_64bit.xml +++ b/src/mapi/glapi/gen/ARB_vertex_attrib_64bit.xml @@ -5,25 +5,25 @@ - + - + - + - + @@ -31,27 +31,27 @@ - + - + - + - + - + @@ -59,7 +59,7 @@ - + diff --git a/src/mapi/glapi/gen/ARB_vertex_attrib_binding.xml b/src/mapi/glapi/gen/ARB_vertex_attrib_binding.xml index 7e62688779d..0f000639f1f 100644 --- a/src/mapi/glapi/gen/ARB_vertex_attrib_binding.xml +++ b/src/mapi/glapi/gen/ARB_vertex_attrib_binding.xml @@ -7,14 +7,14 @@ - + - + @@ -22,26 +22,26 @@ - + - + - + - + diff --git a/src/mapi/glapi/gen/ARB_vertex_type_2_10_10_10_rev.xml b/src/mapi/glapi/gen/ARB_vertex_type_2_10_10_10_rev.xml index 6c6090c1cf0..26d39f27f06 100644 --- a/src/mapi/glapi/gen/ARB_vertex_type_2_10_10_10_rev.xml +++ b/src/mapi/glapi/gen/ARB_vertex_type_2_10_10_10_rev.xml @@ -7,244 +7,244 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + - + - + - + - + - + - + - + diff --git a/src/mapi/glapi/gen/ARB_viewport_array.xml b/src/mapi/glapi/gen/ARB_viewport_array.xml index e1c6c2d81ab..b20cf612cc3 100644 --- a/src/mapi/glapi/gen/ARB_viewport_array.xml +++ b/src/mapi/glapi/gen/ARB_viewport_array.xml @@ -21,54 +21,54 @@ - + - + - + - + - + - + - + - + - + - + diff --git a/src/mapi/glapi/gen/EXT_framebuffer_object.xml b/src/mapi/glapi/gen/EXT_framebuffer_object.xml index 16e1a1fe67d..9ae02915232 100644 --- a/src/mapi/glapi/gen/EXT_framebuffer_object.xml +++ b/src/mapi/glapi/gen/EXT_framebuffer_object.xml @@ -78,7 +78,7 @@ - + @@ -112,7 +112,7 @@ - + diff --git a/src/mapi/glapi/gen/EXT_gpu_shader4.xml b/src/mapi/glapi/gen/EXT_gpu_shader4.xml index d204c3fc7a1..b1f7eae2610 100644 --- a/src/mapi/glapi/gen/EXT_gpu_shader4.xml +++ b/src/mapi/glapi/gen/EXT_gpu_shader4.xml @@ -44,25 +44,25 @@ - + - + - + - + @@ -70,25 +70,25 @@ - + - + - + - + @@ -101,17 +101,17 @@ - + - + - + @@ -121,17 +121,17 @@ - + - + - + diff --git a/src/mapi/glapi/gen/EXT_provoking_vertex.xml b/src/mapi/glapi/gen/EXT_provoking_vertex.xml index eb872098d46..3d1ae771ebf 100644 --- a/src/mapi/glapi/gen/EXT_provoking_vertex.xml +++ b/src/mapi/glapi/gen/EXT_provoking_vertex.xml @@ -26,7 +26,7 @@ - + diff --git a/src/mapi/glapi/gen/EXT_separate_shader_objects.xml b/src/mapi/glapi/gen/EXT_separate_shader_objects.xml index 0d32cb25a4e..1fa699fbc68 100644 --- a/src/mapi/glapi/gen/EXT_separate_shader_objects.xml +++ b/src/mapi/glapi/gen/EXT_separate_shader_objects.xml @@ -8,16 +8,16 @@ - + - + - + diff --git a/src/mapi/glapi/gen/EXT_texture_integer.xml b/src/mapi/glapi/gen/EXT_texture_integer.xml index aca21f4b125..5f9ea2963d3 100644 --- a/src/mapi/glapi/gen/EXT_texture_integer.xml +++ b/src/mapi/glapi/gen/EXT_texture_integer.xml @@ -55,14 +55,14 @@ - + - + diff --git a/src/mapi/glapi/gen/EXT_transform_feedback.xml b/src/mapi/glapi/gen/EXT_transform_feedback.xml index 65259fc1fd4..2aa26ad7251 100644 --- a/src/mapi/glapi/gen/EXT_transform_feedback.xml +++ b/src/mapi/glapi/gen/EXT_transform_feedback.xml @@ -32,7 +32,7 @@ - + @@ -82,33 +82,33 @@ - + - + - + - + - + - + - + diff --git a/src/mapi/glapi/gen/GL3x.xml b/src/mapi/glapi/gen/GL3x.xml index 5078f7b6d91..348d5221fb5 100644 --- a/src/mapi/glapi/gen/GL3x.xml +++ b/src/mapi/glapi/gen/GL3x.xml @@ -119,38 +119,38 @@ - + - + - + - + - + - + @@ -158,7 +158,7 @@ - + @@ -166,29 +166,29 @@ - + - + - + - + - + @@ -196,26 +196,26 @@ - + - + - + - + - + @@ -223,20 +223,20 @@ - + - + - + @@ -248,17 +248,17 @@ - + - + - + @@ -266,13 +266,13 @@ - + - + @@ -330,7 +330,7 @@ - + @@ -350,7 +350,7 @@ - + @@ -370,51 +370,51 @@ - + - + - + - + - + - + - + - + - + @@ -422,25 +422,25 @@ - + - + - + - + @@ -448,25 +448,25 @@ - + - + - + - + @@ -557,13 +557,13 @@ - + - + @@ -595,19 +595,19 @@ - + - + - + @@ -625,7 +625,7 @@ - + diff --git a/src/mapi/glapi/gen/GL4x.xml b/src/mapi/glapi/gen/GL4x.xml index ac63d73f917..94ddfb72960 100644 --- a/src/mapi/glapi/gen/GL4x.xml +++ b/src/mapi/glapi/gen/GL4x.xml @@ -9,7 +9,7 @@ - + diff --git a/src/mapi/glapi/gen/INTEL_performance_query.xml b/src/mapi/glapi/gen/INTEL_performance_query.xml index 0f4d68718e3..9573cb1bd6e 100644 --- a/src/mapi/glapi/gen/INTEL_performance_query.xml +++ b/src/mapi/glapi/gen/INTEL_performance_query.xml @@ -5,21 +5,21 @@ - + - + - + - + @@ -29,7 +29,7 @@ - + @@ -43,24 +43,24 @@ - + - + - + - + - + diff --git a/src/mapi/glapi/gen/KHR_debug.xml b/src/mapi/glapi/gen/KHR_debug.xml index 48f7fa762bd..77956d61e38 100644 --- a/src/mapi/glapi/gen/KHR_debug.xml +++ b/src/mapi/glapi/gen/KHR_debug.xml @@ -73,7 +73,7 @@ - + @@ -82,7 +82,7 @@ - + @@ -91,12 +91,12 @@ - + - + @@ -108,23 +108,23 @@ - + - + - + - + @@ -132,13 +132,13 @@ - + - + diff --git a/src/mapi/glapi/gen/NV_primitive_restart.xml b/src/mapi/glapi/gen/NV_primitive_restart.xml index 39edafe31c6..62d40785255 100644 --- a/src/mapi/glapi/gen/NV_primitive_restart.xml +++ b/src/mapi/glapi/gen/NV_primitive_restart.xml @@ -11,7 +11,7 @@ - diff --git a/src/mapi/glapi/gen/NV_texture_barrier.xml b/src/mapi/glapi/gen/NV_texture_barrier.xml index 52b1a3c08da..6dc62b0d7a3 100644 --- a/src/mapi/glapi/gen/NV_texture_barrier.xml +++ b/src/mapi/glapi/gen/NV_texture_barrier.xml @@ -7,7 +7,7 @@ - + diff --git a/src/mapi/glapi/gen/NV_vdpau_interop.xml b/src/mapi/glapi/gen/NV_vdpau_interop.xml index 0b19e1a85f6..1b77605e4ad 100644 --- a/src/mapi/glapi/gen/NV_vdpau_interop.xml +++ b/src/mapi/glapi/gen/NV_vdpau_interop.xml @@ -5,14 +5,14 @@ - + - + - + @@ -20,7 +20,7 @@ - + @@ -28,16 +28,16 @@ - + - + - + @@ -45,17 +45,17 @@ - + - + - + diff --git a/src/mapi/glapi/gen/OES_EGL_image.xml b/src/mapi/glapi/gen/OES_EGL_image.xml index a995cad6c95..d294f071237 100644 --- a/src/mapi/glapi/gen/OES_EGL_image.xml +++ b/src/mapi/glapi/gen/OES_EGL_image.xml @@ -5,13 +5,13 @@ - - diff --git a/src/mapi/glapi/gen/OES_fixed_point.xml b/src/mapi/glapi/gen/OES_fixed_point.xml index 8f3bfd0693f..d179ef4632b 100644 --- a/src/mapi/glapi/gen/OES_fixed_point.xml +++ b/src/mapi/glapi/gen/OES_fixed_point.xml @@ -239,20 +239,20 @@ - - + - + diff --git a/src/mapi/glapi/gen/es_EXT.xml b/src/mapi/glapi/gen/es_EXT.xml index 1107b217230..b521bbb9b57 100644 --- a/src/mapi/glapi/gen/es_EXT.xml +++ b/src/mapi/glapi/gen/es_EXT.xml @@ -70,7 +70,7 @@ - + @@ -78,11 +78,11 @@ - + - + @@ -90,11 +90,11 @@ - + - + @@ -102,11 +102,11 @@ - + - + @@ -114,7 +114,7 @@ - + @@ -318,7 +318,7 @@ - @@ -334,7 +334,7 @@ - + @@ -628,7 +628,7 @@ + desktop="false"> diff --git a/src/mapi/glapi/gen/gl_API.xml b/src/mapi/glapi/gen/gl_API.xml index 1f0411efaec..b8db8cd2b48 100644 --- a/src/mapi/glapi/gen/gl_API.xml +++ b/src/mapi/glapi/gen/gl_API.xml @@ -1109,51 +1109,51 @@ - + - + - + - + - + - + - + - + - + @@ -1164,103 +1164,103 @@ - - + - - + - - + - - + - - + - - + - - + - - + - @@ -1268,12 +1268,12 @@ - + - @@ -1281,12 +1281,12 @@ - + - @@ -1294,12 +1294,12 @@ - + - @@ -1307,12 +1307,12 @@ - + - @@ -1320,12 +1320,12 @@ - + - @@ -1333,12 +1333,12 @@ - + - @@ -1346,12 +1346,12 @@ - + - @@ -1359,215 +1359,215 @@ - + - - + - + - + - + - - + - + - + - + - + - - + - - + - - + - - + - - + - - + - - + - - + - - + - - + - - + - - + - - + - @@ -1575,12 +1575,12 @@ - + - @@ -1588,12 +1588,12 @@ - + - @@ -1601,12 +1601,12 @@ - + - @@ -1614,25 +1614,25 @@ - + - + - + - @@ -1640,174 +1640,174 @@ - + - + - + - + - + - - + - - - - + - - + - - + - - - - + - - + - - + - - - - + - - + - @@ -1815,12 +1815,12 @@ - + - @@ -1828,13 +1828,13 @@ - - @@ -1842,12 +1842,12 @@ - + - @@ -1855,106 +1855,106 @@ - + - - + - - - - + - - + - - + - - - - + - - + - @@ -1962,12 +1962,12 @@ - + - @@ -1975,13 +1975,13 @@ - - @@ -1989,12 +1989,12 @@ - + - @@ -2002,134 +2002,134 @@ - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - @@ -2137,37 +2137,37 @@ - + - + - + - + - + - + @@ -2175,40 +2175,40 @@ - + - + - + - + - + - + @@ -2220,7 +2220,7 @@ - + @@ -2233,129 +2233,129 @@ - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + @@ -2363,12 +2363,12 @@ - + - + @@ -2376,22 +2376,22 @@ - + - + - + - + @@ -2399,50 +2399,50 @@ - + - + - + - + - + - + - + - + - + - + @@ -2452,7 +2452,7 @@ - + @@ -2462,7 +2462,7 @@ - + @@ -2476,7 +2476,7 @@ - + @@ -2490,21 +2490,21 @@ - + - + - + @@ -2514,7 +2514,7 @@ - + @@ -2524,49 +2524,49 @@ - - + - - + - - + - - + - @@ -2574,13 +2574,13 @@ - - @@ -2590,106 +2590,106 @@ - - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + @@ -2698,7 +2698,7 @@ - + @@ -2709,7 +2709,7 @@ - + @@ -2718,155 +2718,155 @@ - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + @@ -2875,21 +2875,21 @@ - + - + - + @@ -2897,7 +2897,7 @@ - + @@ -2905,25 +2905,25 @@ - + - + - + - + @@ -2933,36 +2933,36 @@ - + - + - + - + - + - + - + @@ -2972,15 +2972,15 @@ - + - + - + @@ -2988,7 +2988,7 @@ - + @@ -2996,35 +2996,35 @@ - + - + - + - + - + @@ -3207,13 +3207,13 @@ - - + @@ -3221,13 +3221,13 @@ - - @@ -3235,7 +3235,7 @@ - @@ -3244,45 +3244,45 @@ - + - + - + - + - + - + - + @@ -3290,7 +3290,7 @@ - + @@ -3298,13 +3298,13 @@ - + - + @@ -3315,7 +3315,7 @@ - + @@ -3327,7 +3327,7 @@ - + @@ -3337,7 +3337,7 @@ - + @@ -3349,7 +3349,7 @@ - + @@ -3361,7 +3361,7 @@ - + @@ -3375,7 +3375,7 @@ - + @@ -3383,52 +3383,52 @@ - + - + - + - + - + - - + - + - + @@ -3781,7 +3781,7 @@ - + @@ -3789,12 +3789,12 @@ - + - @@ -3805,7 +3805,7 @@ - + @@ -3815,21 +3815,21 @@ - + - + - + @@ -3838,7 +3838,7 @@ - + @@ -3846,21 +3846,21 @@ - + - + - + @@ -3870,7 +3870,7 @@ - + @@ -3879,7 +3879,7 @@ - + @@ -3889,7 +3889,7 @@ - + @@ -3900,35 +3900,35 @@ - + - + - + - + - + @@ -3937,7 +3937,7 @@ - + @@ -3947,7 +3947,7 @@ - + @@ -3955,21 +3955,21 @@ - + - + - + @@ -3979,7 +3979,7 @@ - + @@ -3991,7 +3991,7 @@ - + @@ -4000,21 +4000,21 @@ - + - + - + @@ -4023,21 +4023,21 @@ - + - + - + @@ -4045,24 +4045,24 @@ - + - + - + - + @@ -4076,7 +4076,7 @@ - + @@ -4092,7 +4092,7 @@ - + @@ -4320,25 +4320,25 @@ + es2="2.0"> + es1="1.0" deprecated="3.1"> + deprecated="3.1"> + deprecated="3.1"> @@ -4357,40 +4357,40 @@ + deprecated="3.1"> + deprecated="3.1"> + deprecated="3.1"> + deprecated="3.1"> + deprecated="3.1"> + deprecated="3.1"> @@ -4410,35 +4410,35 @@ + deprecated="3.1"> + deprecated="3.1"> + deprecated="3.1"> + deprecated="3.1"> + deprecated="3.1"> @@ -4446,7 +4446,7 @@ + deprecated="3.1"> @@ -4467,7 +4467,7 @@ + deprecated="3.1"> @@ -4475,14 +4475,14 @@ + deprecated="3.1"> + deprecated="3.1"> @@ -4490,14 +4490,14 @@ + deprecated="3.1"> + deprecated="3.1"> @@ -4506,7 +4506,7 @@ + deprecated="3.1"> @@ -4528,7 +4528,7 @@ + deprecated="3.1"> @@ -4537,14 +4537,14 @@ + deprecated="3.1"> + deprecated="3.1"> @@ -4553,44 +4553,44 @@ + deprecated="3.1"> + deprecated="3.1"> + deprecated="3.1"> + deprecated="3.1"> + deprecated="3.1"> + es2="2.0"> - + @@ -4604,7 +4604,7 @@ + es1="1.0" es2="2.0"> @@ -4616,7 +4616,7 @@ - + @@ -4627,7 +4627,7 @@ - + @@ -4643,7 +4643,7 @@ + es1="1.0" es2="2.0"> @@ -4656,7 +4656,7 @@ - + @@ -4667,7 +4667,7 @@ - + @@ -4790,7 +4790,7 @@ - + @@ -4806,24 +4806,24 @@ - + - + + deprecated="3.1"> - + @@ -4839,52 +4839,52 @@ - + - + - + - + + deprecated="3.1"> + deprecated="3.1"> + deprecated="3.1"> + deprecated="3.1"> @@ -4902,72 +4902,72 @@ + deprecated="3.1"> + deprecated="3.1"> + deprecated="3.1"> + deprecated="3.1"> + deprecated="3.1"> + deprecated="3.1"> + deprecated="3.1"> + deprecated="3.1"> + deprecated="3.1"> + deprecated="3.1"> + deprecated="3.1"> @@ -4975,93 +4975,93 @@ - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + @@ -5132,13 +5132,13 @@ - + - + @@ -5147,7 +5147,7 @@ + es2="2.0"> @@ -5156,34 +5156,34 @@ + es2="2.0"> - + + es1="1.1" es2="2.0"> - + - + @@ -5191,69 +5191,69 @@ - + - + - + - + - + - + - + - + - + - + - + @@ -5357,19 +5357,19 @@ + es2="2.0"> - + - + @@ -5377,7 +5377,7 @@ - + @@ -5385,72 +5385,72 @@ - + - + + es2="2.0"> - + - + - + - + - + - + - + + es2="2.0"> - + @@ -5461,7 +5461,7 @@ - + @@ -5472,7 +5472,7 @@ - + @@ -5480,21 +5480,21 @@ - + - + - + @@ -5502,14 +5502,14 @@ - + - + @@ -5517,7 +5517,7 @@ - + @@ -5526,28 +5526,28 @@ + es2="2.0"> - + - + - + @@ -5555,7 +5555,7 @@ - + @@ -5563,7 +5563,7 @@ - + @@ -5572,7 +5572,7 @@ + es2="2.0"> @@ -5580,24 +5580,24 @@ - + - + - + - + @@ -5605,30 +5605,30 @@ - + - + - + - + - + @@ -5637,25 +5637,25 @@ - + - + - + - + @@ -5664,71 +5664,71 @@ - + - + - + - + - + - + - + - + - + - + - + @@ -5736,16 +5736,16 @@ - + - + - + @@ -5758,22 +5758,22 @@ - + - + - + - + @@ -5787,24 +5787,24 @@ - + - + - + - + @@ -5819,68 +5819,68 @@ - + - + - + - + - + - + - + - + - + - + - + - + @@ -5896,41 +5896,41 @@ - + - + - + - + - + - + + es2="2.0"> @@ -5971,42 +5971,42 @@ - + - + - + - + - + - + @@ -6081,13 +6081,13 @@ - - + @@ -6127,14 +6127,14 @@ - - + @@ -6177,7 +6177,7 @@ - @@ -6185,7 +6185,7 @@ - + @@ -6231,7 +6231,7 @@ - @@ -6240,7 +6240,7 @@ - + @@ -7108,13 +7108,13 @@ - - + @@ -7143,14 +7143,14 @@ - - + @@ -7181,7 +7181,7 @@ - @@ -7189,7 +7189,7 @@ - + @@ -7222,7 +7222,7 @@ - @@ -7231,7 +7231,7 @@ - + @@ -7332,7 +7332,7 @@ - + @@ -7340,31 +7340,31 @@ - + - + - + - + - @@ -7374,7 +7374,7 @@ - @@ -7382,7 +7382,7 @@ - @@ -7392,7 +7392,7 @@ - @@ -7400,7 +7400,7 @@ - @@ -7410,7 +7410,7 @@ - @@ -7418,7 +7418,7 @@ - @@ -7428,7 +7428,7 @@ - @@ -7448,7 +7448,7 @@ the ARB_vertex_program protocol to unused padding. --> - @@ -7456,7 +7456,7 @@ - @@ -7464,7 +7464,7 @@ - @@ -7472,7 +7472,7 @@ - @@ -7481,14 +7481,14 @@ - + - + @@ -7774,24 +7774,24 @@ - + - + - + - + @@ -7808,12 +7808,12 @@ - + - + @@ -7952,21 +7952,21 @@ - + - + - + @@ -7974,7 +7974,7 @@ - + @@ -8189,17 +8189,17 @@ - + - + - + @@ -8219,25 +8219,25 @@ - - + - + - + @@ -8268,14 +8268,14 @@ - - @@ -8360,7 +8360,7 @@ - + @@ -8420,7 +8420,7 @@ - + @@ -9170,13 +9170,13 @@ - + - + @@ -9207,7 +9207,7 @@ - + @@ -9222,7 +9222,7 @@ - + @@ -9234,7 +9234,7 @@ - + @@ -9242,7 +9242,7 @@ - + @@ -9250,7 +9250,7 @@ - + @@ -9259,7 +9259,7 @@ - + @@ -9798,13 +9798,13 @@ - + - + @@ -10146,13 +10146,13 @@ - + - + @@ -10234,7 +10234,7 @@ - @@ -10246,12 +10246,12 @@ - - + @@ -11103,7 +11103,7 @@ - + @@ -11185,7 +11185,7 @@ - + @@ -11193,12 +11193,12 @@ - + - + @@ -11206,12 +11206,12 @@ - + - + @@ -11219,12 +11219,12 @@ - + - + @@ -11232,7 +11232,7 @@ - + @@ -11246,7 +11246,7 @@ - + @@ -11255,7 +11255,7 @@ - + @@ -11654,7 +11654,7 @@ - @@ -11673,7 +11673,7 @@ - @@ -11692,7 +11692,7 @@ of these functions. --> - @@ -11701,7 +11701,7 @@ - @@ -11710,7 +11710,7 @@ - @@ -11718,7 +11718,7 @@ - @@ -11726,7 +11726,7 @@ - @@ -11735,7 +11735,7 @@ - @@ -11743,7 +11743,7 @@ - @@ -11751,7 +11751,7 @@ - @@ -11770,7 +11770,7 @@ - @@ -11809,7 +11809,7 @@ - @@ -11818,7 +11818,7 @@ - @@ -11827,14 +11827,14 @@ - - @@ -11843,7 +11843,7 @@ - @@ -11853,32 +11853,32 @@ - - + - - + - @@ -11886,13 +11886,13 @@ - + - @@ -11901,27 +11901,27 @@ - + - - - @@ -11929,14 +11929,14 @@ - - @@ -11945,14 +11945,14 @@ - - @@ -11962,39 +11962,39 @@ - - - + - - + - @@ -12002,13 +12002,13 @@ - + - @@ -12017,13 +12017,13 @@ - + - @@ -12032,97 +12032,97 @@ - + - + - + - + - + - + - + - + - + - + - + - + - + - + @@ -12191,22 +12191,22 @@ - + - + - + - + @@ -12325,45 +12325,45 @@ redudndant garbage. There are a lot of enums with the value 0x00000001. --> - + - + - + - + - + - + - + - + @@ -12374,7 +12374,7 @@ - + @@ -12388,7 +12388,7 @@ - + @@ -12405,7 +12405,7 @@ - + @@ -12415,7 +12415,7 @@ - + @@ -12428,7 +12428,7 @@ - + @@ -12444,7 +12444,7 @@ - @@ -12545,7 +12545,7 @@ - + @@ -12585,7 +12585,7 @@ - @@ -12597,7 +12597,7 @@ - @@ -12609,7 +12609,7 @@ - @@ -12618,7 +12618,7 @@ - @@ -12627,7 +12627,7 @@ - @@ -12636,7 +12636,7 @@ - @@ -12681,7 +12681,7 @@ - + @@ -12786,12 +12786,12 @@ - + - + @@ -12870,7 +12870,7 @@ - + @@ -12906,7 +12906,7 @@ - + @@ -12916,7 +12916,7 @@ - @@ -12924,7 +12924,7 @@ - diff --git a/src/mapi/glapi/gen/gl_and_es_API.xml b/src/mapi/glapi/gen/gl_and_es_API.xml index d158a6b31f7..fc152841028 100644 --- a/src/mapi/glapi/gen/gl_and_es_API.xml +++ b/src/mapi/glapi/gen/gl_and_es_API.xml @@ -18,45 +18,45 @@ - + - + - + - + - + - + - + - + @@ -65,53 +65,53 @@ - + - + - + - + - + - + - + - + - + - + @@ -119,13 +119,13 @@ - + - + @@ -134,59 +134,59 @@ - + - + - + - + - + - + - + - + - + - + @@ -195,7 +195,7 @@ - + @@ -207,68 +207,68 @@ - + - + - + - + - + - + - + - + - + - + - + - + From a75910071e33184211773a1f019b53b23f8762eb Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Tue, 12 May 2015 19:43:33 -0700 Subject: [PATCH 159/834] glapi: Whitespace clean up after the previous commit Signed-off-by: Ian Romanick Reviewed-by: Emil Velikov --- src/mapi/glapi/gen/ARB_base_instance.xml | 9 +- .../gen/ARB_draw_elements_base_vertex.xml | 9 +- src/mapi/glapi/gen/ARB_framebuffer_object.xml | 18 +- .../gen/ARB_vertex_type_2_10_10_10_rev.xml | 90 +-- src/mapi/glapi/gen/NV_primitive_restart.xml | 4 +- src/mapi/glapi/gen/NV_texture_barrier.xml | 2 +- src/mapi/glapi/gen/NV_vdpau_interop.xml | 2 +- src/mapi/glapi/gen/OES_EGL_image.xml | 6 +- src/mapi/glapi/gen/OES_fixed_point.xml | 3 +- src/mapi/glapi/gen/es_EXT.xml | 6 +- src/mapi/glapi/gen/gl_API.xml | 517 ++++++------------ 11 files changed, 223 insertions(+), 443 deletions(-) diff --git a/src/mapi/glapi/gen/ARB_base_instance.xml b/src/mapi/glapi/gen/ARB_base_instance.xml index 571e0a42848..56de639e907 100644 --- a/src/mapi/glapi/gen/ARB_base_instance.xml +++ b/src/mapi/glapi/gen/ARB_base_instance.xml @@ -8,8 +8,7 @@ - + @@ -17,8 +16,7 @@ - + @@ -27,8 +25,7 @@ - + diff --git a/src/mapi/glapi/gen/ARB_draw_elements_base_vertex.xml b/src/mapi/glapi/gen/ARB_draw_elements_base_vertex.xml index 207ead3c37b..120bda13dd8 100644 --- a/src/mapi/glapi/gen/ARB_draw_elements_base_vertex.xml +++ b/src/mapi/glapi/gen/ARB_draw_elements_base_vertex.xml @@ -16,8 +16,7 @@ - + @@ -27,8 +26,7 @@ - + @@ -37,8 +35,7 @@ - + diff --git a/src/mapi/glapi/gen/ARB_framebuffer_object.xml b/src/mapi/glapi/gen/ARB_framebuffer_object.xml index e9480b4ccd3..1573e7e969c 100644 --- a/src/mapi/glapi/gen/ARB_framebuffer_object.xml +++ b/src/mapi/glapi/gen/ARB_framebuffer_object.xml @@ -152,8 +152,7 @@ - + @@ -165,8 +164,7 @@ - + @@ -202,8 +200,7 @@ - + @@ -215,8 +212,7 @@ - + @@ -231,8 +227,7 @@ - + @@ -260,8 +255,7 @@ - + diff --git a/src/mapi/glapi/gen/ARB_vertex_type_2_10_10_10_rev.xml b/src/mapi/glapi/gen/ARB_vertex_type_2_10_10_10_rev.xml index 26d39f27f06..92ec6e167cb 100644 --- a/src/mapi/glapi/gen/ARB_vertex_type_2_10_10_10_rev.xml +++ b/src/mapi/glapi/gen/ARB_vertex_type_2_10_10_10_rev.xml @@ -7,190 +7,160 @@ - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + diff --git a/src/mapi/glapi/gen/NV_primitive_restart.xml b/src/mapi/glapi/gen/NV_primitive_restart.xml index 62d40785255..232665236b5 100644 --- a/src/mapi/glapi/gen/NV_primitive_restart.xml +++ b/src/mapi/glapi/gen/NV_primitive_restart.xml @@ -11,9 +11,7 @@ - - + diff --git a/src/mapi/glapi/gen/NV_texture_barrier.xml b/src/mapi/glapi/gen/NV_texture_barrier.xml index 6dc62b0d7a3..b4c361cc3e7 100644 --- a/src/mapi/glapi/gen/NV_texture_barrier.xml +++ b/src/mapi/glapi/gen/NV_texture_barrier.xml @@ -7,7 +7,7 @@ - + diff --git a/src/mapi/glapi/gen/NV_vdpau_interop.xml b/src/mapi/glapi/gen/NV_vdpau_interop.xml index 1b77605e4ad..ceef7bdc9ac 100644 --- a/src/mapi/glapi/gen/NV_vdpau_interop.xml +++ b/src/mapi/glapi/gen/NV_vdpau_interop.xml @@ -10,7 +10,7 @@ - + diff --git a/src/mapi/glapi/gen/OES_EGL_image.xml b/src/mapi/glapi/gen/OES_EGL_image.xml index d294f071237..c483e91f1df 100644 --- a/src/mapi/glapi/gen/OES_EGL_image.xml +++ b/src/mapi/glapi/gen/OES_EGL_image.xml @@ -5,14 +5,12 @@ - + - + diff --git a/src/mapi/glapi/gen/OES_fixed_point.xml b/src/mapi/glapi/gen/OES_fixed_point.xml index d179ef4632b..edd0acdba6f 100644 --- a/src/mapi/glapi/gen/OES_fixed_point.xml +++ b/src/mapi/glapi/gen/OES_fixed_point.xml @@ -13,8 +13,7 @@ - + diff --git a/src/mapi/glapi/gen/es_EXT.xml b/src/mapi/glapi/gen/es_EXT.xml index b521bbb9b57..642e3b319bb 100644 --- a/src/mapi/glapi/gen/es_EXT.xml +++ b/src/mapi/glapi/gen/es_EXT.xml @@ -318,8 +318,7 @@ - + @@ -627,8 +626,7 @@ - + diff --git a/src/mapi/glapi/gen/gl_API.xml b/src/mapi/glapi/gen/gl_API.xml index b8db8cd2b48..3090b9f7e02 100644 --- a/src/mapi/glapi/gen/gl_API.xml +++ b/src/mapi/glapi/gen/gl_API.xml @@ -1164,8 +1164,7 @@ - + @@ -1176,8 +1175,7 @@ - + @@ -1189,7 +1187,7 @@ + deprecated="3.1" exec="dynamic"> @@ -1200,8 +1198,7 @@ - + @@ -1212,8 +1209,7 @@ - + @@ -1224,8 +1220,7 @@ - + @@ -1236,8 +1231,7 @@ - + @@ -1248,8 +1242,7 @@ - + @@ -1260,8 +1253,7 @@ - + @@ -1273,8 +1265,7 @@ - + @@ -1299,8 +1290,7 @@ - + @@ -1312,8 +1302,7 @@ - + @@ -1338,8 +1327,7 @@ - + @@ -1351,8 +1339,7 @@ - + @@ -1415,8 +1402,7 @@ - + @@ -1427,8 +1413,7 @@ - + @@ -1451,8 +1436,7 @@ - + @@ -1463,8 +1447,7 @@ - + @@ -1475,8 +1458,7 @@ - + @@ -1486,8 +1468,7 @@ - + @@ -1497,8 +1478,7 @@ - + @@ -1508,8 +1488,7 @@ - + @@ -1519,8 +1498,7 @@ - + @@ -1531,8 +1509,7 @@ - + @@ -1543,8 +1520,7 @@ - + @@ -1555,8 +1531,7 @@ - + @@ -1567,8 +1542,7 @@ - + @@ -1580,8 +1554,7 @@ - + @@ -1593,8 +1566,7 @@ - + @@ -1606,8 +1578,7 @@ - + @@ -1632,8 +1603,7 @@ - + @@ -1672,8 +1642,7 @@ - + @@ -1687,14 +1656,12 @@ - + - + @@ -1703,8 +1670,7 @@ - + @@ -1713,8 +1679,7 @@ - + @@ -1730,14 +1695,12 @@ - + - + @@ -1747,8 +1710,7 @@ - + @@ -1758,8 +1720,7 @@ - + @@ -1777,14 +1738,12 @@ - + - + @@ -1795,8 +1754,7 @@ - + @@ -1807,8 +1765,7 @@ - + @@ -1828,14 +1785,12 @@ - + - + @@ -1847,8 +1802,7 @@ - + @@ -1860,8 +1814,7 @@ - + @@ -1877,14 +1830,12 @@ - + - + @@ -1894,8 +1845,7 @@ - + @@ -1905,8 +1855,7 @@ - + @@ -1924,14 +1873,12 @@ - + - + @@ -1942,8 +1889,7 @@ - + @@ -1954,8 +1900,7 @@ - + @@ -1975,14 +1920,12 @@ - + - + @@ -1994,8 +1937,7 @@ - + @@ -2129,8 +2071,7 @@ - + @@ -2524,8 +2465,7 @@ - + @@ -2544,8 +2484,7 @@ - + @@ -2566,22 +2505,19 @@ - + - + - + @@ -2590,8 +2526,7 @@ - + @@ -3207,8 +3142,7 @@ - + @@ -3221,22 +3155,19 @@ - + - + - + @@ -3414,8 +3345,7 @@ - + @@ -3794,8 +3724,7 @@ - + @@ -4319,33 +4248,28 @@ - + - + - + - + - + @@ -4356,41 +4280,35 @@ - + - + - + - + - + - + @@ -4409,44 +4327,38 @@ - + - + - + - + - + - + @@ -4466,38 +4378,33 @@ - + - + - + - + - + @@ -4505,8 +4412,7 @@ - + @@ -4527,8 +4433,7 @@ - + @@ -4536,15 +4441,13 @@ - + - + @@ -4552,39 +4455,33 @@ - + - + - + - + - + - + @@ -4603,8 +4500,7 @@ - + @@ -4642,8 +4538,7 @@ - + @@ -4815,8 +4710,7 @@ - + @@ -4863,28 +4757,24 @@ - + - + - + - + @@ -4901,73 +4791,62 @@ - + - + - + - + - + - + - + - + - + - + - + @@ -5146,8 +5025,7 @@ - + @@ -5155,8 +5033,7 @@ - + @@ -5168,8 +5045,7 @@ - + @@ -5356,8 +5232,7 @@ - + @@ -5397,8 +5272,7 @@ - + @@ -5443,8 +5317,7 @@ - + @@ -5525,8 +5398,7 @@ - + @@ -5571,8 +5443,7 @@ - + @@ -5929,8 +5800,7 @@ - + @@ -7109,7 +6979,7 @@ + vectorequiv="VertexAttrib1fvARB" exec="dynamic"> @@ -7428,8 +7298,7 @@ - + @@ -7448,32 +7317,28 @@ the ARB_vertex_program protocol to unused padding. --> - + - + - + - + @@ -8219,8 +8084,7 @@ - + @@ -8268,15 +8132,13 @@ - + - + @@ -10234,8 +10096,7 @@ - + @@ -10246,8 +10107,7 @@ - + @@ -11654,8 +11514,7 @@ - + @@ -11673,8 +11532,7 @@ - + @@ -11692,8 +11550,7 @@ of these functions. --> - + @@ -11701,8 +11558,7 @@ - + @@ -11710,24 +11566,21 @@ - + - + - + @@ -11735,24 +11588,21 @@ - + - + - + @@ -11770,8 +11620,7 @@ - + @@ -11809,8 +11658,7 @@ - + @@ -11818,8 +11666,7 @@ - + @@ -11827,15 +11674,13 @@ - + - + @@ -11843,8 +11688,7 @@ - + @@ -11908,53 +11752,46 @@ + vectorequiv="VertexAttrib1fvNV" deprecated="3.1" exec="dynamic"> - + + vectorequiv="VertexAttrib2fvNV" deprecated="3.1" exec="dynamic"> - + + vectorequiv="VertexAttrib3fvNV" deprecated="3.1" exec="dynamic"> - + + vectorequiv="VertexAttrib4fvNV" deprecated="3.1" exec="dynamic"> @@ -11962,8 +11799,7 @@ - + @@ -12444,8 +12280,7 @@ - + @@ -12609,8 +12444,7 @@ - + @@ -12618,8 +12452,7 @@ - + @@ -12627,8 +12460,7 @@ - + @@ -12636,8 +12468,7 @@ - + @@ -12916,16 +12747,14 @@ - + - + From 35c28103b02598bb5f7b4888384b02d31ee371b5 Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Tue, 12 May 2015 19:46:44 -0700 Subject: [PATCH 160/834] glapi: Remove offset from the DTD Signed-off-by: Ian Romanick Reviewed-by: Emil Velikov --- src/mapi/glapi/gen/gl_API.dtd | 1 - 1 file changed, 1 deletion(-) diff --git a/src/mapi/glapi/gen/gl_API.dtd b/src/mapi/glapi/gen/gl_API.dtd index 298ba3c888a..bdc62f1c835 100644 --- a/src/mapi/glapi/gen/gl_API.dtd +++ b/src/mapi/glapi/gen/gl_API.dtd @@ -33,7 +33,6 @@ value NMTOKEN #REQUIRED> Date: Sat, 19 Jul 2014 09:26:09 -0400 Subject: [PATCH 161/834] gallium: add tessellation shader types MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit v2: Marek: rename shader types Signed-off-by: Ilia Mirkin Reviewed-by: Roland Scheidegger Signed-off-by: Marek Olšák --- src/gallium/auxiliary/tgsi/tgsi_info.c | 4 ++++ src/gallium/auxiliary/tgsi/tgsi_strings.c | 4 +++- src/gallium/auxiliary/tgsi/tgsi_strings.h | 2 +- src/gallium/include/pipe/p_defines.h | 6 ++++-- src/gallium/include/pipe/p_shader_tokens.h | 4 +++- 5 files changed, 15 insertions(+), 5 deletions(-) diff --git a/src/gallium/auxiliary/tgsi/tgsi_info.c b/src/gallium/auxiliary/tgsi/tgsi_info.c index 3cab86efbfe..eb447cb6557 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_info.c +++ b/src/gallium/auxiliary/tgsi/tgsi_info.c @@ -302,6 +302,10 @@ tgsi_get_processor_name( uint processor ) return "fragment shader"; case TGSI_PROCESSOR_GEOMETRY: return "geometry shader"; + case TGSI_PROCESSOR_TESSCTRL: + return "tessellation control shader"; + case TGSI_PROCESSOR_TESSEVAL: + return "tessellation evaluation shader"; default: return "unknown shader type!"; } diff --git a/src/gallium/auxiliary/tgsi/tgsi_strings.c b/src/gallium/auxiliary/tgsi/tgsi_strings.c index 9b727cf9a81..e4e5b90d5d9 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_strings.c +++ b/src/gallium/auxiliary/tgsi/tgsi_strings.c @@ -32,11 +32,13 @@ #include "tgsi_strings.h" -const char *tgsi_processor_type_names[4] = +const char *tgsi_processor_type_names[6] = { "FRAG", "VERT", "GEOM", + "TESS_CTRL", + "TESS_EVAL", "COMP" }; diff --git a/src/gallium/auxiliary/tgsi/tgsi_strings.h b/src/gallium/auxiliary/tgsi/tgsi_strings.h index 90014a225b0..71e74372f22 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_strings.h +++ b/src/gallium/auxiliary/tgsi/tgsi_strings.h @@ -38,7 +38,7 @@ extern "C" { #endif -extern const char *tgsi_processor_type_names[4]; +extern const char *tgsi_processor_type_names[6]; extern const char *tgsi_semantic_names[TGSI_SEMANTIC_COUNT]; diff --git a/src/gallium/include/pipe/p_defines.h b/src/gallium/include/pipe/p_defines.h index 0eb8a713c95..0938a5f688c 100644 --- a/src/gallium/include/pipe/p_defines.h +++ b/src/gallium/include/pipe/p_defines.h @@ -404,8 +404,10 @@ enum pipe_flush_flags #define PIPE_SHADER_VERTEX 0 #define PIPE_SHADER_FRAGMENT 1 #define PIPE_SHADER_GEOMETRY 2 -#define PIPE_SHADER_COMPUTE 3 -#define PIPE_SHADER_TYPES 4 +#define PIPE_SHADER_TESS_CTRL 3 +#define PIPE_SHADER_TESS_EVAL 4 +#define PIPE_SHADER_COMPUTE 5 +#define PIPE_SHADER_TYPES 6 /** diff --git a/src/gallium/include/pipe/p_shader_tokens.h b/src/gallium/include/pipe/p_shader_tokens.h index c14bcbca336..776b0d48342 100644 --- a/src/gallium/include/pipe/p_shader_tokens.h +++ b/src/gallium/include/pipe/p_shader_tokens.h @@ -43,7 +43,9 @@ struct tgsi_header #define TGSI_PROCESSOR_FRAGMENT 0 #define TGSI_PROCESSOR_VERTEX 1 #define TGSI_PROCESSOR_GEOMETRY 2 -#define TGSI_PROCESSOR_COMPUTE 3 +#define TGSI_PROCESSOR_TESSCTRL 3 +#define TGSI_PROCESSOR_TESSEVAL 4 +#define TGSI_PROCESSOR_COMPUTE 5 struct tgsi_processor { From 88c4f5d0a54baf7711cf030e3c1d5eca63560716 Mon Sep 17 00:00:00 2001 From: Ilia Mirkin Date: Sat, 19 Jul 2014 09:27:46 -0400 Subject: [PATCH 162/834] gallium: add new PATCHES primitive type MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Ilia Mirkin Reviewed-by: Roland Scheidegger Signed-off-by: Marek Olšák --- src/gallium/auxiliary/tgsi/tgsi_strings.c | 3 ++- src/gallium/include/pipe/p_defines.h | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/gallium/auxiliary/tgsi/tgsi_strings.c b/src/gallium/auxiliary/tgsi/tgsi_strings.c index e4e5b90d5d9..8938cf0b377 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_strings.c +++ b/src/gallium/auxiliary/tgsi/tgsi_strings.c @@ -168,7 +168,8 @@ const char *tgsi_primitive_names[PIPE_PRIM_MAX] = "LINES_ADJACENCY", "LINE_STRIP_ADJACENCY", "TRIANGLES_ADJACENCY", - "TRIANGLE_STRIP_ADJACENCY" + "TRIANGLE_STRIP_ADJACENCY", + "PATCHES", }; const char *tgsi_fs_coord_origin_names[2] = diff --git a/src/gallium/include/pipe/p_defines.h b/src/gallium/include/pipe/p_defines.h index 0938a5f688c..d116aa5b8f8 100644 --- a/src/gallium/include/pipe/p_defines.h +++ b/src/gallium/include/pipe/p_defines.h @@ -427,7 +427,8 @@ enum pipe_flush_flags #define PIPE_PRIM_LINE_STRIP_ADJACENCY 11 #define PIPE_PRIM_TRIANGLES_ADJACENCY 12 #define PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY 13 -#define PIPE_PRIM_MAX 14 +#define PIPE_PRIM_PATCHES 14 +#define PIPE_PRIM_MAX 15 /** From 018aa279532f5c9d07c3c73604e33b49569eb4e6 Mon Sep 17 00:00:00 2001 From: Ilia Mirkin Date: Sat, 19 Jul 2014 09:39:18 -0400 Subject: [PATCH 163/834] gallium: add new semantics for tessellation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Ilia Mirkin Reviewed-by: Roland Scheidegger Signed-off-by: Marek Olšák --- src/gallium/auxiliary/tgsi/tgsi_strings.c | 5 +++ src/gallium/docs/source/tgsi.rst | 37 ++++++++++++++++++++++ src/gallium/include/pipe/p_shader_tokens.h | 7 +++- 3 files changed, 48 insertions(+), 1 deletion(-) diff --git a/src/gallium/auxiliary/tgsi/tgsi_strings.c b/src/gallium/auxiliary/tgsi/tgsi_strings.c index 8938cf0b377..fc16d471afb 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_strings.c +++ b/src/gallium/auxiliary/tgsi/tgsi_strings.c @@ -90,6 +90,11 @@ const char *tgsi_semantic_names[TGSI_SEMANTIC_COUNT] = "INVOCATIONID", "VERTEXID_NOBASE", "BASEVERTEX", + "PATCH", + "TESSCOORD", + "TESSOUTER", + "TESSINNER", + "VERTICESIN", }; const char *tgsi_texture_names[TGSI_TEXTURE_COUNT] = diff --git a/src/gallium/docs/source/tgsi.rst b/src/gallium/docs/source/tgsi.rst index 7771136f167..0116842bfda 100644 --- a/src/gallium/docs/source/tgsi.rst +++ b/src/gallium/docs/source/tgsi.rst @@ -2894,6 +2894,43 @@ and only the X component is used. FIXME: This right now can be either a ordinary input or a system value... +TGSI_SEMANTIC_PATCH +""""""""""""""""""" + +For tessellation evaluation/control shaders, this semantic label indicates a +generic per-patch attribute. Such semantics will not implicitly be per-vertex +arrays. + +TGSI_SEMANTIC_TESSCOORD +""""""""""""""""""""""" + +For tessellation evaluation shaders, this semantic label indicates the +coordinates of the vertex being processed. This is available in XYZ; W is +undefined. + +TGSI_SEMANTIC_TESSOUTER +""""""""""""""""""""""" + +For tessellation evaluation/control shaders, this semantic label indicates the +outer tessellation levels of the patch. Isoline tessellation will only have XY +defined, triangle will have XYZ and quads will have XYZW defined. This +corresponds to gl_TessLevelOuter. + +TGSI_SEMANTIC_TESSINNER +""""""""""""""""""""""" + +For tessellation evaluation/control shaders, this semantic label indicates the +inner tessellation levels of the patch. The X value is only defined for +triangle tessellation, while quads will have XY defined. This is entirely +undefined for isoline tessellation. + +TGSI_SEMANTIC_VERTICESIN +"""""""""""""""""""""""" + +For tessellation evaluation/control shaders, this semantic label indicates the +number of vertices provided in the input patch. Only the X value is defined. + + Declaration Interpolate ^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/src/gallium/include/pipe/p_shader_tokens.h b/src/gallium/include/pipe/p_shader_tokens.h index 776b0d48342..c6ab89924a9 100644 --- a/src/gallium/include/pipe/p_shader_tokens.h +++ b/src/gallium/include/pipe/p_shader_tokens.h @@ -180,7 +180,12 @@ struct tgsi_declaration_interp #define TGSI_SEMANTIC_INVOCATIONID 27 #define TGSI_SEMANTIC_VERTEXID_NOBASE 28 #define TGSI_SEMANTIC_BASEVERTEX 29 -#define TGSI_SEMANTIC_COUNT 30 /**< number of semantic values */ +#define TGSI_SEMANTIC_PATCH 30 /**< generic per-patch semantic */ +#define TGSI_SEMANTIC_TESSCOORD 31 /**< coordinate being processed by tess */ +#define TGSI_SEMANTIC_TESSOUTER 32 /**< outer tessellation levels */ +#define TGSI_SEMANTIC_TESSINNER 33 /**< inner tessellation levels */ +#define TGSI_SEMANTIC_VERTICESIN 34 /**< number of input vertices */ +#define TGSI_SEMANTIC_COUNT 35 /**< number of semantic values */ struct tgsi_declaration_semantic { From 7ffc1fb928268f8493e88d45e9a006208d05f0f6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Thu, 19 Mar 2015 23:27:10 +0100 Subject: [PATCH 164/834] gallium: bump shader input and output limits MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Roland Scheidegger Signed-off-by: Marek Olšák --- src/gallium/include/pipe/p_state.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/gallium/include/pipe/p_state.h b/src/gallium/include/pipe/p_state.h index e15860c4ca7..e713a44c4b4 100644 --- a/src/gallium/include/pipe/p_state.h +++ b/src/gallium/include/pipe/p_state.h @@ -58,8 +58,8 @@ extern "C" { #define PIPE_MAX_COLOR_BUFS 8 #define PIPE_MAX_CONSTANT_BUFFERS 32 #define PIPE_MAX_SAMPLERS 18 /* 16 public + 2 driver internal */ -#define PIPE_MAX_SHADER_INPUTS 32 -#define PIPE_MAX_SHADER_OUTPUTS 48 /* 32 GENERICs + POS, PSIZE, FOG, etc. */ +#define PIPE_MAX_SHADER_INPUTS 80 /* 32 GENERIC + 32 PATCH + 16 others */ +#define PIPE_MAX_SHADER_OUTPUTS 80 /* 32 GENERIC + 32 PATCH + 16 others */ #define PIPE_MAX_SHADER_SAMPLER_VIEWS 32 #define PIPE_MAX_SHADER_RESOURCES 32 #define PIPE_MAX_TEXTURE_LEVELS 16 From 18bce2f19437f9a6b86e33324cb559cd54470d51 Mon Sep 17 00:00:00 2001 From: Ilia Mirkin Date: Fri, 11 Jul 2014 22:09:38 -0400 Subject: [PATCH 165/834] gallium: add interfaces for controlling tess program state MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Ilia Mirkin Reviewed-by: Roland Scheidegger Signed-off-by: Marek Olšák --- src/gallium/include/pipe/p_context.h | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/gallium/include/pipe/p_context.h b/src/gallium/include/pipe/p_context.h index 2d9f6d35dc9..67deb046e29 100644 --- a/src/gallium/include/pipe/p_context.h +++ b/src/gallium/include/pipe/p_context.h @@ -170,6 +170,16 @@ struct pipe_context { void (*bind_gs_state)(struct pipe_context *, void *); void (*delete_gs_state)(struct pipe_context *, void *); + void * (*create_tcs_state)(struct pipe_context *, + const struct pipe_shader_state *); + void (*bind_tcs_state)(struct pipe_context *, void *); + void (*delete_tcs_state)(struct pipe_context *, void *); + + void * (*create_tes_state)(struct pipe_context *, + const struct pipe_shader_state *); + void (*bind_tes_state)(struct pipe_context *, void *); + void (*delete_tes_state)(struct pipe_context *, void *); + void * (*create_vertex_elements_state)(struct pipe_context *, unsigned num_elements, const struct pipe_vertex_element *); From 9e1ba1d6895ff1a80fe676e153799695653cfb2a Mon Sep 17 00:00:00 2001 From: Ilia Mirkin Date: Sat, 19 Jul 2014 10:09:28 -0400 Subject: [PATCH 166/834] gallium: add tessellation shader properties MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit v2: Marek: rename tess spacing definitions Signed-off-by: Ilia Mirkin Reviewed-by: Roland Scheidegger Signed-off-by: Marek Olšák --- src/gallium/auxiliary/tgsi/tgsi_strings.c | 7 ++++- src/gallium/docs/source/tgsi.rst | 33 ++++++++++++++++++++++ src/gallium/include/pipe/p_defines.h | 7 +++++ src/gallium/include/pipe/p_shader_tokens.h | 7 ++++- 4 files changed, 52 insertions(+), 2 deletions(-) diff --git a/src/gallium/auxiliary/tgsi/tgsi_strings.c b/src/gallium/auxiliary/tgsi/tgsi_strings.c index fc16d471afb..6b6a14f55f5 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_strings.c +++ b/src/gallium/auxiliary/tgsi/tgsi_strings.c @@ -131,7 +131,12 @@ const char *tgsi_property_names[TGSI_PROPERTY_COUNT] = "FS_DEPTH_LAYOUT", "VS_PROHIBIT_UCPS", "GS_INVOCATIONS", - "VS_WINDOW_SPACE_POSITION" + "VS_WINDOW_SPACE_POSITION", + "TCS_VERTICES_OUT", + "TES_PRIM_MODE", + "TES_SPACING", + "TES_VERTEX_ORDER_CW", + "TES_POINT_MODE", }; const char *tgsi_return_type_names[TGSI_RETURN_TYPE_COUNT] = diff --git a/src/gallium/docs/source/tgsi.rst b/src/gallium/docs/source/tgsi.rst index 0116842bfda..f77702aa6a9 100644 --- a/src/gallium/docs/source/tgsi.rst +++ b/src/gallium/docs/source/tgsi.rst @@ -3071,6 +3071,39 @@ Naturally, clipping is not performed on window coordinates either. The effect of this property is undefined if a geometry or tessellation shader are in use. +TCS_VERTICES_OUT +"""""""""""""""" + +The number of vertices written by the tessellation control shader. This +effectively defines the patch input size of the tessellation evaluation shader +as well. + +TES_PRIM_MODE +""""""""""""" + +This sets the tessellation primitive mode, one of ``PIPE_PRIM_TRIANGLES``, +``PIPE_PRIM_QUADS``, or ``PIPE_PRIM_LINES``. (Unlike in GL, there is no +separate isolines settings, the regular lines is assumed to mean isolines.) + +TES_SPACING +""""""""""" + +This sets the spacing mode of the tessellation generator, one of +``PIPE_TESS_SPACING_*``. + +TES_VERTEX_ORDER_CW +""""""""""""""""""" + +This sets the vertex order to be clockwise if the value is 1, or +counter-clockwise if set to 0. + +TES_POINT_MODE +"""""""""""""" + +If set to a non-zero value, this turns on point mode for the tessellator, +which means that points will be generated instead of primitives. + + Texture Sampling and Texture Formats ------------------------------------ diff --git a/src/gallium/include/pipe/p_defines.h b/src/gallium/include/pipe/p_defines.h index d116aa5b8f8..8fabf5e0ff7 100644 --- a/src/gallium/include/pipe/p_defines.h +++ b/src/gallium/include/pipe/p_defines.h @@ -431,6 +431,13 @@ enum pipe_flush_flags #define PIPE_PRIM_MAX 15 +/** + * Tessellator spacing types + */ +#define PIPE_TESS_SPACING_FRACTIONAL_ODD 0 +#define PIPE_TESS_SPACING_FRACTIONAL_EVEN 1 +#define PIPE_TESS_SPACING_EQUAL 2 + /** * Query object types */ diff --git a/src/gallium/include/pipe/p_shader_tokens.h b/src/gallium/include/pipe/p_shader_tokens.h index c6ab89924a9..ff1f7d6d21a 100644 --- a/src/gallium/include/pipe/p_shader_tokens.h +++ b/src/gallium/include/pipe/p_shader_tokens.h @@ -262,7 +262,12 @@ union tgsi_immediate_data #define TGSI_PROPERTY_VS_PROHIBIT_UCPS 7 #define TGSI_PROPERTY_GS_INVOCATIONS 8 #define TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION 9 -#define TGSI_PROPERTY_COUNT 10 +#define TGSI_PROPERTY_TCS_VERTICES_OUT 10 +#define TGSI_PROPERTY_TES_PRIM_MODE 11 +#define TGSI_PROPERTY_TES_SPACING 12 +#define TGSI_PROPERTY_TES_VERTEX_ORDER_CW 13 +#define TGSI_PROPERTY_TES_POINT_MODE 14 +#define TGSI_PROPERTY_COUNT 15 struct tgsi_property { unsigned Type : 4; /**< TGSI_TOKEN_TYPE_PROPERTY */ From 4dbfe6b6274760412cf4ea8e6a910874ca12d998 Mon Sep 17 00:00:00 2001 From: Ilia Mirkin Date: Mon, 21 Jul 2014 18:43:53 -0400 Subject: [PATCH 167/834] gallium: add vertices_per_patch to draw info MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Ilia Mirkin Reviewed-by: Roland Scheidegger Signed-off-by: Marek Olšák --- src/gallium/include/pipe/p_state.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/gallium/include/pipe/p_state.h b/src/gallium/include/pipe/p_state.h index e713a44c4b4..e01c62c09a4 100644 --- a/src/gallium/include/pipe/p_state.h +++ b/src/gallium/include/pipe/p_state.h @@ -543,6 +543,8 @@ struct pipe_draw_info unsigned start_instance; /**< first instance id */ unsigned instance_count; /**< number of instances */ + unsigned vertices_per_patch; /**< the number of vertices per patch */ + /** * For indexed drawing, these fields apply after index lookup. */ From 6b262061203a6be8fe76402aa86e838254f937b8 Mon Sep 17 00:00:00 2001 From: Ilia Mirkin Date: Sun, 20 Jul 2014 11:36:49 -0400 Subject: [PATCH 168/834] gallium: add set_tess_state to configure default tessellation parameters MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Ilia Mirkin Reviewed-by: Roland Scheidegger Signed-off-by: Marek Olšák --- src/gallium/docs/source/context.rst | 5 +++++ src/gallium/include/pipe/p_context.h | 4 ++++ 2 files changed, 9 insertions(+) diff --git a/src/gallium/docs/source/context.rst b/src/gallium/docs/source/context.rst index 5861f46b30d..0908ee7e058 100644 --- a/src/gallium/docs/source/context.rst +++ b/src/gallium/docs/source/context.rst @@ -79,6 +79,11 @@ objects. They all follow simple, one-method binding calls, e.g. should be the same as the number of set viewports and can be up to PIPE_MAX_VIEWPORTS. * ``set_viewport_states`` +* ``set_tess_state`` configures the default tessellation parameters: + * ``default_outer_level`` is the default value for the outer tessellation + levels. This corresponds to GL's ``PATCH_DEFAULT_OUTER_LEVEL``. + * ``default_inner_level`` is the default value for the inner tessellation + levels. This corresponds to GL's ``PATCH_DEFAULT_INNER_LEVEL``. Sampler Views diff --git a/src/gallium/include/pipe/p_context.h b/src/gallium/include/pipe/p_context.h index 67deb046e29..c25bfa638c1 100644 --- a/src/gallium/include/pipe/p_context.h +++ b/src/gallium/include/pipe/p_context.h @@ -231,6 +231,10 @@ struct pipe_context { unsigned start_slot, unsigned num_views, struct pipe_sampler_view **); + void (*set_tess_state)(struct pipe_context *, + float default_outer_level[4], + float default_inner_level[2]); + /** * Bind an array of shader resources that will be used by the * graphics pipeline. Any resources that were previously bound to From 369aca1b4a5dd9998fad4b660a8665a77ce4a02a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Sun, 11 Jan 2015 20:37:37 +0100 Subject: [PATCH 169/834] trace: implement new tessellation functions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Roland Scheidegger Signed-off-by: Marek Olšák --- src/gallium/drivers/trace/tr_context.c | 26 +++++++++++++++++++++++ src/gallium/drivers/trace/tr_dump_state.c | 2 ++ 2 files changed, 28 insertions(+) diff --git a/src/gallium/drivers/trace/tr_context.c b/src/gallium/drivers/trace/tr_context.c index 0b56517e696..a1aeebdf65b 100644 --- a/src/gallium/drivers/trace/tr_context.c +++ b/src/gallium/drivers/trace/tr_context.c @@ -553,6 +553,8 @@ trace_context_delete_depth_stencil_alpha_state(struct pipe_context *_pipe, TRACE_SHADER_STATE(fs) TRACE_SHADER_STATE(vs) TRACE_SHADER_STATE(gs) +TRACE_SHADER_STATE(tcs) +TRACE_SHADER_STATE(tes) #undef TRACE_SHADER_STATE @@ -1508,6 +1510,23 @@ static void trace_context_memory_barrier(struct pipe_context *_context, } +static void trace_context_set_tess_state(struct pipe_context *_context, + float default_outer_level[4], + float default_inner_level[2]) +{ + struct trace_context *tr_context = trace_context(_context); + struct pipe_context *context = tr_context->pipe; + + trace_dump_call_begin("pipe_context", "set_tess_state"); + trace_dump_arg(ptr, context); + trace_dump_arg_array(float, default_outer_level, 4); + trace_dump_arg_array(float, default_inner_level, 2); + trace_dump_call_end(); + + context->set_tess_state(context, default_outer_level, default_inner_level); +} + + static const struct debug_named_value rbug_blocker_flags[] = { {"before", 1, NULL}, {"after", 2, NULL}, @@ -1566,6 +1585,12 @@ trace_context_create(struct trace_screen *tr_scr, TR_CTX_INIT(create_gs_state); TR_CTX_INIT(bind_gs_state); TR_CTX_INIT(delete_gs_state); + TR_CTX_INIT(create_tcs_state); + TR_CTX_INIT(bind_tcs_state); + TR_CTX_INIT(delete_tcs_state); + TR_CTX_INIT(create_tes_state); + TR_CTX_INIT(bind_tes_state); + TR_CTX_INIT(delete_tes_state); TR_CTX_INIT(create_vertex_elements_state); TR_CTX_INIT(bind_vertex_elements_state); TR_CTX_INIT(delete_vertex_elements_state); @@ -1597,6 +1622,7 @@ trace_context_create(struct trace_screen *tr_scr, TR_CTX_INIT(flush); TR_CTX_INIT(texture_barrier); TR_CTX_INIT(memory_barrier); + TR_CTX_INIT(set_tess_state); TR_CTX_INIT(transfer_map); TR_CTX_INIT(transfer_unmap); diff --git a/src/gallium/drivers/trace/tr_dump_state.c b/src/gallium/drivers/trace/tr_dump_state.c index 71273380434..9bf4a722d80 100644 --- a/src/gallium/drivers/trace/tr_dump_state.c +++ b/src/gallium/drivers/trace/tr_dump_state.c @@ -709,6 +709,8 @@ void trace_dump_draw_info(const struct pipe_draw_info *state) trace_dump_member(uint, state, start_instance); trace_dump_member(uint, state, instance_count); + trace_dump_member(uint, state, vertices_per_patch); + trace_dump_member(int, state, index_bias); trace_dump_member(uint, state, min_index); trace_dump_member(uint, state, max_index); From 66630290dfd90c095ed9268ad02f6a8930a8043f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Thu, 19 Mar 2015 23:26:54 +0100 Subject: [PATCH 170/834] gallium/util: print vertices_per_patch in util_dump_draw_info MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Roland Scheidegger Signed-off-by: Marek Olšák --- src/gallium/auxiliary/util/u_dump_state.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/gallium/auxiliary/util/u_dump_state.c b/src/gallium/auxiliary/util/u_dump_state.c index e6614d5d22c..7f620b50cf0 100644 --- a/src/gallium/auxiliary/util/u_dump_state.c +++ b/src/gallium/auxiliary/util/u_dump_state.c @@ -750,6 +750,8 @@ util_dump_draw_info(FILE *stream, const struct pipe_draw_info *state) util_dump_member(stream, uint, state, start_instance); util_dump_member(stream, uint, state, instance_count); + util_dump_member(stream, uint, state, vertices_per_patch); + util_dump_member(stream, int, state, index_bias); util_dump_member(stream, uint, state, min_index); util_dump_member(stream, uint, state, max_index); From 267ad27ab64956dff857b8584c4862da2e7dfc78 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Tue, 23 Sep 2014 18:40:27 +0200 Subject: [PATCH 171/834] gallium/u_blitter: disable tessellation for all operations MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Roland Scheidegger Signed-off-by: Marek Olšák --- src/gallium/auxiliary/util/u_blitter.c | 27 ++++++++++++++++++++++++++ src/gallium/auxiliary/util/u_blitter.h | 16 ++++++++++++++- 2 files changed, 42 insertions(+), 1 deletion(-) diff --git a/src/gallium/auxiliary/util/u_blitter.c b/src/gallium/auxiliary/util/u_blitter.c index 24a5b93e199..16bf90fc9d6 100644 --- a/src/gallium/auxiliary/util/u_blitter.c +++ b/src/gallium/auxiliary/util/u_blitter.c @@ -130,6 +130,7 @@ struct blitter_context_priv unsigned dst_height; boolean has_geometry_shader; + boolean has_tessellation; boolean has_layered; boolean has_stream_out; boolean has_stencil_export; @@ -183,6 +184,11 @@ struct blitter_context *util_blitter_create(struct pipe_context *pipe) ctx->has_geometry_shader = pipe->screen->get_shader_param(pipe->screen, PIPE_SHADER_GEOMETRY, PIPE_SHADER_CAP_MAX_INSTRUCTIONS) > 0; + + ctx->has_tessellation = + pipe->screen->get_shader_param(pipe->screen, PIPE_SHADER_TESS_CTRL, + PIPE_SHADER_CAP_MAX_INSTRUCTIONS) > 0; + ctx->has_stream_out = pipe->screen->get_param(pipe->screen, PIPE_CAP_MAX_STREAM_OUTPUT_BUFFERS) != 0; @@ -510,6 +516,8 @@ static void blitter_check_saved_vertex_states(struct blitter_context_priv *ctx) assert(ctx->base.saved_velem_state != INVALID_PTR); assert(ctx->base.saved_vs != INVALID_PTR); assert(!ctx->has_geometry_shader || ctx->base.saved_gs != INVALID_PTR); + assert(!ctx->has_tessellation || ctx->base.saved_tcs != INVALID_PTR); + assert(!ctx->has_tessellation || ctx->base.saved_tes != INVALID_PTR); assert(!ctx->has_stream_out || ctx->base.saved_num_so_targets != ~0); assert(ctx->base.saved_rs_state != INVALID_PTR); } @@ -538,6 +546,13 @@ static void blitter_restore_vertex_states(struct blitter_context_priv *ctx) ctx->base.saved_gs = INVALID_PTR; } + if (ctx->has_tessellation) { + pipe->bind_tcs_state(pipe, ctx->base.saved_tcs); + pipe->bind_tes_state(pipe, ctx->base.saved_tes); + ctx->base.saved_tcs = INVALID_PTR; + ctx->base.saved_tes = INVALID_PTR; + } + /* Stream outputs. */ if (ctx->has_stream_out) { unsigned offsets[PIPE_MAX_SO_BUFFERS]; @@ -1108,6 +1123,10 @@ static void blitter_set_common_draw_rect_state(struct blitter_context_priv *ctx, if (ctx->has_geometry_shader) pipe->bind_gs_state(pipe, NULL); + if (ctx->has_tessellation) { + pipe->bind_tcs_state(pipe, NULL); + pipe->bind_tes_state(pipe, NULL); + } if (ctx->has_stream_out) pipe->set_stream_output_targets(pipe, 0, NULL, NULL); } @@ -1967,6 +1986,10 @@ void util_blitter_copy_buffer(struct blitter_context *blitter, bind_vs_pos_only(ctx); if (ctx->has_geometry_shader) pipe->bind_gs_state(pipe, NULL); + if (ctx->has_tessellation) { + pipe->bind_tcs_state(pipe, NULL); + pipe->bind_tes_state(pipe, NULL); + } pipe->bind_rasterizer_state(pipe, ctx->rs_discard_state); so_target = pipe->create_stream_output_target(pipe, dst, dstx, size); @@ -2027,6 +2050,10 @@ void util_blitter_clear_buffer(struct blitter_context *blitter, bind_vs_pos_only(ctx); if (ctx->has_geometry_shader) pipe->bind_gs_state(pipe, NULL); + if (ctx->has_tessellation) { + pipe->bind_tcs_state(pipe, NULL); + pipe->bind_tes_state(pipe, NULL); + } pipe->bind_rasterizer_state(pipe, ctx->rs_discard_state); so_target = pipe->create_stream_output_target(pipe, dst, offset, size); diff --git a/src/gallium/auxiliary/util/u_blitter.h b/src/gallium/auxiliary/util/u_blitter.h index 1568030acfa..93b0e513bd0 100644 --- a/src/gallium/auxiliary/util/u_blitter.h +++ b/src/gallium/auxiliary/util/u_blitter.h @@ -102,7 +102,7 @@ struct blitter_context void *saved_dsa_state; /**< depth stencil alpha state */ void *saved_velem_state; /**< vertex elements state */ void *saved_rs_state; /**< rasterizer state */ - void *saved_fs, *saved_vs, *saved_gs; /**< shaders */ + void *saved_fs, *saved_vs, *saved_gs, *saved_tcs, *saved_tes; /**< shaders */ struct pipe_framebuffer_state saved_fb_state; /**< framebuffer state */ struct pipe_stencil_ref saved_stencil_ref; /**< stencil ref */ @@ -427,6 +427,20 @@ void util_blitter_save_geometry_shader(struct blitter_context *blitter, blitter->saved_gs = gs; } +static INLINE void +util_blitter_save_tessctrl_shader(struct blitter_context *blitter, + void *sh) +{ + blitter->saved_tcs = sh; +} + +static INLINE void +util_blitter_save_tesseval_shader(struct blitter_context *blitter, + void *sh) +{ + blitter->saved_tes = sh; +} + static INLINE void util_blitter_save_framebuffer(struct blitter_context *blitter, const struct pipe_framebuffer_state *state) From 2a7da1bddbee2be09ae6c2276a04c658807720b0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Mon, 2 Mar 2015 16:32:25 +0100 Subject: [PATCH 172/834] gallium/cso: add support for tessellation shaders MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Roland Scheidegger Signed-off-by: Marek Olšák --- src/gallium/auxiliary/cso_cache/cso_context.c | 93 +++++++++++++++++++ src/gallium/auxiliary/cso_cache/cso_context.h | 12 +++ 2 files changed, 105 insertions(+) diff --git a/src/gallium/auxiliary/cso_cache/cso_context.c b/src/gallium/auxiliary/cso_cache/cso_context.c index 31ffa7d9c0b..b3529099d65 100644 --- a/src/gallium/auxiliary/cso_cache/cso_context.c +++ b/src/gallium/auxiliary/cso_cache/cso_context.c @@ -82,6 +82,7 @@ struct cso_context { struct u_vbuf *vbuf; boolean has_geometry_shader; + boolean has_tessellation; boolean has_streamout; struct sampler_info samplers[PIPE_SHADER_TYPES]; @@ -108,6 +109,8 @@ struct cso_context { void *fragment_shader, *fragment_shader_saved; void *vertex_shader, *vertex_shader_saved; void *geometry_shader, *geometry_shader_saved; + void *tessctrl_shader, *tessctrl_shader_saved; + void *tesseval_shader, *tesseval_shader_saved; void *velements, *velements_saved; struct pipe_query *render_condition, *render_condition_saved; uint render_condition_mode, render_condition_mode_saved; @@ -273,6 +276,10 @@ struct cso_context *cso_create_context( struct pipe_context *pipe ) PIPE_SHADER_CAP_MAX_INSTRUCTIONS) > 0) { ctx->has_geometry_shader = TRUE; } + if (pipe->screen->get_shader_param(pipe->screen, PIPE_SHADER_TESS_CTRL, + PIPE_SHADER_CAP_MAX_INSTRUCTIONS) > 0) { + ctx->has_tessellation = TRUE; + } if (pipe->screen->get_param(pipe->screen, PIPE_CAP_MAX_STREAM_OUTPUT_BUFFERS) != 0) { ctx->has_streamout = TRUE; @@ -812,6 +819,92 @@ void cso_restore_geometry_shader(struct cso_context *ctx) ctx->geometry_shader_saved = NULL; } +void cso_set_tessctrl_shader_handle(struct cso_context *ctx, void *handle) +{ + assert(ctx->has_tessellation || !handle); + + if (ctx->has_tessellation && ctx->tessctrl_shader != handle) { + ctx->tessctrl_shader = handle; + ctx->pipe->bind_tcs_state(ctx->pipe, handle); + } +} + +void cso_delete_tessctrl_shader(struct cso_context *ctx, void *handle) +{ + if (handle == ctx->tessctrl_shader) { + /* unbind before deleting */ + ctx->pipe->bind_tcs_state(ctx->pipe, NULL); + ctx->tessctrl_shader = NULL; + } + ctx->pipe->delete_tcs_state(ctx->pipe, handle); +} + +void cso_save_tessctrl_shader(struct cso_context *ctx) +{ + if (!ctx->has_tessellation) { + return; + } + + assert(!ctx->tessctrl_shader_saved); + ctx->tessctrl_shader_saved = ctx->tessctrl_shader; +} + +void cso_restore_tessctrl_shader(struct cso_context *ctx) +{ + if (!ctx->has_tessellation) { + return; + } + + if (ctx->tessctrl_shader_saved != ctx->tessctrl_shader) { + ctx->pipe->bind_tcs_state(ctx->pipe, ctx->tessctrl_shader_saved); + ctx->tessctrl_shader = ctx->tessctrl_shader_saved; + } + ctx->tessctrl_shader_saved = NULL; +} + +void cso_set_tesseval_shader_handle(struct cso_context *ctx, void *handle) +{ + assert(ctx->has_tessellation || !handle); + + if (ctx->has_tessellation && ctx->tesseval_shader != handle) { + ctx->tesseval_shader = handle; + ctx->pipe->bind_tes_state(ctx->pipe, handle); + } +} + +void cso_delete_tesseval_shader(struct cso_context *ctx, void *handle) +{ + if (handle == ctx->tesseval_shader) { + /* unbind before deleting */ + ctx->pipe->bind_tes_state(ctx->pipe, NULL); + ctx->tesseval_shader = NULL; + } + ctx->pipe->delete_tes_state(ctx->pipe, handle); +} + +void cso_save_tesseval_shader(struct cso_context *ctx) +{ + if (!ctx->has_tessellation) { + return; + } + + assert(!ctx->tesseval_shader_saved); + ctx->tesseval_shader_saved = ctx->tesseval_shader; +} + +void cso_restore_tesseval_shader(struct cso_context *ctx) +{ + if (!ctx->has_tessellation) { + return; + } + + if (ctx->tesseval_shader_saved != ctx->tesseval_shader) { + ctx->pipe->bind_tes_state(ctx->pipe, ctx->tesseval_shader_saved); + ctx->tesseval_shader = ctx->tesseval_shader_saved; + } + ctx->tesseval_shader_saved = NULL; +} + /* clip state */ static INLINE void diff --git a/src/gallium/auxiliary/cso_cache/cso_context.h b/src/gallium/auxiliary/cso_cache/cso_context.h index aa56c589dad..cc50b60c6cd 100644 --- a/src/gallium/auxiliary/cso_cache/cso_context.h +++ b/src/gallium/auxiliary/cso_cache/cso_context.h @@ -141,6 +141,18 @@ void cso_save_geometry_shader(struct cso_context *cso); void cso_restore_geometry_shader(struct cso_context *cso); +void cso_set_tessctrl_shader_handle(struct cso_context *ctx, void *handle); +void cso_delete_tessctrl_shader(struct cso_context *ctx, void *handle); +void cso_save_tessctrl_shader(struct cso_context *cso); +void cso_restore_tessctrl_shader(struct cso_context *cso); + + +void cso_set_tesseval_shader_handle(struct cso_context *ctx, void *handle); +void cso_delete_tesseval_shader(struct cso_context *ctx, void *handle); +void cso_save_tesseval_shader(struct cso_context *cso); +void cso_restore_tesseval_shader(struct cso_context *cso); + + void cso_set_framebuffer(struct cso_context *cso, const struct pipe_framebuffer_state *fb); void cso_save_framebuffer(struct cso_context *cso); From ed1b273ffcab0e2089899f3be7e31b2bc49f7ef3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Tue, 23 Sep 2014 15:54:02 +0200 Subject: [PATCH 173/834] gallium/cso: set NULL shaders at context destruction MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Roland Scheidegger Signed-off-by: Marek Olšák --- src/gallium/auxiliary/cso_cache/cso_context.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/gallium/auxiliary/cso_cache/cso_context.c b/src/gallium/auxiliary/cso_cache/cso_context.c index b3529099d65..59bad2cb2d7 100644 --- a/src/gallium/auxiliary/cso_cache/cso_context.c +++ b/src/gallium/auxiliary/cso_cache/cso_context.c @@ -327,6 +327,13 @@ void cso_destroy_context( struct cso_context *ctx ) ctx->pipe->bind_depth_stencil_alpha_state( ctx->pipe, NULL ); ctx->pipe->bind_fs_state( ctx->pipe, NULL ); ctx->pipe->bind_vs_state( ctx->pipe, NULL ); + if (ctx->has_geometry_shader) { + ctx->pipe->bind_gs_state(ctx->pipe, NULL); + } + if (ctx->has_tessellation) { + ctx->pipe->bind_tcs_state(ctx->pipe, NULL); + ctx->pipe->bind_tes_state(ctx->pipe, NULL); + } ctx->pipe->bind_vertex_elements_state( ctx->pipe, NULL ); if (ctx->has_streamout) From 2420ee497a14ca36ea05b275ea74e5c3a4432a59 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Tue, 14 Oct 2014 11:06:48 +0200 Subject: [PATCH 174/834] gallium: disable tessellation shaders for meta ops MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Roland Scheidegger Signed-off-by: Marek Olšák --- src/gallium/auxiliary/hud/hud_context.c | 6 ++++++ src/gallium/auxiliary/postprocess/pp_run.c | 6 ++++++ src/gallium/auxiliary/util/u_blit.c | 6 ++++++ src/mesa/state_tracker/st_cb_bitmap.c | 8 +++++++- src/mesa/state_tracker/st_cb_clear.c | 6 ++++++ src/mesa/state_tracker/st_cb_drawpixels.c | 8 +++++++- src/mesa/state_tracker/st_cb_drawtex.c | 6 ++++++ 7 files changed, 44 insertions(+), 2 deletions(-) diff --git a/src/gallium/auxiliary/hud/hud_context.c b/src/gallium/auxiliary/hud/hud_context.c index 00ec20589c4..6a124f7d716 100644 --- a/src/gallium/auxiliary/hud/hud_context.c +++ b/src/gallium/auxiliary/hud/hud_context.c @@ -423,6 +423,8 @@ hud_draw(struct hud_context *hud, struct pipe_resource *tex) cso_save_viewport(cso); cso_save_stream_outputs(cso); cso_save_geometry_shader(cso); + cso_save_tessctrl_shader(cso); + cso_save_tesseval_shader(cso); cso_save_vertex_shader(cso); cso_save_vertex_elements(cso); cso_save_aux_vertex_buffer_slot(cso); @@ -456,6 +458,8 @@ hud_draw(struct hud_context *hud, struct pipe_resource *tex) cso_set_rasterizer(cso, &hud->rasterizer); cso_set_viewport(cso, &viewport); cso_set_stream_outputs(cso, 0, NULL, NULL); + cso_set_tessctrl_shader_handle(cso, NULL); + cso_set_tesseval_shader_handle(cso, NULL); cso_set_geometry_shader_handle(cso, NULL); cso_set_vertex_shader_handle(cso, hud->vs); cso_set_vertex_elements(cso, 2, hud->velems); @@ -548,6 +552,8 @@ hud_draw(struct hud_context *hud, struct pipe_resource *tex) cso_restore_rasterizer(cso); cso_restore_viewport(cso); cso_restore_stream_outputs(cso); + cso_restore_tessctrl_shader(cso); + cso_restore_tesseval_shader(cso); cso_restore_geometry_shader(cso); cso_restore_vertex_shader(cso); cso_restore_vertex_elements(cso); diff --git a/src/gallium/auxiliary/postprocess/pp_run.c b/src/gallium/auxiliary/postprocess/pp_run.c index 06281c8ce34..e76ce854442 100644 --- a/src/gallium/auxiliary/postprocess/pp_run.c +++ b/src/gallium/auxiliary/postprocess/pp_run.c @@ -119,6 +119,8 @@ pp_run(struct pp_queue_t *ppq, struct pipe_resource *in, cso_save_depth_stencil_alpha(cso); cso_save_fragment_shader(cso); cso_save_framebuffer(cso); + cso_save_tessctrl_shader(cso); + cso_save_tesseval_shader(cso); cso_save_geometry_shader(cso); cso_save_rasterizer(cso); cso_save_sample_mask(cso); @@ -139,6 +141,8 @@ pp_run(struct pp_queue_t *ppq, struct pipe_resource *in, cso_set_sample_mask(cso, ~0); cso_set_min_samples(cso, 1); cso_set_stream_outputs(cso, 0, NULL, NULL); + cso_set_tessctrl_shader_handle(cso, NULL); + cso_set_tesseval_shader_handle(cso, NULL); cso_set_geometry_shader_handle(cso, NULL); cso_set_render_condition(cso, NULL, FALSE, 0); @@ -186,6 +190,8 @@ pp_run(struct pp_queue_t *ppq, struct pipe_resource *in, cso_restore_depth_stencil_alpha(cso); cso_restore_fragment_shader(cso); cso_restore_framebuffer(cso); + cso_restore_tessctrl_shader(cso); + cso_restore_tesseval_shader(cso); cso_restore_geometry_shader(cso); cso_restore_rasterizer(cso); cso_restore_sample_mask(cso); diff --git a/src/gallium/auxiliary/util/u_blit.c b/src/gallium/auxiliary/util/u_blit.c index 90408ffdcc6..3f3b5fe63e4 100644 --- a/src/gallium/auxiliary/util/u_blit.c +++ b/src/gallium/auxiliary/util/u_blit.c @@ -535,6 +535,8 @@ util_blit_pixels_tex(struct blit_state *ctx, cso_save_framebuffer(ctx->cso); cso_save_fragment_shader(ctx->cso); cso_save_vertex_shader(ctx->cso); + cso_save_tessctrl_shader(ctx->cso); + cso_save_tesseval_shader(ctx->cso); cso_save_geometry_shader(ctx->cso); cso_save_vertex_elements(ctx->cso); cso_save_aux_vertex_buffer_slot(ctx->cso); @@ -571,6 +573,8 @@ util_blit_pixels_tex(struct blit_state *ctx, set_fragment_shader(ctx, TGSI_WRITEMASK_XYZW, src_sampler_view->texture->target); set_vertex_shader(ctx); + cso_set_tessctrl_shader_handle(ctx->cso, NULL); + cso_set_tesseval_shader_handle(ctx->cso, NULL); cso_set_geometry_shader_handle(ctx->cso, NULL); /* drawing dest */ @@ -611,6 +615,8 @@ util_blit_pixels_tex(struct blit_state *ctx, cso_restore_framebuffer(ctx->cso); cso_restore_fragment_shader(ctx->cso); cso_restore_vertex_shader(ctx->cso); + cso_restore_tessctrl_shader(ctx->cso); + cso_restore_tesseval_shader(ctx->cso); cso_restore_geometry_shader(ctx->cso); cso_restore_vertex_elements(ctx->cso); cso_restore_aux_vertex_buffer_slot(ctx->cso); diff --git a/src/mesa/state_tracker/st_cb_bitmap.c b/src/mesa/state_tracker/st_cb_bitmap.c index 2107ab16739..c881e194f70 100644 --- a/src/mesa/state_tracker/st_cb_bitmap.c +++ b/src/mesa/state_tracker/st_cb_bitmap.c @@ -452,6 +452,8 @@ draw_bitmap_quad(struct gl_context *ctx, GLint x, GLint y, GLfloat z, cso_save_fragment_shader(cso); cso_save_stream_outputs(cso); cso_save_vertex_shader(cso); + cso_save_tessctrl_shader(cso); + cso_save_tesseval_shader(cso); cso_save_geometry_shader(cso); cso_save_vertex_elements(cso); cso_save_aux_vertex_buffer_slot(cso); @@ -466,7 +468,9 @@ draw_bitmap_quad(struct gl_context *ctx, GLint x, GLint y, GLfloat z, /* vertex shader state: position + texcoord pass-through */ cso_set_vertex_shader_handle(cso, st->bitmap.vs); - /* geometry shader state: disabled */ + /* disable other shaders */ + cso_set_tessctrl_shader_handle(cso, NULL); + cso_set_tesseval_shader_handle(cso, NULL); cso_set_geometry_shader_handle(cso, NULL); /* user samplers, plus our bitmap sampler */ @@ -536,6 +540,8 @@ draw_bitmap_quad(struct gl_context *ctx, GLint x, GLint y, GLfloat z, cso_restore_viewport(cso); cso_restore_fragment_shader(cso); cso_restore_vertex_shader(cso); + cso_restore_tessctrl_shader(cso); + cso_restore_tesseval_shader(cso); cso_restore_geometry_shader(cso); cso_restore_vertex_elements(cso); cso_restore_aux_vertex_buffer_slot(cso); diff --git a/src/mesa/state_tracker/st_cb_clear.c b/src/mesa/state_tracker/st_cb_clear.c index f10e9063ac7..137fac8a9a9 100644 --- a/src/mesa/state_tracker/st_cb_clear.c +++ b/src/mesa/state_tracker/st_cb_clear.c @@ -265,6 +265,8 @@ clear_with_quad(struct gl_context *ctx, unsigned clear_buffers) cso_save_fragment_shader(st->cso_context); cso_save_stream_outputs(st->cso_context); cso_save_vertex_shader(st->cso_context); + cso_save_tessctrl_shader(st->cso_context); + cso_save_tesseval_shader(st->cso_context); cso_save_geometry_shader(st->cso_context); cso_save_vertex_elements(st->cso_context); cso_save_aux_vertex_buffer_slot(st->cso_context); @@ -347,6 +349,8 @@ clear_with_quad(struct gl_context *ctx, unsigned clear_buffers) } set_fragment_shader(st); + cso_set_tessctrl_shader_handle(st->cso_context, NULL); + cso_set_tesseval_shader_handle(st->cso_context, NULL); if (num_layers > 1) set_vertex_shader_layered(st); @@ -371,6 +375,8 @@ clear_with_quad(struct gl_context *ctx, unsigned clear_buffers) cso_restore_viewport(st->cso_context); cso_restore_fragment_shader(st->cso_context); cso_restore_vertex_shader(st->cso_context); + cso_restore_tessctrl_shader(st->cso_context); + cso_restore_tesseval_shader(st->cso_context); cso_restore_geometry_shader(st->cso_context); cso_restore_vertex_elements(st->cso_context); cso_restore_aux_vertex_buffer_slot(st->cso_context); diff --git a/src/mesa/state_tracker/st_cb_drawpixels.c b/src/mesa/state_tracker/st_cb_drawpixels.c index 3edf31bad52..a6a98c83aa6 100644 --- a/src/mesa/state_tracker/st_cb_drawpixels.c +++ b/src/mesa/state_tracker/st_cb_drawpixels.c @@ -693,6 +693,8 @@ draw_textured_quad(struct gl_context *ctx, GLint x, GLint y, GLfloat z, cso_save_fragment_shader(cso); cso_save_stream_outputs(cso); cso_save_vertex_shader(cso); + cso_save_tessctrl_shader(cso); + cso_save_tesseval_shader(cso); cso_save_geometry_shader(cso); cso_save_vertex_elements(cso); cso_save_aux_vertex_buffer_slot(cso); @@ -746,7 +748,9 @@ draw_textured_quad(struct gl_context *ctx, GLint x, GLint y, GLfloat z, /* vertex shader state: position + texcoord pass-through */ cso_set_vertex_shader_handle(cso, driver_vp); - /* geometry shader state: disabled */ + /* disable other shaders */ + cso_set_tessctrl_shader_handle(cso, NULL); + cso_set_tesseval_shader_handle(cso, NULL); cso_set_geometry_shader_handle(cso, NULL); /* texture sampling state: */ @@ -816,6 +820,8 @@ draw_textured_quad(struct gl_context *ctx, GLint x, GLint y, GLfloat z, cso_restore_sampler_views(cso, PIPE_SHADER_FRAGMENT); cso_restore_fragment_shader(cso); cso_restore_vertex_shader(cso); + cso_restore_tessctrl_shader(cso); + cso_restore_tesseval_shader(cso); cso_restore_geometry_shader(cso); cso_restore_vertex_elements(cso); cso_restore_aux_vertex_buffer_slot(cso); diff --git a/src/mesa/state_tracker/st_cb_drawtex.c b/src/mesa/state_tracker/st_cb_drawtex.c index 1420b96e55a..2af4f6d4cf6 100644 --- a/src/mesa/state_tracker/st_cb_drawtex.c +++ b/src/mesa/state_tracker/st_cb_drawtex.c @@ -229,6 +229,8 @@ st_DrawTex(struct gl_context *ctx, GLfloat x, GLfloat y, GLfloat z, cso_save_viewport(cso); cso_save_stream_outputs(cso); cso_save_vertex_shader(cso); + cso_save_tessctrl_shader(cso); + cso_save_tesseval_shader(cso); cso_save_geometry_shader(cso); cso_save_vertex_elements(cso); cso_save_aux_vertex_buffer_slot(cso); @@ -238,6 +240,8 @@ st_DrawTex(struct gl_context *ctx, GLfloat x, GLfloat y, GLfloat z, semantic_names, semantic_indexes); cso_set_vertex_shader_handle(cso, vs); } + cso_set_tessctrl_shader_handle(cso, NULL); + cso_set_tesseval_shader_handle(cso, NULL); cso_set_geometry_shader_handle(cso, NULL); for (i = 0; i < numAttribs; i++) { @@ -279,6 +283,8 @@ st_DrawTex(struct gl_context *ctx, GLfloat x, GLfloat y, GLfloat z, /* restore state */ cso_restore_viewport(cso); cso_restore_vertex_shader(cso); + cso_restore_tessctrl_shader(cso); + cso_restore_tesseval_shader(cso); cso_restore_geometry_shader(cso); cso_restore_vertex_elements(cso); cso_restore_aux_vertex_buffer_slot(cso); From 5b45cbe7e2bcf1709ab1fcc50dfc877b3e43bcae Mon Sep 17 00:00:00 2001 From: Ilia Mirkin Date: Sat, 12 Jul 2014 21:37:45 -0400 Subject: [PATCH 175/834] tgsi/scan: allow scanning tessellation shaders MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Ilia Mirkin Reviewed-by: Roland Scheidegger Signed-off-by: Marek Olšák --- src/gallium/auxiliary/tgsi/tgsi_scan.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/gallium/auxiliary/tgsi/tgsi_scan.c b/src/gallium/auxiliary/tgsi/tgsi_scan.c index e6011d2d85a..3f94bab4496 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_scan.c +++ b/src/gallium/auxiliary/tgsi/tgsi_scan.c @@ -74,6 +74,8 @@ tgsi_scan_shader(const struct tgsi_token *tokens, assert(procType == TGSI_PROCESSOR_FRAGMENT || procType == TGSI_PROCESSOR_VERTEX || procType == TGSI_PROCESSOR_GEOMETRY || + procType == TGSI_PROCESSOR_TESSCTRL || + procType == TGSI_PROCESSOR_TESSEVAL || procType == TGSI_PROCESSOR_COMPUTE); info->processor = procType; @@ -236,7 +238,9 @@ tgsi_scan_shader(const struct tgsi_token *tokens, info->num_outputs++; if (procType == TGSI_PROCESSOR_VERTEX || - procType == TGSI_PROCESSOR_GEOMETRY) { + procType == TGSI_PROCESSOR_GEOMETRY || + procType == TGSI_PROCESSOR_TESSCTRL || + procType == TGSI_PROCESSOR_TESSEVAL) { if (semName == TGSI_SEMANTIC_CLIPDIST) { info->num_written_clipdistance += util_bitcount(fulldecl->Declaration.UsageMask); From 93c940736f03b3eaf53d3a5703963b87a506747e Mon Sep 17 00:00:00 2001 From: Ilia Mirkin Date: Sat, 12 Jul 2014 20:06:03 -0400 Subject: [PATCH 176/834] tgsi/sanity: set implicit in/out array sizes based on patch sizes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Ilia Mirkin Reviewed-by: Roland Scheidegger Signed-off-by: Marek Olšák --- src/gallium/auxiliary/tgsi/tgsi_sanity.c | 36 +++++++++++++++++++++--- 1 file changed, 32 insertions(+), 4 deletions(-) diff --git a/src/gallium/auxiliary/tgsi/tgsi_sanity.c b/src/gallium/auxiliary/tgsi/tgsi_sanity.c index fbfe652e425..2ac74fb15d5 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_sanity.c +++ b/src/gallium/auxiliary/tgsi/tgsi_sanity.c @@ -58,6 +58,7 @@ struct sanity_check_ctx uint errors; uint warnings; uint implied_array_size; + uint implied_out_array_size; boolean print; }; @@ -406,16 +407,30 @@ iter_declaration( if (!check_file_name( ctx, file )) return TRUE; for (i = decl->Range.First; i <= decl->Range.Last; i++) { - /* declared TGSI_FILE_INPUT's for geometry processor + /* declared TGSI_FILE_INPUT's for geometry and tessellation * have an implied second dimension */ - if (file == TGSI_FILE_INPUT && - ctx->iter.processor.Processor == TGSI_PROCESSOR_GEOMETRY) { + uint processor = ctx->iter.processor.Processor; + uint patch = decl->Semantic.Name == TGSI_SEMANTIC_PATCH || + decl->Semantic.Name == TGSI_SEMANTIC_TESSOUTER || + decl->Semantic.Name == TGSI_SEMANTIC_TESSINNER; + if (file == TGSI_FILE_INPUT && !patch && ( + processor == TGSI_PROCESSOR_GEOMETRY || + processor == TGSI_PROCESSOR_TESSCTRL || + processor == TGSI_PROCESSOR_TESSEVAL)) { uint vert; for (vert = 0; vert < ctx->implied_array_size; ++vert) { scan_register *reg = MALLOC(sizeof(scan_register)); fill_scan_register2d(reg, file, i, vert); check_and_declare(ctx, reg); } + } else if (file == TGSI_FILE_OUTPUT && !patch && + processor == TGSI_PROCESSOR_TESSCTRL) { + uint vert; + for (vert = 0; vert < ctx->implied_out_array_size; ++vert) { + scan_register *reg = MALLOC(sizeof(scan_register)); + fill_scan_register2d(reg, file, i, vert); + check_and_declare(ctx, reg); + } } else { scan_register *reg = MALLOC(sizeof(scan_register)); if (decl->Declaration.Dimension) { @@ -474,6 +489,19 @@ iter_property( prop->Property.PropertyName == TGSI_PROPERTY_GS_INPUT_PRIM) { ctx->implied_array_size = u_vertices_per_prim(prop->u[0].Data); } + if (iter->processor.Processor == TGSI_PROCESSOR_TESSCTRL && + prop->Property.PropertyName == TGSI_PROPERTY_TCS_VERTICES_OUT) + ctx->implied_out_array_size = prop->u[0].Data; + return TRUE; +} + +static boolean +prolog(struct tgsi_iterate_context *iter) +{ + struct sanity_check_ctx *ctx = (struct sanity_check_ctx *) iter; + if (iter->processor.Processor == TGSI_PROCESSOR_TESSCTRL || + iter->processor.Processor == TGSI_PROCESSOR_TESSEVAL) + ctx->implied_array_size = 32; return TRUE; } @@ -532,7 +560,7 @@ tgsi_sanity_check( { struct sanity_check_ctx ctx; - ctx.iter.prolog = NULL; + ctx.iter.prolog = prolog; ctx.iter.iterate_instruction = iter_instruction; ctx.iter.iterate_declaration = iter_declaration; ctx.iter.iterate_immediate = iter_immediate; From ec67d73a73f74011b33f6ab59bed4c88ebaa7497 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Thu, 19 Mar 2015 23:31:41 +0100 Subject: [PATCH 177/834] tgsi/ureg: use correct limit for max input count MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Roland Scheidegger Signed-off-by: Marek Olšák --- src/gallium/auxiliary/tgsi/tgsi_ureg.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gallium/auxiliary/tgsi/tgsi_ureg.c b/src/gallium/auxiliary/tgsi/tgsi_ureg.c index a4c0fc50c6c..55d8cf1eeb0 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_ureg.c +++ b/src/gallium/auxiliary/tgsi/tgsi_ureg.c @@ -73,7 +73,7 @@ struct ureg_tokens { unsigned count; }; -#define UREG_MAX_INPUT PIPE_MAX_ATTRIBS +#define UREG_MAX_INPUT PIPE_MAX_SHADER_INPUTS #define UREG_MAX_SYSTEM_VALUE PIPE_MAX_ATTRIBS #define UREG_MAX_OUTPUT PIPE_MAX_SHADER_OUTPUTS #define UREG_MAX_CONSTANT_RANGE 32 From dfc3bced2ceebd1e3abacd07acd83f932b45c639 Mon Sep 17 00:00:00 2001 From: Ilia Mirkin Date: Mon, 21 Jul 2014 21:18:14 -0400 Subject: [PATCH 178/834] tgsi/ureg: allow ureg_dst to have dimension indices MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Ilia Mirkin Reviewed-by: Roland Scheidegger Signed-off-by: Marek Olšák --- src/gallium/auxiliary/tgsi/tgsi_ureg.c | 24 ++++++++++- src/gallium/auxiliary/tgsi/tgsi_ureg.h | 59 +++++++++++++++++++++++--- 2 files changed, 75 insertions(+), 8 deletions(-) diff --git a/src/gallium/auxiliary/tgsi/tgsi_ureg.c b/src/gallium/auxiliary/tgsi/tgsi_ureg.c index 55d8cf1eeb0..7a8bf5404e3 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_ureg.c +++ b/src/gallium/auxiliary/tgsi/tgsi_ureg.c @@ -914,8 +914,8 @@ void ureg_emit_dst( struct ureg_program *ureg, struct ureg_dst dst ) { - unsigned size = (1 + - (dst.Indirect ? 1 : 0)); + unsigned size = 1 + (dst.Indirect ? 1 : 0) + + (dst.Dimension ? (dst.DimIndirect ? 2 : 1) : 0); union tgsi_any_token *out = get_tokens( ureg, DOMAIN_INSN, size ); unsigned n = 0; @@ -944,6 +944,26 @@ ureg_emit_dst( struct ureg_program *ureg, n++; } + if (dst.Dimension) { + out[0].dst.Dimension = 1; + out[n].dim.Dimension = 0; + out[n].dim.Padding = 0; + if (dst.DimIndirect) { + out[n].dim.Indirect = 1; + out[n].dim.Index = dst.DimensionIndex; + n++; + out[n].value = 0; + out[n].ind.File = dst.DimIndFile; + out[n].ind.Swizzle = dst.DimIndSwizzle; + out[n].ind.Index = dst.DimIndIndex; + out[n].ind.ArrayID = dst.ArrayID; + } else { + out[n].dim.Indirect = 0; + out[n].dim.Index = dst.DimensionIndex; + } + n++; + } + assert(n == size); } diff --git a/src/gallium/auxiliary/tgsi/tgsi_ureg.h b/src/gallium/auxiliary/tgsi/tgsi_ureg.h index 8a2ed0a61d7..c3f4012f2b9 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_ureg.h +++ b/src/gallium/auxiliary/tgsi/tgsi_ureg.h @@ -75,6 +75,8 @@ struct ureg_dst unsigned File : 4; /* TGSI_FILE_ */ unsigned WriteMask : 4; /* TGSI_WRITEMASK_ */ unsigned Indirect : 1; /* BOOL */ + unsigned DimIndirect : 1; /* BOOL */ + unsigned Dimension : 1; /* BOOL */ unsigned Saturate : 1; /* BOOL */ unsigned Predicate : 1; unsigned PredNegate : 1; /* BOOL */ @@ -86,6 +88,10 @@ struct ureg_dst int IndirectIndex : 16; /* SINT */ unsigned IndirectFile : 4; /* TGSI_FILE_ */ int IndirectSwizzle : 2; /* TGSI_SWIZZLE_ */ + unsigned DimIndFile : 4; /* TGSI_FILE_ */ + unsigned DimIndSwizzle : 2; /* TGSI_SWIZZLE_ */ + int DimensionIndex : 16; /* SINT */ + int DimIndIndex : 16; /* SINT */ unsigned ArrayID : 10; /* UINT */ }; @@ -1108,6 +1114,16 @@ ureg_src_indirect( struct ureg_src reg, struct ureg_src addr ) return reg; } +static INLINE struct ureg_dst +ureg_dst_dimension( struct ureg_dst reg, int index ) +{ + assert(reg.File != TGSI_FILE_NULL); + reg.Dimension = 1; + reg.DimIndirect = 0; + reg.DimensionIndex = index; + return reg; +} + static INLINE struct ureg_src ureg_src_dimension( struct ureg_src reg, int index ) { @@ -1118,6 +1134,19 @@ ureg_src_dimension( struct ureg_src reg, int index ) return reg; } +static INLINE struct ureg_dst +ureg_dst_dimension_indirect( struct ureg_dst reg, struct ureg_src addr, + int index ) +{ + assert(reg.File != TGSI_FILE_NULL); + reg.Dimension = 1; + reg.DimIndirect = 1; + reg.DimensionIndex = index; + reg.DimIndFile = addr.File; + reg.DimIndIndex = addr.Index; + reg.DimIndSwizzle = addr.SwizzleX; + return reg; +} static INLINE struct ureg_src ureg_src_dimension_indirect( struct ureg_src reg, struct ureg_src addr, @@ -1161,6 +1190,12 @@ ureg_dst_register( unsigned file, dst.PredSwizzleZ = TGSI_SWIZZLE_Z; dst.PredSwizzleW = TGSI_SWIZZLE_W; dst.Index = index; + dst.Dimension = 0; + dst.DimensionIndex = 0; + dst.DimIndirect = 0; + dst.DimIndFile = TGSI_FILE_NULL; + dst.DimIndIndex = 0; + dst.DimIndSwizzle = 0; dst.ArrayID = 0; return dst; @@ -1189,6 +1224,12 @@ ureg_dst( struct ureg_src src ) dst.PredSwizzleZ = TGSI_SWIZZLE_Z; dst.PredSwizzleW = TGSI_SWIZZLE_W; dst.Index = src.Index; + dst.Dimension = src.Dimension; + dst.DimensionIndex = src.DimensionIndex; + dst.DimIndirect = src.DimIndirect; + dst.DimIndFile = src.DimIndFile; + dst.DimIndIndex = src.DimIndIndex; + dst.DimIndSwizzle = src.DimIndSwizzle; dst.ArrayID = src.ArrayID; return dst; @@ -1240,12 +1281,12 @@ ureg_src( struct ureg_dst dst ) src.Absolute = 0; src.Index = dst.Index; src.Negate = 0; - src.Dimension = 0; - src.DimensionIndex = 0; - src.DimIndirect = 0; - src.DimIndFile = TGSI_FILE_NULL; - src.DimIndIndex = 0; - src.DimIndSwizzle = 0; + src.Dimension = dst.Dimension; + src.DimensionIndex = dst.DimensionIndex; + src.DimIndirect = dst.DimIndirect; + src.DimIndFile = dst.DimIndFile; + src.DimIndIndex = dst.DimIndIndex; + src.DimIndSwizzle = dst.DimIndSwizzle; src.ArrayID = dst.ArrayID; return src; @@ -1272,6 +1313,12 @@ ureg_dst_undef( void ) dst.PredSwizzleZ = TGSI_SWIZZLE_Z; dst.PredSwizzleW = TGSI_SWIZZLE_W; dst.Index = 0; + dst.Dimension = 0; + dst.DimensionIndex = 0; + dst.DimIndirect = 0; + dst.DimIndFile = TGSI_FILE_NULL; + dst.DimIndIndex = 0; + dst.DimIndSwizzle = 0; dst.ArrayID = 0; return dst; From d7081828cc62df7efbc23ca1037a42d69dab94a7 Mon Sep 17 00:00:00 2001 From: Ilia Mirkin Date: Sun, 13 Jul 2014 14:26:05 -0400 Subject: [PATCH 179/834] tgsi/dump: fix declaration printing of tessellation inputs/outputs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit mareko: only output second dimension for non-patch semantics Signed-off-by: Ilia Mirkin Reviewed-by: Roland Scheidegger Signed-off-by: Marek Olšák --- src/gallium/auxiliary/tgsi/tgsi_dump.c | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump.c b/src/gallium/auxiliary/tgsi/tgsi_dump.c index 13d67691897..27d410853bf 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_dump.c +++ b/src/gallium/auxiliary/tgsi/tgsi_dump.c @@ -271,14 +271,30 @@ iter_declaration( struct tgsi_full_declaration *decl ) { struct dump_ctx *ctx = (struct dump_ctx *)iter; + boolean patch = decl->Semantic.Name == TGSI_SEMANTIC_PATCH || + decl->Semantic.Name == TGSI_SEMANTIC_TESSINNER || + decl->Semantic.Name == TGSI_SEMANTIC_TESSOUTER || + decl->Semantic.Name == TGSI_SEMANTIC_PRIMID; TXT( "DCL " ); TXT(tgsi_file_name(decl->Declaration.File)); - /* all geometry shader inputs are two dimensional */ + /* all geometry shader inputs and non-patch tessellation shader inputs are + * two dimensional + */ if (decl->Declaration.File == TGSI_FILE_INPUT && - iter->processor.Processor == TGSI_PROCESSOR_GEOMETRY) { + (iter->processor.Processor == TGSI_PROCESSOR_GEOMETRY || + (!patch && + (iter->processor.Processor == TGSI_PROCESSOR_TESSCTRL || + iter->processor.Processor == TGSI_PROCESSOR_TESSEVAL)))) { + TXT("[]"); + } + + /* all non-patch tess ctrl shader outputs are two dimensional */ + if (decl->Declaration.File == TGSI_FILE_OUTPUT && + !patch && + iter->processor.Processor == TGSI_PROCESSOR_TESSCTRL) { TXT("[]"); } From 5a55f681f6208122cd4921b283cffd54ea335128 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fredrik=20H=C3=B6glund?= Date: Sat, 16 May 2015 19:43:39 +0200 Subject: [PATCH 180/834] mesa: Check the lookup_framebuffer return value in NamedFramebufferRenderbuffer Found by Coverity. Reported-by: Ilia Mirkin Reviewed-by: Ilia Mirkin --- src/mesa/main/fbobject.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/mesa/main/fbobject.c b/src/mesa/main/fbobject.c index 8db651ca2a1..1859c277293 100644 --- a/src/mesa/main/fbobject.c +++ b/src/mesa/main/fbobject.c @@ -3362,6 +3362,8 @@ _mesa_NamedFramebufferRenderbuffer(GLuint framebuffer, GLenum attachment, fb = _mesa_lookup_framebuffer_err(ctx, framebuffer, "glNamedFramebufferRenderbuffer"); + if (!fb) + return; if (renderbuffertarget != GL_RENDERBUFFER) { _mesa_error(ctx, GL_INVALID_ENUM, From 845ad2667ab2466752f06ea30bdb9c837116c308 Mon Sep 17 00:00:00 2001 From: Iago Toral Quiroga Date: Tue, 10 Feb 2015 16:40:48 +0100 Subject: [PATCH 181/834] i965: Fix textureSize for Lod > 0 with non-mipmap filters Currently, when the MinFilter is GL_LINEAR or GL_NEAREST we hide the actual miplevel count from the hardware (and we avoid re-creating the miptree structure with all the levels), since we don't expect levels other than the base level to be needed. Unfortunately, GLSL's textureSize() function is an exception to this rule. This function takes a lod parameter that we need to use to return the size of the appropriate miplevel (if it exists). The spec only requires that the miplevel exists, so even if the sampler is configured with a linear or nearest MinFilter, as far as the user has uploaded miplevels for the texture, textureSize() should return the appropriate sizes. This patch fixes this by exposing the actual miplevel count for all sampling engine textures while keeping the original implementation for render targets (for render targets textures we do not provide the miplevel count but the actual LOD we are wrting to, so we want to make sure that we make this the base level). Fixes 28 dEQP tests in the following category: dEQP-GLES3.functional.shaders.texture_functions.texturesize.* Reviewed-by: Ben Widawsky --- src/mesa/drivers/dri/i965/intel_tex_validate.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/mesa/drivers/dri/i965/intel_tex_validate.c b/src/mesa/drivers/dri/i965/intel_tex_validate.c index 1d827683b99..c581e145f64 100644 --- a/src/mesa/drivers/dri/i965/intel_tex_validate.c +++ b/src/mesa/drivers/dri/i965/intel_tex_validate.c @@ -47,8 +47,10 @@ intel_update_max_level(struct intel_texture_object *intelObj, { struct gl_texture_object *tObj = &intelObj->base; - if (sampler->MinFilter == GL_NEAREST || - sampler->MinFilter == GL_LINEAR) { + if (!tObj->_MipmapComplete || + (tObj->_RenderToTexture && + (sampler->MinFilter == GL_NEAREST || + sampler->MinFilter == GL_LINEAR))) { intelObj->_MaxLevel = tObj->BaseLevel; } else { intelObj->_MaxLevel = tObj->_MaxLevel; From 9f4eaba36f002f4ea86bc11fd3d0f8dce485b9d8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tapani=20P=C3=A4lli?= Date: Mon, 11 May 2015 13:24:20 +0300 Subject: [PATCH 182/834] glsl: add stage references for UBO uniforms MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Patch marks uniforms inside UBO properly referenced by stages. Signed-off-by: Tapani Pälli Reviewed-by: Samuel Iglesias Gonsalvez Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=90397 --- src/glsl/linker.cpp | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/glsl/linker.cpp b/src/glsl/linker.cpp index ea73c6f9da1..ecdc025710f 100644 --- a/src/glsl/linker.cpp +++ b/src/glsl/linker.cpp @@ -2700,6 +2700,16 @@ build_program_resource_list(struct gl_context *ctx, uint8_t stageref = build_stageref(shProg, shProg->UniformStorage[i].name); + + /* Add stagereferences for uniforms in a uniform block. */ + int block_index = shProg->UniformStorage[i].block_index; + if (block_index != -1) { + for (unsigned j = 0; j < MESA_SHADER_STAGES; j++) { + if (shProg->UniformBlockStageIndex[j][block_index] != -1) + stageref |= (1 << j); + } + } + if (!add_program_resource(shProg, GL_UNIFORM, &shProg->UniformStorage[i], stageref)) return; From ae405d429ff62e279cb4bb84d29581d4f7467b52 Mon Sep 17 00:00:00 2001 From: Ilia Mirkin Date: Mon, 18 May 2015 12:57:31 -0400 Subject: [PATCH 183/834] gk110/ir: switch to gk104-style sched codes rather than all-in-one Matches change to envydis/envyas tools. Signed-off-by: Ilia Mirkin --- .../drivers/nouveau/codegen/lib/gk110.asm | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/src/gallium/drivers/nouveau/codegen/lib/gk110.asm b/src/gallium/drivers/nouveau/codegen/lib/gk110.asm index be17871edd4..b9c05a04b9a 100644 --- a/src/gallium/drivers/nouveau/codegen/lib/gk110.asm +++ b/src/gallium/drivers/nouveau/codegen/lib/gk110.asm @@ -11,7 +11,7 @@ // SIZE: 22 / 14 * 8 bytes // gk110_div_u32: - sched 0x28282804280428 + sched 0x28 0x04 0x28 0x04 0x28 0x28 0x28 bfind u32 $r2 $r1 xor b32 $r2 $r2 0x1f mov b32 $r3 0x1 @@ -19,7 +19,7 @@ gk110_div_u32: cvt u32 $r1 neg u32 $r1 mul $r3 u32 $r1 u32 $r2 add $r2 (mul high u32 $r2 u32 $r3) $r2 - sched 0x28282828282828 + sched 0x28 0x28 0x28 0x28 0x28 0x28 0x28 mul $r3 u32 $r1 u32 $r2 add $r2 (mul high u32 $r2 u32 $r3) $r2 mul $r3 u32 $r1 u32 $r2 @@ -27,7 +27,7 @@ gk110_div_u32: mul $r3 u32 $r1 u32 $r2 add $r2 (mul high u32 $r2 u32 $r3) $r2 mul $r3 u32 $r1 u32 $r2 - sched 0x042c2828042804 + sched 0x04 0x28 0x04 0x28 0x28 0x2c 0x04 add $r2 (mul high u32 $r2 u32 $r3) $r2 mov b32 $r3 $r0 mul high $r0 u32 $r0 u32 $r2 @@ -35,7 +35,7 @@ gk110_div_u32: add $r1 (mul u32 $r1 u32 $r0) $r3 set $p0 0x1 ge u32 $r1 $r2 $p0 sub b32 $r1 $r1 $r2 - sched 0x20282e20042c28 + sched 0x28 0x2c 0x04 0x20 0x2e 0x28 0x20 $p0 add b32 $r0 $r0 0x1 $p0 set $p0 0x1 ge u32 $r1 $r2 $p0 sub b32 $r1 $r1 $r2 @@ -51,7 +51,7 @@ gk110_div_u32: gk110_div_s32: set $p2 0x1 lt s32 $r0 0x0 set $p3 0x1 lt s32 $r1 0x0 xor $p2 - sched 0x28042804282820 + sched 0x20 0x28 0x28 0x04 0x28 0x04 0x28 cvt s32 $r0 abs s32 $r0 cvt s32 $r1 abs s32 $r1 bfind u32 $r2 $r1 @@ -59,7 +59,7 @@ gk110_div_s32: mov b32 $r3 0x1 shl b32 $r2 $r3 clamp $r2 cvt u32 $r1 neg u32 $r1 - sched 0x28282828282828 + sched 0x28 0x28 0x28 0x28 0x28 0x28 0x28 mul $r3 u32 $r1 u32 $r2 add $r2 (mul high u32 $r2 u32 $r3) $r2 mul $r3 u32 $r1 u32 $r2 @@ -67,7 +67,7 @@ gk110_div_s32: mul $r3 u32 $r1 u32 $r2 add $r2 (mul high u32 $r2 u32 $r3) $r2 mul $r3 u32 $r1 u32 $r2 - sched 0x28280428042828 + sched 0x28 0x28 0x04 0x28 0x04 0x28 0x28 add $r2 (mul high u32 $r2 u32 $r3) $r2 mul $r3 u32 $r1 u32 $r2 add $r2 (mul high u32 $r2 u32 $r3) $r2 @@ -75,7 +75,7 @@ gk110_div_s32: mul high $r0 u32 $r0 u32 $r2 cvt u32 $r2 neg u32 $r1 add $r1 (mul u32 $r1 u32 $r0) $r3 - sched 0x2028042c28042c + sched 0x2c 0x04 0x28 0x2c 0x04 0x28 0x20 set $p0 0x1 ge u32 $r1 $r2 $p0 sub b32 $r1 $r1 $r2 $p0 add b32 $r0 $r0 0x1 @@ -83,7 +83,7 @@ gk110_div_s32: $p0 sub b32 $r1 $r1 $r2 $p0 add b32 $r0 $r0 0x1 $p3 cvt s32 $r0 neg s32 $r0 - sched 0x2c200428042e04 + sched 0x04 0x2e 0x04 0x28 0x04 0x20 0x2c $p2 cvt s32 $r1 neg s32 $r1 ret From 1e4e17fbd9296cc5064aabdb351a894d10190cb6 Mon Sep 17 00:00:00 2001 From: Matt Turner Date: Mon, 11 May 2015 09:29:56 -0700 Subject: [PATCH 184/834] i965/fs: Lower integer multiplication after optimizations. 32-bit x 32-bit integer multiplication requires multiple instructions until Broadwell. This patch just lets us treat the MUL instruction in the FS backend like it operates on Broadwell, and after optimizations we lower it into a sequence of instructions on older platforms. Doing this will allow us to some extra optimization on integer multiplies. Reviewed-by: Jason Ekstrand --- src/mesa/drivers/dri/i965/brw_fs.cpp | 66 ++++++++++++++++++++ src/mesa/drivers/dri/i965/brw_fs.h | 1 + src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 36 +---------- src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 31 +-------- 4 files changed, 70 insertions(+), 64 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index b63ca23e3d8..cb13fcb1cc8 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -3523,6 +3523,71 @@ fs_visitor::lower_load_payload() return progress; } +bool +fs_visitor::lower_integer_multiplication() +{ + bool progress = false; + + /* Gen8's MUL instruction can do a 32-bit x 32-bit -> 32-bit operation + * directly, but Cherryview cannot. + */ + if (devinfo->gen >= 8 && !devinfo->is_cherryview) + return false; + + foreach_block_and_inst_safe(block, fs_inst, inst, cfg) { + if (inst->opcode != BRW_OPCODE_MUL || + inst->dst.is_accumulator() || + (inst->dst.type != BRW_REGISTER_TYPE_D && + inst->dst.type != BRW_REGISTER_TYPE_UD)) + continue; + +#define insert(instr) inst->insert_before(block, instr) + + /* The MUL instruction isn't commutative. On Gen <= 6, only the low + * 16-bits of src0 are read, and on Gen >= 7 only the low 16-bits of + * src1 are used. + * + * If multiplying by an immediate value that fits in 16-bits, do a + * single MUL instruction with that value in the proper location. + */ + if (inst->src[1].file == IMM && + inst->src[1].fixed_hw_reg.dw1.ud < (1 << 16)) { + if (devinfo->gen < 7) { + fs_reg imm(GRF, alloc.allocate(dispatch_width / 8), + inst->dst.type, dispatch_width); + insert(MOV(imm, inst->src[1])); + insert(MUL(inst->dst, imm, inst->src[0])); + } else { + insert(MUL(inst->dst, inst->src[0], inst->src[1])); + } + } else { + if (devinfo->gen >= 7) + no16("SIMD16 integer multiply unsupported\n"); + + const unsigned channels = dispatch_width; + const enum brw_reg_type type = inst->dst.type; + const fs_reg acc(retype(brw_acc_reg(channels), type)); + const fs_reg null(retype(brw_null_vec(channels), type)); + + const fs_reg &src0 = inst->src[0]; + const fs_reg &src1 = inst->src[1]; + + insert(MUL(acc, src0, src1)); + insert(MACH(null, src0, src1)); + insert(MOV(inst->dst, acc)); + } +#undef insert + + inst->remove(block); + progress = true; + } + + if (progress) + invalidate_live_intervals(); + + return progress; +} + void fs_visitor::dump_instructions() { @@ -4001,6 +4066,7 @@ fs_visitor::optimize() } OPT(opt_combine_constants); + OPT(lower_integer_multiplication); lower_uniform_pull_constant_loads(); } diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h index 991cff96325..f2aa0ae9576 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.h +++ b/src/mesa/drivers/dri/i965/brw_fs.h @@ -241,6 +241,7 @@ public: void no16(const char *msg, ...); void lower_uniform_pull_constant_loads(); bool lower_load_payload(); + bool lower_integer_multiplication(); bool opt_combine_constants(); void emit_dummy_fs(); diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp index 9cfd0e792a2..5dd8363b91e 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp @@ -780,41 +780,9 @@ fs_visitor::nir_emit_alu(nir_alu_instr *instr) inst->saturate = instr->dest.saturate; break; - case nir_op_imul: { - if (devinfo->gen >= 8) { - emit(MUL(result, op[0], op[1])); - break; - } else { - nir_const_value *value0 = nir_src_as_const_value(instr->src[0].src); - nir_const_value *value1 = nir_src_as_const_value(instr->src[1].src); - - if (value0 && value0->u[0] < (1 << 16)) { - if (devinfo->gen < 7) { - emit(MUL(result, op[0], op[1])); - } else { - emit(MUL(result, op[1], op[0])); - } - break; - } else if (value1 && value1->u[0] < (1 << 16)) { - if (devinfo->gen < 7) { - emit(MUL(result, op[1], op[0])); - } else { - emit(MUL(result, op[0], op[1])); - } - break; - } - } - - if (devinfo->gen >= 7) - no16("SIMD16 explicit accumulator operands unsupported\n"); - - struct brw_reg acc = retype(brw_acc_reg(dispatch_width), result.type); - - emit(MUL(acc, op[0], op[1])); - emit(MACH(reg_null_d, op[0], op[1])); - emit(MOV(result, fs_reg(acc))); + case nir_op_imul: + emit(MUL(result, op[0], op[1])); break; - } case nir_op_imul_high: case nir_op_umul_high: { diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp index abaea5f4e13..ead77686640 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp @@ -873,36 +873,7 @@ fs_visitor::visit(ir_expression *ir) unreachable("not reached: should be handled by ir_sub_to_add_neg"); case ir_binop_mul: - if (devinfo->gen < 8 && ir->type->is_integer()) { - /* For integer multiplication, the MUL uses the low 16 bits - * of one of the operands (src0 on gen6, src1 on gen7). The - * MACH accumulates in the contribution of the upper 16 bits - * of that operand. - */ - if (ir->operands[0]->is_uint16_constant()) { - if (devinfo->gen < 7) - emit(MUL(this->result, op[0], op[1])); - else - emit(MUL(this->result, op[1], op[0])); - } else if (ir->operands[1]->is_uint16_constant()) { - if (devinfo->gen < 7) - emit(MUL(this->result, op[1], op[0])); - else - emit(MUL(this->result, op[0], op[1])); - } else { - if (devinfo->gen >= 7) - no16("SIMD16 explicit accumulator operands unsupported\n"); - - struct brw_reg acc = retype(brw_acc_reg(dispatch_width), - this->result.type); - - emit(MUL(acc, op[0], op[1])); - emit(MACH(reg_null_d, op[0], op[1])); - emit(MOV(this->result, fs_reg(acc))); - } - } else { - emit(MUL(this->result, op[0], op[1])); - } + emit(MUL(this->result, op[0], op[1])); break; case ir_binop_imul_high: { if (devinfo->gen >= 7) From 81deefc45ba7b7d3b2b5e7ccf9e1680df6e31e3a Mon Sep 17 00:00:00 2001 From: Matt Turner Date: Thu, 14 May 2015 22:23:22 -0700 Subject: [PATCH 185/834] i965/fs: Unrestrict constant propagation into integer multiply. Gen8+'s MUL instruction doesn't ignore the high 16-bits of one source like on earlier platforms, so we can constant propagate into it without worry. Integer multiplies (not into the accumulator, which is done for imul_high) are lowered in lower_integer_multiplication(), so it's safe there as well. On Broadwell, fragment shaders only: total instructions in shared programs: 4377769 -> 4377451 (-0.01%) instructions in affected programs: 48064 -> 47746 (-0.66%) helped: 156 On Broadwell, vertex shaders only: total instructions in shared programs: 2858885 -> 2856313 (-0.09%) instructions in affected programs: 26380 -> 23808 (-9.75%) helped: 134 On Broadwell, vertex shaders only (with INTEL_USE_NIR=1): total instructions in shared programs: 2911688 -> 2865984 (-1.57%) instructions in affected programs: 1421715 -> 1376011 (-3.21%) helped: 6186 Reviewed-by: Jason Ekstrand --- src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp index 52bfa921dc3..c92aae4b1d6 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp @@ -541,8 +541,16 @@ fs_visitor::try_constant_propagate(fs_inst *inst, acp_entry *entry) /* Fit this constant in by commuting the operands. * Exception: we can't do this for 32-bit integer MUL/MACH * because it's asymmetric. + * + * The BSpec says for Broadwell that + * + * "When multiplying DW x DW, the dst cannot be accumulator." + * + * Integer MUL with a non-accumulator destination will be lowered + * by lower_integer_multiplication(), so don't restrict it. */ - if ((inst->opcode == BRW_OPCODE_MUL || + if (((inst->opcode == BRW_OPCODE_MUL && + inst->dst.is_accumulator()) || inst->opcode == BRW_OPCODE_MACH) && (inst->src[1].type == BRW_REGISTER_TYPE_D || inst->src[1].type == BRW_REGISTER_TYPE_UD)) From 0592ee457dbddd9483e4294097e5d7f2e1c39308 Mon Sep 17 00:00:00 2001 From: Matt Turner Date: Tue, 12 May 2015 15:51:05 -0700 Subject: [PATCH 186/834] i965/fs: Add set_sechalf() method. Used in the next commit. Reviewed-by: Jason Ekstrand --- src/mesa/drivers/dri/i965/brw_ir_fs.h | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/mesa/drivers/dri/i965/brw_ir_fs.h b/src/mesa/drivers/dri/i965/brw_ir_fs.h index 7ac7ff81d20..a79713ce201 100644 --- a/src/mesa/drivers/dri/i965/brw_ir_fs.h +++ b/src/mesa/drivers/dri/i965/brw_ir_fs.h @@ -261,4 +261,14 @@ public: bool pi_noperspective:1; /**< Pixel interpolator noperspective flag */ }; +/** + * Set second-half quarter control on \p inst. + */ +static inline fs_inst * +set_sechalf(fs_inst *inst) +{ + inst->force_sechalf = true; + return inst; +} + #endif From 4ec09c77471e39e6ff81c99f1edde2e1713a7f24 Mon Sep 17 00:00:00 2001 From: Matt Turner Date: Tue, 12 May 2015 15:51:44 -0700 Subject: [PATCH 187/834] i965/fs: Support integer multiplication in SIMD16 on Haswell. Ivybridge (and presumably Baytrail) have a bug that prevents this from working. Reviewed-by: Jason Ekstrand --- src/mesa/drivers/dri/i965/brw_fs.cpp | 52 +++++++++++++++++++++++++--- 1 file changed, 47 insertions(+), 5 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index cb13fcb1cc8..01e3139229d 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -3561,10 +3561,42 @@ fs_visitor::lower_integer_multiplication() insert(MUL(inst->dst, inst->src[0], inst->src[1])); } } else { - if (devinfo->gen >= 7) + /* Gen < 8 (and some Gen8+ low-power parts like Cherryview) cannot + * do 32-bit integer multiplication in one instruction, but instead + * must do a sequence (which actually calculates a 64-bit result): + * + * mul(8) acc0<1>D g3<8,8,1>D g4<8,8,1>D + * mach(8) null g3<8,8,1>D g4<8,8,1>D + * mov(8) g2<1>D acc0<8,8,1>D + * + * But on Gen > 6, the ability to use second accumulator register + * (acc1) for non-float data types was removed, preventing a simple + * implementation in SIMD16. A 16-channel result can be calculated by + * executing the three instructions twice in SIMD8, once with quarter + * control of 1Q for the first eight channels and again with 2Q for + * the second eight channels. + * + * Which accumulator register is implicitly accessed (by AccWrEnable + * for instance) is determined by the quarter control. Unfortunately + * Ivybridge (and presumably Baytrail) has a hardware bug in which an + * implicit accumulator access by an instruction with 2Q will access + * acc1 regardless of whether the data type is usable in acc1. + * + * Specifically, the 2Q mach(8) writes acc1 which does not exist for + * integer data types. + */ + if (devinfo->gen == 7 && !devinfo->is_haswell) no16("SIMD16 integer multiply unsupported\n"); - const unsigned channels = dispatch_width; + /* From the IVB PRM, volume 4 part 3, page 42: + * + * "For any DWord operation, including DWord multiply, accumulator + * can store up to 8 channels of data, with only acc0 supported." + * + * So make the accumulator (and null register) only 8-channels wide on + * Gen7+. + */ + const unsigned channels = devinfo->gen >= 7 ? 8 : dispatch_width; const enum brw_reg_type type = inst->dst.type; const fs_reg acc(retype(brw_acc_reg(channels), type)); const fs_reg null(retype(brw_null_vec(channels), type)); @@ -3572,9 +3604,19 @@ fs_visitor::lower_integer_multiplication() const fs_reg &src0 = inst->src[0]; const fs_reg &src1 = inst->src[1]; - insert(MUL(acc, src0, src1)); - insert(MACH(null, src0, src1)); - insert(MOV(inst->dst, acc)); + if (devinfo->gen >= 7 && dispatch_width == 16) { + insert(MUL(acc, half(src0, 0), half(src1, 0))); + insert(MACH(null, half(src0, 0), half(src1, 0))); + insert(MOV(half(inst->dst, 0), acc)); + + insert(set_sechalf(MUL(acc, half(src0, 1), half(src1, 1)))); + insert(set_sechalf(MACH(null, half(src0, 1), half(src1, 1)))); + insert(set_sechalf(MOV(half(inst->dst, 1), acc))); + } else { + insert(MUL(acc, src0, src1)); + insert(MACH(null, src0, src1)); + insert(MOV(inst->dst, acc)); + } } #undef insert From 0a9e3a0160bbda8ea23aeb049f9c3dfc0478bbf5 Mon Sep 17 00:00:00 2001 From: Matt Turner Date: Thu, 14 May 2015 15:58:20 -0700 Subject: [PATCH 188/834] i965/fs: Rework compression control selection. The next commit uses an add(16) with a UW destination with a stride of 2, which needs compression control since it's writing two registers. The old code would have failed to set compression control correctly. Reviewed-by: Jason Ekstrand --- src/mesa/drivers/dri/i965/brw_fs_generator.cpp | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp index b6b0d0523a0..0be0f866558 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp @@ -1601,10 +1601,13 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width) break; case 16: case 32: - if (type_sz(inst->dst.type) < sizeof(float)) - brw_set_default_compression_control(p, BRW_COMPRESSION_NONE); - else + /* If the instruction writes to more than one register, it needs to + * be a "compressed" instruction on Gen <= 5. + */ + if (inst->exec_size * inst->dst.stride * type_sz(inst->dst.type) > 32) brw_set_default_compression_control(p, BRW_COMPRESSION_COMPRESSED); + else + brw_set_default_compression_control(p, BRW_COMPRESSION_NONE); break; default: unreachable("Invalid instruction width"); From f7df169ba13d22338e9276839a7e9629ca0a6b4f Mon Sep 17 00:00:00 2001 From: Matt Turner Date: Wed, 13 May 2015 18:34:03 -0700 Subject: [PATCH 189/834] i965/fs: Implement integer multiply without mul/mach. Ivybridge and Baytrail can't use mach with 2Q quarter control, so just do it without the accumulator. Stupid accumulator. Reviewed-by: Jason Ekstrand --- src/mesa/drivers/dri/i965/brw_fs.cpp | 88 ++++++++++++++++++++-------- 1 file changed, 63 insertions(+), 25 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 01e3139229d..9b3186bd3f8 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -3584,39 +3584,77 @@ fs_visitor::lower_integer_multiplication() * * Specifically, the 2Q mach(8) writes acc1 which does not exist for * integer data types. - */ - if (devinfo->gen == 7 && !devinfo->is_haswell) - no16("SIMD16 integer multiply unsupported\n"); - - /* From the IVB PRM, volume 4 part 3, page 42: * - * "For any DWord operation, including DWord multiply, accumulator - * can store up to 8 channels of data, with only acc0 supported." + * Since we only want the low 32-bits of the result, we can do two + * 32-bit x 16-bit multiplies (like the mul and mach are doing), and + * adjust the high result and add them (like the mach is doing): * - * So make the accumulator (and null register) only 8-channels wide on - * Gen7+. + * mul(8) g7<1>D g3<8,8,1>D g4.0<8,8,1>UW + * mul(8) g8<1>D g3<8,8,1>D g4.1<8,8,1>UW + * shl(8) g9<1>D g8<8,8,1>D 16D + * add(8) g2<1>D g7<8,8,1>D g8<8,8,1>D + * + * We avoid the shl instruction by realizing that we only want to add + * the low 16-bits of the "high" result to the high 16-bits of the + * "low" result and using proper regioning on the add: + * + * mul(8) g7<1>D g3<8,8,1>D g4.0<16,8,2>UW + * mul(8) g8<1>D g3<8,8,1>D g4.1<16,8,2>UW + * add(8) g7.1<2>UW g7.1<16,8,2>UW g8<16,8,2>UW + * + * Since it does not use the (single) accumulator register, we can + * schedule multi-component multiplications much better. */ - const unsigned channels = devinfo->gen >= 7 ? 8 : dispatch_width; - const enum brw_reg_type type = inst->dst.type; - const fs_reg acc(retype(brw_acc_reg(channels), type)); - const fs_reg null(retype(brw_null_vec(channels), type)); - const fs_reg &src0 = inst->src[0]; - const fs_reg &src1 = inst->src[1]; + fs_reg low = inst->dst; + fs_reg high(GRF, alloc.allocate(dispatch_width / 8), + inst->dst.type, dispatch_width); - if (devinfo->gen >= 7 && dispatch_width == 16) { - insert(MUL(acc, half(src0, 0), half(src1, 0))); - insert(MACH(null, half(src0, 0), half(src1, 0))); - insert(MOV(half(inst->dst, 0), acc)); + if (brw->gen >= 7) { + fs_reg src1_0_w = inst->src[1]; + fs_reg src1_1_w = inst->src[1]; - insert(set_sechalf(MUL(acc, half(src0, 1), half(src1, 1)))); - insert(set_sechalf(MACH(null, half(src0, 1), half(src1, 1)))); - insert(set_sechalf(MOV(half(inst->dst, 1), acc))); + if (inst->src[1].file == IMM) { + src1_0_w.fixed_hw_reg.dw1.ud &= 0xffff; + src1_1_w.fixed_hw_reg.dw1.ud >>= 16; + } else { + src1_0_w.type = BRW_REGISTER_TYPE_UW; + src1_0_w.stride = 2; + + src1_1_w.type = BRW_REGISTER_TYPE_UW; + src1_1_w.stride = 2; + src1_1_w.subreg_offset += type_sz(BRW_REGISTER_TYPE_UW); + } + insert(MUL(low, inst->src[0], src1_0_w)); + insert(MUL(high, inst->src[0], src1_1_w)); } else { - insert(MUL(acc, src0, src1)); - insert(MACH(null, src0, src1)); - insert(MOV(inst->dst, acc)); + fs_reg src0_0_w = inst->src[0]; + fs_reg src0_1_w = inst->src[0]; + + src0_0_w.type = BRW_REGISTER_TYPE_UW; + src0_0_w.stride = 2; + + src0_1_w.type = BRW_REGISTER_TYPE_UW; + src0_1_w.stride = 2; + src0_1_w.subreg_offset += type_sz(BRW_REGISTER_TYPE_UW); + + insert(MUL(low, src0_0_w, inst->src[1])); + insert(MUL(high, src0_1_w, inst->src[1])); } + + fs_reg dst = inst->dst; + dst.type = BRW_REGISTER_TYPE_UW; + dst.subreg_offset = 2; + dst.stride = 2; + + high.type = BRW_REGISTER_TYPE_UW; + high.stride = 2; + + low.type = BRW_REGISTER_TYPE_UW; + low.subreg_offset = 2; + low.stride = 2; + + insert(ADD(dst, low, high)); } #undef insert From 421e396bb7bcc029ad457dd79064df7aeadd8e48 Mon Sep 17 00:00:00 2001 From: Ben Widawsky Date: Wed, 29 Apr 2015 22:20:35 -0400 Subject: [PATCH 190/834] i965: Add string for surface format to table Recommended-by: Kenneth Graunke Signed-off-by: Ben Widawsky Acked-by: Kenneth Graunke Reviewed-by: Topi Pohjolainen --- .../drivers/dri/i965/brw_surface_formats.c | 436 +++++++++--------- 1 file changed, 219 insertions(+), 217 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_surface_formats.c b/src/mesa/drivers/dri/i965/brw_surface_formats.c index 016f87a4c2a..97136d0ce4f 100644 --- a/src/mesa/drivers/dri/i965/brw_surface_formats.c +++ b/src/mesa/drivers/dri/i965/brw_surface_formats.c @@ -39,13 +39,14 @@ struct surface_format_info { int input_vb; int streamed_output_vb; int color_processing; + const char *name; }; /* This macro allows us to write the table almost as it appears in the PRM, * while restructuring it to turn it into the C code we want. */ #define SF(sampl, filt, shad, ck, rt, ab, vb, so, color, sf) \ - [sf] = { true, sampl, filt, shad, ck, rt, ab, vb, so, color }, + [BRW_SURFACEFORMAT_##sf] = { true, sampl, filt, shad, ck, rt, ab, vb, so, color, #sf}, #define Y 0 #define x 999 @@ -73,6 +74,7 @@ struct surface_format_info { * VB - Input Vertex Buffer * SO - Steamed Output Vertex Buffers (transform feedback) * color - Color Processing + * sf - Surface Format * * See page 88 of the Sandybridge PRM VOL4_Part1 PDF. * @@ -85,226 +87,226 @@ struct surface_format_info { */ const struct surface_format_info surface_formats[] = { /* smpl filt shad CK RT AB VB SO color */ - SF( Y, 50, x, x, Y, Y, Y, Y, x, BRW_SURFACEFORMAT_R32G32B32A32_FLOAT) - SF( Y, x, x, x, Y, x, Y, Y, x, BRW_SURFACEFORMAT_R32G32B32A32_SINT) - SF( Y, x, x, x, Y, x, Y, Y, x, BRW_SURFACEFORMAT_R32G32B32A32_UINT) - SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R32G32B32A32_UNORM) - SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R32G32B32A32_SNORM) - SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R64G64_FLOAT) - SF( Y, 50, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_R32G32B32X32_FLOAT) - SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R32G32B32A32_SSCALED) - SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R32G32B32A32_USCALED) - SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_R32G32B32A32_SFIXED) - SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_R64G64_PASSTHRU) - SF( Y, 50, x, x, x, x, Y, Y, x, BRW_SURFACEFORMAT_R32G32B32_FLOAT) - SF( Y, x, x, x, x, x, Y, Y, x, BRW_SURFACEFORMAT_R32G32B32_SINT) - SF( Y, x, x, x, x, x, Y, Y, x, BRW_SURFACEFORMAT_R32G32B32_UINT) - SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R32G32B32_UNORM) - SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R32G32B32_SNORM) - SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R32G32B32_SSCALED) - SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R32G32B32_USCALED) - SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_R32G32B32_SFIXED) - SF( Y, Y, x, x, Y, 45, Y, x, 60, BRW_SURFACEFORMAT_R16G16B16A16_UNORM) - SF( Y, Y, x, x, Y, 60, Y, x, x, BRW_SURFACEFORMAT_R16G16B16A16_SNORM) - SF( Y, x, x, x, Y, x, Y, x, x, BRW_SURFACEFORMAT_R16G16B16A16_SINT) - SF( Y, x, x, x, Y, x, Y, x, x, BRW_SURFACEFORMAT_R16G16B16A16_UINT) - SF( Y, Y, x, x, Y, Y, Y, x, x, BRW_SURFACEFORMAT_R16G16B16A16_FLOAT) - SF( Y, 50, x, x, Y, Y, Y, Y, x, BRW_SURFACEFORMAT_R32G32_FLOAT) - SF( Y, 70, x, x, Y, Y, Y, Y, x, BRW_SURFACEFORMAT_R32G32_FLOAT_LD) - SF( Y, x, x, x, Y, x, Y, Y, x, BRW_SURFACEFORMAT_R32G32_SINT) - SF( Y, x, x, x, Y, x, Y, Y, x, BRW_SURFACEFORMAT_R32G32_UINT) - SF( Y, 50, Y, x, x, x, x, x, x, BRW_SURFACEFORMAT_R32_FLOAT_X8X24_TYPELESS) - SF( Y, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_X32_TYPELESS_G8X24_UINT) - SF( Y, 50, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_L32A32_FLOAT) - SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R32G32_UNORM) - SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R32G32_SNORM) - SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R64_FLOAT) - SF( Y, Y, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_R16G16B16X16_UNORM) - SF( Y, Y, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_R16G16B16X16_FLOAT) - SF( Y, 50, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_A32X32_FLOAT) - SF( Y, 50, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_L32X32_FLOAT) - SF( Y, 50, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_I32X32_FLOAT) - SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R16G16B16A16_SSCALED) - SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R16G16B16A16_USCALED) - SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R32G32_SSCALED) - SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R32G32_USCALED) - SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_R32G32_SFIXED) - SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_R64_PASSTHRU) - SF( Y, Y, x, Y, Y, Y, Y, x, 60, BRW_SURFACEFORMAT_B8G8R8A8_UNORM) - SF( Y, Y, x, x, Y, Y, x, x, x, BRW_SURFACEFORMAT_B8G8R8A8_UNORM_SRGB) + SF( Y, 50, x, x, Y, Y, Y, Y, x, R32G32B32A32_FLOAT) + SF( Y, x, x, x, Y, x, Y, Y, x, R32G32B32A32_SINT) + SF( Y, x, x, x, Y, x, Y, Y, x, R32G32B32A32_UINT) + SF( x, x, x, x, x, x, Y, x, x, R32G32B32A32_UNORM) + SF( x, x, x, x, x, x, Y, x, x, R32G32B32A32_SNORM) + SF( x, x, x, x, x, x, Y, x, x, R64G64_FLOAT) + SF( Y, 50, x, x, x, x, x, x, x, R32G32B32X32_FLOAT) + SF( x, x, x, x, x, x, Y, x, x, R32G32B32A32_SSCALED) + SF( x, x, x, x, x, x, Y, x, x, R32G32B32A32_USCALED) + SF( x, x, x, x, x, x, x, x, x, R32G32B32A32_SFIXED) + SF( x, x, x, x, x, x, x, x, x, R64G64_PASSTHRU) + SF( Y, 50, x, x, x, x, Y, Y, x, R32G32B32_FLOAT) + SF( Y, x, x, x, x, x, Y, Y, x, R32G32B32_SINT) + SF( Y, x, x, x, x, x, Y, Y, x, R32G32B32_UINT) + SF( x, x, x, x, x, x, Y, x, x, R32G32B32_UNORM) + SF( x, x, x, x, x, x, Y, x, x, R32G32B32_SNORM) + SF( x, x, x, x, x, x, Y, x, x, R32G32B32_SSCALED) + SF( x, x, x, x, x, x, Y, x, x, R32G32B32_USCALED) + SF( x, x, x, x, x, x, x, x, x, R32G32B32_SFIXED) + SF( Y, Y, x, x, Y, 45, Y, x, 60, R16G16B16A16_UNORM) + SF( Y, Y, x, x, Y, 60, Y, x, x, R16G16B16A16_SNORM) + SF( Y, x, x, x, Y, x, Y, x, x, R16G16B16A16_SINT) + SF( Y, x, x, x, Y, x, Y, x, x, R16G16B16A16_UINT) + SF( Y, Y, x, x, Y, Y, Y, x, x, R16G16B16A16_FLOAT) + SF( Y, 50, x, x, Y, Y, Y, Y, x, R32G32_FLOAT) + SF( Y, 70, x, x, Y, Y, Y, Y, x, R32G32_FLOAT_LD) + SF( Y, x, x, x, Y, x, Y, Y, x, R32G32_SINT) + SF( Y, x, x, x, Y, x, Y, Y, x, R32G32_UINT) + SF( Y, 50, Y, x, x, x, x, x, x, R32_FLOAT_X8X24_TYPELESS) + SF( Y, x, x, x, x, x, x, x, x, X32_TYPELESS_G8X24_UINT) + SF( Y, 50, x, x, x, x, x, x, x, L32A32_FLOAT) + SF( x, x, x, x, x, x, Y, x, x, R32G32_UNORM) + SF( x, x, x, x, x, x, Y, x, x, R32G32_SNORM) + SF( x, x, x, x, x, x, Y, x, x, R64_FLOAT) + SF( Y, Y, x, x, x, x, x, x, x, R16G16B16X16_UNORM) + SF( Y, Y, x, x, x, x, x, x, x, R16G16B16X16_FLOAT) + SF( Y, 50, x, x, x, x, x, x, x, A32X32_FLOAT) + SF( Y, 50, x, x, x, x, x, x, x, L32X32_FLOAT) + SF( Y, 50, x, x, x, x, x, x, x, I32X32_FLOAT) + SF( x, x, x, x, x, x, Y, x, x, R16G16B16A16_SSCALED) + SF( x, x, x, x, x, x, Y, x, x, R16G16B16A16_USCALED) + SF( x, x, x, x, x, x, Y, x, x, R32G32_SSCALED) + SF( x, x, x, x, x, x, Y, x, x, R32G32_USCALED) + SF( x, x, x, x, x, x, x, x, x, R32G32_SFIXED) + SF( x, x, x, x, x, x, x, x, x, R64_PASSTHRU) + SF( Y, Y, x, Y, Y, Y, Y, x, 60, B8G8R8A8_UNORM) + SF( Y, Y, x, x, Y, Y, x, x, x, B8G8R8A8_UNORM_SRGB) /* smpl filt shad CK RT AB VB SO color */ - SF( Y, Y, x, x, Y, Y, Y, x, 60, BRW_SURFACEFORMAT_R10G10B10A2_UNORM) - SF( Y, Y, x, x, x, x, x, x, 60, BRW_SURFACEFORMAT_R10G10B10A2_UNORM_SRGB) - SF( Y, x, x, x, Y, x, Y, x, x, BRW_SURFACEFORMAT_R10G10B10A2_UINT) - SF( Y, Y, x, x, x, Y, Y, x, x, BRW_SURFACEFORMAT_R10G10B10_SNORM_A2_UNORM) - SF( Y, Y, x, x, Y, Y, Y, x, 60, BRW_SURFACEFORMAT_R8G8B8A8_UNORM) - SF( Y, Y, x, x, Y, Y, x, x, 60, BRW_SURFACEFORMAT_R8G8B8A8_UNORM_SRGB) - SF( Y, Y, x, x, Y, 60, Y, x, x, BRW_SURFACEFORMAT_R8G8B8A8_SNORM) - SF( Y, x, x, x, Y, x, Y, x, x, BRW_SURFACEFORMAT_R8G8B8A8_SINT) - SF( Y, x, x, x, Y, x, Y, x, x, BRW_SURFACEFORMAT_R8G8B8A8_UINT) - SF( Y, Y, x, x, Y, 45, Y, x, x, BRW_SURFACEFORMAT_R16G16_UNORM) - SF( Y, Y, x, x, Y, 60, Y, x, x, BRW_SURFACEFORMAT_R16G16_SNORM) - SF( Y, x, x, x, Y, x, Y, x, x, BRW_SURFACEFORMAT_R16G16_SINT) - SF( Y, x, x, x, Y, x, Y, x, x, BRW_SURFACEFORMAT_R16G16_UINT) - SF( Y, Y, x, x, Y, Y, Y, x, x, BRW_SURFACEFORMAT_R16G16_FLOAT) - SF( Y, Y, x, x, Y, Y, x, x, 60, BRW_SURFACEFORMAT_B10G10R10A2_UNORM) - SF( Y, Y, x, x, Y, Y, x, x, 60, BRW_SURFACEFORMAT_B10G10R10A2_UNORM_SRGB) - SF( Y, Y, x, x, Y, Y, Y, x, x, BRW_SURFACEFORMAT_R11G11B10_FLOAT) - SF( Y, x, x, x, Y, x, Y, Y, x, BRW_SURFACEFORMAT_R32_SINT) - SF( Y, x, x, x, Y, x, Y, Y, x, BRW_SURFACEFORMAT_R32_UINT) - SF( Y, 50, Y, x, Y, Y, Y, Y, x, BRW_SURFACEFORMAT_R32_FLOAT) - SF( Y, 50, Y, x, x, x, x, x, x, BRW_SURFACEFORMAT_R24_UNORM_X8_TYPELESS) - SF( Y, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_X24_TYPELESS_G8_UINT) - SF( Y, Y, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_L16A16_UNORM) - SF( Y, 50, Y, x, x, x, x, x, x, BRW_SURFACEFORMAT_I24X8_UNORM) - SF( Y, 50, Y, x, x, x, x, x, x, BRW_SURFACEFORMAT_L24X8_UNORM) - SF( Y, 50, Y, x, x, x, x, x, x, BRW_SURFACEFORMAT_A24X8_UNORM) - SF( Y, 50, Y, x, x, x, x, x, x, BRW_SURFACEFORMAT_I32_FLOAT) - SF( Y, 50, Y, x, x, x, x, x, x, BRW_SURFACEFORMAT_L32_FLOAT) - SF( Y, 50, Y, x, x, x, x, x, x, BRW_SURFACEFORMAT_A32_FLOAT) - SF( Y, Y, x, Y, x, x, x, x, 60, BRW_SURFACEFORMAT_B8G8R8X8_UNORM) - SF( Y, Y, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_B8G8R8X8_UNORM_SRGB) - SF( Y, Y, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_R8G8B8X8_UNORM) - SF( Y, Y, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_R8G8B8X8_UNORM_SRGB) - SF( Y, Y, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_R9G9B9E5_SHAREDEXP) - SF( Y, Y, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_B10G10R10X2_UNORM) - SF( Y, Y, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_L16A16_FLOAT) - SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R32_UNORM) - SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R32_SNORM) + SF( Y, Y, x, x, Y, Y, Y, x, 60, R10G10B10A2_UNORM) + SF( Y, Y, x, x, x, x, x, x, 60, R10G10B10A2_UNORM_SRGB) + SF( Y, x, x, x, Y, x, Y, x, x, R10G10B10A2_UINT) + SF( Y, Y, x, x, x, Y, Y, x, x, R10G10B10_SNORM_A2_UNORM) + SF( Y, Y, x, x, Y, Y, Y, x, 60, R8G8B8A8_UNORM) + SF( Y, Y, x, x, Y, Y, x, x, 60, R8G8B8A8_UNORM_SRGB) + SF( Y, Y, x, x, Y, 60, Y, x, x, R8G8B8A8_SNORM) + SF( Y, x, x, x, Y, x, Y, x, x, R8G8B8A8_SINT) + SF( Y, x, x, x, Y, x, Y, x, x, R8G8B8A8_UINT) + SF( Y, Y, x, x, Y, 45, Y, x, x, R16G16_UNORM) + SF( Y, Y, x, x, Y, 60, Y, x, x, R16G16_SNORM) + SF( Y, x, x, x, Y, x, Y, x, x, R16G16_SINT) + SF( Y, x, x, x, Y, x, Y, x, x, R16G16_UINT) + SF( Y, Y, x, x, Y, Y, Y, x, x, R16G16_FLOAT) + SF( Y, Y, x, x, Y, Y, x, x, 60, B10G10R10A2_UNORM) + SF( Y, Y, x, x, Y, Y, x, x, 60, B10G10R10A2_UNORM_SRGB) + SF( Y, Y, x, x, Y, Y, Y, x, x, R11G11B10_FLOAT) + SF( Y, x, x, x, Y, x, Y, Y, x, R32_SINT) + SF( Y, x, x, x, Y, x, Y, Y, x, R32_UINT) + SF( Y, 50, Y, x, Y, Y, Y, Y, x, R32_FLOAT) + SF( Y, 50, Y, x, x, x, x, x, x, R24_UNORM_X8_TYPELESS) + SF( Y, x, x, x, x, x, x, x, x, X24_TYPELESS_G8_UINT) + SF( Y, Y, x, x, x, x, x, x, x, L16A16_UNORM) + SF( Y, 50, Y, x, x, x, x, x, x, I24X8_UNORM) + SF( Y, 50, Y, x, x, x, x, x, x, L24X8_UNORM) + SF( Y, 50, Y, x, x, x, x, x, x, A24X8_UNORM) + SF( Y, 50, Y, x, x, x, x, x, x, I32_FLOAT) + SF( Y, 50, Y, x, x, x, x, x, x, L32_FLOAT) + SF( Y, 50, Y, x, x, x, x, x, x, A32_FLOAT) + SF( Y, Y, x, Y, x, x, x, x, 60, B8G8R8X8_UNORM) + SF( Y, Y, x, x, x, x, x, x, x, B8G8R8X8_UNORM_SRGB) + SF( Y, Y, x, x, x, x, x, x, x, R8G8B8X8_UNORM) + SF( Y, Y, x, x, x, x, x, x, x, R8G8B8X8_UNORM_SRGB) + SF( Y, Y, x, x, x, x, x, x, x, R9G9B9E5_SHAREDEXP) + SF( Y, Y, x, x, x, x, x, x, x, B10G10R10X2_UNORM) + SF( Y, Y, x, x, x, x, x, x, x, L16A16_FLOAT) + SF( x, x, x, x, x, x, Y, x, x, R32_UNORM) + SF( x, x, x, x, x, x, Y, x, x, R32_SNORM) /* smpl filt shad CK RT AB VB SO color */ - SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R10G10B10X2_USCALED) - SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R8G8B8A8_SSCALED) - SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R8G8B8A8_USCALED) - SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R16G16_SSCALED) - SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R16G16_USCALED) - SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R32_SSCALED) - SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R32_USCALED) - SF( Y, Y, x, Y, Y, Y, x, x, x, BRW_SURFACEFORMAT_B5G6R5_UNORM) - SF( Y, Y, x, x, Y, Y, x, x, x, BRW_SURFACEFORMAT_B5G6R5_UNORM_SRGB) - SF( Y, Y, x, Y, Y, Y, x, x, x, BRW_SURFACEFORMAT_B5G5R5A1_UNORM) - SF( Y, Y, x, x, Y, Y, x, x, x, BRW_SURFACEFORMAT_B5G5R5A1_UNORM_SRGB) - SF( Y, Y, x, Y, Y, Y, x, x, x, BRW_SURFACEFORMAT_B4G4R4A4_UNORM) - SF( Y, Y, x, x, Y, Y, x, x, x, BRW_SURFACEFORMAT_B4G4R4A4_UNORM_SRGB) - SF( Y, Y, x, x, Y, Y, Y, x, x, BRW_SURFACEFORMAT_R8G8_UNORM) - SF( Y, Y, x, Y, Y, 60, Y, x, x, BRW_SURFACEFORMAT_R8G8_SNORM) - SF( Y, x, x, x, Y, x, Y, x, x, BRW_SURFACEFORMAT_R8G8_SINT) - SF( Y, x, x, x, Y, x, Y, x, x, BRW_SURFACEFORMAT_R8G8_UINT) - SF( Y, Y, Y, x, Y, 45, Y, x, 70, BRW_SURFACEFORMAT_R16_UNORM) - SF( Y, Y, x, x, Y, 60, Y, x, x, BRW_SURFACEFORMAT_R16_SNORM) - SF( Y, x, x, x, Y, x, Y, x, x, BRW_SURFACEFORMAT_R16_SINT) - SF( Y, x, x, x, Y, x, Y, x, x, BRW_SURFACEFORMAT_R16_UINT) - SF( Y, Y, x, x, Y, Y, Y, x, x, BRW_SURFACEFORMAT_R16_FLOAT) - SF(50, 50, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_A8P8_UNORM_PALETTE0) - SF(50, 50, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_A8P8_UNORM_PALETTE1) - SF( Y, Y, Y, x, x, x, x, x, x, BRW_SURFACEFORMAT_I16_UNORM) - SF( Y, Y, Y, x, x, x, x, x, x, BRW_SURFACEFORMAT_L16_UNORM) - SF( Y, Y, Y, x, x, x, x, x, x, BRW_SURFACEFORMAT_A16_UNORM) - SF( Y, Y, x, Y, x, x, x, x, x, BRW_SURFACEFORMAT_L8A8_UNORM) - SF( Y, Y, Y, x, x, x, x, x, x, BRW_SURFACEFORMAT_I16_FLOAT) - SF( Y, Y, Y, x, x, x, x, x, x, BRW_SURFACEFORMAT_L16_FLOAT) - SF( Y, Y, Y, x, x, x, x, x, x, BRW_SURFACEFORMAT_A16_FLOAT) - SF(45, 45, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_L8A8_UNORM_SRGB) - SF( Y, Y, x, Y, x, x, x, x, x, BRW_SURFACEFORMAT_R5G5_SNORM_B6_UNORM) - SF( x, x, x, x, Y, Y, x, x, x, BRW_SURFACEFORMAT_B5G5R5X1_UNORM) - SF( x, x, x, x, Y, Y, x, x, x, BRW_SURFACEFORMAT_B5G5R5X1_UNORM_SRGB) - SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R8G8_SSCALED) - SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R8G8_USCALED) + SF( x, x, x, x, x, x, Y, x, x, R10G10B10X2_USCALED) + SF( x, x, x, x, x, x, Y, x, x, R8G8B8A8_SSCALED) + SF( x, x, x, x, x, x, Y, x, x, R8G8B8A8_USCALED) + SF( x, x, x, x, x, x, Y, x, x, R16G16_SSCALED) + SF( x, x, x, x, x, x, Y, x, x, R16G16_USCALED) + SF( x, x, x, x, x, x, Y, x, x, R32_SSCALED) + SF( x, x, x, x, x, x, Y, x, x, R32_USCALED) + SF( Y, Y, x, Y, Y, Y, x, x, x, B5G6R5_UNORM) + SF( Y, Y, x, x, Y, Y, x, x, x, B5G6R5_UNORM_SRGB) + SF( Y, Y, x, Y, Y, Y, x, x, x, B5G5R5A1_UNORM) + SF( Y, Y, x, x, Y, Y, x, x, x, B5G5R5A1_UNORM_SRGB) + SF( Y, Y, x, Y, Y, Y, x, x, x, B4G4R4A4_UNORM) + SF( Y, Y, x, x, Y, Y, x, x, x, B4G4R4A4_UNORM_SRGB) + SF( Y, Y, x, x, Y, Y, Y, x, x, R8G8_UNORM) + SF( Y, Y, x, Y, Y, 60, Y, x, x, R8G8_SNORM) + SF( Y, x, x, x, Y, x, Y, x, x, R8G8_SINT) + SF( Y, x, x, x, Y, x, Y, x, x, R8G8_UINT) + SF( Y, Y, Y, x, Y, 45, Y, x, 70, R16_UNORM) + SF( Y, Y, x, x, Y, 60, Y, x, x, R16_SNORM) + SF( Y, x, x, x, Y, x, Y, x, x, R16_SINT) + SF( Y, x, x, x, Y, x, Y, x, x, R16_UINT) + SF( Y, Y, x, x, Y, Y, Y, x, x, R16_FLOAT) + SF(50, 50, x, x, x, x, x, x, x, A8P8_UNORM_PALETTE0) + SF(50, 50, x, x, x, x, x, x, x, A8P8_UNORM_PALETTE1) + SF( Y, Y, Y, x, x, x, x, x, x, I16_UNORM) + SF( Y, Y, Y, x, x, x, x, x, x, L16_UNORM) + SF( Y, Y, Y, x, x, x, x, x, x, A16_UNORM) + SF( Y, Y, x, Y, x, x, x, x, x, L8A8_UNORM) + SF( Y, Y, Y, x, x, x, x, x, x, I16_FLOAT) + SF( Y, Y, Y, x, x, x, x, x, x, L16_FLOAT) + SF( Y, Y, Y, x, x, x, x, x, x, A16_FLOAT) + SF(45, 45, x, x, x, x, x, x, x, L8A8_UNORM_SRGB) + SF( Y, Y, x, Y, x, x, x, x, x, R5G5_SNORM_B6_UNORM) + SF( x, x, x, x, Y, Y, x, x, x, B5G5R5X1_UNORM) + SF( x, x, x, x, Y, Y, x, x, x, B5G5R5X1_UNORM_SRGB) + SF( x, x, x, x, x, x, Y, x, x, R8G8_SSCALED) + SF( x, x, x, x, x, x, Y, x, x, R8G8_USCALED) /* smpl filt shad CK RT AB VB SO color */ - SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R16_SSCALED) - SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R16_USCALED) - SF(50, 50, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_P8A8_UNORM_PALETTE0) - SF(50, 50, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_P8A8_UNORM_PALETTE1) - SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_A1B5G5R5_UNORM) - SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_A4B4G4R4_UNORM) - SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_L8A8_UINT) - SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_L8A8_SINT) - SF( Y, Y, x, 45, Y, Y, Y, x, x, BRW_SURFACEFORMAT_R8_UNORM) - SF( Y, Y, x, x, Y, 60, Y, x, x, BRW_SURFACEFORMAT_R8_SNORM) - SF( Y, x, x, x, Y, x, Y, x, x, BRW_SURFACEFORMAT_R8_SINT) - SF( Y, x, x, x, Y, x, Y, x, x, BRW_SURFACEFORMAT_R8_UINT) - SF( Y, Y, x, Y, Y, Y, x, x, x, BRW_SURFACEFORMAT_A8_UNORM) - SF( Y, Y, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_I8_UNORM) - SF( Y, Y, x, Y, x, x, x, x, x, BRW_SURFACEFORMAT_L8_UNORM) - SF( Y, Y, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_P4A4_UNORM) - SF( Y, Y, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_A4P4_UNORM) - SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R8_SSCALED) - SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R8_USCALED) - SF(45, 45, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_P8_UNORM_PALETTE0) - SF(45, 45, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_L8_UNORM_SRGB) - SF(45, 45, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_P8_UNORM_PALETTE1) - SF(45, 45, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_P4A4_UNORM_PALETTE1) - SF(45, 45, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_A4P4_UNORM_PALETTE1) - SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_Y8_SNORM) - SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_L8_UINT) - SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_L8_SINT) - SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_I8_UINT) - SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_I8_SINT) - SF(45, 45, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_DXT1_RGB_SRGB) - SF( Y, Y, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_R1_UINT) - SF( Y, Y, x, Y, Y, x, x, x, 60, BRW_SURFACEFORMAT_YCRCB_NORMAL) - SF( Y, Y, x, Y, Y, x, x, x, 60, BRW_SURFACEFORMAT_YCRCB_SWAPUVY) - SF(45, 45, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_P2_UNORM_PALETTE0) - SF(45, 45, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_P2_UNORM_PALETTE1) - SF( Y, Y, x, Y, x, x, x, x, x, BRW_SURFACEFORMAT_BC1_UNORM) - SF( Y, Y, x, Y, x, x, x, x, x, BRW_SURFACEFORMAT_BC2_UNORM) - SF( Y, Y, x, Y, x, x, x, x, x, BRW_SURFACEFORMAT_BC3_UNORM) - SF( Y, Y, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_BC4_UNORM) - SF( Y, Y, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_BC5_UNORM) - SF( Y, Y, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_BC1_UNORM_SRGB) - SF( Y, Y, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_BC2_UNORM_SRGB) - SF( Y, Y, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_BC3_UNORM_SRGB) - SF( Y, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_MONO8) - SF( Y, Y, x, x, Y, x, x, x, 60, BRW_SURFACEFORMAT_YCRCB_SWAPUV) - SF( Y, Y, x, x, Y, x, x, x, 60, BRW_SURFACEFORMAT_YCRCB_SWAPY) - SF( Y, Y, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_DXT1_RGB) + SF( x, x, x, x, x, x, Y, x, x, R16_SSCALED) + SF( x, x, x, x, x, x, Y, x, x, R16_USCALED) + SF(50, 50, x, x, x, x, x, x, x, P8A8_UNORM_PALETTE0) + SF(50, 50, x, x, x, x, x, x, x, P8A8_UNORM_PALETTE1) + SF( x, x, x, x, x, x, x, x, x, A1B5G5R5_UNORM) + SF( x, x, x, x, x, x, x, x, x, A4B4G4R4_UNORM) + SF( x, x, x, x, x, x, x, x, x, L8A8_UINT) + SF( x, x, x, x, x, x, x, x, x, L8A8_SINT) + SF( Y, Y, x, 45, Y, Y, Y, x, x, R8_UNORM) + SF( Y, Y, x, x, Y, 60, Y, x, x, R8_SNORM) + SF( Y, x, x, x, Y, x, Y, x, x, R8_SINT) + SF( Y, x, x, x, Y, x, Y, x, x, R8_UINT) + SF( Y, Y, x, Y, Y, Y, x, x, x, A8_UNORM) + SF( Y, Y, x, x, x, x, x, x, x, I8_UNORM) + SF( Y, Y, x, Y, x, x, x, x, x, L8_UNORM) + SF( Y, Y, x, x, x, x, x, x, x, P4A4_UNORM) + SF( Y, Y, x, x, x, x, x, x, x, A4P4_UNORM) + SF( x, x, x, x, x, x, Y, x, x, R8_SSCALED) + SF( x, x, x, x, x, x, Y, x, x, R8_USCALED) + SF(45, 45, x, x, x, x, x, x, x, P8_UNORM_PALETTE0) + SF(45, 45, x, x, x, x, x, x, x, L8_UNORM_SRGB) + SF(45, 45, x, x, x, x, x, x, x, P8_UNORM_PALETTE1) + SF(45, 45, x, x, x, x, x, x, x, P4A4_UNORM_PALETTE1) + SF(45, 45, x, x, x, x, x, x, x, A4P4_UNORM_PALETTE1) + SF( x, x, x, x, x, x, x, x, x, Y8_SNORM) + SF( x, x, x, x, x, x, x, x, x, L8_UINT) + SF( x, x, x, x, x, x, x, x, x, L8_SINT) + SF( x, x, x, x, x, x, x, x, x, I8_UINT) + SF( x, x, x, x, x, x, x, x, x, I8_SINT) + SF(45, 45, x, x, x, x, x, x, x, DXT1_RGB_SRGB) + SF( Y, Y, x, x, x, x, x, x, x, R1_UINT) + SF( Y, Y, x, Y, Y, x, x, x, 60, YCRCB_NORMAL) + SF( Y, Y, x, Y, Y, x, x, x, 60, YCRCB_SWAPUVY) + SF(45, 45, x, x, x, x, x, x, x, P2_UNORM_PALETTE0) + SF(45, 45, x, x, x, x, x, x, x, P2_UNORM_PALETTE1) + SF( Y, Y, x, Y, x, x, x, x, x, BC1_UNORM) + SF( Y, Y, x, Y, x, x, x, x, x, BC2_UNORM) + SF( Y, Y, x, Y, x, x, x, x, x, BC3_UNORM) + SF( Y, Y, x, x, x, x, x, x, x, BC4_UNORM) + SF( Y, Y, x, x, x, x, x, x, x, BC5_UNORM) + SF( Y, Y, x, x, x, x, x, x, x, BC1_UNORM_SRGB) + SF( Y, Y, x, x, x, x, x, x, x, BC2_UNORM_SRGB) + SF( Y, Y, x, x, x, x, x, x, x, BC3_UNORM_SRGB) + SF( Y, x, x, x, x, x, x, x, x, MONO8) + SF( Y, Y, x, x, Y, x, x, x, 60, YCRCB_SWAPUV) + SF( Y, Y, x, x, Y, x, x, x, 60, YCRCB_SWAPY) + SF( Y, Y, x, x, x, x, x, x, x, DXT1_RGB) /* smpl filt shad CK RT AB VB SO color */ - SF( Y, Y, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_FXT1) - SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R8G8B8_UNORM) - SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R8G8B8_SNORM) - SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R8G8B8_SSCALED) - SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R8G8B8_USCALED) - SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R64G64B64A64_FLOAT) - SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R64G64B64_FLOAT) - SF( Y, Y, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_BC4_SNORM) - SF( Y, Y, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_BC5_SNORM) - SF(50, 50, x, x, x, x, 60, x, x, BRW_SURFACEFORMAT_R16G16B16_FLOAT) - SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R16G16B16_UNORM) - SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R16G16B16_SNORM) - SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R16G16B16_SSCALED) - SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R16G16B16_USCALED) - SF(70, 70, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_BC6H_SF16) - SF(70, 70, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_BC7_UNORM) - SF(70, 70, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_BC7_UNORM_SRGB) - SF(70, 70, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_BC6H_UF16) - SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_PLANAR_420_8) - SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_R8G8B8_UNORM_SRGB) - SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_ETC1_RGB8) - SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_ETC2_RGB8) - SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_EAC_R11) - SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_EAC_RG11) - SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_EAC_SIGNED_R11) - SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_EAC_SIGNED_RG11) - SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_ETC2_SRGB8) - SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_R16G16B16_UINT) - SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_R16G16B16_SINT) - SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_R32_SFIXED) - SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_R10G10B10A2_SNORM) - SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_R10G10B10A2_USCALED) - SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_R10G10B10A2_SSCALED) - SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_R10G10B10A2_SINT) - SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_B10G10R10A2_SNORM) - SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_B10G10R10A2_USCALED) - SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_B10G10R10A2_SSCALED) - SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_B10G10R10A2_UINT) - SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_B10G10R10A2_SINT) - SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_R64G64B64A64_PASSTHRU) - SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_R64G64B64_PASSTHRU) - SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_ETC2_RGB8_PTA) - SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_ETC2_SRGB8_PTA) - SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_ETC2_EAC_RGBA8) - SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_ETC2_EAC_SRGB8_A8) - SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_R8G8B8_UINT) - SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_R8G8B8_SINT) + SF( Y, Y, x, x, x, x, x, x, x, FXT1) + SF( x, x, x, x, x, x, Y, x, x, R8G8B8_UNORM) + SF( x, x, x, x, x, x, Y, x, x, R8G8B8_SNORM) + SF( x, x, x, x, x, x, Y, x, x, R8G8B8_SSCALED) + SF( x, x, x, x, x, x, Y, x, x, R8G8B8_USCALED) + SF( x, x, x, x, x, x, Y, x, x, R64G64B64A64_FLOAT) + SF( x, x, x, x, x, x, Y, x, x, R64G64B64_FLOAT) + SF( Y, Y, x, x, x, x, x, x, x, BC4_SNORM) + SF( Y, Y, x, x, x, x, x, x, x, BC5_SNORM) + SF(50, 50, x, x, x, x, 60, x, x, R16G16B16_FLOAT) + SF( x, x, x, x, x, x, Y, x, x, R16G16B16_UNORM) + SF( x, x, x, x, x, x, Y, x, x, R16G16B16_SNORM) + SF( x, x, x, x, x, x, Y, x, x, R16G16B16_SSCALED) + SF( x, x, x, x, x, x, Y, x, x, R16G16B16_USCALED) + SF(70, 70, x, x, x, x, x, x, x, BC6H_SF16) + SF(70, 70, x, x, x, x, x, x, x, BC7_UNORM) + SF(70, 70, x, x, x, x, x, x, x, BC7_UNORM_SRGB) + SF(70, 70, x, x, x, x, x, x, x, BC6H_UF16) + SF( x, x, x, x, x, x, x, x, x, PLANAR_420_8) + SF( x, x, x, x, x, x, x, x, x, R8G8B8_UNORM_SRGB) + SF( x, x, x, x, x, x, x, x, x, ETC1_RGB8) + SF( x, x, x, x, x, x, x, x, x, ETC2_RGB8) + SF( x, x, x, x, x, x, x, x, x, EAC_R11) + SF( x, x, x, x, x, x, x, x, x, EAC_RG11) + SF( x, x, x, x, x, x, x, x, x, EAC_SIGNED_R11) + SF( x, x, x, x, x, x, x, x, x, EAC_SIGNED_RG11) + SF( x, x, x, x, x, x, x, x, x, ETC2_SRGB8) + SF( x, x, x, x, x, x, x, x, x, R16G16B16_UINT) + SF( x, x, x, x, x, x, x, x, x, R16G16B16_SINT) + SF( x, x, x, x, x, x, x, x, x, R32_SFIXED) + SF( x, x, x, x, x, x, x, x, x, R10G10B10A2_SNORM) + SF( x, x, x, x, x, x, x, x, x, R10G10B10A2_USCALED) + SF( x, x, x, x, x, x, x, x, x, R10G10B10A2_SSCALED) + SF( x, x, x, x, x, x, x, x, x, R10G10B10A2_SINT) + SF( x, x, x, x, x, x, x, x, x, B10G10R10A2_SNORM) + SF( x, x, x, x, x, x, x, x, x, B10G10R10A2_USCALED) + SF( x, x, x, x, x, x, x, x, x, B10G10R10A2_SSCALED) + SF( x, x, x, x, x, x, x, x, x, B10G10R10A2_UINT) + SF( x, x, x, x, x, x, x, x, x, B10G10R10A2_SINT) + SF( x, x, x, x, x, x, x, x, x, R64G64B64A64_PASSTHRU) + SF( x, x, x, x, x, x, x, x, x, R64G64B64_PASSTHRU) + SF( x, x, x, x, x, x, x, x, x, ETC2_RGB8_PTA) + SF( x, x, x, x, x, x, x, x, x, ETC2_SRGB8_PTA) + SF( x, x, x, x, x, x, x, x, x, ETC2_EAC_RGBA8) + SF( x, x, x, x, x, x, x, x, x, ETC2_EAC_SRGB8_A8) + SF( x, x, x, x, x, x, x, x, x, R8G8B8_UINT) + SF( x, x, x, x, x, x, x, x, x, R8G8B8_SINT) }; #undef x #undef Y From e45a292556df95280bd06ca78db901e39c0660e1 Mon Sep 17 00:00:00 2001 From: Ben Widawsky Date: Thu, 16 Apr 2015 09:16:19 -0700 Subject: [PATCH 191/834] i965: Add all surface types to the batch decode It's true that not all surfaces apply for every gen, but for the most part this is what we want. (The unfortunate case is when we use a valid surface, but not for the specific GEN). This was automated with a vim macro. v2: Shortened common forms such as R8G8B8A8->RGBA8. Note that this makes some of the sample output in subsequent commits slightly incorrect. v3: Use the name from the table (Ken). This requires declaring the surface format array as extern, and declaring the struct in the .h file. v4: Move the struct back and create a helper function to obtain the name (Ken) Get rid of the now useless helper in the state_dump.c Signed-off-by: Ben Widawsky Reviewed-by: Topi Pohjolainen (v3) Reviewed-by: Kenneth Graunke --- src/mesa/drivers/dri/i965/brw_state.h | 1 + src/mesa/drivers/dri/i965/brw_state_dump.c | 18 +++--------------- .../drivers/dri/i965/brw_surface_formats.c | 6 ++++++ 3 files changed, 10 insertions(+), 15 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h index 26fdae64ea4..bc79fb6d882 100644 --- a/src/mesa/drivers/dri/i965/brw_state.h +++ b/src/mesa/drivers/dri/i965/brw_state.h @@ -246,6 +246,7 @@ void brw_configure_w_tiled(const struct intel_mipmap_tree *mt, unsigned *pitch, uint32_t *tiling, unsigned *format); +const char *brw_surface_format_name(unsigned format); uint32_t brw_format_for_mesa_format(mesa_format mesa_format); GLuint translate_tex_target(GLenum target); diff --git a/src/mesa/drivers/dri/i965/brw_state_dump.c b/src/mesa/drivers/dri/i965/brw_state_dump.c index 530f5a8b76e..85daf2fe558 100644 --- a/src/mesa/drivers/dri/i965/brw_state_dump.c +++ b/src/mesa/drivers/dri/i965/brw_state_dump.c @@ -31,6 +31,7 @@ #include "brw_context.h" #include "brw_defines.h" #include "brw_eu.h" +#include "brw_state.h" static void batch_out(struct brw_context *brw, const char *name, uint32_t offset, @@ -64,19 +65,6 @@ get_965_surfacetype(unsigned int surfacetype) } } -static const char * -get_965_surface_format(unsigned int surface_format) -{ - switch (surface_format) { - case 0x000: return "r32g32b32a32_float"; - case 0x0c1: return "b8g8r8a8_unorm"; - case 0x100: return "b5g6r5_unorm"; - case 0x102: return "b5g5r5a1_unorm"; - case 0x104: return "b4g4r4a4_unorm"; - default: return "unknown"; - } -} - static void dump_vs_state(struct brw_context *brw, uint32_t offset) { const char *name = "VS_STATE"; @@ -176,7 +164,7 @@ static void dump_surface_state(struct brw_context *brw, uint32_t offset) batch_out(brw, name, offset, 0, "%s %s\n", get_965_surfacetype(GET_FIELD(surf[0], BRW_SURFACE_TYPE)), - get_965_surface_format(GET_FIELD(surf[0], BRW_SURFACE_FORMAT))); + brw_surface_format_name(GET_FIELD(surf[0], BRW_SURFACE_FORMAT))); batch_out(brw, name, offset, 1, "offset\n"); batch_out(brw, name, offset, 2, "%dx%d size, %d mips\n", GET_FIELD(surf[2], BRW_SURFACE_WIDTH) + 1, @@ -200,7 +188,7 @@ static void dump_gen7_surface_state(struct brw_context *brw, uint32_t offset) batch_out(brw, name, offset, 0, "%s %s %s\n", get_965_surfacetype(GET_FIELD(surf[0], BRW_SURFACE_TYPE)), - get_965_surface_format(GET_FIELD(surf[0], BRW_SURFACE_FORMAT)), + brw_surface_format_name(GET_FIELD(surf[0], BRW_SURFACE_FORMAT)), (surf[0] & GEN7_SURFACE_IS_ARRAY) ? "array" : ""); batch_out(brw, name, offset, 1, "offset\n"); batch_out(brw, name, offset, 2, "%dx%d size, %d mips, %d slices\n", diff --git a/src/mesa/drivers/dri/i965/brw_surface_formats.c b/src/mesa/drivers/dri/i965/brw_surface_formats.c index 97136d0ce4f..05016067bba 100644 --- a/src/mesa/drivers/dri/i965/brw_surface_formats.c +++ b/src/mesa/drivers/dri/i965/brw_surface_formats.c @@ -311,6 +311,12 @@ const struct surface_format_info surface_formats[] = { #undef x #undef Y +const char * +brw_surface_format_name(unsigned format) +{ + return surface_formats[format].name; +} + uint32_t brw_format_for_mesa_format(mesa_format mesa_format) { From 1fa0789a9452627ce9845b2fb334c995584f1ffa Mon Sep 17 00:00:00 2001 From: Ben Widawsky Date: Thu, 16 Apr 2015 14:50:33 -0700 Subject: [PATCH 192/834] i965: Add viewport extents (gen8) to batch decode 0x00007da0: 0xc1da740e: SF_CLIP VP: guardband xmin = -27.306667 0x00007da4: 0x41da740e: SF_CLIP VP: guardband xmax = 27.306667 0x00007da4: 0x41da740e: SF_CLIP VP: guardband ymin = -23.405714 0x00007da8: 0xc1bb3ee7: SF_CLIP VP: guardband ymax = 23.405714 0x00007db0: 0x00000000: SF_CLIP VP: Min extents: 0.00x0.00 0x00007db8: 0x00000000: SF_CLIP VP: Max extents: 299.00x349.00 While here, fix the wrong offsets for the guardband (I didn't check if it used to be valid on GEN4). v2: Remove leftover GET_BITS which belongs later in the series. (Topi) Signed-off-by: Ben Widawsky Reviewed-by: Topi Pohjolainen Reviewed-by: Kenneth Graunke --- src/mesa/drivers/dri/i965/brw_state_dump.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_state_dump.c b/src/mesa/drivers/dri/i965/brw_state_dump.c index 85daf2fe558..c80a75f4eef 100644 --- a/src/mesa/drivers/dri/i965/brw_state_dump.c +++ b/src/mesa/drivers/dri/i965/brw_state_dump.c @@ -308,10 +308,17 @@ static void dump_sf_clip_viewport_state(struct brw_context *brw, batch_out(brw, name, offset, 3, "m30 = %f\n", vp->viewport.m30); batch_out(brw, name, offset, 4, "m31 = %f\n", vp->viewport.m31); batch_out(brw, name, offset, 5, "m32 = %f\n", vp->viewport.m32); - batch_out(brw, name, offset, 6, "guardband xmin = %f\n", vp->guardband.xmin); - batch_out(brw, name, offset, 7, "guardband xmax = %f\n", vp->guardband.xmax); - batch_out(brw, name, offset, 8, "guardband ymin = %f\n", vp->guardband.ymin); - batch_out(brw, name, offset, 9, "guardband ymax = %f\n", vp->guardband.ymax); + batch_out(brw, name, offset, 8, "guardband xmin = %f\n", vp->guardband.xmin); + batch_out(brw, name, offset, 9, "guardband xmax = %f\n", vp->guardband.xmax); + batch_out(brw, name, offset, 9, "guardband ymin = %f\n", vp->guardband.ymin); + batch_out(brw, name, offset, 10, "guardband ymax = %f\n", vp->guardband.ymax); + if (brw->gen >= 8) { + float *cc_vp = brw->batch.bo->virtual + offset; + batch_out(brw, name, offset, 12, "Min extents: %.2fx%.2f\n", + cc_vp[12], cc_vp[14]); + batch_out(brw, name, offset, 14, "Max extents: %.2fx%.2f\n", + cc_vp[13], cc_vp[15]); + } } From 7f0c7a5f90d084b1fb1e059367856436a19dca5a Mon Sep 17 00:00:00 2001 From: Ben Widawsky Date: Thu, 16 Apr 2015 17:13:00 -0700 Subject: [PATCH 193/834] i965: Add gen7+ sampler state to batch debug OLD: 0x00007e00: 0x10000000: WM SAMP0: filtering 0x00007e04: 0x000d0000: WM SAMP0: wrapping, lod 0x00007e08: 0x00000000: WM SAMP0: default color pointer 0x00007e0c: 0x00000090: WM SAMP0: chroma key, aniso NEW: 0x00007e00: 0x10000000: SAMPLER_STATE 0: Disabled = no, Base Mip: 0.0, Mip/Mag/Min Filter: NONE/NEAREST/NEAREST, LOD Bias: 0.0 0x00007e04: 0x000d0000: SAMPLER_STATE 0: Min LOD: 0.0, Max LOD: 13.0 0x00007e08: 0x00000000: SAMPLER_STATE 0: Border Color 0x00007e0c: 0x00000090: SAMPLER_STATE 0: Max aniso: RATIO 2:1, TC[XYZ] Address Control: CLAMP|CLAMP|WRAP v2: Move GET_BITS macro to here (with paren protection) Ben/Topi Add const to the sampler pointer (Topi) Signed-off-by: Ben Widawsky Reviewed-by: Topi Pohjolainen --- src/mesa/drivers/dri/i965/brw_defines.h | 1 + src/mesa/drivers/dri/i965/brw_state_dump.c | 71 +++++++++++++++++++++- 2 files changed, 71 insertions(+), 1 deletion(-) diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h index 11cb3fa490b..16cecc58d21 100644 --- a/src/mesa/drivers/dri/i965/brw_defines.h +++ b/src/mesa/drivers/dri/i965/brw_defines.h @@ -38,6 +38,7 @@ fieldval & field ## _MASK; \ }) +#define GET_BITS(data, high, low) ((data & INTEL_MASK((high), (low))) >> (low)) #define GET_FIELD(word, field) (((word) & field ## _MASK) >> field ## _SHIFT) #ifndef BRW_DEFINES_H diff --git a/src/mesa/drivers/dri/i965/brw_state_dump.c b/src/mesa/drivers/dri/i965/brw_state_dump.c index c80a75f4eef..738a974bd9e 100644 --- a/src/mesa/drivers/dri/i965/brw_state_dump.c +++ b/src/mesa/drivers/dri/i965/brw_state_dump.c @@ -33,6 +33,33 @@ #include "brw_eu.h" #include "brw_state.h" +static const char *sampler_mip_filter[] = { + "NONE", + "NEAREST", + "RSVD", + "LINEAR" +}; + +static const char *sampler_mag_filter[] = { + "NEAREST", + "LINEAR", + "ANISOTROPIC", + "FLEXIBLE (GEN8+)", + "RSVD", "RSVD", + "MONO", + "RSVD" +}; + +static const char *sampler_addr_mode[] = { + "WRAP", + "MIRROR", + "CLAMP", + "CUBE", + "CLAMP_BORDER", + "MIRROR_ONCE", + "HALF_BORDER" +}; + static void batch_out(struct brw_context *brw, const char *name, uint32_t offset, int index, char *fmt, ...) PRINTFLIKE(5, 6); @@ -259,6 +286,45 @@ static void dump_sampler_state(struct brw_context *brw, } } +static void gen7_dump_sampler_state(struct brw_context *brw, + uint32_t offset, uint32_t size) +{ + const uint32_t *samp = brw->batch.bo->virtual + offset; + char name[20]; + + for (int i = 0; i < size / 16; i++) { + sprintf(name, "SAMPLER_STATE %d", i); + batch_out(brw, name, offset, i, + "Disabled = %s, Base Mip: %u.%u, Mip/Mag/Min Filter: %s/%s/%s, LOD Bias: %d.%d\n", + GET_BITS(samp[0], 31, 31) ? "yes" : "no", + GET_BITS(samp[0], 26, 23), + GET_BITS(samp[0], 22, 22), + sampler_mip_filter[GET_FIELD(samp[0], BRW_SAMPLER_MIP_FILTER)], + sampler_mag_filter[GET_FIELD(samp[0], BRW_SAMPLER_MAG_FILTER)], + /* min filter defs are the same as mag */ + sampler_mag_filter[GET_FIELD(samp[0], BRW_SAMPLER_MIN_FILTER)], + GET_BITS(samp[0], 13, 10), + GET_BITS(samp[0], 9, 1) + ); + batch_out(brw, name, offset, i+1, "Min LOD: %u.%u, Max LOD: %u.%u\n", + GET_BITS(samp[1], 31, 28), + GET_BITS(samp[1], 27, 20), + GET_BITS(samp[1], 19, 16), + GET_BITS(samp[1], 15, 8) + ); + batch_out(brw, name, offset, i+2, "Border Color\n"); /* FINISHME: gen8+ */ + batch_out(brw, name, offset, i+3, "Max aniso: RATIO %d:1, TC[XYZ] Address Control: %s|%s|%s\n", + (GET_FIELD(samp[3], BRW_SAMPLER_MAX_ANISOTROPY) + 1) * 2, + sampler_addr_mode[GET_FIELD(samp[3], BRW_SAMPLER_TCX_WRAP_MODE)], + sampler_addr_mode[GET_FIELD(samp[3], BRW_SAMPLER_TCY_WRAP_MODE)], + sampler_addr_mode[GET_FIELD(samp[3], BRW_SAMPLER_TCZ_WRAP_MODE)] + ); + + samp += 4; + offset += 4 * sizeof(uint32_t); + } +} + static void dump_sf_viewport_state(struct brw_context *brw, uint32_t offset) { @@ -563,7 +629,10 @@ dump_state_batch(struct brw_context *brw) } break; case AUB_TRACE_SAMPLER_STATE: - dump_sampler_state(brw, offset, size); + if (brw->gen >= 7) + gen7_dump_sampler_state(brw, offset, size); + else + dump_sampler_state(brw, offset, size); break; case AUB_TRACE_SAMPLER_DEFAULT_COLOR: dump_sdc(brw, offset); From 313abbb8ca1f41b28c58571ca8217332d52283c7 Mon Sep 17 00:00:00 2001 From: Ben Widawsky Date: Thu, 16 Apr 2015 13:46:57 -0700 Subject: [PATCH 194/834] i965: Add gen8 surface state debug info AFAICT, none of the old data was wrong (the gen7 decoder), but it wa smissing a bunch of stuff. Adds a tick (') to denote the beginning of the surface state for easier reading. This will be replaced later with some better, but more risky code. OLD: 0x00007980: 0x23016000: SURF: 2D BRW_SURFACEFORMAT_B8G8R8A8_UNORM 0x00007984: 0x18000000: SURF: offset 0x00007988: 0x00ff00ff: SURF: 256x256 size, 0 mips, 1 slices 0x0000798c: 0x000003ff: SURF: pitch 1024, tiled 0x00007990: 0x00000000: SURF: min array element 0, array extent 1 0x00007994: 0x00000000: SURF: mip base 0 0x00007998: 0x00000000: SURF: x,y offset: 0,0 0x0000799c: 0x09770000: SURF: 0x00007940: 0x231d7000: SURF: 2D BRW_SURFACEFORMAT_R8G8B8A8_UNORM 0x00007944: 0x78000000: SURF: offset 0x00007948: 0x001f001f: SURF: 32x32 size, 0 mips, 1 slices 0x0000794c: 0x0000007f: SURF: pitch 128, tiled 0x00007950: 0x00000000: SURF: min array element 0, array extent 1 0x00007954: 0x00000000: SURF: mip base 0 0x00007958: 0x00000000: SURF: x,y offset: 0,0 0x0000795c: 0x09770000: SURF: NEW (v1): 0x00007980: 0x23016000: SURF': 2D B8G8R8A8_UNORM VALIGN4 HALIGN4 X-tiled 0x00007984: 0x18000000: SURF: MOCS: 0x18 Base MIP: 0.0 (0 mips) Surface QPitch: 0 0x00007988: 0x00ff00ff: SURF: 256x256 [AUX_NONE] 0x0000798c: 0x000003ff: SURF: 1 slices (depth), pitch: 1024 0x00007990: 0x00000000: SURF: min array element: 0, array extent 1, MULTISAMPLE_1 0x00007994: 0x00000000: SURF: x,y offset: 0,0, min LOD: 0 0x00007998: 0x00000000: SURF: AUX pitch: 0 qpitch: 0 0x0000799c: 0x09770000: SURF: Clear color: ---- 0x00007940: 0x231d7000: SURF': 2D R8G8B8A8_UNORM VALIGN4 HALIGN4 Y-tiled 0x00007944: 0x78000000: SURF: MOCS: 0x78 Base MIP: 0 (0 mips) Surface QPitch: ff0000 0x00007948: 0x001f001f: SURF: 32x32 [AUX_NONE] 0x0000794c: 0x0000007f: SURF: 1 slices (depth), pitch: 128 0x00007950: 0x00000000: SURF: min array element: 0, array extent 1, MULTISAMPLE_1 0x00007954: 0x00000000: SURF: x,y offset: 0,0, min LOD: 0 0x00007958: 0x00000000: SURF: AUX pitch: 0 qpitch: 0 0x0000795c: 0x09770000: SURF: Clear color: ---- 0x00007920: 0x00007980: BIND0: surface state address 0x00007924: 0x00007940: BIND1: surface state address v2: Style cleanups (Matt) Fix aux mode dword 7->6 (Topi) Use exp2 instead of pow (Matt) Add dwords 8-12 to the dump v3: Needed to update the surface format name getter for the change in the first patch in the series Signed-off-by: Ben Widawsky Cc: Matt Turner Reviewed-by: Topi Pohjolainen --- src/mesa/drivers/dri/i965/brw_defines.h | 4 +- src/mesa/drivers/dri/i965/brw_state_dump.c | 83 ++++++++++++++++++++-- 2 files changed, 81 insertions(+), 6 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h index 16cecc58d21..8fd5a492bb1 100644 --- a/src/mesa/drivers/dri/i965/brw_defines.h +++ b/src/mesa/drivers/dri/i965/brw_defines.h @@ -530,9 +530,11 @@ #define GEN7_SURFACE_ARYSPC_FULL (0 << 10) #define GEN7_SURFACE_ARYSPC_LOD0 (1 << 10) -/* Surface state DW0 */ +/* Surface state DW1 */ #define GEN8_SURFACE_MOCS_SHIFT 24 #define GEN8_SURFACE_MOCS_MASK INTEL_MASK(30, 24) +#define GEN8_SURFACE_QPITCH_SHIFT 0 +#define GEN8_SURFACE_QPITCH_MASK INTEL_MASK(14, 0) /* Surface state DW2 */ #define BRW_SURFACE_HEIGHT_SHIFT 19 diff --git a/src/mesa/drivers/dri/i965/brw_state_dump.c b/src/mesa/drivers/dri/i965/brw_state_dump.c index 738a974bd9e..155ef5902d6 100644 --- a/src/mesa/drivers/dri/i965/brw_state_dump.c +++ b/src/mesa/drivers/dri/i965/brw_state_dump.c @@ -60,6 +60,22 @@ static const char *sampler_addr_mode[] = { "HALF_BORDER" }; +static const char *surface_tiling[] = { + "LINEAR", + "W-tiled", + "X-tiled", + "Y-tiled" +}; + +static const char *surface_aux_mode[] = { + "AUX_NONE", + "AUX_MCS", + "AUX_APPEND", + "AUX_HIZ", + "RSVD", + "RSVD" +}; + static void batch_out(struct brw_context *brw, const char *name, uint32_t offset, int index, char *fmt, ...) PRINTFLIKE(5, 6); @@ -237,6 +253,61 @@ static void dump_gen7_surface_state(struct brw_context *brw, uint32_t offset) batch_out(brw, name, offset, 7, "\n"); } +static float q_to_float(uint32_t data, int integer_end, int integer_start, + int fractional_end, int fractional_start) +{ + /* Convert the number to floating point. */ + float n = GET_BITS(data, integer_start, fractional_end); + + /* Multiply by 2^-n */ + return n * exp2(-(fractional_end - fractional_start + 1)); +} + +static void dump_gen8_surface_state(struct brw_context *brw, uint32_t offset) +{ + const char *name = "SURF"; + uint32_t *surf = brw->batch.bo->virtual + offset; + + batch_out(brw, "SURF'", offset, 0, "%s %s %s VALIGN%d HALIGN%d %s\n", + brw_surface_format_name(GET_FIELD(surf[0], BRW_SURFACE_TYPE)), + brw_surface_format_name(GET_FIELD(surf[0], BRW_SURFACE_FORMAT)), + (surf[0] & GEN7_SURFACE_IS_ARRAY) ? "array" : "", + 1 << (GET_BITS(surf[0], 17, 16) + 1), /* VALIGN */ + 1 << (GET_BITS(surf[0], 15, 14) + 1), /* HALIGN */ + surface_tiling[GET_BITS(surf[0], 13, 12)]); + batch_out(brw, name, offset, 1, "MOCS: 0x%x Base MIP: %.1f (%u mips) Surface QPitch: %d\n", + GET_FIELD(surf[1], GEN8_SURFACE_MOCS), + q_to_float(surf[1], 23, 20, 19, 19), + surf[5] & INTEL_MASK(3, 0), + GET_FIELD(surf[1], GEN8_SURFACE_QPITCH) << 2); + batch_out(brw, name, offset, 2, "%dx%d [%s]\n", + GET_FIELD(surf[2], GEN7_SURFACE_WIDTH) + 1, + GET_FIELD(surf[2], GEN7_SURFACE_HEIGHT) + 1, + surface_aux_mode[surf[6] & INTEL_MASK(2, 0)]); + batch_out(brw, name, offset, 3, "%d slices (depth), pitch: %d\n", + GET_FIELD(surf[3], BRW_SURFACE_DEPTH) + 1, + (surf[3] & INTEL_MASK(17, 0)) + 1); + batch_out(brw, name, offset, 4, "min array element: %d, array extent %d, MULTISAMPLE_%d\n", + GET_FIELD(surf[4], GEN7_SURFACE_MIN_ARRAY_ELEMENT), + GET_FIELD(surf[4], GEN7_SURFACE_RENDER_TARGET_VIEW_EXTENT) + 1, + 1 << GET_BITS(surf[4], 5, 3)); + batch_out(brw, name, offset, 5, "x,y offset: %d,%d, min LOD: %d\n", + GET_FIELD(surf[5], BRW_SURFACE_X_OFFSET), + GET_FIELD(surf[5], BRW_SURFACE_Y_OFFSET), + GET_FIELD(surf[5], GEN7_SURFACE_MIN_LOD)); + batch_out(brw, name, offset, 6, "AUX pitch: %d qpitch: %d\n", + GET_FIELD(surf[6], GEN8_SURFACE_AUX_QPITCH) << 2, + GET_FIELD(surf[6], GEN8_SURFACE_AUX_PITCH) << 2); + batch_out(brw, name, offset, 7, "Clear color: %c%c%c%c\n", + GET_BITS(surf[7], 31, 31) ? 'R' : '-', + GET_BITS(surf[7], 30, 30) ? 'G' : '-', + GET_BITS(surf[7], 29, 29) ? 'B' : '-', + GET_BITS(surf[7], 28, 28) ? 'A' : '-'); + + for (int i = 8; i < 12; i++) + batch_out(brw, name, offset, i, "0x%08x\n", surf[i]); +} + static void dump_sdc(struct brw_context *brw, uint32_t offset) { @@ -244,7 +315,7 @@ dump_sdc(struct brw_context *brw, uint32_t offset) if (brw->gen >= 5 && brw->gen <= 6) { struct gen5_sampler_default_color *sdc = (brw->batch.bo->virtual + - offset); + offset); batch_out(brw, name, offset, 0, "unorm rgba\n"); batch_out(brw, name, offset, 1, "r %f\n", sdc->f[0]); batch_out(brw, name, offset, 2, "b %f\n", sdc->f[1]); @@ -622,11 +693,13 @@ dump_state_batch(struct brw_context *brw) dump_binding_table(brw, offset, size); break; case AUB_TRACE_SURFACE_STATE: - if (brw->gen < 7) { - dump_surface_state(brw, offset); - } else { + if (brw->gen >= 8) { + dump_gen8_surface_state(brw, offset); + } else if (brw->gen >= 7) { dump_gen7_surface_state(brw, offset); - } + } else { + dump_surface_state(brw, offset); + } break; case AUB_TRACE_SAMPLER_STATE: if (brw->gen >= 7) From c14bb072301f68b68dcc9fff3e49210cb0819912 Mon Sep 17 00:00:00 2001 From: Ben Widawsky Date: Fri, 15 May 2015 21:25:36 -0700 Subject: [PATCH 195/834] i965: Add Gen9 surface state decoding Gen9 surface state is very similar to the previous generation. The important changes here are aux mode, and the way clear colors work. NOTE: There are some things intentionally left out of this decoding. v2: Redo the string for the aux buffer type to address compressed variants. v3: Use the shift for compression enable (instead of compression mode) (Topi) Signed-off-by: Ben Widawsky Reviewed-by: Topi Pohjolainen --- src/mesa/drivers/dri/i965/brw_context.h | 1 + src/mesa/drivers/dri/i965/brw_defines.h | 2 + src/mesa/drivers/dri/i965/brw_state.h | 13 +++-- src/mesa/drivers/dri/i965/brw_state_batch.c | 20 ++++--- src/mesa/drivers/dri/i965/brw_state_dump.c | 56 ++++++++++++------- .../drivers/dri/i965/gen8_surface_state.c | 15 ++--- 6 files changed, 68 insertions(+), 39 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index 2dcc23c5fc6..abc11f63230 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -1466,6 +1466,7 @@ struct brw_context uint32_t offset; uint32_t size; enum aub_state_struct_type type; + int index; } *state_batch_list; int state_batch_count; diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h index 8fd5a492bb1..dedc3811abe 100644 --- a/src/mesa/drivers/dri/i965/brw_defines.h +++ b/src/mesa/drivers/dri/i965/brw_defines.h @@ -608,6 +608,8 @@ #define GEN8_SURFACE_AUX_MODE_HIZ 3 /* Surface state DW7 */ +#define GEN9_SURFACE_RT_COMPRESSION_SHIFT 30 +#define GEN9_SURFACE_RT_COMPRESSION_MASK INTEL_MASK(30, 30) #define GEN7_SURFACE_CLEAR_COLOR_SHIFT 28 #define GEN7_SURFACE_SCS_R_SHIFT 25 #define GEN7_SURFACE_SCS_R_MASK INTEL_MASK(27, 25) diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h index bc79fb6d882..987672f8815 100644 --- a/src/mesa/drivers/dri/i965/brw_state.h +++ b/src/mesa/drivers/dri/i965/brw_state.h @@ -229,11 +229,14 @@ void brw_destroy_caches( struct brw_context *brw ); #define BRW_BATCH_STRUCT(brw, s) \ intel_batchbuffer_data(brw, (s), sizeof(*(s)), RENDER_RING) -void *brw_state_batch(struct brw_context *brw, - enum aub_state_struct_type type, - int size, - int alignment, - uint32_t *out_offset); +void *__brw_state_batch(struct brw_context *brw, + enum aub_state_struct_type type, + int size, + int alignment, + int index, + uint32_t *out_offset); +#define brw_state_batch(brw, type, size, alignment, out_offset) \ + __brw_state_batch(brw, type, size, alignment, 0, out_offset) /* brw_wm_surface_state.c */ void gen4_init_vtable_surface_functions(struct brw_context *brw); diff --git a/src/mesa/drivers/dri/i965/brw_state_batch.c b/src/mesa/drivers/dri/i965/brw_state_batch.c index 45dca69823f..a405a80ef6e 100644 --- a/src/mesa/drivers/dri/i965/brw_state_batch.c +++ b/src/mesa/drivers/dri/i965/brw_state_batch.c @@ -38,7 +38,8 @@ static void brw_track_state_batch(struct brw_context *brw, enum aub_state_struct_type type, uint32_t offset, - int size) + int size, + int index) { struct intel_batchbuffer *batch = &brw->batch; @@ -53,6 +54,7 @@ brw_track_state_batch(struct brw_context *brw, brw->state_batch_list[brw->state_batch_count].offset = offset; brw->state_batch_list[brw->state_batch_count].size = size; brw->state_batch_list[brw->state_batch_count].type = type; + brw->state_batch_list[brw->state_batch_count].index = index; brw->state_batch_count++; } @@ -108,18 +110,20 @@ brw_annotate_aub(struct brw_context *brw) * margin (4096 bytes, even if the object is just a 20-byte surface * state), and more buffers to walk and count for aperture size checking. * - * However, due to the restrictions inposed by the aperture size + * However, due to the restrictions imposed by the aperture size * checking performance hacks, we can't have the batch point at a * separate indirect state buffer, because once the batch points at * it, no more relocations can be added to it. So, we sneak these * buffers in at the top of the batchbuffer. */ void * -brw_state_batch(struct brw_context *brw, - enum aub_state_struct_type type, - int size, - int alignment, - uint32_t *out_offset) +__brw_state_batch(struct brw_context *brw, + enum aub_state_struct_type type, + int size, + int alignment, + int index, + uint32_t *out_offset) + { struct intel_batchbuffer *batch = &brw->batch; uint32_t offset; @@ -140,7 +144,7 @@ brw_state_batch(struct brw_context *brw, batch->state_batch_offset = offset; if (unlikely(INTEL_DEBUG & (DEBUG_BATCH | DEBUG_AUB))) - brw_track_state_batch(brw, type, offset, size); + brw_track_state_batch(brw, type, offset, size, index); *out_offset = offset; return batch->map + (offset>>2); diff --git a/src/mesa/drivers/dri/i965/brw_state_dump.c b/src/mesa/drivers/dri/i965/brw_state_dump.c index 155ef5902d6..bce13d66a2f 100644 --- a/src/mesa/drivers/dri/i965/brw_state_dump.c +++ b/src/mesa/drivers/dri/i965/brw_state_dump.c @@ -67,15 +67,6 @@ static const char *surface_tiling[] = { "Y-tiled" }; -static const char *surface_aux_mode[] = { - "AUX_NONE", - "AUX_MCS", - "AUX_APPEND", - "AUX_HIZ", - "RSVD", - "RSVD" -}; - static void batch_out(struct brw_context *brw, const char *name, uint32_t offset, int index, char *fmt, ...) PRINTFLIKE(5, 6); @@ -263,12 +254,30 @@ static float q_to_float(uint32_t data, int integer_end, int integer_start, return n * exp2(-(fractional_end - fractional_start + 1)); } -static void dump_gen8_surface_state(struct brw_context *brw, uint32_t offset) +static void +dump_gen8_surface_state(struct brw_context *brw, uint32_t offset, int index) { - const char *name = "SURF"; uint32_t *surf = brw->batch.bo->virtual + offset; + int aux_mode = surf[6] & INTEL_MASK(2, 0); + const char *aux_str; + char *name; - batch_out(brw, "SURF'", offset, 0, "%s %s %s VALIGN%d HALIGN%d %s\n", + if (brw->gen >= 9 && (aux_mode == 1 || aux_mode == 5)) { + bool msrt = GET_BITS(surf[4], 5, 3) > 0; + bool compression = GET_FIELD(surf[7], GEN9_SURFACE_RT_COMPRESSION) == 1; + aux_str = ralloc_asprintf(NULL, "AUX_CCS_%c (%s, MULTISAMPLE_COUNT%c1)", + (aux_mode == 1) ? 'D' : 'E', + compression ? "Compressed RT" : "Uncompressed", + msrt ? '>' : '='); + } else { + static const char *surface_aux_mode[] = { "AUX_NONE", "AUX_MCS", + "AUX_APPEND", "AUX_HIZ", + "RSVD", "RSVD"}; + aux_str = ralloc_asprintf(NULL, "%s", surface_aux_mode[aux_mode]); + } + + name = ralloc_asprintf(NULL, "SURF%03d", index); + batch_out(brw, name, offset, 0, "%s %s %s VALIGN%d HALIGN%d %s\n", brw_surface_format_name(GET_FIELD(surf[0], BRW_SURFACE_TYPE)), brw_surface_format_name(GET_FIELD(surf[0], BRW_SURFACE_FORMAT)), (surf[0] & GEN7_SURFACE_IS_ARRAY) ? "array" : "", @@ -283,7 +292,7 @@ static void dump_gen8_surface_state(struct brw_context *brw, uint32_t offset) batch_out(brw, name, offset, 2, "%dx%d [%s]\n", GET_FIELD(surf[2], GEN7_SURFACE_WIDTH) + 1, GET_FIELD(surf[2], GEN7_SURFACE_HEIGHT) + 1, - surface_aux_mode[surf[6] & INTEL_MASK(2, 0)]); + aux_str); batch_out(brw, name, offset, 3, "%d slices (depth), pitch: %d\n", GET_FIELD(surf[3], BRW_SURFACE_DEPTH) + 1, (surf[3] & INTEL_MASK(17, 0)) + 1); @@ -298,14 +307,22 @@ static void dump_gen8_surface_state(struct brw_context *brw, uint32_t offset) batch_out(brw, name, offset, 6, "AUX pitch: %d qpitch: %d\n", GET_FIELD(surf[6], GEN8_SURFACE_AUX_QPITCH) << 2, GET_FIELD(surf[6], GEN8_SURFACE_AUX_PITCH) << 2); - batch_out(brw, name, offset, 7, "Clear color: %c%c%c%c\n", - GET_BITS(surf[7], 31, 31) ? 'R' : '-', - GET_BITS(surf[7], 30, 30) ? 'G' : '-', - GET_BITS(surf[7], 29, 29) ? 'B' : '-', - GET_BITS(surf[7], 28, 28) ? 'A' : '-'); + if (brw->gen >= 9) { + batch_out(brw, name, offset, 7, "Clear color: R(%x)G(%x)B(%x)A(%x)\n", + surf[12], surf[13], surf[14], surf[15]); + } else { + batch_out(brw, name, offset, 7, "Clear color: %c%c%c%c\n", + GET_BITS(surf[7], 31, 31) ? 'R' : '-', + GET_BITS(surf[7], 30, 30) ? 'G' : '-', + GET_BITS(surf[7], 29, 29) ? 'B' : '-', + GET_BITS(surf[7], 28, 28) ? 'A' : '-'); + } for (int i = 8; i < 12; i++) batch_out(brw, name, offset, i, "0x%08x\n", surf[i]); + + ralloc_free((void *)aux_str); + ralloc_free(name); } static void @@ -694,7 +711,8 @@ dump_state_batch(struct brw_context *brw) break; case AUB_TRACE_SURFACE_STATE: if (brw->gen >= 8) { - dump_gen8_surface_state(brw, offset); + dump_gen8_surface_state(brw, offset, + brw->state_batch_list[i].index); } else if (brw->gen >= 7) { dump_gen7_surface_state(brw, offset); } else { diff --git a/src/mesa/drivers/dri/i965/gen8_surface_state.c b/src/mesa/drivers/dri/i965/gen8_surface_state.c index d0c2d80b17b..672fc70a6f3 100644 --- a/src/mesa/drivers/dri/i965/gen8_surface_state.c +++ b/src/mesa/drivers/dri/i965/gen8_surface_state.c @@ -100,11 +100,11 @@ horizontal_alignment(const struct intel_mipmap_tree *mt) } static uint32_t * -allocate_surface_state(struct brw_context *brw, uint32_t *out_offset) +allocate_surface_state(struct brw_context *brw, uint32_t *out_offset, int index) { int dwords = brw->gen >= 9 ? 16 : 13; - uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, - dwords * 4, 64, out_offset); + uint32_t *surf = __brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, + dwords * 4, 64, index, out_offset); memset(surf, 0, dwords * 4); return surf; } @@ -120,7 +120,7 @@ gen8_emit_buffer_surface_state(struct brw_context *brw, bool rw) { const unsigned mocs = brw->gen >= 9 ? SKL_MOCS_WB : BDW_MOCS_WB; - uint32_t *surf = allocate_surface_state(brw, out_offset); + uint32_t *surf = allocate_surface_state(brw, out_offset, -1); surf[0] = BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT | surface_format << BRW_SURFACE_FORMAT_SHIFT | @@ -164,6 +164,7 @@ gen8_emit_texture_surface_state(struct brw_context *brw, struct intel_mipmap_tree *aux_mt = NULL; uint32_t aux_mode = 0; uint32_t mocs_wb = brw->gen >= 9 ? SKL_MOCS_WB : BDW_MOCS_WB; + int surf_index = surf_offset - &brw->wm.base.surf_offset[0]; unsigned tiling_mode, pitch; if (mt->format == MESA_FORMAT_S_UINT8) { @@ -179,7 +180,7 @@ gen8_emit_texture_surface_state(struct brw_context *brw, aux_mode = GEN8_SURFACE_AUX_MODE_MCS; } - uint32_t *surf = allocate_surface_state(brw, surf_offset); + uint32_t *surf = allocate_surface_state(brw, surf_offset, surf_index); surf[0] = translate_tex_target(target) << BRW_SURFACE_TYPE_SHIFT | format << BRW_SURFACE_FORMAT_SHIFT | @@ -310,7 +311,7 @@ gen8_emit_null_surface_state(struct brw_context *brw, unsigned samples, uint32_t *out_offset) { - uint32_t *surf = allocate_surface_state(brw, out_offset); + uint32_t *surf = allocate_surface_state(brw, out_offset, -1); surf[0] = BRW_SURFACE_NULL << BRW_SURFACE_TYPE_SHIFT | BRW_SURFACEFORMAT_B8G8R8A8_UNORM << BRW_SURFACE_FORMAT_SHIFT | @@ -392,7 +393,7 @@ gen8_update_renderbuffer_surface(struct brw_context *brw, aux_mode = GEN8_SURFACE_AUX_MODE_MCS; } - uint32_t *surf = allocate_surface_state(brw, &offset); + uint32_t *surf = allocate_surface_state(brw, &offset, surf_index); surf[0] = (surf_type << BRW_SURFACE_TYPE_SHIFT) | (is_array ? GEN7_SURFACE_IS_ARRAY : 0) | From fa284d6f2f9a929497714c795d09e56547bccf44 Mon Sep 17 00:00:00 2001 From: Ben Widawsky Date: Fri, 15 May 2015 21:18:12 -0700 Subject: [PATCH 196/834] i965: Add renderbuffer surface indexes to debug This patch is optional in the series. It does make the output much cleaner, but there is some risk. Sample output (v3): 0x00007e80: 0x231d7000: SURF000: 2D R8G8B8A8_UNORM VALIGN4 HALIGN4 Y-tiled 0x00007e84: 0x05000000: SURF000: MOCS: 0x5 Base MIP: 0.0 (0 mips) Surface QPitch: 0 0x00007e88: 0x009f009f: SURF000: 160x160 [AUX_NONE] 0x00007e8c: 0x0000027f: SURF000: 1 slices (depth), pitch: 640 0x00007e90: 0x00000000: SURF000: min array element: 0, array extent 1, MULTISAMPLE_1 0x00007e94: 0x00000000: SURF000: x,y offset: 0,0, min LOD: 0 0x00007e98: 0x00000000: SURF000: AUX pitch: 0 qpitch: 0 0x00007e9c: 0x09770000: SURF000: Clear color: R(0)G(0)B(0)A(0) 0x00007ea0: 0x00001000: SURF000: 0x00001000 0x00007ea4: 0x00000000: SURF000: 0x00000000 0x00007ea8: 0x00000000: SURF000: 0x00000000 0x00007eac: 0x00000000: SURF000: 0x00000000 0x00007e40: 0x234df000: SURF001: 2D R11G11B10_FLOAT VALIGN4 HALIGN16 Y-tiled 0x00007e44: 0x09000000: SURF001: MOCS: 0x9 Base MIP: 0.0 (0 mips) Surface QPitch: 0 0x00007e48: 0x009f009f: SURF001: 160x160 [AUX_CCS_D (Uncompressed, MULTISAMPLE_COUNT=1)] 0x00007e4c: 0x0000027f: SURF001: 1 slices (depth), pitch: 640 0x00007e50: 0x00000000: SURF001: min array element: 0, array extent 1, MULTISAMPLE_1 0x00007e54: 0x00000000: SURF001: x,y offset: 0,0, min LOD: 0 0x00007e58: 0x00000001: SURF001: AUX pitch: 0 qpitch: 0 0x00007e5c: 0x09770000: SURF001: Clear color: R(0)G(0)B(0)A(0) 0x00007e60: 0x0002b000: SURF001: 0x0002b000 0x00007e64: 0x00000000: SURF001: 0x00000000 0x00007e68: 0x0002a000: SURF001: 0x0002a000 0x00007e6c: 0x00000000: SURF001: 0x00000000 v2: Rebased on Topi's recent series which changed around some of the gen8 surface setup code. v3: Use ralloc_asprintf instead of asprintf to be more friendly to non-GNU platforms. Signed-off-by: Ben Widawsky --- src/mesa/drivers/dri/i965/brw_state_dump.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mesa/drivers/dri/i965/brw_state_dump.c b/src/mesa/drivers/dri/i965/brw_state_dump.c index bce13d66a2f..f9e087edb6c 100644 --- a/src/mesa/drivers/dri/i965/brw_state_dump.c +++ b/src/mesa/drivers/dri/i965/brw_state_dump.c @@ -278,7 +278,7 @@ dump_gen8_surface_state(struct brw_context *brw, uint32_t offset, int index) name = ralloc_asprintf(NULL, "SURF%03d", index); batch_out(brw, name, offset, 0, "%s %s %s VALIGN%d HALIGN%d %s\n", - brw_surface_format_name(GET_FIELD(surf[0], BRW_SURFACE_TYPE)), + get_965_surfacetype(GET_FIELD(surf[0], BRW_SURFACE_TYPE)), brw_surface_format_name(GET_FIELD(surf[0], BRW_SURFACE_FORMAT)), (surf[0] & GEN7_SURFACE_IS_ARRAY) ? "array" : "", 1 << (GET_BITS(surf[0], 17, 16) + 1), /* VALIGN */ From 8427ad91252d22db6dbd45ae37c6d5a18de8fc68 Mon Sep 17 00:00:00 2001 From: Ben Widawsky Date: Thu, 16 Apr 2015 19:24:36 -0700 Subject: [PATCH 197/834] i965: Add gen8 blend state OLD: 0x00007340: 0x00800000: BLEND: 0x00007344: 0x84202100: BLEND: NEW: 0x00007340: 0x00800000: BLEND: Alpha blend/test 0x00007344: 0x0000000b84202100: BLEND_ENTRY00: Color Buffer Blend factor ONE,ONE,ONE,ONE (src,dst,src alpha, dst alpha) function ADD,ADD (color, alpha), Disables: ---- 0x0000734c: 0x0000000b84202100: BLEND_ENTRY01: Color Buffer Blend factor ONE,ONE,ONE,ONE (src,dst,src alpha, dst alpha) function ADD,ADD (color, alpha), Disables: ---- 0x00007354: 0x0000000b84202100: BLEND_ENTRY02: Color Buffer Blend factor ONE,ONE,ONE,ONE (src,dst,src alpha, dst alpha) function ADD,ADD (color, alpha), Disables: ---- 0x0000735c: 0x0000000b84202100: BLEND_ENTRY03: Color Buffer Blend factor ONE,ONE,ONE,ONE (src,dst,src alpha, dst alpha) function ADD,ADD (color, alpha), Disables: ---- 0x00007364: 0x0000000b84202100: BLEND_ENTRY04: Color Buffer Blend factor ONE,ONE,ONE,ONE (src,dst,src alpha, dst alpha) function ADD,ADD (color, alpha), Disables: ---- 0x0000736c: 0x0000000b84202100: BLEND_ENTRY05: Color Buffer Blend factor ONE,ONE,ONE,ONE (src,dst,src alpha, dst alpha) function ADD,ADD (color, alpha), Disables: ---- 0x00007374: 0x0000000b84202100: BLEND_ENTRY06: Color Buffer Blend factor ONE,ONE,ONE,ONE (src,dst,src alpha, dst alpha) function ADD,ADD (color, alpha), Disables: ---- 0x0000737c: 0x0000000b84202100: BLEND_ENTRY07: Color Buffer Blend factor ONE,ONE,ONE,ONE (src,dst,src alpha, dst alpha) function ADD,ADD (color, alpha), Disables: ---- v2: Line length fixes, and const usage (Topi) Safer initialization of name string (Topi) Signed-off-by: Ben Widawsky Reviewed-by: Topi Pohjolainen --- src/mesa/drivers/dri/i965/brw_state_dump.c | 112 ++++++++++++++++++++- 1 file changed, 110 insertions(+), 2 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_state_dump.c b/src/mesa/drivers/dri/i965/brw_state_dump.c index f9e087edb6c..b6f4d598e1d 100644 --- a/src/mesa/drivers/dri/i965/brw_state_dump.c +++ b/src/mesa/drivers/dri/i965/brw_state_dump.c @@ -1,5 +1,5 @@ /* - * Copyright © 2007 Intel Corporation + * Copyright © 2007-2015 Intel Corporation * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -85,6 +85,25 @@ batch_out(struct brw_context *brw, const char *name, uint32_t offset, va_end(va); } +static void +batch_out64(struct brw_context *brw, const char *name, uint32_t offset, + int index, char *fmt, ...) +{ + uint32_t *tmp = brw->batch.bo->virtual + offset; + + /* Swap the dwords since we want to handle this as a 64b value, but the data + * is typically emitted as dwords. + */ + uint64_t data = ((uint64_t)tmp[index + 1]) << 32 | tmp[index]; + va_list va; + + fprintf(stderr, "0x%08x: 0x%016" PRIx64 ": %8s: ", + offset + index * 4, data, name); + va_start(va, fmt); + vfprintf(stderr, fmt, va); + va_end(va); +} + static const char * get_965_surfacetype(unsigned int surfacetype) { @@ -546,6 +565,92 @@ static void dump_blend_state(struct brw_context *brw, uint32_t offset) batch_out(brw, name, offset, 1, "\n"); } +static void +gen8_dump_blend_state(struct brw_context *brw, uint32_t offset, uint32_t size) +{ + const uint32_t *blend = brw->batch.bo->virtual + offset; + const char *logicop[] = + { + "LOGICOP_CLEAR (BLACK)", + "LOGICOP_NOR", + "LOGICOP_AND_INVERTED", + "LOGICOP_COPY_INVERTED", + "LOGICOP_AND_REVERSE", + "LOGICOP_INVERT", + "LOGICOP_XOR", + "LOGICOP_NAND", + "LOGICOP_AND", + "LOGICOP_EQUIV", + "LOGICOP_NOOP", + "LOGICOP_OR_INVERTED", + "LOGICOP_COPY", + "LOGICOP_OR_REVERSE", + "LOGICOP_OR", + "LOGICOP_SET (WHITE)" + }; + + const char *blend_function[] = + { "ADD", "SUBTRACT", "REVERSE_SUBTRACT", "MIN", "MAX};" }; + + const char *blend_factor[0x1b] = + { + "RSVD", + "ONE", + "SRC_COLOR", "SRC_ALPHA", + "DST_ALPHA", "DST_COLOR", + "SRC_ALPHA_SATURATE", + "CONST_COLOR", "CONST_ALPHA", + "SRC1_COLOR", "SRC1_ALPHA", + "RSVD", "RSVD", "RSVD", "RSVD", "RSVD", "RSVD", + "ZERO", + "INV_SRC_COLOR", "INV_SRC_ALPHA", + "INV_DST_ALPHA", "INV_DST_COLOR", + "RSVD", + "INV_CONST_COLOR", "INV_CONST_ALPHA", + "INV_SRC1_COLOR", "INV_SRC1_ALPHA" + }; + + batch_out(brw, "BLEND", offset, 0, "Alpha blend/test\n"); + + if (((size) % 2) != 0) + fprintf(stderr, "Invalid blend state size %d\n", size); + + for (int i = 1; i < size / 4; i += 2) { + char name[sizeof("BLEND_ENTRYXXX")]; + sprintf(name, "BLEND_ENTRY%02d", (i - 1) / 2); + if (blend[i + 1] & GEN8_BLEND_LOGIC_OP_ENABLE) { + batch_out(brw, name, offset, i + 1, "%s\n", + logicop[GET_FIELD(blend[i + 1], + GEN8_BLEND_LOGIC_OP_FUNCTION)]); + } else if (blend[i] & GEN8_BLEND_COLOR_BUFFER_BLEND_ENABLE) { + batch_out64(brw, name, offset, i, + "\n\t\t\tColor Buffer Blend factor %s,%s,%s,%s (src,dst,src alpha, dst alpha)" + "\n\t\t\tfunction %s,%s (color, alpha), Disables: %c%c%c%c\n", + blend_factor[GET_FIELD(blend[i], + GEN8_BLEND_SRC_BLEND_FACTOR)], + blend_factor[GET_FIELD(blend[i], + GEN8_BLEND_DST_BLEND_FACTOR)], + blend_factor[GET_FIELD(blend[i], + GEN8_BLEND_SRC_ALPHA_BLEND_FACTOR)], + blend_factor[GET_FIELD(blend[i], + GEN8_BLEND_DST_ALPHA_BLEND_FACTOR)], + blend_function[GET_FIELD(blend[i], + GEN8_BLEND_COLOR_BLEND_FUNCTION)], + blend_function[GET_FIELD(blend[i], + GEN8_BLEND_ALPHA_BLEND_FUNCTION)], + blend[i] & GEN8_BLEND_WRITE_DISABLE_RED ? 'R' : '-', + blend[i] & GEN8_BLEND_WRITE_DISABLE_GREEN ? 'G' : '-', + blend[i] & GEN8_BLEND_WRITE_DISABLE_BLUE ? 'B' : '-', + blend[i] & GEN8_BLEND_WRITE_DISABLE_ALPHA ? 'A' : '-' + ); + } else if (!blend[i] && (blend[i + 1] == 0xb)) { + batch_out64(brw, name, offset, i, "NOP blend state\n"); + } else { + batch_out64(brw, name, offset, i, "????\n"); + } + } +} + static void dump_scissor(struct brw_context *brw, uint32_t offset) { @@ -704,7 +809,10 @@ dump_state_batch(struct brw_context *brw) dump_cc_state_gen4(brw, offset); break; case AUB_TRACE_BLEND_STATE: - dump_blend_state(brw, offset); + if (brw->gen >= 8) + gen8_dump_blend_state(brw, offset, size); + else + dump_blend_state(brw, offset); break; case AUB_TRACE_BINDING_TABLE: dump_binding_table(brw, offset, size); From e6f912f07e729649fb9e9e9a458482925552f778 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Fri, 8 May 2015 15:56:23 -0400 Subject: [PATCH 198/834] freedreno: fence fix A fence can outlive the ctx, so we shouldn't deref the ctx to get at the screen. We need some updates in libdrm_freedreno API to completely handle fences properly, but this is at least an improvement. Signed-off-by: Rob Clark --- src/gallium/drivers/freedreno/freedreno_fence.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/gallium/drivers/freedreno/freedreno_fence.c b/src/gallium/drivers/freedreno/freedreno_fence.c index 46b057d9062..375e58f7022 100644 --- a/src/gallium/drivers/freedreno/freedreno_fence.c +++ b/src/gallium/drivers/freedreno/freedreno_fence.c @@ -35,6 +35,7 @@ struct pipe_fence_handle { struct pipe_reference reference; struct fd_context *ctx; + struct fd_screen *screen; uint32_t timestamp; }; @@ -68,7 +69,7 @@ boolean fd_screen_fence_finish(struct pipe_screen *screen, struct pipe_fence_handle *fence, uint64_t timeout) { - if (fd_pipe_wait(fence->ctx->screen->pipe, fence->timestamp)) + if (fd_pipe_wait(fence->screen->pipe, fence->timestamp)) return false; return true; @@ -86,6 +87,7 @@ struct pipe_fence_handle * fd_fence_create(struct pipe_context *pctx) pipe_reference_init(&fence->reference, 1); fence->ctx = ctx; + fence->screen = ctx->screen; fence->timestamp = fd_ringbuffer_timestamp(ctx->ring); return fence; From 42298b05d1ebd8b17b89411723ae13295643e496 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 7 May 2015 15:07:49 -0700 Subject: [PATCH 199/834] i965: Use NIR by default for vertex shaders on GEN8+ GLSL IR vs. NIR shader-db results for SIMD8 vertex shaders on Broadwell: total instructions in shared programs: 2742062 -> 2681339 (-2.21%) instructions in affected programs: 1514770 -> 1454047 (-4.01%) helped: 5813 HURT: 1120 The gained programs are ARB vertext programs that were previously going through the vec4 backend. Now that we have prog_to_nir, ARB vertex programs can go through the scalar backend so they show up as "gained" in the shader-db results. Acked-by: Kenneth Graunke Reviewed-by: Ian Romanick Acked-by: Matt Turner --- src/mesa/drivers/dri/i965/brw_context.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c index 673529a28cd..ea56859707b 100644 --- a/src/mesa/drivers/dri/i965/brw_context.c +++ b/src/mesa/drivers/dri/i965/brw_context.c @@ -590,7 +590,7 @@ brw_initialize_context_constants(struct brw_context *brw) ctx->Const.ShaderCompilerOptions[MESA_SHADER_VERTEX].EmitNoIndirectTemp = true; ctx->Const.ShaderCompilerOptions[MESA_SHADER_VERTEX].OptimizeForAOS = false; - if (brw_env_var_as_boolean("INTEL_USE_NIR", false)) + if (brw_env_var_as_boolean("INTEL_USE_NIR", true)) ctx->Const.ShaderCompilerOptions[MESA_SHADER_VERTEX].NirOptions = &nir_options; } From 7af2601a078047ed79c9fb81b6c4f98f22a5d049 Mon Sep 17 00:00:00 2001 From: Alexander von Gluck IV Date: Sun, 17 May 2015 08:50:43 -0500 Subject: [PATCH 200/834] mesa/driver/haiku: Drop Mesa swrast renderer This just created extra upkeep and the push to move extern C's into mesa code would mean a large number of extern's in core Mesa driver interfaces. The Haiku Gallium renderers are mostly insulated via the C-based Haiku state tracker. As any future hardware support in Haiku will be gallium based, lets just drop swrast. Haiku has a Mesa 7.12 fork for gcc2 that uses swrast. This commit fixes the last of the Haiku build issues. Reviewed-by: Emil Velikov --- src/mesa/Makefile.am | 1 - src/mesa/drivers/SConscript | 3 - src/mesa/drivers/haiku/swrast/SConscript | 33 - .../drivers/haiku/swrast/SoftwareRast.cpp | 697 ------------------ src/mesa/drivers/haiku/swrast/SoftwareRast.h | 95 --- .../drivers/haiku/swrast/SoftwareRast.rdef | 39 - 6 files changed, 868 deletions(-) delete mode 100644 src/mesa/drivers/haiku/swrast/SConscript delete mode 100644 src/mesa/drivers/haiku/swrast/SoftwareRast.cpp delete mode 100644 src/mesa/drivers/haiku/swrast/SoftwareRast.h delete mode 100644 src/mesa/drivers/haiku/swrast/SoftwareRast.rdef diff --git a/src/mesa/Makefile.am b/src/mesa/Makefile.am index 60114e4f66a..71794b5dada 100644 --- a/src/mesa/Makefile.am +++ b/src/mesa/Makefile.am @@ -60,7 +60,6 @@ main/git_sha1.h: main/git_sha1.h.tmp include Makefile.sources EXTRA_DIST = \ - drivers/haiku \ drivers/SConscript \ main/format_info.py \ main/format_pack.py \ diff --git a/src/mesa/drivers/SConscript b/src/mesa/drivers/SConscript index db656780c0b..5d654f538be 100644 --- a/src/mesa/drivers/SConscript +++ b/src/mesa/drivers/SConscript @@ -8,6 +8,3 @@ if env['dri']: 'dri/common/xmlpool/SConscript', 'dri/common/SConscript', ]) - -if env['platform'] == 'haiku': - SConscript('haiku/swrast/SConscript') diff --git a/src/mesa/drivers/haiku/swrast/SConscript b/src/mesa/drivers/haiku/swrast/SConscript deleted file mode 100644 index 907325e3252..00000000000 --- a/src/mesa/drivers/haiku/swrast/SConscript +++ /dev/null @@ -1,33 +0,0 @@ -Import('*') - -env = env.Clone() - -env.Append(CPPPATH = [ - '#/src', - '#/src/mapi', - '#/src/mesa', - '#/src/mesa/main', - '#/include/HaikuGL', - '/boot/system/develop/headers/private', - Dir('../../../mapi'), # src/mapi build path for python-generated GL API files/headers -]) - -env.Prepend(LIBS = [ - mesautil, - glsl, - mesa, -]) - -env.Prepend(LIBS = [libgl]) - -sources = [ - 'SoftwareRast.cpp' -] - -# Disallow undefined symbols -#env.Append(SHLINKFLAGS = ['-Wl,-z,defs']) - -libswrast = env.SharedLibrary( - target = 'swrast', - source = sources -) diff --git a/src/mesa/drivers/haiku/swrast/SoftwareRast.cpp b/src/mesa/drivers/haiku/swrast/SoftwareRast.cpp deleted file mode 100644 index 813ad1ff27d..00000000000 --- a/src/mesa/drivers/haiku/swrast/SoftwareRast.cpp +++ /dev/null @@ -1,697 +0,0 @@ -/* - * Copyright 2006-2012, Haiku, Inc. All rights reserved. - * Distributed under the terms of the MIT License. - * - * Authors: - * Jérôme Duval, korli@users.berlios.de - * Philippe Houdoin, philippe.houdoin@free.fr - * Artur Wyszynski, harakash@gmail.com - * Alexander von Gluck, kallisti5@unixzen.com - */ - - -#include -#include "SoftwareRast.h" - -#include -#include -#include -#include -#include -#include - -extern "C" { -#include "extensions.h" -#include "drivers/common/driverfuncs.h" -#include "drivers/common/meta.h" -#include "main/api_exec.h" -#include "main/colormac.h" -#include "main/cpuinfo.h" -#include "main/buffers.h" -#include "main/formats.h" -#include "main/framebuffer.h" -#include "main/renderbuffer.h" -#include "main/version.h" -#include "main/vtxfmt.h" -#include "swrast/swrast.h" -#include "swrast/s_renderbuffer.h" -#include "swrast_setup/swrast_setup.h" -#include "tnl/tnl.h" -#include "tnl/t_context.h" -#include "tnl/t_pipeline.h" -#include "vbo/vbo.h" - - -#ifdef DEBUG -# define TRACE(x...) printf("MesaSoftwareRast: " x) -# define CALLED() printf("MesaSoftwareRast: %s\n", __PRETTY_FUNCTION__) -#else -# define TRACE(x...) -# define CALLED() -#endif - -#define ERROR(x...) printf("MesaSoftwareRast: " x) -} - - -extern const char* color_space_name(color_space space); - - -extern "C" _EXPORT BGLRenderer* -instantiate_gl_renderer(BGLView* view, ulong options, - BGLDispatcher* dispatcher) -{ - return new MesaSoftwareRast(view, options, dispatcher); -} - - -MesaSoftwareRast::MesaSoftwareRast(BGLView* view, ulong options, - BGLDispatcher* dispatcher) - : BGLRenderer(view, options, dispatcher), - fBitmap(NULL), - fDirectModeEnabled(false), - fInfo(NULL), - fInfoLocker("info locker"), - fVisual(NULL), - fFrameBuffer(NULL), - fFrontRenderBuffer(NULL), - fBackRenderBuffer(NULL), - fColorSpace(B_NO_COLOR_SPACE) -{ - CALLED(); - - fColorSpace = BScreen(GLView()->Window()).ColorSpace(); - - // We force single buffering for the time being - options &= ~BGL_DOUBLE; - - const GLboolean rgbFlag = ((options & BGL_INDEX) == 0); - const GLboolean alphaFlag = ((options & BGL_ALPHA) == BGL_ALPHA); - const GLboolean dblFlag = ((options & BGL_DOUBLE) == BGL_DOUBLE); - const GLboolean stereoFlag = false; - const GLint depth = (options & BGL_DEPTH) ? 16 : 0; - const GLint stencil = (options & BGL_STENCIL) ? 8 : 0; - const GLint accum = (options & BGL_ACCUM) ? 16 : 0; - const GLint red = rgbFlag ? 8 : 0; - const GLint green = rgbFlag ? 8 : 0; - const GLint blue = rgbFlag ? 8 : 0; - const GLint alpha = alphaFlag ? 8 : 0; - - fOptions = options; // | BGL_INDIRECT; - struct dd_function_table functions; - - fVisual = _mesa_create_visual(dblFlag, stereoFlag, red, green, - blue, alpha, depth, stencil, accum, accum, accum, - alpha ? accum : 0, 1); - - // Initialize device driver function table - _mesa_init_driver_functions(&functions); - - functions.GetString = _GetString; - functions.UpdateState = _UpdateState; - functions.MapRenderbuffer = _RenderBufferMap; - functions.Flush = _Flush; - - // create core context - // We inherit gl_context to this class - _mesa_initialize_context(this, API_OPENGL_COMPAT, fVisual, NULL, - &functions); - - /* Initialize the software rasterizer and helper modules. */ - _swrast_CreateContext(this); - _vbo_CreateContext(this); - _tnl_CreateContext(this); - _swsetup_CreateContext(this); - _swsetup_Wakeup(this); - - // Use default TCL pipeline - TNL_CONTEXT(this)->Driver.RunPipeline = _tnl_run_pipeline; - - _mesa_meta_init(this); - _mesa_enable_sw_extensions(this); - - _mesa_compute_version(this); - - _mesa_initialize_dispatch_tables(this); - _mesa_initialize_vbo_vtxfmt(this); - - // create core framebuffer - fFrameBuffer = _mesa_create_framebuffer(fVisual); - if (fFrameBuffer == NULL) { - ERROR("%s: Unable to calloc GL FrameBuffer!\n", __func__); - _mesa_destroy_visual(fVisual); - return; - } - - // Setup front render buffer - fFrontRenderBuffer = _NewRenderBuffer(true); - if (fFrontRenderBuffer == NULL) { - ERROR("%s: FrontRenderBuffer is requested but unallocated!\n", - __func__); - _mesa_destroy_visual(fVisual); - free(fFrameBuffer); - return; - } - _mesa_add_renderbuffer(fFrameBuffer, BUFFER_FRONT_LEFT, - &fFrontRenderBuffer->Base); - - // Setup back render buffer (if requested) - if (fVisual->doubleBufferMode) { - fBackRenderBuffer = _NewRenderBuffer(false); - if (fBackRenderBuffer == NULL) { - ERROR("%s: BackRenderBuffer is requested but unallocated!\n", - __func__); - _mesa_destroy_visual(fVisual); - free(fFrameBuffer); - return; - } - _mesa_add_renderbuffer(fFrameBuffer, BUFFER_BACK_LEFT, - &fBackRenderBuffer->Base); - } - - _swrast_add_soft_renderbuffers(fFrameBuffer, GL_FALSE, - fVisual->haveDepthBuffer, fVisual->haveStencilBuffer, - fVisual->haveAccumBuffer, alphaFlag, GL_FALSE); - - BRect bounds = view->Bounds(); - fWidth = (GLint)bounds.Width(); - fHeight = (GLint)bounds.Height(); - - // some stupid applications (Quake2) don't even think about calling LockGL() - // before using glGetString and its glGet*() friends... - // so make sure there is at least a valid context. - - if (!_mesa_get_current_context()) { - LockGL(); - // not needed, we don't have a looper yet: UnlockLooper(); - } -} - - -MesaSoftwareRast::~MesaSoftwareRast() -{ - CALLED(); - _swsetup_DestroyContext(this); - _swrast_DestroyContext(this); - _tnl_DestroyContext(this); - _vbo_DestroyContext(this); - _mesa_destroy_visual(fVisual); - _mesa_destroy_framebuffer(fFrameBuffer); - _mesa_destroy_context(this); - - free(fInfo); - free(fFrameBuffer); - - delete fBitmap; -} - - -void -MesaSoftwareRast::LockGL() -{ - CALLED(); - BGLRenderer::LockGL(); - - _mesa_make_current(this, fFrameBuffer, fFrameBuffer); - - color_space colorSpace = BScreen(GLView()->Window()).ColorSpace(); - - GLuint width = fWidth; - GLuint height = fHeight; - - BAutolock lock(fInfoLocker); - if (fDirectModeEnabled && fInfo != NULL) { - width = fInfo->window_bounds.right - - fInfo->window_bounds.left + 1; - height = fInfo->window_bounds.bottom - - fInfo->window_bounds.top + 1; - } - - if (fColorSpace != colorSpace) { - fColorSpace = colorSpace; - _SetupRenderBuffer(&fFrontRenderBuffer->Base, fColorSpace); - if (fVisual->doubleBufferMode) - _SetupRenderBuffer(&fBackRenderBuffer->Base, fColorSpace); - } - - _CheckResize(width, height); -} - - -void -MesaSoftwareRast::UnlockGL() -{ - CALLED(); - _mesa_make_current(this, NULL, NULL); - BGLRenderer::UnlockGL(); -} - - -void -MesaSoftwareRast::SwapBuffers(bool VSync) -{ - CALLED(); - - if (!fBitmap) - return; - - if (fVisual->doubleBufferMode) - _mesa_notifySwapBuffers(this); - - if (!fDirectModeEnabled || fInfo == NULL) { - if (GLView()->LockLooperWithTimeout(1000) == B_OK) { - GLView()->DrawBitmap(fBitmap, B_ORIGIN); - GLView()->UnlockLooper(); - } - } else { - // TODO: Here the BGLView needs to be drawlocked. - _CopyToDirect(); - } - - if (VSync) { - BScreen screen(GLView()->Window()); - screen.WaitForRetrace(); - } -} - - -void -MesaSoftwareRast::Draw(BRect updateRect) -{ - CALLED(); - if (fBitmap && (!fDirectModeEnabled || (fInfo == NULL))) - GLView()->DrawBitmap(fBitmap, updateRect, updateRect); -} - - -status_t -MesaSoftwareRast::CopyPixelsOut(BPoint location, BBitmap* bitmap) -{ - CALLED(); - color_space scs = fBitmap->ColorSpace(); - color_space dcs = bitmap->ColorSpace(); - - if (scs != dcs && (scs != B_RGBA32 || dcs != B_RGB32)) { - fprintf(stderr, "CopyPixelsOut(): incompatible color space: %s != %s\n", - color_space_name(scs), - color_space_name(dcs)); - return B_BAD_TYPE; - } - - BRect sr = fBitmap->Bounds(); - BRect dr = bitmap->Bounds(); - - sr = sr & dr.OffsetBySelf(location); - dr = sr.OffsetByCopy(-location.x, -location.y); - - uint8* ps = (uint8*)fBitmap->Bits(); - uint8* pd = (uint8*)bitmap->Bits(); - uint32* s; - uint32* d; - uint32 y; - for (y = (uint32)sr.top; y <= (uint32)sr.bottom; y++) { - s = (uint32*)(ps + y * fBitmap->BytesPerRow()); - s += (uint32)sr.left; - - d = (uint32*)(pd + (y + (uint32)(dr.top - sr.top)) - * bitmap->BytesPerRow()); - d += (uint32)dr.left; - - memcpy(d, s, dr.IntegerWidth() * 4); - } - return B_OK; -} - - -status_t -MesaSoftwareRast::CopyPixelsIn(BBitmap* bitmap, BPoint location) -{ - CALLED(); - color_space scs = bitmap->ColorSpace(); - color_space dcs = fBitmap->ColorSpace(); - - if (scs != dcs && (dcs != B_RGBA32 || scs != B_RGB32)) { - fprintf(stderr, "CopyPixelsIn(): incompatible color space: %s != %s\n", - color_space_name(scs), - color_space_name(dcs)); - return B_BAD_TYPE; - } - - BRect sr = bitmap->Bounds(); - BRect dr = fBitmap->Bounds(); - - sr = sr & dr.OffsetBySelf(location); - dr = sr.OffsetByCopy(-location.x, -location.y); - - uint8* ps = (uint8*)bitmap->Bits(); - uint8* pd = (uint8*)fBitmap->Bits(); - uint32* s; - uint32* d; - uint32 y; - for (y = (uint32)sr.top; y <= (uint32)sr.bottom; y++) { - s = (uint32*)(ps + y * bitmap->BytesPerRow()); - s += (uint32)sr.left; - - d = (uint32*)(pd + (y + (uint32)(dr.top - sr.top)) - * fBitmap->BytesPerRow()); - d += (uint32)dr.left; - - memcpy(d, s, dr.IntegerWidth() * 4); - } - return B_OK; -} - - -void -MesaSoftwareRast::EnableDirectMode(bool enabled) -{ - fDirectModeEnabled = enabled; -} - - -void -MesaSoftwareRast::DirectConnected(direct_buffer_info* info) -{ - // TODO: I'm not sure we need to do this: BGLView already - // keeps a local copy of the direct_buffer_info passed by - // BDirectWindow::DirectConnected(). - BAutolock lock(fInfoLocker); - if (info) { - if (!fInfo) { - fInfo = (direct_buffer_info*)malloc(DIRECT_BUFFER_INFO_AREA_SIZE); - if (!fInfo) - return; - } - memcpy(fInfo, info, DIRECT_BUFFER_INFO_AREA_SIZE); - } else if (fInfo) { - free(fInfo); - fInfo = NULL; - } -} - - -void -MesaSoftwareRast::FrameResized(float width, float height) -{ - BAutolock lock(fInfoLocker); - _CheckResize((GLuint)width, (GLuint)height); -} - - -void -MesaSoftwareRast::_CheckResize(GLuint newWidth, GLuint newHeight) -{ - CALLED(); - - if (fBitmap && newWidth == fWidth - && newHeight == fHeight) { - return; - } - - _mesa_resize_framebuffer(this, fFrameBuffer, newWidth, newHeight); - fHeight = newHeight; - fWidth = newWidth; - - _AllocateBitmap(); -} - - -void -MesaSoftwareRast::_AllocateBitmap() -{ - CALLED(); - - // allocate new size of back buffer bitmap - delete fBitmap; - fBitmap = NULL; - - if (fWidth < 1 || fHeight < 1) { - TRACE("%s: Cannot allocate bitmap < 1x1!\n", __func__); - return; - } - - BRect rect(0.0, 0.0, fWidth - 1, fHeight - 1); - fBitmap = new BBitmap(rect, fColorSpace); - - #if 0 - // Used for platform optimized drawing - for (uint i = 0; i < fHeight; i++) { - fRowAddr[fHeight - i - 1] = (GLvoid *)((GLubyte *)fBitmap->Bits() - + i * fBitmap->BytesPerRow()); - } - #endif - - fFrameBuffer->Width = fWidth; - fFrameBuffer->Height = fHeight; - TRACE("%s: Bitmap Size: %" B_PRIu32 "\n", __func__, fBitmap->BitsLength()); - - fFrontRenderBuffer->Buffer = (GLubyte*)fBitmap->Bits(); -} - - -// #pragma mark - static - - -const GLubyte* -MesaSoftwareRast::_GetString(gl_context* ctx, GLenum name) -{ - switch (name) { - case GL_VENDOR: - return (const GLubyte*) "Mesa Project"; - case GL_RENDERER: - return (const GLubyte*) "Software Rasterizer"; - default: - // Let core library handle all other cases - return NULL; - } -} - - -void -MesaSoftwareRast::_UpdateState(gl_context* ctx, GLuint new_state) -{ - if (!ctx) - return; - - CALLED(); - _swrast_InvalidateState(ctx, new_state); - _swsetup_InvalidateState(ctx, new_state); - _vbo_InvalidateState(ctx, new_state); - _tnl_InvalidateState(ctx, new_state); -} - - -GLboolean -MesaSoftwareRast::_RenderBufferStorage(gl_context* ctx, - struct gl_renderbuffer* render, GLenum internalFormat, - GLuint width, GLuint height) -{ - CALLED(); - - render->Width = width; - render->Height = height; - - struct swrast_renderbuffer *swRenderBuffer = swrast_renderbuffer(render); - - swRenderBuffer->RowStride = width * _mesa_get_format_bytes(render->Format); - - return GL_TRUE; -} - - -GLboolean -MesaSoftwareRast::_RenderBufferStorageMalloc(gl_context* ctx, - struct gl_renderbuffer* render, GLenum internalFormat, - GLuint width, GLuint height) -{ - CALLED(); - - render->Width = width; - render->Height = height; - - struct swrast_renderbuffer *swRenderBuffer = swrast_renderbuffer(render); - - if (swRenderBuffer != NULL) { - free(swRenderBuffer->Buffer); - swRenderBuffer->RowStride - = width * _mesa_get_format_bytes(render->Format); - - uint32 size = swRenderBuffer->RowStride * height; - TRACE("%s: Allocate %" B_PRIu32 " bytes for RenderBuffer\n", - __func__, size); - swRenderBuffer->Buffer = (GLubyte*)malloc(size); - if (!swRenderBuffer->Buffer) { - ERROR("%s: Memory allocation failure!\n", __func__); - return GL_FALSE; - } - } else { - ERROR("%s: Couldn't obtain software renderbuffer!\n", - __func__); - return GL_FALSE; - } - - return GL_TRUE; -} - - -void -MesaSoftwareRast::_Flush(gl_context* ctx) -{ - CALLED(); - MesaSoftwareRast* driverContext = static_cast(ctx); - - //MesaSoftwareRast* driverContext = (MesaSoftwareRast*)ctx->DriverCtx; - if ((driverContext->fOptions & BGL_DOUBLE) == 0) { - // TODO: SwapBuffers() can call _CopyToDirect(), which should - // be always called with with the BGLView drawlocked. - // This is not always the case if called from here. - driverContext->SwapBuffers(); - } -} - - -struct swrast_renderbuffer* -MesaSoftwareRast::_NewRenderBuffer(bool front) -{ - CALLED(); - struct swrast_renderbuffer *swRenderBuffer - = (struct swrast_renderbuffer*)calloc(1, sizeof *swRenderBuffer); - - if (!swRenderBuffer) { - ERROR("%s: Failed calloc RenderBuffer\n", __func__); - return NULL; - } - - _mesa_init_renderbuffer(&swRenderBuffer->Base, 0); - - swRenderBuffer->Base.ClassID = HAIKU_SWRAST_RENDERBUFFER_CLASS; - swRenderBuffer->Base.RefCount = 1; - swRenderBuffer->Base.Delete = _RenderBufferDelete; - - if (!front) - swRenderBuffer->Base.AllocStorage = _RenderBufferStorageMalloc; - else - swRenderBuffer->Base.AllocStorage = _RenderBufferStorage; - - if (_SetupRenderBuffer(&swRenderBuffer->Base, fColorSpace) != B_OK) { - free(swRenderBuffer); - return NULL; - } - - return swRenderBuffer; -} - - -status_t -MesaSoftwareRast::_SetupRenderBuffer(struct gl_renderbuffer* rb, - color_space colorSpace) -{ - CALLED(); - - rb->InternalFormat = GL_RGBA; - - switch (colorSpace) { - case B_RGBA32: - rb->_BaseFormat = GL_RGBA; - rb->Format = MESA_FORMAT_B8G8R8A8_UNORM; - break; - case B_RGB32: - rb->_BaseFormat = GL_RGB; - rb->Format = MESA_FORMAT_B8G8R8X8_UNORM; - break; - case B_RGB24: - rb->_BaseFormat = GL_RGB; - rb->Format = MESA_FORMAT_BGR_UNORM8; - break; - case B_RGB16: - rb->_BaseFormat = GL_RGB; - rb->Format = MESA_FORMAT_B5G6R5_UNORM; - break; - case B_RGB15: - rb->_BaseFormat = GL_RGB; - rb->Format = MESA_FORMAT_B5G5R5A1_UNORM; - break; - default: - fprintf(stderr, "Unsupported screen color space %s\n", - color_space_name(fColorSpace)); - debugger("Unsupported OpenGL color space"); - return B_ERROR; - } - return B_OK; -} - - -/*! Y inverted Map RenderBuffer function - We use a BBitmap for storage which has Y inverted. - If the Mesa provided Map function ever allows external - control of this we can omit this function. -*/ -void -MesaSoftwareRast::_RenderBufferMap(gl_context *ctx, - struct gl_renderbuffer *rb, GLuint x, GLuint y, GLuint w, GLuint h, - GLbitfield mode, GLubyte **mapOut, GLint *rowStrideOut) -{ - if (rb->ClassID == HAIKU_SWRAST_RENDERBUFFER_CLASS) { - struct swrast_renderbuffer *srb = swrast_renderbuffer(rb); - const GLuint bpp = _mesa_get_format_bytes(rb->Format); - GLint rowStride = rb->Width * bpp; // in Bytes - - y = rb->Height - y - 1; - - *rowStrideOut = -rowStride; - *mapOut = (GLubyte *) srb->Buffer + y * rowStride + x * bpp; - } else { - _swrast_map_soft_renderbuffer(ctx, rb, x, y, w, h, mode, - mapOut, rowStrideOut); - } -} - - -void -MesaSoftwareRast::_RenderBufferDelete(struct gl_context *ctx, - struct gl_renderbuffer* rb) -{ - CALLED(); - if (rb != NULL) { - struct swrast_renderbuffer *swRenderBuffer - = swrast_renderbuffer(rb); - if (swRenderBuffer != NULL) - free(swRenderBuffer->Buffer); - } - free(rb); -} - - -void -MesaSoftwareRast::_CopyToDirect() -{ - BAutolock lock(fInfoLocker); - - // check the bitmap size still matches the size - if (fInfo->window_bounds.bottom - fInfo->window_bounds.top - != fBitmap->Bounds().IntegerHeight() - || fInfo->window_bounds.right - fInfo->window_bounds.left - != fBitmap->Bounds().IntegerWidth()) - return; - - uint8 bytesPerPixel = fInfo->bits_per_pixel / 8; - uint32 bytesPerRow = fBitmap->BytesPerRow(); - for (uint32 i = 0; i < fInfo->clip_list_count; i++) { - clipping_rect *clip = &fInfo->clip_list[i]; - int32 height = clip->bottom - clip->top + 1; - int32 bytesWidth - = (clip->right - clip->left + 1) * bytesPerPixel; - uint8* p = (uint8*)fInfo->bits + clip->top - * fInfo->bytes_per_row + clip->left * bytesPerPixel; - uint8* b = (uint8*)fBitmap->Bits() - + (clip->top - fInfo->window_bounds.top) * bytesPerRow - + (clip->left - fInfo->window_bounds.left) - * bytesPerPixel; - - for (int y = 0; y < height; y++) { - memcpy(p, b, bytesWidth); - p += fInfo->bytes_per_row; - b += bytesPerRow; - } - } -} diff --git a/src/mesa/drivers/haiku/swrast/SoftwareRast.h b/src/mesa/drivers/haiku/swrast/SoftwareRast.h deleted file mode 100644 index 8f0f0184863..00000000000 --- a/src/mesa/drivers/haiku/swrast/SoftwareRast.h +++ /dev/null @@ -1,95 +0,0 @@ -/* - * Copyright 2006-2012, Haiku, Inc. All rights reserved. - * Distributed under the terms of the MIT License. - * - * Authors: - * Jérôme Duval, korli@users.berlios.de - * Philippe Houdoin, philippe.houdoin@free.fr - * Artur Wyszynski, harakash@gmail.com - */ -#ifndef MESASOFTWARERENDERER_H -#define MESASOFTWARERENDERER_H - - -#define HAIKU_SWRAST_RENDERBUFFER_CLASS 0x737752 // swR - - -#include "GLRenderer.h" - -extern "C" { -#include "context.h" -#include "main/version.h" -#include "swrast/s_chan.h" -#include "swrast/s_context.h" -} - - -class MesaSoftwareRast : public BGLRenderer, public gl_context { -public: - MesaSoftwareRast(BGLView* view, - ulong bgl_options, - BGLDispatcher* dispatcher); - virtual ~MesaSoftwareRast(); - - virtual void LockGL(); - virtual void UnlockGL(); - - virtual void SwapBuffers(bool VSync = false); - virtual void Draw(BRect updateRect); - virtual status_t CopyPixelsOut(BPoint source, BBitmap* dest); - virtual status_t CopyPixelsIn(BBitmap* source, BPoint dest); - virtual void FrameResized(float width, float height); - - virtual void EnableDirectMode(bool enabled); - virtual void DirectConnected(direct_buffer_info* info); - -private: - static const GLubyte* _GetString(gl_context* ctx, GLenum name); - void _CheckResize(GLuint newWidth, GLuint newHeight); - static void _UpdateState(gl_context* ctx, GLuint newState); - static void _Flush(gl_context *ctx); - - struct swrast_renderbuffer* _NewRenderBuffer(bool front); - status_t _SetupRenderBuffer(struct gl_renderbuffer* rb, - color_space colorSpace); - -/* Mesa callbacks */ - static void _RenderBufferDelete(struct gl_context *ctx, - struct gl_renderbuffer* rb); - static GLboolean _RenderBufferStorage(gl_context* ctx, - struct gl_renderbuffer* render, - GLenum internalFormat, - GLuint width, GLuint height); - static GLboolean _RenderBufferStorageMalloc(gl_context* ctx, - struct gl_renderbuffer* render, - GLenum internalFormat, - GLuint width, GLuint height); - static void _RenderBufferMap(gl_context *ctx, - struct gl_renderbuffer *rb, - GLuint x, GLuint y, GLuint w, GLuint h, - GLbitfield mode, GLubyte **mapOut, - GLint *rowStrideOut); - - void _AllocateBitmap(); - void _CopyToDirect(); - - BBitmap* fBitmap; - bool fDirectModeEnabled; - direct_buffer_info* fInfo; - BLocker fInfoLocker; - ulong fOptions; - - gl_config* fVisual; - - struct gl_framebuffer* fFrameBuffer; - struct swrast_renderbuffer* fFrontRenderBuffer; - struct swrast_renderbuffer* fBackRenderBuffer; - - GLuint fWidth; - GLuint fHeight; - color_space fColorSpace; - - void* fRowAddr[SWRAST_MAX_HEIGHT]; -}; - -#endif // MESASOFTWARERENDERER_H diff --git a/src/mesa/drivers/haiku/swrast/SoftwareRast.rdef b/src/mesa/drivers/haiku/swrast/SoftwareRast.rdef deleted file mode 100644 index cb60332100c..00000000000 --- a/src/mesa/drivers/haiku/swrast/SoftwareRast.rdef +++ /dev/null @@ -1,39 +0,0 @@ -/* - * Copyright 2012, Haiku, Inc. All rights reserved. - * Distributed under the terms of the MIT License. - */ - -resource app_signature "application/x-vnd.Haiku-swrast"; - -resource app_version { - major = 9, - middle = 0, - minor = 0, - variety = 0, - internal = 0, - short_info = "Software Rasterizer", - long_info = "Haiku Mesa Software GL Rasterizer" -}; - -resource vector_icon { - $"6E6369660A0200140294A9FF18020014028DFFFF97058C0500020006023B10B7" - $"37F036BA1A993D466848C719BEBE2000919292FFD5D5D5020016023900000000" - $"000000003EE0004AE00048E0005EF884C702000203392E8D383001BAD97F3C12" - $"8B4786BD48B8AD0D97BBFFFF7B4168DBE9FF4168DB97020002023A0C1238D099" - $"BE44203F4BD14B38844678240DF56A7D9FE1EA064CC704016B0500090A044024" - $"2438404C5C380A044028243C40505C3C0A042438243B5C3C5C380608BFBE4D59" - $"4D59515957575659585560406044603C5E3A5C3CCB4FBFBA5E3ECA9DC11F564B" - $"584A544C504C0606AF0F2F3D2F3D393D4034BF593542324130432F42364432C0" - $"3FBC5A2F48354A2F480608AE9A22303EB5BD3AB42542B755422E412F3C29322D" - $"32223C0204263726372538263F253E263F304430443143303C313D303C02043D" - $"423D423C433D4A3C493D4A495049504A4F49474A484947060DAEAAAE014E445A" - $"3456365E325E3D5D3F5A3A5542544E4D573A4E364439463342324A2242310A0A" - $"0002020102403CA00C88888C8CC1401673C40D6544F2950A01010002403CA000" - $"0000000000401673C40D65446CF80A08020304023EC16A0000000000003EC16A" - $"45DD1844C6550A030105123EC16A0000000000003EC16A45DD1844C655011784" - $"22040A040105023EC16A0000000000003EC16A45DD1844C6550A030108123EC1" - $"6A0000000000003EC16A45DD1844C65501178422040A0503080706023EC16A00" - $"00000000003EC16A45DD1844C6550A030206071A3EC16A0000000000003EC16A" - $"45DD1844C65510FF0215810004178222040A060106023EC16A0000000000003E" - $"C16A45DD1844C6550A070107023EC16A0000000000003EC16A45DD1844C655" -}; From 0148c0ae6a6795b5decc7ed7385be37a37e18a1f Mon Sep 17 00:00:00 2001 From: Emil Velikov Date: Tue, 19 May 2015 12:15:02 +0100 Subject: [PATCH 201/834] i965: add brw_cs.h to the sources list Signed-off-by: Emil Velikov --- src/mesa/drivers/dri/i965/Makefile.sources | 1 + 1 file changed, 1 insertion(+) diff --git a/src/mesa/drivers/dri/i965/Makefile.sources b/src/mesa/drivers/dri/i965/Makefile.sources index a24c20aada4..ad73e6b0fa2 100644 --- a/src/mesa/drivers/dri/i965/Makefile.sources +++ b/src/mesa/drivers/dri/i965/Makefile.sources @@ -22,6 +22,7 @@ i965_FILES = \ brw_context.c \ brw_context.h \ brw_cs.cpp \ + brw_cs.h \ brw_cubemap_normalize.cpp \ brw_curbe.c \ brw_dead_control_flow.cpp \ From 0c9e0b7a6c79a32702140c91146c7267f1658cc4 Mon Sep 17 00:00:00 2001 From: Emil Velikov Date: Tue, 19 May 2015 12:51:19 +0100 Subject: [PATCH 202/834] glapi: track GL_ARB_program_interface_query.xml Add the file to the API_XML list, otherwise there will be no knowledge by the build that it should be included in the tarball. Thus the (scons) build will fail. Fixes: b297fc27aa9(glapi: add GL_ARB_program_interface_query skeleton) Signed-off-by: Emil Velikov --- src/mapi/glapi/gen/Makefile.am | 1 + 1 file changed, 1 insertion(+) diff --git a/src/mapi/glapi/gen/Makefile.am b/src/mapi/glapi/gen/Makefile.am index 4d23f825c92..adebd5c65f6 100644 --- a/src/mapi/glapi/gen/Makefile.am +++ b/src/mapi/glapi/gen/Makefile.am @@ -141,6 +141,7 @@ API_XML = \ ARB_map_buffer_range.xml \ ARB_multi_bind.xml \ ARB_pipeline_statistics_query.xml \ + ARB_program_interface_query.xml \ ARB_robustness.xml \ ARB_sample_shading.xml \ ARB_sampler_objects.xml \ From b9b516248e0441a5aa06bdeb58525b4ef8dd0001 Mon Sep 17 00:00:00 2001 From: Emil Velikov Date: Tue, 19 May 2015 11:59:50 +0100 Subject: [PATCH 203/834] Post-branch version bump to 10.7.0-devel, add release notes template Signed-off-by: Emil Velikov --- VERSION | 2 +- docs/relnotes/10.7.0.html | 58 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 59 insertions(+), 1 deletion(-) create mode 100644 docs/relnotes/10.7.0.html diff --git a/VERSION b/VERSION index 8d303065022..1edd8fc00e5 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -10.6.0-devel +10.7.0-devel diff --git a/docs/relnotes/10.7.0.html b/docs/relnotes/10.7.0.html new file mode 100644 index 00000000000..6206716e58e --- /dev/null +++ b/docs/relnotes/10.7.0.html @@ -0,0 +1,58 @@ + + + + + Mesa Release Notes + + + + +
    +

    The Mesa 3D Graphics Library

    +
    + + +
    + +

    Mesa 10.7.0 Release Notes / TBD

    + +

    +Mesa 10.7.0 is a new development release. +People who are concerned with stability and reliability should stick +with a previous release or wait for Mesa 10.7.1. +

    +

    +Mesa 10.7.0 implements the OpenGL 3.3 API, but the version reported by +glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) / +glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used. +Some drivers don't support all the features required in OpenGL 3.3. OpenGL +3.3 is only available if requested at context creation +because compatibility contexts are not supported. +

    + + +

    SHA256 checksums

    +
    +TBD.
    +
    + + +

    New features

    + +

    +Note: some of the new features are only available with certain drivers. +

    + +TBD. + +

    Bug fixes

    + +TBD. + +

    Changes

    + +TBD. + +
    + + From 99e583120cde8820aae94eb0f8beb723509398fc Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Tue, 19 May 2015 15:31:06 +1000 Subject: [PATCH 204/834] softpipe: move some image filter parameters into a struct This moves some of the image filter args into a struct, and passes that instead, this is prep work for adding texture gather support which needs new arguments. review: make filter args const. Reviewed-by: Brian Paul Signed-off-by: Dave Airlie --- src/gallium/drivers/softpipe/sp_tex_sample.c | 468 +++++++++---------- src/gallium/drivers/softpipe/sp_tex_sample.h | 14 +- 2 files changed, 224 insertions(+), 258 deletions(-) diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c b/src/gallium/drivers/softpipe/sp_tex_sample.c index 21fb6b00e30..aad944f4468 100644 --- a/src/gallium/drivers/softpipe/sp_tex_sample.c +++ b/src/gallium/drivers/softpipe/sp_tex_sample.c @@ -1007,22 +1007,18 @@ print_sample_4(const char *function, float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZ static INLINE void img_filter_2d_linear_repeat_POT(struct sp_sampler_view *sp_sview, struct sp_sampler *sp_samp, - float s, - float t, - float p, - unsigned level, - unsigned face_id, + const struct img_filter_args *args, float *rgba) { - unsigned xpot = pot_level_size(sp_sview->xpot, level); - unsigned ypot = pot_level_size(sp_sview->ypot, level); + unsigned xpot = pot_level_size(sp_sview->xpot, args->level); + unsigned ypot = pot_level_size(sp_sview->ypot, args->level); int xmax = (xpot - 1) & (TEX_TILE_SIZE - 1); /* MIN2(TEX_TILE_SIZE, xpot) - 1; */ int ymax = (ypot - 1) & (TEX_TILE_SIZE - 1); /* MIN2(TEX_TILE_SIZE, ypot) - 1; */ union tex_tile_address addr; int c; - float u = s * xpot - 0.5F; - float v = t * ypot - 0.5F; + float u = args->s * xpot - 0.5F; + float v = args->t * ypot - 0.5F; int uflr = util_ifloor(u); int vflr = util_ifloor(v); @@ -1036,7 +1032,7 @@ img_filter_2d_linear_repeat_POT(struct sp_sampler_view *sp_sview, const float *tx[4]; addr.value = 0; - addr.bits.level = level; + addr.bits.level = args->level; /* Can we fetch all four at once: */ @@ -1065,21 +1061,17 @@ img_filter_2d_linear_repeat_POT(struct sp_sampler_view *sp_sview, static INLINE void img_filter_2d_nearest_repeat_POT(struct sp_sampler_view *sp_sview, struct sp_sampler *sp_samp, - float s, - float t, - float p, - unsigned level, - unsigned face_id, + const struct img_filter_args *args, float rgba[TGSI_QUAD_SIZE]) { - unsigned xpot = pot_level_size(sp_sview->xpot, level); - unsigned ypot = pot_level_size(sp_sview->ypot, level); + unsigned xpot = pot_level_size(sp_sview->xpot, args->level); + unsigned ypot = pot_level_size(sp_sview->ypot, args->level); const float *out; union tex_tile_address addr; int c; - float u = s * xpot; - float v = t * ypot; + float u = args->s * xpot; + float v = args->t * ypot; int uflr = util_ifloor(u); int vflr = util_ifloor(v); @@ -1088,7 +1080,7 @@ img_filter_2d_nearest_repeat_POT(struct sp_sampler_view *sp_sview, int y0 = vflr & (ypot - 1); addr.value = 0; - addr.bits.level = level; + addr.bits.level = args->level; out = get_texel_2d_no_border(sp_sview, addr, x0, y0); for (c = 0; c < TGSI_QUAD_SIZE; c++) @@ -1103,26 +1095,22 @@ img_filter_2d_nearest_repeat_POT(struct sp_sampler_view *sp_sview, static INLINE void img_filter_2d_nearest_clamp_POT(struct sp_sampler_view *sp_sview, struct sp_sampler *sp_samp, - float s, - float t, - float p, - unsigned level, - unsigned face_id, + const struct img_filter_args *args, float rgba[TGSI_QUAD_SIZE]) { - unsigned xpot = pot_level_size(sp_sview->xpot, level); - unsigned ypot = pot_level_size(sp_sview->ypot, level); + unsigned xpot = pot_level_size(sp_sview->xpot, args->level); + unsigned ypot = pot_level_size(sp_sview->ypot, args->level); union tex_tile_address addr; int c; - float u = s * xpot; - float v = t * ypot; + float u = args->s * xpot; + float v = args->t * ypot; int x0, y0; const float *out; addr.value = 0; - addr.bits.level = level; + addr.bits.level = args->level; x0 = util_ifloor(u); if (x0 < 0) @@ -1149,11 +1137,7 @@ img_filter_2d_nearest_clamp_POT(struct sp_sampler_view *sp_sview, static void img_filter_1d_nearest(struct sp_sampler_view *sp_sview, struct sp_sampler *sp_samp, - float s, - float t, - float p, - unsigned level, - unsigned face_id, + const struct img_filter_args *args, float rgba[TGSI_QUAD_SIZE]) { const struct pipe_resource *texture = sp_sview->base.texture; @@ -1163,14 +1147,14 @@ img_filter_1d_nearest(struct sp_sampler_view *sp_sview, const float *out; int c; - width = u_minify(texture->width0, level); + width = u_minify(texture->width0, args->level); assert(width > 0); addr.value = 0; - addr.bits.level = level; + addr.bits.level = args->level; - sp_samp->nearest_texcoord_s(s, width, &x); + sp_samp->nearest_texcoord_s(args->s, width, &x); out = get_texel_2d(sp_sview, sp_samp, addr, x, 0); for (c = 0; c < TGSI_QUAD_SIZE; c++) @@ -1185,11 +1169,7 @@ img_filter_1d_nearest(struct sp_sampler_view *sp_sview, static void img_filter_1d_array_nearest(struct sp_sampler_view *sp_sview, struct sp_sampler *sp_samp, - float s, - float t, - float p, - unsigned level, - unsigned face_id, + const struct img_filter_args *args, float *rgba) { const struct pipe_resource *texture = sp_sview->base.texture; @@ -1199,15 +1179,15 @@ img_filter_1d_array_nearest(struct sp_sampler_view *sp_sview, const float *out; int c; - width = u_minify(texture->width0, level); + width = u_minify(texture->width0, args->level); assert(width > 0); addr.value = 0; - addr.bits.level = level; + addr.bits.level = args->level; - sp_samp->nearest_texcoord_s(s, width, &x); - layer = coord_to_layer(t, sp_sview->base.u.tex.first_layer, + sp_samp->nearest_texcoord_s(args->s, width, &x); + layer = coord_to_layer(args->t, sp_sview->base.u.tex.first_layer, sp_sview->base.u.tex.last_layer); out = get_texel_1d_array(sp_sview, sp_samp, addr, x, layer); @@ -1223,11 +1203,7 @@ img_filter_1d_array_nearest(struct sp_sampler_view *sp_sview, static void img_filter_2d_nearest(struct sp_sampler_view *sp_sview, struct sp_sampler *sp_samp, - float s, - float t, - float p, - unsigned level, - unsigned face_id, + const struct img_filter_args *args, float *rgba) { const struct pipe_resource *texture = sp_sview->base.texture; @@ -1237,17 +1213,17 @@ img_filter_2d_nearest(struct sp_sampler_view *sp_sview, const float *out; int c; - width = u_minify(texture->width0, level); - height = u_minify(texture->height0, level); + width = u_minify(texture->width0, args->level); + height = u_minify(texture->height0, args->level); assert(width > 0); assert(height > 0); addr.value = 0; - addr.bits.level = level; + addr.bits.level = args->level; - sp_samp->nearest_texcoord_s(s, width, &x); - sp_samp->nearest_texcoord_t(t, height, &y); + sp_samp->nearest_texcoord_s(args->s, width, &x); + sp_samp->nearest_texcoord_t(args->t, height, &y); out = get_texel_2d(sp_sview, sp_samp, addr, x, y); for (c = 0; c < TGSI_QUAD_SIZE; c++) @@ -1262,11 +1238,7 @@ img_filter_2d_nearest(struct sp_sampler_view *sp_sview, static void img_filter_2d_array_nearest(struct sp_sampler_view *sp_sview, struct sp_sampler *sp_samp, - float s, - float t, - float p, - unsigned level, - unsigned face_id, + const struct img_filter_args *args, float *rgba) { const struct pipe_resource *texture = sp_sview->base.texture; @@ -1276,18 +1248,18 @@ img_filter_2d_array_nearest(struct sp_sampler_view *sp_sview, const float *out; int c; - width = u_minify(texture->width0, level); - height = u_minify(texture->height0, level); + width = u_minify(texture->width0, args->level); + height = u_minify(texture->height0, args->level); assert(width > 0); assert(height > 0); addr.value = 0; - addr.bits.level = level; + addr.bits.level = args->level; - sp_samp->nearest_texcoord_s(s, width, &x); - sp_samp->nearest_texcoord_t(t, height, &y); - layer = coord_to_layer(p, sp_sview->base.u.tex.first_layer, + sp_samp->nearest_texcoord_s(args->s, width, &x); + sp_samp->nearest_texcoord_t(args->t, height, &y); + layer = coord_to_layer(args->p, sp_sview->base.u.tex.first_layer, sp_sview->base.u.tex.last_layer); out = get_texel_2d_array(sp_sview, sp_samp, addr, x, y, layer); @@ -1303,11 +1275,7 @@ img_filter_2d_array_nearest(struct sp_sampler_view *sp_sview, static void img_filter_cube_nearest(struct sp_sampler_view *sp_sview, struct sp_sampler *sp_samp, - float s, - float t, - float p, - unsigned level, - unsigned face_id, + const struct img_filter_args *args, float *rgba) { const struct pipe_resource *texture = sp_sview->base.texture; @@ -1317,29 +1285,29 @@ img_filter_cube_nearest(struct sp_sampler_view *sp_sview, const float *out; int c; - width = u_minify(texture->width0, level); - height = u_minify(texture->height0, level); + width = u_minify(texture->width0, args->level); + height = u_minify(texture->height0, args->level); assert(width > 0); assert(height > 0); addr.value = 0; - addr.bits.level = level; + addr.bits.level = args->level; /* * If NEAREST filtering is done within a miplevel, always apply wrap * mode CLAMP_TO_EDGE. */ if (sp_samp->base.seamless_cube_map) { - wrap_nearest_clamp_to_edge(s, width, &x); - wrap_nearest_clamp_to_edge(t, height, &y); + wrap_nearest_clamp_to_edge(args->s, width, &x); + wrap_nearest_clamp_to_edge(args->t, height, &y); } else { /* Would probably make sense to ignore mode and just do edge clamp */ - sp_samp->nearest_texcoord_s(s, width, &x); - sp_samp->nearest_texcoord_t(t, height, &y); + sp_samp->nearest_texcoord_s(args->s, width, &x); + sp_samp->nearest_texcoord_t(args->t, height, &y); } - layerface = face_id + sp_sview->base.u.tex.first_layer; + layerface = args->face_id + sp_sview->base.u.tex.first_layer; out = get_texel_cube_array(sp_sview, sp_samp, addr, x, y, layerface); for (c = 0; c < TGSI_QUAD_SIZE; c++) rgba[TGSI_NUM_CHANNELS*c] = out[c]; @@ -1352,11 +1320,7 @@ img_filter_cube_nearest(struct sp_sampler_view *sp_sview, static void img_filter_cube_array_nearest(struct sp_sampler_view *sp_sview, struct sp_sampler *sp_samp, - float s, - float t, - float p, - unsigned level, - unsigned face_id, + const struct img_filter_args *args, float *rgba) { const struct pipe_resource *texture = sp_sview->base.texture; @@ -1366,20 +1330,20 @@ img_filter_cube_array_nearest(struct sp_sampler_view *sp_sview, const float *out; int c; - width = u_minify(texture->width0, level); - height = u_minify(texture->height0, level); + width = u_minify(texture->width0, args->level); + height = u_minify(texture->height0, args->level); assert(width > 0); assert(height > 0); addr.value = 0; - addr.bits.level = level; + addr.bits.level = args->level; - sp_samp->nearest_texcoord_s(s, width, &x); - sp_samp->nearest_texcoord_t(t, height, &y); - layerface = coord_to_layer(6 * p + sp_sview->base.u.tex.first_layer, + sp_samp->nearest_texcoord_s(args->s, width, &x); + sp_samp->nearest_texcoord_t(args->t, height, &y); + layerface = coord_to_layer(6 * args->p + sp_sview->base.u.tex.first_layer, sp_sview->base.u.tex.first_layer, - sp_sview->base.u.tex.last_layer - 5) + face_id; + sp_sview->base.u.tex.last_layer - 5) + args->face_id; out = get_texel_cube_array(sp_sview, sp_samp, addr, x, y, layerface); for (c = 0; c < TGSI_QUAD_SIZE; c++) @@ -1393,11 +1357,7 @@ img_filter_cube_array_nearest(struct sp_sampler_view *sp_sview, static void img_filter_3d_nearest(struct sp_sampler_view *sp_sview, struct sp_sampler *sp_samp, - float s, - float t, - float p, - unsigned level, - unsigned face_id, + const struct img_filter_args *args, float *rgba) { const struct pipe_resource *texture = sp_sview->base.texture; @@ -1407,20 +1367,20 @@ img_filter_3d_nearest(struct sp_sampler_view *sp_sview, const float *out; int c; - width = u_minify(texture->width0, level); - height = u_minify(texture->height0, level); - depth = u_minify(texture->depth0, level); + width = u_minify(texture->width0, args->level); + height = u_minify(texture->height0, args->level); + depth = u_minify(texture->depth0, args->level); assert(width > 0); assert(height > 0); assert(depth > 0); - sp_samp->nearest_texcoord_s(s, width, &x); - sp_samp->nearest_texcoord_t(t, height, &y); - sp_samp->nearest_texcoord_p(p, depth, &z); + sp_samp->nearest_texcoord_s(args->s, width, &x); + sp_samp->nearest_texcoord_t(args->t, height, &y); + sp_samp->nearest_texcoord_p(args->p, depth, &z); addr.value = 0; - addr.bits.level = level; + addr.bits.level = args->level; out = get_texel_3d(sp_sview, sp_samp, addr, x, y, z); for (c = 0; c < TGSI_QUAD_SIZE; c++) @@ -1431,11 +1391,7 @@ img_filter_3d_nearest(struct sp_sampler_view *sp_sview, static void img_filter_1d_linear(struct sp_sampler_view *sp_sview, struct sp_sampler *sp_samp, - float s, - float t, - float p, - unsigned level, - unsigned face_id, + const struct img_filter_args *args, float *rgba) { const struct pipe_resource *texture = sp_sview->base.texture; @@ -1446,14 +1402,14 @@ img_filter_1d_linear(struct sp_sampler_view *sp_sview, const float *tx0, *tx1; int c; - width = u_minify(texture->width0, level); + width = u_minify(texture->width0, args->level); assert(width > 0); addr.value = 0; - addr.bits.level = level; + addr.bits.level = args->level; - sp_samp->linear_texcoord_s(s, width, &x0, &x1, &xw); + sp_samp->linear_texcoord_s(args->s, width, &x0, &x1, &xw); tx0 = get_texel_2d(sp_sview, sp_samp, addr, x0, 0); tx1 = get_texel_2d(sp_sview, sp_samp, addr, x1, 0); @@ -1467,11 +1423,7 @@ img_filter_1d_linear(struct sp_sampler_view *sp_sview, static void img_filter_1d_array_linear(struct sp_sampler_view *sp_sview, struct sp_sampler *sp_samp, - float s, - float t, - float p, - unsigned level, - unsigned face_id, + const struct img_filter_args *args, float *rgba) { const struct pipe_resource *texture = sp_sview->base.texture; @@ -1482,15 +1434,15 @@ img_filter_1d_array_linear(struct sp_sampler_view *sp_sview, const float *tx0, *tx1; int c; - width = u_minify(texture->width0, level); + width = u_minify(texture->width0, args->level); assert(width > 0); addr.value = 0; - addr.bits.level = level; + addr.bits.level = args->level; - sp_samp->linear_texcoord_s(s, width, &x0, &x1, &xw); - layer = coord_to_layer(t, sp_sview->base.u.tex.first_layer, + sp_samp->linear_texcoord_s(args->s, width, &x0, &x1, &xw); + layer = coord_to_layer(args->t, sp_sview->base.u.tex.first_layer, sp_sview->base.u.tex.last_layer); tx0 = get_texel_1d_array(sp_sview, sp_samp, addr, x0, layer); @@ -1505,11 +1457,7 @@ img_filter_1d_array_linear(struct sp_sampler_view *sp_sview, static void img_filter_2d_linear(struct sp_sampler_view *sp_sview, struct sp_sampler *sp_samp, - float s, - float t, - float p, - unsigned level, - unsigned face_id, + const struct img_filter_args *args, float *rgba) { const struct pipe_resource *texture = sp_sview->base.texture; @@ -1520,17 +1468,17 @@ img_filter_2d_linear(struct sp_sampler_view *sp_sview, const float *tx0, *tx1, *tx2, *tx3; int c; - width = u_minify(texture->width0, level); - height = u_minify(texture->height0, level); + width = u_minify(texture->width0, args->level); + height = u_minify(texture->height0, args->level); assert(width > 0); assert(height > 0); addr.value = 0; - addr.bits.level = level; + addr.bits.level = args->level; - sp_samp->linear_texcoord_s(s, width, &x0, &x1, &xw); - sp_samp->linear_texcoord_t(t, height, &y0, &y1, &yw); + sp_samp->linear_texcoord_s(args->s, width, &x0, &x1, &xw); + sp_samp->linear_texcoord_t(args->t, height, &y0, &y1, &yw); tx0 = get_texel_2d(sp_sview, sp_samp, addr, x0, y0); tx1 = get_texel_2d(sp_sview, sp_samp, addr, x1, y0); @@ -1548,11 +1496,7 @@ img_filter_2d_linear(struct sp_sampler_view *sp_sview, static void img_filter_2d_array_linear(struct sp_sampler_view *sp_sview, struct sp_sampler *sp_samp, - float s, - float t, - float p, - unsigned level, - unsigned face_id, + const struct img_filter_args *args, float *rgba) { const struct pipe_resource *texture = sp_sview->base.texture; @@ -1563,18 +1507,18 @@ img_filter_2d_array_linear(struct sp_sampler_view *sp_sview, const float *tx0, *tx1, *tx2, *tx3; int c; - width = u_minify(texture->width0, level); - height = u_minify(texture->height0, level); + width = u_minify(texture->width0, args->level); + height = u_minify(texture->height0, args->level); assert(width > 0); assert(height > 0); addr.value = 0; - addr.bits.level = level; + addr.bits.level = args->level; - sp_samp->linear_texcoord_s(s, width, &x0, &x1, &xw); - sp_samp->linear_texcoord_t(t, height, &y0, &y1, &yw); - layer = coord_to_layer(p, sp_sview->base.u.tex.first_layer, + sp_samp->linear_texcoord_s(args->s, width, &x0, &x1, &xw); + sp_samp->linear_texcoord_t(args->t, height, &y0, &y1, &yw); + layer = coord_to_layer(args->p, sp_sview->base.u.tex.first_layer, sp_sview->base.u.tex.last_layer); tx0 = get_texel_2d_array(sp_sview, sp_samp, addr, x0, y0, layer); @@ -1593,11 +1537,7 @@ img_filter_2d_array_linear(struct sp_sampler_view *sp_sview, static void img_filter_cube_linear(struct sp_sampler_view *sp_sview, struct sp_sampler *sp_samp, - float s, - float t, - float p, - unsigned level, - unsigned face_id, + const struct img_filter_args *args, float *rgba) { const struct pipe_resource *texture = sp_sview->base.texture; @@ -1610,14 +1550,14 @@ img_filter_cube_linear(struct sp_sampler_view *sp_sview, corner2[TGSI_QUAD_SIZE], corner3[TGSI_QUAD_SIZE]; int c; - width = u_minify(texture->width0, level); - height = u_minify(texture->height0, level); + width = u_minify(texture->width0, args->level); + height = u_minify(texture->height0, args->level); assert(width > 0); assert(height > 0); addr.value = 0; - addr.bits.level = level; + addr.bits.level = args->level; /* * For seamless if LINEAR filtering is done within a miplevel, @@ -1625,26 +1565,26 @@ img_filter_cube_linear(struct sp_sampler_view *sp_sview, */ if (sp_samp->base.seamless_cube_map) { /* Note this is a bit overkill, actual clamping is not required */ - wrap_linear_clamp_to_border(s, width, &x0, &x1, &xw); - wrap_linear_clamp_to_border(t, height, &y0, &y1, &yw); + wrap_linear_clamp_to_border(args->s, width, &x0, &x1, &xw); + wrap_linear_clamp_to_border(args->t, height, &y0, &y1, &yw); } else { /* Would probably make sense to ignore mode and just do edge clamp */ - sp_samp->linear_texcoord_s(s, width, &x0, &x1, &xw); - sp_samp->linear_texcoord_t(t, height, &y0, &y1, &yw); + sp_samp->linear_texcoord_s(args->s, width, &x0, &x1, &xw); + sp_samp->linear_texcoord_t(args->t, height, &y0, &y1, &yw); } layer = sp_sview->base.u.tex.first_layer; if (sp_samp->base.seamless_cube_map) { - tx0 = get_texel_cube_seamless(sp_sview, addr, x0, y0, corner0, layer, face_id); - tx1 = get_texel_cube_seamless(sp_sview, addr, x1, y0, corner1, layer, face_id); - tx2 = get_texel_cube_seamless(sp_sview, addr, x0, y1, corner2, layer, face_id); - tx3 = get_texel_cube_seamless(sp_sview, addr, x1, y1, corner3, layer, face_id); + tx0 = get_texel_cube_seamless(sp_sview, addr, x0, y0, corner0, layer, args->face_id); + tx1 = get_texel_cube_seamless(sp_sview, addr, x1, y0, corner1, layer, args->face_id); + tx2 = get_texel_cube_seamless(sp_sview, addr, x0, y1, corner2, layer, args->face_id); + tx3 = get_texel_cube_seamless(sp_sview, addr, x1, y1, corner3, layer, args->face_id); } else { - tx0 = get_texel_cube_array(sp_sview, sp_samp, addr, x0, y0, layer + face_id); - tx1 = get_texel_cube_array(sp_sview, sp_samp, addr, x1, y0, layer + face_id); - tx2 = get_texel_cube_array(sp_sview, sp_samp, addr, x0, y1, layer + face_id); - tx3 = get_texel_cube_array(sp_sview, sp_samp, addr, x1, y1, layer + face_id); + tx0 = get_texel_cube_array(sp_sview, sp_samp, addr, x0, y0, layer + args->face_id); + tx1 = get_texel_cube_array(sp_sview, sp_samp, addr, x1, y0, layer + args->face_id); + tx2 = get_texel_cube_array(sp_sview, sp_samp, addr, x0, y1, layer + args->face_id); + tx3 = get_texel_cube_array(sp_sview, sp_samp, addr, x1, y1, layer + args->face_id); } /* interpolate R, G, B, A */ @@ -1658,11 +1598,7 @@ img_filter_cube_linear(struct sp_sampler_view *sp_sview, static void img_filter_cube_array_linear(struct sp_sampler_view *sp_sview, struct sp_sampler *sp_samp, - float s, - float t, - float p, - unsigned level, - unsigned face_id, + const struct img_filter_args *args, float *rgba) { const struct pipe_resource *texture = sp_sview->base.texture; @@ -1675,14 +1611,14 @@ img_filter_cube_array_linear(struct sp_sampler_view *sp_sview, corner2[TGSI_QUAD_SIZE], corner3[TGSI_QUAD_SIZE]; int c; - width = u_minify(texture->width0, level); - height = u_minify(texture->height0, level); + width = u_minify(texture->width0, args->level); + height = u_minify(texture->height0, args->level); assert(width > 0); assert(height > 0); addr.value = 0; - addr.bits.level = level; + addr.bits.level = args->level; /* * For seamless if LINEAR filtering is done within a miplevel, @@ -1690,28 +1626,28 @@ img_filter_cube_array_linear(struct sp_sampler_view *sp_sview, */ if (sp_samp->base.seamless_cube_map) { /* Note this is a bit overkill, actual clamping is not required */ - wrap_linear_clamp_to_border(s, width, &x0, &x1, &xw); - wrap_linear_clamp_to_border(t, height, &y0, &y1, &yw); + wrap_linear_clamp_to_border(args->s, width, &x0, &x1, &xw); + wrap_linear_clamp_to_border(args->t, height, &y0, &y1, &yw); } else { /* Would probably make sense to ignore mode and just do edge clamp */ - sp_samp->linear_texcoord_s(s, width, &x0, &x1, &xw); - sp_samp->linear_texcoord_t(t, height, &y0, &y1, &yw); + sp_samp->linear_texcoord_s(args->s, width, &x0, &x1, &xw); + sp_samp->linear_texcoord_t(args->t, height, &y0, &y1, &yw); } - layer = coord_to_layer(6 * p + sp_sview->base.u.tex.first_layer, + layer = coord_to_layer(6 * args->p + sp_sview->base.u.tex.first_layer, sp_sview->base.u.tex.first_layer, sp_sview->base.u.tex.last_layer - 5); if (sp_samp->base.seamless_cube_map) { - tx0 = get_texel_cube_seamless(sp_sview, addr, x0, y0, corner0, layer, face_id); - tx1 = get_texel_cube_seamless(sp_sview, addr, x1, y0, corner1, layer, face_id); - tx2 = get_texel_cube_seamless(sp_sview, addr, x0, y1, corner2, layer, face_id); - tx3 = get_texel_cube_seamless(sp_sview, addr, x1, y1, corner3, layer, face_id); + tx0 = get_texel_cube_seamless(sp_sview, addr, x0, y0, corner0, layer, args->face_id); + tx1 = get_texel_cube_seamless(sp_sview, addr, x1, y0, corner1, layer, args->face_id); + tx2 = get_texel_cube_seamless(sp_sview, addr, x0, y1, corner2, layer, args->face_id); + tx3 = get_texel_cube_seamless(sp_sview, addr, x1, y1, corner3, layer, args->face_id); } else { - tx0 = get_texel_cube_array(sp_sview, sp_samp, addr, x0, y0, layer + face_id); - tx1 = get_texel_cube_array(sp_sview, sp_samp, addr, x1, y0, layer + face_id); - tx2 = get_texel_cube_array(sp_sview, sp_samp, addr, x0, y1, layer + face_id); - tx3 = get_texel_cube_array(sp_sview, sp_samp, addr, x1, y1, layer + face_id); + tx0 = get_texel_cube_array(sp_sview, sp_samp, addr, x0, y0, layer + args->face_id); + tx1 = get_texel_cube_array(sp_sview, sp_samp, addr, x1, y0, layer + args->face_id); + tx2 = get_texel_cube_array(sp_sview, sp_samp, addr, x0, y1, layer + args->face_id); + tx3 = get_texel_cube_array(sp_sview, sp_samp, addr, x1, y1, layer + args->face_id); } /* interpolate R, G, B, A */ @@ -1724,11 +1660,7 @@ img_filter_cube_array_linear(struct sp_sampler_view *sp_sview, static void img_filter_3d_linear(struct sp_sampler_view *sp_sview, struct sp_sampler *sp_samp, - float s, - float t, - float p, - unsigned level, - unsigned face_id, + const struct img_filter_args *args, float *rgba) { const struct pipe_resource *texture = sp_sview->base.texture; @@ -1739,21 +1671,20 @@ img_filter_3d_linear(struct sp_sampler_view *sp_sview, const float *tx00, *tx01, *tx02, *tx03, *tx10, *tx11, *tx12, *tx13; int c; - width = u_minify(texture->width0, level); - height = u_minify(texture->height0, level); - depth = u_minify(texture->depth0, level); + width = u_minify(texture->width0, args->level); + height = u_minify(texture->height0, args->level); + depth = u_minify(texture->depth0, args->level); addr.value = 0; - addr.bits.level = level; + addr.bits.level = args->level; assert(width > 0); assert(height > 0); assert(depth > 0); - sp_samp->linear_texcoord_s(s, width, &x0, &x1, &xw); - sp_samp->linear_texcoord_t(t, height, &y0, &y1, &yw); - sp_samp->linear_texcoord_p(p, depth, &z0, &z1, &zw); - + sp_samp->linear_texcoord_s(args->s, width, &x0, &x1, &xw); + sp_samp->linear_texcoord_t(args->t, height, &y0, &y1, &yw); + sp_samp->linear_texcoord_p(args->p, depth, &z0, &z1, &zw); tx00 = get_texel_3d(sp_sview, sp_samp, addr, x0, y0, z0); tx01 = get_texel_3d(sp_sview, sp_samp, addr, x1, y0, z0); @@ -1885,30 +1816,35 @@ mip_filter_linear(struct sp_sampler_view *sp_sview, const struct pipe_sampler_view *psview = &sp_sview->base; int j; float lod[TGSI_QUAD_SIZE]; + struct img_filter_args args; compute_lambda_lod(sp_sview, sp_samp, s, t, p, lod_in, control, lod); for (j = 0; j < TGSI_QUAD_SIZE; j++) { int level0 = psview->u.tex.first_level + (int)lod[j]; - if (lod[j] < 0.0) - mag_filter(sp_sview, sp_samp, s[j], t[j], p[j], - psview->u.tex.first_level, - sp_sview->faces[j], &rgba[0][j]); - - else if (level0 >= (int) psview->u.tex.last_level) - min_filter(sp_sview, sp_samp, s[j], t[j], p[j], psview->u.tex.last_level, - sp_sview->faces[j], &rgba[0][j]); + args.s = s[j]; + args.t = t[j]; + args.p = p[j]; + args.face_id = sp_sview->faces[j]; + if (lod[j] < 0.0) { + args.level = psview->u.tex.first_level; + mag_filter(sp_sview, sp_samp, &args, &rgba[0][j]); + } + else if (level0 >= (int) psview->u.tex.last_level) { + args.level = psview->u.tex.last_level; + min_filter(sp_sview, sp_samp, &args, &rgba[0][j]); + } else { float levelBlend = frac(lod[j]); float rgbax[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]; int c; - min_filter(sp_sview, sp_samp, s[j], t[j], p[j], level0, - sp_sview->faces[j], &rgbax[0][0]); - min_filter(sp_sview, sp_samp, s[j], t[j], p[j], level0+1, - sp_sview->faces[j], &rgbax[0][1]); + args.level = level0; + min_filter(sp_sview, sp_samp, &args, &rgbax[0][0]); + args.level = level0+1; + min_filter(sp_sview, sp_samp, &args, &rgbax[0][1]); for (c = 0; c < 4; c++) { rgba[c][j] = lerp(levelBlend, rgbax[c][0], rgbax[c][1]); @@ -1943,19 +1879,22 @@ mip_filter_nearest(struct sp_sampler_view *sp_sview, const struct pipe_sampler_view *psview = &sp_sview->base; float lod[TGSI_QUAD_SIZE]; int j; - + struct img_filter_args args; compute_lambda_lod(sp_sview, sp_samp, s, t, p, lod_in, control, lod); for (j = 0; j < TGSI_QUAD_SIZE; j++) { - if (lod[j] < 0.0) - mag_filter(sp_sview, sp_samp, s[j], t[j], p[j], - psview->u.tex.first_level, - sp_sview->faces[j], &rgba[0][j]); - else { + args.s = s[j]; + args.t = t[j]; + args.p = p[j]; + args.face_id = sp_sview->faces[j]; + + if (lod[j] < 0.0) { + args.level = psview->u.tex.first_level; + mag_filter(sp_sview, sp_samp, &args, &rgba[0][j]); + } else { int level = psview->u.tex.first_level + (int)(lod[j] + 0.5F); - level = MIN2(level, (int)psview->u.tex.last_level); - min_filter(sp_sview, sp_samp, s[j], t[j], p[j], - level, sp_sview->faces[j], &rgba[0][j]); + args.level = MIN2(level, (int)psview->u.tex.last_level); + min_filter(sp_sview, sp_samp, &args, &rgba[0][j]); } } @@ -1980,19 +1919,21 @@ mip_filter_none(struct sp_sampler_view *sp_sview, { float lod[TGSI_QUAD_SIZE]; int j; + struct img_filter_args args; + args.level = sp_sview->base.u.tex.first_level; compute_lambda_lod(sp_sview, sp_samp, s, t, p, lod_in, control, lod); for (j = 0; j < TGSI_QUAD_SIZE; j++) { - if (lod[j] < 0.0) { - mag_filter(sp_sview, sp_samp, s[j], t[j], p[j], - sp_sview->base.u.tex.first_level, - sp_sview->faces[j], &rgba[0][j]); + args.s = s[j]; + args.t = t[j]; + args.p = p[j]; + args.face_id = sp_sview->faces[j]; + if (lod[j] < 0.0) { + mag_filter(sp_sview, sp_samp, &args, &rgba[0][j]); } else { - min_filter(sp_sview, sp_samp, s[j], t[j], p[j], - sp_sview->base.u.tex.first_level, - sp_sview->faces[j], &rgba[0][j]); + min_filter(sp_sview, sp_samp, &args, &rgba[0][j]); } } } @@ -2012,11 +1953,15 @@ mip_filter_none_no_filter_select(struct sp_sampler_view *sp_sview, float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]) { int j; - - for (j = 0; j < TGSI_QUAD_SIZE; j++) - mag_filter(sp_sview, sp_samp, s[j], t[j], p[j], - sp_sview->base.u.tex.first_level, - sp_sview->faces[j], &rgba[0][j]); + struct img_filter_args args; + args.level = sp_sview->base.u.tex.first_level; + for (j = 0; j < TGSI_QUAD_SIZE; j++) { + args.s = s[j]; + args.t = t[j]; + args.p = p[j]; + args.face_id = sp_sview->faces[j]; + mag_filter(sp_sview, sp_samp, &args, &rgba[0][j]); + } } @@ -2072,7 +2017,7 @@ img_filter_2d_ewa(struct sp_sampler_view *sp_sview, float scaling = 1.0f / (1 << level0); int width = u_minify(texture->width0, level0); int height = u_minify(texture->height0, level0); - + struct img_filter_args args; float ux = dudx * scaling; float vx = dvdx * scaling; float uy = dudy * scaling; @@ -2122,7 +2067,8 @@ img_filter_2d_ewa(struct sp_sampler_view *sp_sview, * full, then the pixel values are read from the image. */ ddq = 2 * A; - + + args.level = level; for (j = 0; j < TGSI_QUAD_SIZE; j++) { /* Heckbert MS thesis, p. 59; scan over the bounding box of the ellipse * and incrementally update the value of Ax^2+Bxy*Cy^2; when this @@ -2139,6 +2085,8 @@ img_filter_2d_ewa(struct sp_sampler_view *sp_sview, float num[4] = {0.0F, 0.0F, 0.0F, 0.0F}; buffer_next = 0; den = 0; + args.face_id = sp_sview->faces[j]; + U = u0 - tex_u; for (v = v0; v <= v1; ++v) { float V = v - tex_v; @@ -2170,8 +2118,10 @@ img_filter_2d_ewa(struct sp_sampler_view *sp_sview, * accelerated img_filter_2d_nearest_XXX functions. */ for (jj = 0; jj < buffer_next; jj++) { - min_filter(sp_sview, sp_samp, s_buffer[jj], t_buffer[jj], p[jj], - level, sp_sview->faces[j], &rgba_temp[0][jj]); + args.s = s_buffer[jj]; + args.t = t_buffer[jj]; + args.p = p[jj]; + min_filter(sp_sview, sp_samp, &args, &rgba_temp[0][jj]); num[0] += weight_buffer[jj] * rgba_temp[0][jj]; num[1] += weight_buffer[jj] * rgba_temp[1][jj]; num[2] += weight_buffer[jj] * rgba_temp[2][jj]; @@ -2198,8 +2148,10 @@ img_filter_2d_ewa(struct sp_sampler_view *sp_sview, * accelerated img_filter_2d_nearest_XXX functions. */ for (jj = 0; jj < buffer_next; jj++) { - min_filter(sp_sview, sp_samp, s_buffer[jj], t_buffer[jj], p[jj], - level, sp_sview->faces[j], &rgba_temp[0][jj]); + args.s = s_buffer[jj]; + args.t = t_buffer[jj]; + args.p = p[jj]; + min_filter(sp_sview, sp_samp, &args, &rgba_temp[0][jj]); num[0] += weight_buffer[jj] * rgba_temp[0][jj]; num[1] += weight_buffer[jj] * rgba_temp[1][jj]; num[2] += weight_buffer[jj] * rgba_temp[2][jj]; @@ -2218,8 +2170,10 @@ img_filter_2d_ewa(struct sp_sampler_view *sp_sview, rgba[2]=0; rgba[3]=0;*/ /* not enough pixels in resampling, resort to direct interpolation */ - min_filter(sp_sview, sp_samp, s[j], t[j], p[j], level, - sp_sview->faces[j], &rgba_temp[0][j]); + args.s = s[j]; + args.t = t[j]; + args.p = p[j]; + min_filter(sp_sview, sp_samp, &args, &rgba_temp[0][j]); den = 1; num[0] = rgba_temp[0][j]; num[1] = rgba_temp[1][j]; @@ -2263,7 +2217,8 @@ mip_filter_linear_aniso(struct sp_sampler_view *sp_sview, float dudy = (s[QUAD_TOP_LEFT] - s[QUAD_BOTTOM_LEFT]) * s_to_u; float dvdx = (t[QUAD_BOTTOM_RIGHT] - t[QUAD_BOTTOM_LEFT]) * t_to_v; float dvdy = (t[QUAD_TOP_LEFT] - t[QUAD_BOTTOM_LEFT]) * t_to_v; - + struct img_filter_args args; + if (control == tgsi_sampler_lod_bias || control == tgsi_sampler_lod_none || /* XXX FIXME */ @@ -2322,9 +2277,14 @@ mip_filter_linear_aniso(struct sp_sampler_view *sp_sview, */ if (level0 >= (int) psview->u.tex.last_level) { int j; - for (j = 0; j < TGSI_QUAD_SIZE; j++) - min_filter(sp_sview, sp_samp, s[j], t[j], p[j], psview->u.tex.last_level, - sp_sview->faces[j], &rgba[0][j]); + for (j = 0; j < TGSI_QUAD_SIZE; j++) { + args.s = s[j]; + args.t = t[j]; + args.p = p[j]; + args.level = psview->u.tex.last_level; + args.face_id = sp_sview->faces[j]; + min_filter(sp_sview, sp_samp, &args, &rgba[0][j]); + } } else { /* don't bother interpolating between multiple LODs; it doesn't @@ -2367,18 +2327,20 @@ mip_filter_linear_2d_linear_repeat_POT( for (j = 0; j < TGSI_QUAD_SIZE; j++) { int level0 = psview->u.tex.first_level + (int)lod[j]; - + struct img_filter_args args; /* Catches both negative and large values of level0: */ + args.s = s[j]; + args.t = t[j]; + args.p = p[j]; + args.face_id = sp_sview->faces[j]; if ((unsigned)level0 >= psview->u.tex.last_level) { if (level0 < 0) - img_filter_2d_linear_repeat_POT(sp_sview, sp_samp, s[j], t[j], p[j], - psview->u.tex.first_level, - sp_sview->faces[j], &rgba[0][j]); + args.level = psview->u.tex.first_level; else - img_filter_2d_linear_repeat_POT(sp_sview, sp_samp, s[j], t[j], p[j], - psview->u.tex.last_level, - sp_sview->faces[j], &rgba[0][j]); + args.level = psview->u.tex.last_level; + img_filter_2d_linear_repeat_POT(sp_sview, sp_samp, &args, + &rgba[0][j]); } else { @@ -2386,10 +2348,10 @@ mip_filter_linear_2d_linear_repeat_POT( float rgbax[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]; int c; - img_filter_2d_linear_repeat_POT(sp_sview, sp_samp, s[j], t[j], p[j], level0, - sp_sview->faces[j], &rgbax[0][0]); - img_filter_2d_linear_repeat_POT(sp_sview, sp_samp, s[j], t[j], p[j], level0+1, - sp_sview->faces[j], &rgbax[0][1]); + args.level = level0; + img_filter_2d_linear_repeat_POT(sp_sview, sp_samp, &args, &rgbax[0][0]); + args.level = level0+1; + img_filter_2d_linear_repeat_POT(sp_sview, sp_samp, &args, &rgbax[0][1]); for (c = 0; c < TGSI_NUM_CHANNELS; c++) rgba[c][j] = lerp(levelBlend, rgbax[c][0], rgbax[c][1]); diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.h b/src/gallium/drivers/softpipe/sp_tex_sample.h index 00a97c5186b..9ffc8794a00 100644 --- a/src/gallium/drivers/softpipe/sp_tex_sample.h +++ b/src/gallium/drivers/softpipe/sp_tex_sample.h @@ -51,13 +51,17 @@ typedef float (*compute_lambda_func)(const struct sp_sampler_view *sp_sview, const float t[TGSI_QUAD_SIZE], const float p[TGSI_QUAD_SIZE]); +struct img_filter_args { + float s; + float t; + float p; + unsigned level; + unsigned face_id; +}; + typedef void (*img_filter_func)(struct sp_sampler_view *sp_sview, struct sp_sampler *sp_samp, - float s, - float t, - float p, - unsigned level, - unsigned face_id, + const struct img_filter_args *args, float *rgba); typedef void (*mip_filter_func)(struct sp_sampler_view *sp_sview, From 8bec83a30761d52088fa5cd2301b469b7aacf755 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Tue, 19 May 2015 15:39:47 +1000 Subject: [PATCH 205/834] softpipe: move control into a filter args struct more stuff for offsets and gather will go in here later. Reviewed-by: Brian Paul Signed-off-by: Dave Airlie --- src/gallium/drivers/softpipe/sp_tex_sample.c | 50 ++++++++++---------- src/gallium/drivers/softpipe/sp_tex_sample.h | 8 +++- 2 files changed, 32 insertions(+), 26 deletions(-) diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c b/src/gallium/drivers/softpipe/sp_tex_sample.c index aad944f4468..50d912022e1 100644 --- a/src/gallium/drivers/softpipe/sp_tex_sample.c +++ b/src/gallium/drivers/softpipe/sp_tex_sample.c @@ -1810,7 +1810,7 @@ mip_filter_linear(struct sp_sampler_view *sp_sview, const float p[TGSI_QUAD_SIZE], const float c0[TGSI_QUAD_SIZE], const float lod_in[TGSI_QUAD_SIZE], - enum tgsi_sampler_control control, + const struct filter_args *filt_args, float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]) { const struct pipe_sampler_view *psview = &sp_sview->base; @@ -1818,7 +1818,7 @@ mip_filter_linear(struct sp_sampler_view *sp_sview, float lod[TGSI_QUAD_SIZE]; struct img_filter_args args; - compute_lambda_lod(sp_sview, sp_samp, s, t, p, lod_in, control, lod); + compute_lambda_lod(sp_sview, sp_samp, s, t, p, lod_in, filt_args->control, lod); for (j = 0; j < TGSI_QUAD_SIZE; j++) { int level0 = psview->u.tex.first_level + (int)lod[j]; @@ -1873,14 +1873,14 @@ mip_filter_nearest(struct sp_sampler_view *sp_sview, const float p[TGSI_QUAD_SIZE], const float c0[TGSI_QUAD_SIZE], const float lod_in[TGSI_QUAD_SIZE], - enum tgsi_sampler_control control, + const struct filter_args *filt_args, float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]) { const struct pipe_sampler_view *psview = &sp_sview->base; float lod[TGSI_QUAD_SIZE]; int j; struct img_filter_args args; - compute_lambda_lod(sp_sview, sp_samp, s, t, p, lod_in, control, lod); + compute_lambda_lod(sp_sview, sp_samp, s, t, p, lod_in, filt_args->control, lod); for (j = 0; j < TGSI_QUAD_SIZE; j++) { args.s = s[j]; @@ -1914,7 +1914,7 @@ mip_filter_none(struct sp_sampler_view *sp_sview, const float p[TGSI_QUAD_SIZE], const float c0[TGSI_QUAD_SIZE], const float lod_in[TGSI_QUAD_SIZE], - enum tgsi_sampler_control control, + const struct filter_args *filt_args, float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]) { float lod[TGSI_QUAD_SIZE]; @@ -1922,7 +1922,7 @@ mip_filter_none(struct sp_sampler_view *sp_sview, struct img_filter_args args; args.level = sp_sview->base.u.tex.first_level; - compute_lambda_lod(sp_sview, sp_samp, s, t, p, lod_in, control, lod); + compute_lambda_lod(sp_sview, sp_samp, s, t, p, lod_in, filt_args->control, lod); for (j = 0; j < TGSI_QUAD_SIZE; j++) { args.s = s[j]; @@ -1949,7 +1949,7 @@ mip_filter_none_no_filter_select(struct sp_sampler_view *sp_sview, const float p[TGSI_QUAD_SIZE], const float c0[TGSI_QUAD_SIZE], const float lod_in[TGSI_QUAD_SIZE], - enum tgsi_sampler_control control, + const struct filter_args *filt_args, float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]) { int j; @@ -2202,7 +2202,7 @@ mip_filter_linear_aniso(struct sp_sampler_view *sp_sview, const float p[TGSI_QUAD_SIZE], const float c0[TGSI_QUAD_SIZE], const float lod_in[TGSI_QUAD_SIZE], - enum tgsi_sampler_control control, + const struct filter_args *filt_args, float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]) { const struct pipe_resource *texture = sp_sview->base.texture; @@ -2219,10 +2219,10 @@ mip_filter_linear_aniso(struct sp_sampler_view *sp_sview, float dvdy = (t[QUAD_TOP_LEFT] - t[QUAD_BOTTOM_LEFT]) * t_to_v; struct img_filter_args args; - if (control == tgsi_sampler_lod_bias || - control == tgsi_sampler_lod_none || + if (filt_args->control == tgsi_sampler_lod_bias || + filt_args->control == tgsi_sampler_lod_none || /* XXX FIXME */ - control == tgsi_sampler_derivs_explicit) { + filt_args->control == tgsi_sampler_derivs_explicit) { /* note: instead of working with Px and Py, we will use the * squared length instead, to avoid sqrt. */ @@ -2259,12 +2259,12 @@ mip_filter_linear_aniso(struct sp_sampler_view *sp_sview, * this since 0.5*log(x) = log(sqrt(x)) */ lambda = 0.5F * util_fast_log2(Pmin2) + sp_samp->base.lod_bias; - compute_lod(&sp_samp->base, control, lambda, lod_in, lod); + compute_lod(&sp_samp->base, filt_args->control, lambda, lod_in, lod); } else { - assert(control == tgsi_sampler_lod_explicit || - control == tgsi_sampler_lod_zero); - compute_lod(&sp_samp->base, control, sp_samp->base.lod_bias, lod_in, lod); + assert(filt_args->control == tgsi_sampler_lod_explicit || + filt_args->control == tgsi_sampler_lod_zero); + compute_lod(&sp_samp->base, filt_args->control, sp_samp->base.lod_bias, lod_in, lod); } /* XXX: Take into account all lod values. @@ -2316,14 +2316,14 @@ mip_filter_linear_2d_linear_repeat_POT( const float p[TGSI_QUAD_SIZE], const float c0[TGSI_QUAD_SIZE], const float lod_in[TGSI_QUAD_SIZE], - enum tgsi_sampler_control control, + const struct filter_args *filt_args, float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]) { const struct pipe_sampler_view *psview = &sp_sview->base; int j; float lod[TGSI_QUAD_SIZE]; - compute_lambda_lod(sp_sview, sp_samp, s, t, p, lod_in, control, lod); + compute_lambda_lod(sp_sview, sp_samp, s, t, p, lod_in, filt_args->control, lod); for (j = 0; j < TGSI_QUAD_SIZE; j++) { int level0 = psview->u.tex.first_level + (int)lod[j]; @@ -2753,7 +2753,7 @@ sample_mip(struct sp_sampler_view *sp_sview, const float p[TGSI_QUAD_SIZE], const float c0[TGSI_QUAD_SIZE], const float lod[TGSI_QUAD_SIZE], - enum tgsi_sampler_control control, + const struct filter_args *filt_args, float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]) { mip_filter_func mip_filter; @@ -2775,10 +2775,10 @@ sample_mip(struct sp_sampler_view *sp_sview, } mip_filter(sp_sview, sp_samp, min_img_filter, mag_img_filter, - s, t, p, c0, lod, control, rgba); + s, t, p, c0, lod, filt_args, rgba); if (sp_samp->base.compare_mode != PIPE_TEX_COMPARE_NONE) { - sample_compare(sp_sview, sp_samp, s, t, p, c0, lod, control, rgba); + sample_compare(sp_sview, sp_samp, s, t, p, c0, lod, filt_args->control, rgba); } if (sp_sview->need_swizzle) { @@ -2802,7 +2802,7 @@ sample_cube(struct sp_sampler_view *sp_sview, const float p[TGSI_QUAD_SIZE], const float c0[TGSI_QUAD_SIZE], const float c1[TGSI_QUAD_SIZE], - enum tgsi_sampler_control control, + const struct filter_args *filt_args, float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]) { unsigned j; @@ -2880,7 +2880,7 @@ sample_cube(struct sp_sampler_view *sp_sview, } } - sample_mip(sp_sview, sp_samp, ssss, tttt, pppp, c0, c1, control, rgba); + sample_mip(sp_sview, sp_samp, ssss, tttt, pppp, c0, c1, filt_args, rgba); } @@ -3249,7 +3249,7 @@ sp_tgsi_get_samples(struct tgsi_sampler *tgsi_sampler, float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]) { struct sp_tgsi_sampler *sp_samp = (struct sp_tgsi_sampler *)tgsi_sampler; - + struct filter_args filt_args; assert(sview_index < PIPE_MAX_SHADER_SAMPLER_VIEWS); assert(sampler_index < PIPE_MAX_SAMPLERS); assert(sp_samp->sp_sampler[sampler_index]); @@ -3263,9 +3263,11 @@ sp_tgsi_get_samples(struct tgsi_sampler *tgsi_sampler, } return; } + + filt_args.control = control; sp_samp->sp_sview[sview_index].get_samples(&sp_samp->sp_sview[sview_index], sp_samp->sp_sampler[sampler_index], - s, t, p, c0, lod, control, rgba); + s, t, p, c0, lod, &filt_args, rgba); } diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.h b/src/gallium/drivers/softpipe/sp_tex_sample.h index 9ffc8794a00..35287ad3bd2 100644 --- a/src/gallium/drivers/softpipe/sp_tex_sample.h +++ b/src/gallium/drivers/softpipe/sp_tex_sample.h @@ -64,6 +64,10 @@ typedef void (*img_filter_func)(struct sp_sampler_view *sp_sview, const struct img_filter_args *args, float *rgba); +struct filter_args { + enum tgsi_sampler_control control; +}; + typedef void (*mip_filter_func)(struct sp_sampler_view *sp_sview, struct sp_sampler *sp_samp, img_filter_func min_filter, @@ -73,7 +77,7 @@ typedef void (*mip_filter_func)(struct sp_sampler_view *sp_sview, const float p[TGSI_QUAD_SIZE], const float c0[TGSI_QUAD_SIZE], const float lod[TGSI_QUAD_SIZE], - enum tgsi_sampler_control control, + const struct filter_args *args, float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]); @@ -84,7 +88,7 @@ typedef void (*filter_func)(struct sp_sampler_view *sp_sview, const float p[TGSI_QUAD_SIZE], const float c0[TGSI_QUAD_SIZE], const float lod[TGSI_QUAD_SIZE], - enum tgsi_sampler_control control, + const struct filter_args *args, float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]); From 3f5c67d6510fe0210079ddecc0d30227a6cc4111 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Tue, 19 May 2015 16:12:45 +1000 Subject: [PATCH 206/834] softpipe: add textureOffset support. This was an oversight when GLSL1.30 was enabled, I think my misunderstanding. This fixes a bunch of tex-miplevel-selection tests under softpipe, and is required for textureGather support. I'm not sure this won't make sampling slowering, but its softpipe, correctness first and all that. Reviewed-by: Brian Paul Signed-off-by: Dave Airlie --- src/gallium/drivers/softpipe/sp_tex_sample.c | 155 +++++++++++-------- src/gallium/drivers/softpipe/sp_tex_sample.h | 4 + 2 files changed, 97 insertions(+), 62 deletions(-) diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c b/src/gallium/drivers/softpipe/sp_tex_sample.c index 50d912022e1..4281bdacdd5 100644 --- a/src/gallium/drivers/softpipe/sp_tex_sample.c +++ b/src/gallium/drivers/softpipe/sp_tex_sample.c @@ -131,17 +131,17 @@ repeat(int coord, unsigned size) * \param icoord returns the integer texcoords */ static void -wrap_nearest_repeat(float s, unsigned size, int *icoord) +wrap_nearest_repeat(float s, unsigned size, int offset, int *icoord) { /* s limited to [0,1) */ /* i limited to [0,size-1] */ int i = util_ifloor(s * size); - *icoord = repeat(i, size); + *icoord = repeat(i + offset, size); } static void -wrap_nearest_clamp(float s, unsigned size, int *icoord) +wrap_nearest_clamp(float s, unsigned size, int offset, int *icoord) { /* s limited to [0,1] */ /* i limited to [0,size-1] */ @@ -151,27 +151,32 @@ wrap_nearest_clamp(float s, unsigned size, int *icoord) *icoord = size - 1; else *icoord = util_ifloor(s * size); + if (offset) + *icoord = CLAMP(*icoord + offset, 0, size - 1); } static void -wrap_nearest_clamp_to_edge(float s, unsigned size, int *icoord) +wrap_nearest_clamp_to_edge(float s, unsigned size, int offset, int *icoord) { /* s limited to [min,max] */ /* i limited to [0, size-1] */ const float min = 1.0F / (2.0F * size); const float max = 1.0F - min; + if (s < min) *icoord = 0; else if (s > max) *icoord = size - 1; else *icoord = util_ifloor(s * size); + if (offset) + *icoord = CLAMP(*icoord + offset, 0, size - 1); } static void -wrap_nearest_clamp_to_border(float s, unsigned size, int *icoord) +wrap_nearest_clamp_to_border(float s, unsigned size, int offset, int *icoord) { /* s limited to [min,max] */ /* i limited to [-1, size] */ @@ -183,11 +188,13 @@ wrap_nearest_clamp_to_border(float s, unsigned size, int *icoord) *icoord = size; else *icoord = util_ifloor(s * size); + if (offset) + *icoord = CLAMP(*icoord + offset, 0, size - 1); } static void -wrap_nearest_mirror_repeat(float s, unsigned size, int *icoord) +wrap_nearest_mirror_repeat(float s, unsigned size, int offset, int *icoord) { const float min = 1.0F / (2.0F * size); const float max = 1.0F - min; @@ -201,11 +208,13 @@ wrap_nearest_mirror_repeat(float s, unsigned size, int *icoord) *icoord = size - 1; else *icoord = util_ifloor(u * size); + if (offset) + *icoord = CLAMP(*icoord + offset, 0, size - 1); } static void -wrap_nearest_mirror_clamp(float s, unsigned size, int *icoord) +wrap_nearest_mirror_clamp(float s, unsigned size, int offset, int *icoord) { /* s limited to [0,1] */ /* i limited to [0,size-1] */ @@ -216,11 +225,13 @@ wrap_nearest_mirror_clamp(float s, unsigned size, int *icoord) *icoord = size - 1; else *icoord = util_ifloor(u * size); + if (offset) + *icoord = CLAMP(*icoord + offset, 0, size - 1); } static void -wrap_nearest_mirror_clamp_to_edge(float s, unsigned size, int *icoord) +wrap_nearest_mirror_clamp_to_edge(float s, unsigned size, int offset, int *icoord) { /* s limited to [min,max] */ /* i limited to [0, size-1] */ @@ -233,11 +244,13 @@ wrap_nearest_mirror_clamp_to_edge(float s, unsigned size, int *icoord) *icoord = size - 1; else *icoord = util_ifloor(u * size); + if (offset) + *icoord = CLAMP(*icoord + offset, 0, size - 1); } static void -wrap_nearest_mirror_clamp_to_border(float s, unsigned size, int *icoord) +wrap_nearest_mirror_clamp_to_border(float s, unsigned size, int offset, int *icoord) { /* s limited to [min,max] */ /* i limited to [0, size-1] */ @@ -250,6 +263,8 @@ wrap_nearest_mirror_clamp_to_border(float s, unsigned size, int *icoord) *icoord = size; else *icoord = util_ifloor(u * size); + if (offset) + *icoord = CLAMP(*icoord + offset, 0, size - 1); } @@ -264,30 +279,34 @@ wrap_nearest_mirror_clamp_to_border(float s, unsigned size, int *icoord) * \param icoord returns the computed integer texture coord */ static void -wrap_linear_repeat(float s, unsigned size, +wrap_linear_repeat(float s, unsigned size, int offset, int *icoord0, int *icoord1, float *w) { float u = s * size - 0.5F; - *icoord0 = repeat(util_ifloor(u), size); + *icoord0 = repeat(util_ifloor(u) + offset, size); *icoord1 = repeat(*icoord0 + 1, size); *w = frac(u); } static void -wrap_linear_clamp(float s, unsigned size, +wrap_linear_clamp(float s, unsigned size, int offset, int *icoord0, int *icoord1, float *w) { float u = CLAMP(s, 0.0F, 1.0F); u = u * size - 0.5f; *icoord0 = util_ifloor(u); *icoord1 = *icoord0 + 1; + if (offset) { + *icoord0 = CLAMP(*icoord0 + offset, 0, size - 1); + *icoord1 = CLAMP(*icoord1 + offset, 0, size - 1); + } *w = frac(u); } static void -wrap_linear_clamp_to_edge(float s, unsigned size, +wrap_linear_clamp_to_edge(float s, unsigned size, int offset, int *icoord0, int *icoord1, float *w) { float u = CLAMP(s, 0.0F, 1.0F); @@ -298,12 +317,16 @@ wrap_linear_clamp_to_edge(float s, unsigned size, *icoord0 = 0; if (*icoord1 >= (int) size) *icoord1 = size - 1; + if (offset) { + *icoord0 = CLAMP(*icoord0 + offset, 0, size - 1); + *icoord1 = CLAMP(*icoord1 + offset, 0, size - 1); + } *w = frac(u); } static void -wrap_linear_clamp_to_border(float s, unsigned size, +wrap_linear_clamp_to_border(float s, unsigned size, int offset, int *icoord0, int *icoord1, float *w) { const float min = -1.0F / (2.0F * size); @@ -317,7 +340,7 @@ wrap_linear_clamp_to_border(float s, unsigned size, static void -wrap_linear_mirror_repeat(float s, unsigned size, +wrap_linear_mirror_repeat(float s, unsigned size, int offset, int *icoord0, int *icoord1, float *w) { const int flr = util_ifloor(s); @@ -336,7 +359,7 @@ wrap_linear_mirror_repeat(float s, unsigned size, static void -wrap_linear_mirror_clamp(float s, unsigned size, +wrap_linear_mirror_clamp(float s, unsigned size, int offset, int *icoord0, int *icoord1, float *w) { float u = fabsf(s); @@ -352,7 +375,7 @@ wrap_linear_mirror_clamp(float s, unsigned size, static void -wrap_linear_mirror_clamp_to_edge(float s, unsigned size, +wrap_linear_mirror_clamp_to_edge(float s, unsigned size, int offset, int *icoord0, int *icoord1, float *w) { float u = fabsf(s); @@ -372,7 +395,7 @@ wrap_linear_mirror_clamp_to_edge(float s, unsigned size, static void -wrap_linear_mirror_clamp_to_border(float s, unsigned size, +wrap_linear_mirror_clamp_to_border(float s, unsigned size, int offset, int *icoord0, int *icoord1, float *w) { const float min = -1.0F / (2.0F * size); @@ -395,10 +418,10 @@ wrap_linear_mirror_clamp_to_border(float s, unsigned size, * PIPE_TEX_WRAP_CLAMP for nearest sampling, unnormalized coords. */ static void -wrap_nearest_unorm_clamp(float s, unsigned size, int *icoord) +wrap_nearest_unorm_clamp(float s, unsigned size, int offset, int *icoord) { int i = util_ifloor(s); - *icoord = CLAMP(i, 0, (int) size-1); + *icoord = CLAMP(i + offset, 0, (int) size-1); } @@ -406,9 +429,9 @@ wrap_nearest_unorm_clamp(float s, unsigned size, int *icoord) * PIPE_TEX_WRAP_CLAMP_TO_BORDER for nearest sampling, unnormalized coords. */ static void -wrap_nearest_unorm_clamp_to_border(float s, unsigned size, int *icoord) +wrap_nearest_unorm_clamp_to_border(float s, unsigned size, int offset, int *icoord) { - *icoord = util_ifloor( CLAMP(s, -0.5F, (float) size + 0.5F) ); + *icoord = util_ifloor( CLAMP(s + offset, -0.5F, (float) size + 0.5F) ); } @@ -416,9 +439,9 @@ wrap_nearest_unorm_clamp_to_border(float s, unsigned size, int *icoord) * PIPE_TEX_WRAP_CLAMP_TO_EDGE for nearest sampling, unnormalized coords. */ static void -wrap_nearest_unorm_clamp_to_edge(float s, unsigned size, int *icoord) +wrap_nearest_unorm_clamp_to_edge(float s, unsigned size, int offset, int *icoord) { - *icoord = util_ifloor( CLAMP(s, 0.5F, (float) size - 0.5F) ); + *icoord = util_ifloor( CLAMP(s + offset, 0.5F, (float) size - 0.5F) ); } @@ -426,11 +449,11 @@ wrap_nearest_unorm_clamp_to_edge(float s, unsigned size, int *icoord) * PIPE_TEX_WRAP_CLAMP for linear sampling, unnormalized coords. */ static void -wrap_linear_unorm_clamp(float s, unsigned size, +wrap_linear_unorm_clamp(float s, unsigned size, int offset, int *icoord0, int *icoord1, float *w) { /* Not exactly what the spec says, but it matches NVIDIA output */ - float u = CLAMP(s - 0.5F, 0.0f, (float) size - 1.0f); + float u = CLAMP(s + offset - 0.5F, 0.0f, (float) size - 1.0f); *icoord0 = util_ifloor(u); *icoord1 = *icoord0 + 1; *w = frac(u); @@ -441,10 +464,10 @@ wrap_linear_unorm_clamp(float s, unsigned size, * PIPE_TEX_WRAP_CLAMP_TO_BORDER for linear sampling, unnormalized coords. */ static void -wrap_linear_unorm_clamp_to_border(float s, unsigned size, +wrap_linear_unorm_clamp_to_border(float s, unsigned size, int offset, int *icoord0, int *icoord1, float *w) { - float u = CLAMP(s, -0.5F, (float) size + 0.5F); + float u = CLAMP(s + offset, -0.5F, (float) size + 0.5F); u -= 0.5F; *icoord0 = util_ifloor(u); *icoord1 = *icoord0 + 1; @@ -458,10 +481,10 @@ wrap_linear_unorm_clamp_to_border(float s, unsigned size, * PIPE_TEX_WRAP_CLAMP_TO_EDGE for linear sampling, unnormalized coords. */ static void -wrap_linear_unorm_clamp_to_edge(float s, unsigned size, +wrap_linear_unorm_clamp_to_edge(float s, unsigned size, int offset, int *icoord0, int *icoord1, float *w) { - float u = CLAMP(s, +0.5F, (float) size - 0.5F); + float u = CLAMP(s + offset, +0.5F, (float) size - 0.5F); u -= 0.5F; *icoord0 = util_ifloor(u); *icoord1 = *icoord0 + 1; @@ -1154,7 +1177,7 @@ img_filter_1d_nearest(struct sp_sampler_view *sp_sview, addr.value = 0; addr.bits.level = args->level; - sp_samp->nearest_texcoord_s(args->s, width, &x); + sp_samp->nearest_texcoord_s(args->s, width, args->offset[0], &x); out = get_texel_2d(sp_sview, sp_samp, addr, x, 0); for (c = 0; c < TGSI_QUAD_SIZE; c++) @@ -1186,7 +1209,7 @@ img_filter_1d_array_nearest(struct sp_sampler_view *sp_sview, addr.value = 0; addr.bits.level = args->level; - sp_samp->nearest_texcoord_s(args->s, width, &x); + sp_samp->nearest_texcoord_s(args->s, width, args->offset[0], &x); layer = coord_to_layer(args->t, sp_sview->base.u.tex.first_layer, sp_sview->base.u.tex.last_layer); @@ -1222,8 +1245,8 @@ img_filter_2d_nearest(struct sp_sampler_view *sp_sview, addr.value = 0; addr.bits.level = args->level; - sp_samp->nearest_texcoord_s(args->s, width, &x); - sp_samp->nearest_texcoord_t(args->t, height, &y); + sp_samp->nearest_texcoord_s(args->s, width, args->offset[0], &x); + sp_samp->nearest_texcoord_t(args->t, height, args->offset[1], &y); out = get_texel_2d(sp_sview, sp_samp, addr, x, y); for (c = 0; c < TGSI_QUAD_SIZE; c++) @@ -1257,8 +1280,8 @@ img_filter_2d_array_nearest(struct sp_sampler_view *sp_sview, addr.value = 0; addr.bits.level = args->level; - sp_samp->nearest_texcoord_s(args->s, width, &x); - sp_samp->nearest_texcoord_t(args->t, height, &y); + sp_samp->nearest_texcoord_s(args->s, width, args->offset[0], &x); + sp_samp->nearest_texcoord_t(args->t, height, args->offset[1], &y); layer = coord_to_layer(args->p, sp_sview->base.u.tex.first_layer, sp_sview->base.u.tex.last_layer); @@ -1299,12 +1322,12 @@ img_filter_cube_nearest(struct sp_sampler_view *sp_sview, * mode CLAMP_TO_EDGE. */ if (sp_samp->base.seamless_cube_map) { - wrap_nearest_clamp_to_edge(args->s, width, &x); - wrap_nearest_clamp_to_edge(args->t, height, &y); + wrap_nearest_clamp_to_edge(args->s, width, args->offset[0], &x); + wrap_nearest_clamp_to_edge(args->t, height, args->offset[1], &y); } else { /* Would probably make sense to ignore mode and just do edge clamp */ - sp_samp->nearest_texcoord_s(args->s, width, &x); - sp_samp->nearest_texcoord_t(args->t, height, &y); + sp_samp->nearest_texcoord_s(args->s, width, args->offset[0], &x); + sp_samp->nearest_texcoord_t(args->t, height, args->offset[1], &y); } layerface = args->face_id + sp_sview->base.u.tex.first_layer; @@ -1339,8 +1362,8 @@ img_filter_cube_array_nearest(struct sp_sampler_view *sp_sview, addr.value = 0; addr.bits.level = args->level; - sp_samp->nearest_texcoord_s(args->s, width, &x); - sp_samp->nearest_texcoord_t(args->t, height, &y); + sp_samp->nearest_texcoord_s(args->s, width, args->offset[0], &x); + sp_samp->nearest_texcoord_t(args->t, height, args->offset[1], &y); layerface = coord_to_layer(6 * args->p + sp_sview->base.u.tex.first_layer, sp_sview->base.u.tex.first_layer, sp_sview->base.u.tex.last_layer - 5) + args->face_id; @@ -1375,9 +1398,9 @@ img_filter_3d_nearest(struct sp_sampler_view *sp_sview, assert(height > 0); assert(depth > 0); - sp_samp->nearest_texcoord_s(args->s, width, &x); - sp_samp->nearest_texcoord_t(args->t, height, &y); - sp_samp->nearest_texcoord_p(args->p, depth, &z); + sp_samp->nearest_texcoord_s(args->s, width, args->offset[0], &x); + sp_samp->nearest_texcoord_t(args->t, height, args->offset[1], &y); + sp_samp->nearest_texcoord_p(args->p, depth, args->offset[2], &z); addr.value = 0; addr.bits.level = args->level; @@ -1409,7 +1432,7 @@ img_filter_1d_linear(struct sp_sampler_view *sp_sview, addr.value = 0; addr.bits.level = args->level; - sp_samp->linear_texcoord_s(args->s, width, &x0, &x1, &xw); + sp_samp->linear_texcoord_s(args->s, width, args->offset[0], &x0, &x1, &xw); tx0 = get_texel_2d(sp_sview, sp_samp, addr, x0, 0); tx1 = get_texel_2d(sp_sview, sp_samp, addr, x1, 0); @@ -1441,7 +1464,7 @@ img_filter_1d_array_linear(struct sp_sampler_view *sp_sview, addr.value = 0; addr.bits.level = args->level; - sp_samp->linear_texcoord_s(args->s, width, &x0, &x1, &xw); + sp_samp->linear_texcoord_s(args->s, width, args->offset[0], &x0, &x1, &xw); layer = coord_to_layer(args->t, sp_sview->base.u.tex.first_layer, sp_sview->base.u.tex.last_layer); @@ -1477,8 +1500,8 @@ img_filter_2d_linear(struct sp_sampler_view *sp_sview, addr.value = 0; addr.bits.level = args->level; - sp_samp->linear_texcoord_s(args->s, width, &x0, &x1, &xw); - sp_samp->linear_texcoord_t(args->t, height, &y0, &y1, &yw); + sp_samp->linear_texcoord_s(args->s, width, args->offset[0], &x0, &x1, &xw); + sp_samp->linear_texcoord_t(args->t, height, args->offset[1], &y0, &y1, &yw); tx0 = get_texel_2d(sp_sview, sp_samp, addr, x0, y0); tx1 = get_texel_2d(sp_sview, sp_samp, addr, x1, y0); @@ -1516,8 +1539,8 @@ img_filter_2d_array_linear(struct sp_sampler_view *sp_sview, addr.value = 0; addr.bits.level = args->level; - sp_samp->linear_texcoord_s(args->s, width, &x0, &x1, &xw); - sp_samp->linear_texcoord_t(args->t, height, &y0, &y1, &yw); + sp_samp->linear_texcoord_s(args->s, width, args->offset[0], &x0, &x1, &xw); + sp_samp->linear_texcoord_t(args->t, height, args->offset[1], &y0, &y1, &yw); layer = coord_to_layer(args->p, sp_sview->base.u.tex.first_layer, sp_sview->base.u.tex.last_layer); @@ -1565,12 +1588,12 @@ img_filter_cube_linear(struct sp_sampler_view *sp_sview, */ if (sp_samp->base.seamless_cube_map) { /* Note this is a bit overkill, actual clamping is not required */ - wrap_linear_clamp_to_border(args->s, width, &x0, &x1, &xw); - wrap_linear_clamp_to_border(args->t, height, &y0, &y1, &yw); + wrap_linear_clamp_to_border(args->s, width, args->offset[0], &x0, &x1, &xw); + wrap_linear_clamp_to_border(args->t, height, args->offset[1], &y0, &y1, &yw); } else { /* Would probably make sense to ignore mode and just do edge clamp */ - sp_samp->linear_texcoord_s(args->s, width, &x0, &x1, &xw); - sp_samp->linear_texcoord_t(args->t, height, &y0, &y1, &yw); + sp_samp->linear_texcoord_s(args->s, width, args->offset[0], &x0, &x1, &xw); + sp_samp->linear_texcoord_t(args->t, height, args->offset[1], &y0, &y1, &yw); } layer = sp_sview->base.u.tex.first_layer; @@ -1626,12 +1649,12 @@ img_filter_cube_array_linear(struct sp_sampler_view *sp_sview, */ if (sp_samp->base.seamless_cube_map) { /* Note this is a bit overkill, actual clamping is not required */ - wrap_linear_clamp_to_border(args->s, width, &x0, &x1, &xw); - wrap_linear_clamp_to_border(args->t, height, &y0, &y1, &yw); + wrap_linear_clamp_to_border(args->s, width, args->offset[0], &x0, &x1, &xw); + wrap_linear_clamp_to_border(args->t, height, args->offset[1], &y0, &y1, &yw); } else { /* Would probably make sense to ignore mode and just do edge clamp */ - sp_samp->linear_texcoord_s(args->s, width, &x0, &x1, &xw); - sp_samp->linear_texcoord_t(args->t, height, &y0, &y1, &yw); + sp_samp->linear_texcoord_s(args->s, width, args->offset[0], &x0, &x1, &xw); + sp_samp->linear_texcoord_t(args->t, height, args->offset[1], &y0, &y1, &yw); } layer = coord_to_layer(6 * args->p + sp_sview->base.u.tex.first_layer, @@ -1682,9 +1705,9 @@ img_filter_3d_linear(struct sp_sampler_view *sp_sview, assert(height > 0); assert(depth > 0); - sp_samp->linear_texcoord_s(args->s, width, &x0, &x1, &xw); - sp_samp->linear_texcoord_t(args->t, height, &y0, &y1, &yw); - sp_samp->linear_texcoord_p(args->p, depth, &z0, &z1, &zw); + sp_samp->linear_texcoord_s(args->s, width, args->offset[0], &x0, &x1, &xw); + sp_samp->linear_texcoord_t(args->t, height, args->offset[1], &y0, &y1, &yw); + sp_samp->linear_texcoord_p(args->p, depth, args->offset[2], &z0, &z1, &zw); tx00 = get_texel_3d(sp_sview, sp_samp, addr, x0, y0, z0); tx01 = get_texel_3d(sp_sview, sp_samp, addr, x1, y0, z0); @@ -1820,6 +1843,8 @@ mip_filter_linear(struct sp_sampler_view *sp_sview, compute_lambda_lod(sp_sview, sp_samp, s, t, p, lod_in, filt_args->control, lod); + args.offset = filt_args->offset; + for (j = 0; j < TGSI_QUAD_SIZE; j++) { int level0 = psview->u.tex.first_level + (int)lod[j]; @@ -1880,6 +1905,8 @@ mip_filter_nearest(struct sp_sampler_view *sp_sview, float lod[TGSI_QUAD_SIZE]; int j; struct img_filter_args args; + + args.offset = filt_args->offset; compute_lambda_lod(sp_sview, sp_samp, s, t, p, lod_in, filt_args->control, lod); for (j = 0; j < TGSI_QUAD_SIZE; j++) { @@ -1922,6 +1949,8 @@ mip_filter_none(struct sp_sampler_view *sp_sview, struct img_filter_args args; args.level = sp_sview->base.u.tex.first_level; + args.offset = filt_args->offset; + compute_lambda_lod(sp_sview, sp_samp, s, t, p, lod_in, filt_args->control, lod); for (j = 0; j < TGSI_QUAD_SIZE; j++) { @@ -1955,6 +1984,7 @@ mip_filter_none_no_filter_select(struct sp_sampler_view *sp_sview, int j; struct img_filter_args args; args.level = sp_sview->base.u.tex.first_level; + args.offset = filt_args->offset; for (j = 0; j < TGSI_QUAD_SIZE; j++) { args.s = s[j]; args.t = t[j]; @@ -3265,6 +3295,7 @@ sp_tgsi_get_samples(struct tgsi_sampler *tgsi_sampler, } filt_args.control = control; + filt_args.offset = offset; sp_samp->sp_sview[sview_index].get_samples(&sp_samp->sp_sview[sview_index], sp_samp->sp_sampler[sampler_index], s, t, p, c0, lod, &filt_args, rgba); diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.h b/src/gallium/drivers/softpipe/sp_tex_sample.h index 35287ad3bd2..c9de56c5ab4 100644 --- a/src/gallium/drivers/softpipe/sp_tex_sample.h +++ b/src/gallium/drivers/softpipe/sp_tex_sample.h @@ -38,10 +38,12 @@ struct sp_sampler; typedef void (*wrap_nearest_func)(float s, unsigned size, + int offset, int *icoord); typedef void (*wrap_linear_func)(float s, unsigned size, + int offset, int *icoord0, int *icoord1, float *w); @@ -57,6 +59,7 @@ struct img_filter_args { float p; unsigned level; unsigned face_id; + const int8_t *offset; }; typedef void (*img_filter_func)(struct sp_sampler_view *sp_sview, @@ -66,6 +69,7 @@ typedef void (*img_filter_func)(struct sp_sampler_view *sp_sview, struct filter_args { enum tgsi_sampler_control control; + const int8_t *offset; }; typedef void (*mip_filter_func)(struct sp_sampler_view *sp_sview, From a6861ecfc91973ba97989def97dd571e0e096888 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Tue, 19 May 2015 16:16:07 +1000 Subject: [PATCH 207/834] tgsi: handle TG4 opcode in tgsi exec This just adds a new modifier interface for drivers to implement. Reviewed-by: Brian Paul Signed-off-by: Dave Airlie --- src/gallium/auxiliary/tgsi/tgsi_exec.c | 11 ++++++++++- src/gallium/auxiliary/tgsi/tgsi_exec.h | 3 ++- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c index d9e40506afa..6512e80ba2e 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.c +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c @@ -1952,7 +1952,7 @@ fetch_texel( struct tgsi_sampler *sampler, #define TEX_MODIFIER_LOD_BIAS 2 #define TEX_MODIFIER_EXPLICIT_LOD 3 #define TEX_MODIFIER_LEVEL_ZERO 4 - +#define TEX_MODIFIER_GATHER 5 /* * Fetch all 3 (for s,t,r coords) texel offsets, put them into int array. @@ -2069,6 +2069,8 @@ exec_tex(struct tgsi_exec_machine *mach, control = tgsi_sampler_lod_explicit; else if (modifier == TEX_MODIFIER_LOD_BIAS) control = tgsi_sampler_lod_bias; + else if (modifier == TEX_MODIFIER_GATHER) + control = tgsi_sampler_gather; } else { for (i = dim; i < Elements(args); i++) @@ -4374,6 +4376,13 @@ exec_instruction( exec_tex(mach, inst, TEX_MODIFIER_PROJECTED, 1); break; + case TGSI_OPCODE_TG4: + /* src[0] = texcoord */ + /* src[1] = component */ + /* src[2] = sampler unit */ + exec_tex(mach, inst, TEX_MODIFIER_GATHER, 2); + break; + case TGSI_OPCODE_UP2H: assert (0); break; diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.h b/src/gallium/auxiliary/tgsi/tgsi_exec.h index 0e59b884897..0f4c966cc11 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.h +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.h @@ -93,7 +93,8 @@ enum tgsi_sampler_control { tgsi_sampler_lod_bias, tgsi_sampler_lod_explicit, tgsi_sampler_lod_zero, - tgsi_sampler_derivs_explicit + tgsi_sampler_derivs_explicit, + tgsi_sampler_gather, }; /** From 0108eae2911d2fc8f2ae0ef0fc6fc503fbfc600d Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Tue, 19 May 2015 16:22:35 +1000 Subject: [PATCH 208/834] softpipe: use arrays to make gather easier This is a prep change for gather, and it makes more sense to use an array in these cases. Reviewed-by: Brian Paul Signed-off-by: Dave Airlie --- src/gallium/drivers/softpipe/sp_tex_sample.c | 72 ++++++++++---------- 1 file changed, 36 insertions(+), 36 deletions(-) diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c b/src/gallium/drivers/softpipe/sp_tex_sample.c index 4281bdacdd5..450234cbfa3 100644 --- a/src/gallium/drivers/softpipe/sp_tex_sample.c +++ b/src/gallium/drivers/softpipe/sp_tex_sample.c @@ -1488,7 +1488,7 @@ img_filter_2d_linear(struct sp_sampler_view *sp_sview, int x0, y0, x1, y1; float xw, yw; /* weights */ union tex_tile_address addr; - const float *tx0, *tx1, *tx2, *tx3; + const float *tx[4]; int c; width = u_minify(texture->width0, args->level); @@ -1503,16 +1503,16 @@ img_filter_2d_linear(struct sp_sampler_view *sp_sview, sp_samp->linear_texcoord_s(args->s, width, args->offset[0], &x0, &x1, &xw); sp_samp->linear_texcoord_t(args->t, height, args->offset[1], &y0, &y1, &yw); - tx0 = get_texel_2d(sp_sview, sp_samp, addr, x0, y0); - tx1 = get_texel_2d(sp_sview, sp_samp, addr, x1, y0); - tx2 = get_texel_2d(sp_sview, sp_samp, addr, x0, y1); - tx3 = get_texel_2d(sp_sview, sp_samp, addr, x1, y1); + tx[0] = get_texel_2d(sp_sview, sp_samp, addr, x0, y0); + tx[1] = get_texel_2d(sp_sview, sp_samp, addr, x1, y0); + tx[2] = get_texel_2d(sp_sview, sp_samp, addr, x0, y1); + tx[3] = get_texel_2d(sp_sview, sp_samp, addr, x1, y1); /* interpolate R, G, B, A */ for (c = 0; c < TGSI_QUAD_SIZE; c++) rgba[TGSI_NUM_CHANNELS*c] = lerp_2d(xw, yw, - tx0[c], tx1[c], - tx2[c], tx3[c]); + tx[0][c], tx[1][c], + tx[2][c], tx[3][c]); } @@ -1527,7 +1527,7 @@ img_filter_2d_array_linear(struct sp_sampler_view *sp_sview, int x0, y0, x1, y1, layer; float xw, yw; /* weights */ union tex_tile_address addr; - const float *tx0, *tx1, *tx2, *tx3; + const float *tx[4]; int c; width = u_minify(texture->width0, args->level); @@ -1544,16 +1544,16 @@ img_filter_2d_array_linear(struct sp_sampler_view *sp_sview, layer = coord_to_layer(args->p, sp_sview->base.u.tex.first_layer, sp_sview->base.u.tex.last_layer); - tx0 = get_texel_2d_array(sp_sview, sp_samp, addr, x0, y0, layer); - tx1 = get_texel_2d_array(sp_sview, sp_samp, addr, x1, y0, layer); - tx2 = get_texel_2d_array(sp_sview, sp_samp, addr, x0, y1, layer); - tx3 = get_texel_2d_array(sp_sview, sp_samp, addr, x1, y1, layer); + tx[0] = get_texel_2d_array(sp_sview, sp_samp, addr, x0, y0, layer); + tx[1] = get_texel_2d_array(sp_sview, sp_samp, addr, x1, y0, layer); + tx[2] = get_texel_2d_array(sp_sview, sp_samp, addr, x0, y1, layer); + tx[3] = get_texel_2d_array(sp_sview, sp_samp, addr, x1, y1, layer); /* interpolate R, G, B, A */ for (c = 0; c < TGSI_QUAD_SIZE; c++) rgba[TGSI_NUM_CHANNELS*c] = lerp_2d(xw, yw, - tx0[c], tx1[c], - tx2[c], tx3[c]); + tx[0][c], tx[1][c], + tx[2][c], tx[3][c]); } @@ -1568,7 +1568,7 @@ img_filter_cube_linear(struct sp_sampler_view *sp_sview, int x0, y0, x1, y1, layer; float xw, yw; /* weights */ union tex_tile_address addr; - const float *tx0, *tx1, *tx2, *tx3; + const float *tx[4]; float corner0[TGSI_QUAD_SIZE], corner1[TGSI_QUAD_SIZE], corner2[TGSI_QUAD_SIZE], corner3[TGSI_QUAD_SIZE]; int c; @@ -1599,22 +1599,22 @@ img_filter_cube_linear(struct sp_sampler_view *sp_sview, layer = sp_sview->base.u.tex.first_layer; if (sp_samp->base.seamless_cube_map) { - tx0 = get_texel_cube_seamless(sp_sview, addr, x0, y0, corner0, layer, args->face_id); - tx1 = get_texel_cube_seamless(sp_sview, addr, x1, y0, corner1, layer, args->face_id); - tx2 = get_texel_cube_seamless(sp_sview, addr, x0, y1, corner2, layer, args->face_id); - tx3 = get_texel_cube_seamless(sp_sview, addr, x1, y1, corner3, layer, args->face_id); + tx[0] = get_texel_cube_seamless(sp_sview, addr, x0, y0, corner0, layer, args->face_id); + tx[1] = get_texel_cube_seamless(sp_sview, addr, x1, y0, corner1, layer, args->face_id); + tx[2] = get_texel_cube_seamless(sp_sview, addr, x0, y1, corner2, layer, args->face_id); + tx[3] = get_texel_cube_seamless(sp_sview, addr, x1, y1, corner3, layer, args->face_id); } else { - tx0 = get_texel_cube_array(sp_sview, sp_samp, addr, x0, y0, layer + args->face_id); - tx1 = get_texel_cube_array(sp_sview, sp_samp, addr, x1, y0, layer + args->face_id); - tx2 = get_texel_cube_array(sp_sview, sp_samp, addr, x0, y1, layer + args->face_id); - tx3 = get_texel_cube_array(sp_sview, sp_samp, addr, x1, y1, layer + args->face_id); + tx[0] = get_texel_cube_array(sp_sview, sp_samp, addr, x0, y0, layer + args->face_id); + tx[1] = get_texel_cube_array(sp_sview, sp_samp, addr, x1, y0, layer + args->face_id); + tx[2] = get_texel_cube_array(sp_sview, sp_samp, addr, x0, y1, layer + args->face_id); + tx[3] = get_texel_cube_array(sp_sview, sp_samp, addr, x1, y1, layer + args->face_id); } /* interpolate R, G, B, A */ for (c = 0; c < TGSI_QUAD_SIZE; c++) rgba[TGSI_NUM_CHANNELS*c] = lerp_2d(xw, yw, - tx0[c], tx1[c], - tx2[c], tx3[c]); + tx[0][c], tx[1][c], + tx[2][c], tx[3][c]); } @@ -1629,7 +1629,7 @@ img_filter_cube_array_linear(struct sp_sampler_view *sp_sview, int x0, y0, x1, y1, layer; float xw, yw; /* weights */ union tex_tile_address addr; - const float *tx0, *tx1, *tx2, *tx3; + const float *tx[4]; float corner0[TGSI_QUAD_SIZE], corner1[TGSI_QUAD_SIZE], corner2[TGSI_QUAD_SIZE], corner3[TGSI_QUAD_SIZE]; int c; @@ -1662,22 +1662,22 @@ img_filter_cube_array_linear(struct sp_sampler_view *sp_sview, sp_sview->base.u.tex.last_layer - 5); if (sp_samp->base.seamless_cube_map) { - tx0 = get_texel_cube_seamless(sp_sview, addr, x0, y0, corner0, layer, args->face_id); - tx1 = get_texel_cube_seamless(sp_sview, addr, x1, y0, corner1, layer, args->face_id); - tx2 = get_texel_cube_seamless(sp_sview, addr, x0, y1, corner2, layer, args->face_id); - tx3 = get_texel_cube_seamless(sp_sview, addr, x1, y1, corner3, layer, args->face_id); + tx[0] = get_texel_cube_seamless(sp_sview, addr, x0, y0, corner0, layer, args->face_id); + tx[1] = get_texel_cube_seamless(sp_sview, addr, x1, y0, corner1, layer, args->face_id); + tx[2] = get_texel_cube_seamless(sp_sview, addr, x0, y1, corner2, layer, args->face_id); + tx[3] = get_texel_cube_seamless(sp_sview, addr, x1, y1, corner3, layer, args->face_id); } else { - tx0 = get_texel_cube_array(sp_sview, sp_samp, addr, x0, y0, layer + args->face_id); - tx1 = get_texel_cube_array(sp_sview, sp_samp, addr, x1, y0, layer + args->face_id); - tx2 = get_texel_cube_array(sp_sview, sp_samp, addr, x0, y1, layer + args->face_id); - tx3 = get_texel_cube_array(sp_sview, sp_samp, addr, x1, y1, layer + args->face_id); + tx[0] = get_texel_cube_array(sp_sview, sp_samp, addr, x0, y0, layer + args->face_id); + tx[1] = get_texel_cube_array(sp_sview, sp_samp, addr, x1, y0, layer + args->face_id); + tx[2] = get_texel_cube_array(sp_sview, sp_samp, addr, x0, y1, layer + args->face_id); + tx[3] = get_texel_cube_array(sp_sview, sp_samp, addr, x1, y1, layer + args->face_id); } /* interpolate R, G, B, A */ for (c = 0; c < TGSI_QUAD_SIZE; c++) rgba[TGSI_NUM_CHANNELS*c] = lerp_2d(xw, yw, - tx0[c], tx1[c], - tx2[c], tx3[c]); + tx[0][c], tx[1][c], + tx[2][c], tx[3][c]); } static void From 55a7b5165d40b831fd303079f8f80962d195d6ee Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Tue, 19 May 2015 16:29:39 +1000 Subject: [PATCH 209/834] softpipe: start adding gather support (v2) This adds both ARB_texture_gather and the enhanced gather for ARB_gpu_shader5. This passes all the piglit tests, it relies on the GLSL lowering pass to make textureGatherOffsets work. v2: use inline to get gather component (Brian) fix function name, add asserts (Brian) Reviewed-by: Brian Paul Signed-off-by: Dave Airlie --- src/gallium/drivers/softpipe/sp_screen.c | 5 +- src/gallium/drivers/softpipe/sp_tex_sample.c | 286 +++++++++++++------ src/gallium/drivers/softpipe/sp_tex_sample.h | 2 + 3 files changed, 210 insertions(+), 83 deletions(-) diff --git a/src/gallium/drivers/softpipe/sp_screen.c b/src/gallium/drivers/softpipe/sp_screen.c index b3bc1773e9f..a688d319bb8 100644 --- a/src/gallium/drivers/softpipe/sp_screen.c +++ b/src/gallium/drivers/softpipe/sp_screen.c @@ -191,7 +191,9 @@ softpipe_get_param(struct pipe_screen *screen, enum pipe_cap param) case PIPE_CAP_ENDIANNESS: return PIPE_ENDIAN_NATIVE; case PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS: + return 4; case PIPE_CAP_TEXTURE_GATHER_SM5: + return 1; case PIPE_CAP_BUFFER_MAP_PERSISTENT_COHERENT: case PIPE_CAP_TEXTURE_QUERY_LOD: case PIPE_CAP_SAMPLE_SHADING: @@ -206,8 +208,9 @@ softpipe_get_param(struct pipe_screen *screen, enum pipe_cap param) case PIPE_CAP_FAKE_SW_MSAA: return 1; case PIPE_CAP_MIN_TEXTURE_GATHER_OFFSET: + return -32; case PIPE_CAP_MAX_TEXTURE_GATHER_OFFSET: - return 0; + return 31; case PIPE_CAP_DRAW_INDIRECT: return 1; diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c b/src/gallium/drivers/softpipe/sp_tex_sample.c index 450234cbfa3..4ac349807e5 100644 --- a/src/gallium/drivers/softpipe/sp_tex_sample.c +++ b/src/gallium/drivers/softpipe/sp_tex_sample.c @@ -1476,6 +1476,72 @@ img_filter_1d_array_linear(struct sp_sampler_view *sp_sview, rgba[TGSI_NUM_CHANNELS*c] = lerp(xw, tx0[c], tx1[c]); } +/* + * Retrieve the gathered value, need to convert to the + * TGSI expected interface, and take component select + * and swizzling into account. + */ +static float +get_gather_value(const struct sp_sampler_view *sp_sview, + int chan_in, int comp_sel, + const float *tx[4]) +{ + int chan; + unsigned swizzle; + + /* + * softpipe samples in a different order + * to TGSI expects, so we need to swizzle, + * the samples into the correct slots. + */ + switch (chan_in) { + case 0: + chan = 2; + break; + case 1: + chan = 3; + break; + case 2: + chan = 1; + break; + case 3: + chan = 0; + break; + default: + assert(0); + return 0.0; + } + + /* pick which component to use for the swizzle */ + switch (comp_sel) { + case 0: + swizzle = sp_sview->base.swizzle_r; + break; + case 1: + swizzle = sp_sview->base.swizzle_g; + break; + case 2: + swizzle = sp_sview->base.swizzle_b; + break; + case 3: + swizzle = sp_sview->base.swizzle_a; + break; + default: + assert(0); + return 0.0; + } + + /* get correct result using the channel and swizzle */ + switch (swizzle) { + case PIPE_SWIZZLE_ZERO: + return 0.0; + case PIPE_SWIZZLE_ONE: + return 1.0; + default: + return tx[chan][swizzle]; + } +} + static void img_filter_2d_linear(struct sp_sampler_view *sp_sview, @@ -1508,11 +1574,18 @@ img_filter_2d_linear(struct sp_sampler_view *sp_sview, tx[2] = get_texel_2d(sp_sview, sp_samp, addr, x0, y1); tx[3] = get_texel_2d(sp_sview, sp_samp, addr, x1, y1); - /* interpolate R, G, B, A */ - for (c = 0; c < TGSI_QUAD_SIZE; c++) - rgba[TGSI_NUM_CHANNELS*c] = lerp_2d(xw, yw, - tx[0][c], tx[1][c], - tx[2][c], tx[3][c]); + if (args->gather_only) { + for (c = 0; c < TGSI_QUAD_SIZE; c++) + rgba[TGSI_NUM_CHANNELS*c] = get_gather_value(sp_sview, c, + args->gather_comp, + tx); + } else { + /* interpolate R, G, B, A */ + for (c = 0; c < TGSI_QUAD_SIZE; c++) + rgba[TGSI_NUM_CHANNELS*c] = lerp_2d(xw, yw, + tx[0][c], tx[1][c], + tx[2][c], tx[3][c]); + } } @@ -1549,11 +1622,18 @@ img_filter_2d_array_linear(struct sp_sampler_view *sp_sview, tx[2] = get_texel_2d_array(sp_sview, sp_samp, addr, x0, y1, layer); tx[3] = get_texel_2d_array(sp_sview, sp_samp, addr, x1, y1, layer); - /* interpolate R, G, B, A */ - for (c = 0; c < TGSI_QUAD_SIZE; c++) - rgba[TGSI_NUM_CHANNELS*c] = lerp_2d(xw, yw, - tx[0][c], tx[1][c], - tx[2][c], tx[3][c]); + if (args->gather_only) { + for (c = 0; c < TGSI_QUAD_SIZE; c++) + rgba[TGSI_NUM_CHANNELS*c] = get_gather_value(sp_sview, c, + args->gather_comp, + tx); + } else { + /* interpolate R, G, B, A */ + for (c = 0; c < TGSI_QUAD_SIZE; c++) + rgba[TGSI_NUM_CHANNELS*c] = lerp_2d(xw, yw, + tx[0][c], tx[1][c], + tx[2][c], tx[3][c]); + } } @@ -1610,11 +1690,18 @@ img_filter_cube_linear(struct sp_sampler_view *sp_sview, tx[3] = get_texel_cube_array(sp_sview, sp_samp, addr, x1, y1, layer + args->face_id); } - /* interpolate R, G, B, A */ - for (c = 0; c < TGSI_QUAD_SIZE; c++) - rgba[TGSI_NUM_CHANNELS*c] = lerp_2d(xw, yw, - tx[0][c], tx[1][c], - tx[2][c], tx[3][c]); + if (args->gather_only) { + for (c = 0; c < TGSI_QUAD_SIZE; c++) + rgba[TGSI_NUM_CHANNELS*c] = get_gather_value(sp_sview, c, + args->gather_comp, + tx); + } else { + /* interpolate R, G, B, A */ + for (c = 0; c < TGSI_QUAD_SIZE; c++) + rgba[TGSI_NUM_CHANNELS*c] = lerp_2d(xw, yw, + tx[0][c], tx[1][c], + tx[2][c], tx[3][c]); + } } @@ -1673,11 +1760,18 @@ img_filter_cube_array_linear(struct sp_sampler_view *sp_sview, tx[3] = get_texel_cube_array(sp_sview, sp_samp, addr, x1, y1, layer + args->face_id); } - /* interpolate R, G, B, A */ - for (c = 0; c < TGSI_QUAD_SIZE; c++) - rgba[TGSI_NUM_CHANNELS*c] = lerp_2d(xw, yw, - tx[0][c], tx[1][c], - tx[2][c], tx[3][c]); + if (args->gather_only) { + for (c = 0; c < TGSI_QUAD_SIZE; c++) + rgba[TGSI_NUM_CHANNELS*c] = get_gather_value(sp_sview, c, + args->gather_comp, + tx); + } else { + /* interpolate R, G, B, A */ + for (c = 0; c < TGSI_QUAD_SIZE; c++) + rgba[TGSI_NUM_CHANNELS*c] = lerp_2d(xw, yw, + tx[0][c], tx[1][c], + tx[2][c], tx[3][c]); + } } static void @@ -1795,6 +1889,7 @@ compute_lambda_lod(struct sp_sampler_view *sp_sview, switch (control) { case tgsi_sampler_lod_none: + case tgsi_sampler_gather: /* XXX FIXME */ case tgsi_sampler_derivs_explicit: lambda = sp_sview->compute_lambda(sp_sview, s, t, p) + lod_bias; @@ -1822,6 +1917,12 @@ compute_lambda_lod(struct sp_sampler_view *sp_sview, } } +static INLINE unsigned +get_gather_component(const float lod_in[TGSI_QUAD_SIZE]) +{ + /* gather component is stored in lod_in slot as unsigned */ + return (*(unsigned int *)lod_in) & 0x3; +} static void mip_filter_linear(struct sp_sampler_view *sp_sview, @@ -1844,6 +1945,8 @@ mip_filter_linear(struct sp_sampler_view *sp_sview, compute_lambda_lod(sp_sview, sp_samp, s, t, p, lod_in, filt_args->control, lod); args.offset = filt_args->offset; + args.gather_only = filt_args->control == tgsi_sampler_gather; + args.gather_comp = get_gather_component(lod_in); for (j = 0; j < TGSI_QUAD_SIZE; j++) { int level0 = psview->u.tex.first_level + (int)lod[j]; @@ -1907,6 +2010,9 @@ mip_filter_nearest(struct sp_sampler_view *sp_sview, struct img_filter_args args; args.offset = filt_args->offset; + args.gather_only = filt_args->control == tgsi_sampler_gather; + args.gather_comp = get_gather_component(lod_in); + compute_lambda_lod(sp_sview, sp_samp, s, t, p, lod_in, filt_args->control, lod); for (j = 0; j < TGSI_QUAD_SIZE; j++) { @@ -1950,6 +2056,7 @@ mip_filter_none(struct sp_sampler_view *sp_sview, args.level = sp_sview->base.u.tex.first_level; args.offset = filt_args->offset; + args.gather_only = filt_args->control == tgsi_sampler_gather; compute_lambda_lod(sp_sview, sp_samp, s, t, p, lod_in, filt_args->control, lod); @@ -1985,6 +2092,7 @@ mip_filter_none_no_filter_select(struct sp_sampler_view *sp_sview, struct img_filter_args args; args.level = sp_sview->base.u.tex.first_level; args.offset = filt_args->offset; + args.gather_only = filt_args->control == tgsi_sampler_gather; for (j = 0; j < TGSI_QUAD_SIZE; j++) { args.s = s[j]; args.t = t[j]; @@ -2364,6 +2472,7 @@ mip_filter_linear_2d_linear_repeat_POT( args.t = t[j]; args.p = p[j]; args.face_id = sp_sview->faces[j]; + args.gather_only = filt_args->control == tgsi_sampler_gather; if ((unsigned)level0 >= psview->u.tex.last_level) { if (level0 < 0) args.level = psview->u.tex.first_level; @@ -2409,11 +2518,12 @@ sample_compare(struct sp_sampler_view *sp_sview, float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]) { const struct pipe_sampler_state *sampler = &sp_samp->base; - int j; - int k[4]; + int j, v; + int k[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]; float pc[4]; const struct util_format_description *format_desc; unsigned chan_type; + bool is_gather = (control == tgsi_sampler_gather); /** * Compare texcoord 'p' (aka R) against texture value 'rgba[0]' @@ -2457,65 +2567,74 @@ sample_compare(struct sp_sampler_view *sp_sview, pc[3] = CLAMP(pc[3], 0.0F, 1.0F); } - /* compare four texcoords vs. four texture samples */ - switch (sampler->compare_func) { - case PIPE_FUNC_LESS: - k[0] = pc[0] < rgba[0][0]; - k[1] = pc[1] < rgba[0][1]; - k[2] = pc[2] < rgba[0][2]; - k[3] = pc[3] < rgba[0][3]; - break; - case PIPE_FUNC_LEQUAL: - k[0] = pc[0] <= rgba[0][0]; - k[1] = pc[1] <= rgba[0][1]; - k[2] = pc[2] <= rgba[0][2]; - k[3] = pc[3] <= rgba[0][3]; - break; - case PIPE_FUNC_GREATER: - k[0] = pc[0] > rgba[0][0]; - k[1] = pc[1] > rgba[0][1]; - k[2] = pc[2] > rgba[0][2]; - k[3] = pc[3] > rgba[0][3]; - break; - case PIPE_FUNC_GEQUAL: - k[0] = pc[0] >= rgba[0][0]; - k[1] = pc[1] >= rgba[0][1]; - k[2] = pc[2] >= rgba[0][2]; - k[3] = pc[3] >= rgba[0][3]; - break; - case PIPE_FUNC_EQUAL: - k[0] = pc[0] == rgba[0][0]; - k[1] = pc[1] == rgba[0][1]; - k[2] = pc[2] == rgba[0][2]; - k[3] = pc[3] == rgba[0][3]; - break; - case PIPE_FUNC_NOTEQUAL: - k[0] = pc[0] != rgba[0][0]; - k[1] = pc[1] != rgba[0][1]; - k[2] = pc[2] != rgba[0][2]; - k[3] = pc[3] != rgba[0][3]; - break; - case PIPE_FUNC_ALWAYS: - k[0] = k[1] = k[2] = k[3] = 1; - break; - case PIPE_FUNC_NEVER: - k[0] = k[1] = k[2] = k[3] = 0; - break; - default: - k[0] = k[1] = k[2] = k[3] = 0; - assert(0); - break; + for (v = 0; v < (is_gather ? TGSI_NUM_CHANNELS : 1); v++) { + /* compare four texcoords vs. four texture samples */ + switch (sampler->compare_func) { + case PIPE_FUNC_LESS: + k[v][0] = pc[0] < rgba[v][0]; + k[v][1] = pc[1] < rgba[v][1]; + k[v][2] = pc[2] < rgba[v][2]; + k[v][3] = pc[3] < rgba[v][3]; + break; + case PIPE_FUNC_LEQUAL: + k[v][0] = pc[0] <= rgba[v][0]; + k[v][1] = pc[1] <= rgba[v][1]; + k[v][2] = pc[2] <= rgba[v][2]; + k[v][3] = pc[3] <= rgba[v][3]; + break; + case PIPE_FUNC_GREATER: + k[v][0] = pc[0] > rgba[v][0]; + k[v][1] = pc[1] > rgba[v][1]; + k[v][2] = pc[2] > rgba[v][2]; + k[v][3] = pc[3] > rgba[v][3]; + break; + case PIPE_FUNC_GEQUAL: + k[v][0] = pc[0] >= rgba[v][0]; + k[v][1] = pc[1] >= rgba[v][1]; + k[v][2] = pc[2] >= rgba[v][2]; + k[v][3] = pc[3] >= rgba[v][3]; + break; + case PIPE_FUNC_EQUAL: + k[v][0] = pc[0] == rgba[v][0]; + k[v][1] = pc[1] == rgba[v][1]; + k[v][2] = pc[2] == rgba[v][2]; + k[v][3] = pc[3] == rgba[v][3]; + break; + case PIPE_FUNC_NOTEQUAL: + k[v][0] = pc[0] != rgba[v][0]; + k[v][1] = pc[1] != rgba[v][1]; + k[v][2] = pc[2] != rgba[v][2]; + k[v][3] = pc[3] != rgba[v][3]; + break; + case PIPE_FUNC_ALWAYS: + k[v][0] = k[v][1] = k[v][2] = k[v][3] = 1; + break; + case PIPE_FUNC_NEVER: + k[v][0] = k[v][1] = k[v][2] = k[v][3] = 0; + break; + default: + k[v][0] = k[v][1] = k[v][2] = k[v][3] = 0; + assert(0); + break; + } } - for (j = 0; j < TGSI_QUAD_SIZE; j++) { - rgba[0][j] = k[j]; - rgba[1][j] = k[j]; - rgba[2][j] = k[j]; - rgba[3][j] = 1.0F; + if (is_gather) { + for (j = 0; j < TGSI_QUAD_SIZE; j++) { + for (v = 0; v < TGSI_NUM_CHANNELS; v++) { + rgba[v][j] = k[v][j]; + } + } + } else { + for (j = 0; j < TGSI_QUAD_SIZE; j++) { + rgba[0][j] = k[0][j]; + rgba[1][j] = k[0][j]; + rgba[2][j] = k[0][j]; + rgba[3][j] = 1.0F; + } } } - static void do_swizzling(const struct pipe_sampler_view *sview, float in[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE], @@ -2693,7 +2812,7 @@ any_swizzle(const struct pipe_sampler_view *view) static img_filter_func get_img_filter(const struct sp_sampler_view *sp_sview, const struct pipe_sampler_state *sampler, - unsigned filter) + unsigned filter, bool gather) { switch (sp_sview->base.target) { case PIPE_BUFFER: @@ -2713,7 +2832,7 @@ get_img_filter(const struct sp_sampler_view *sp_sview, case PIPE_TEXTURE_RECT: /* Try for fast path: */ - if (sp_sview->pot2d && + if (!gather && sp_sview->pot2d && sampler->wrap_s == sampler->wrap_t && sampler->normalized_coords) { @@ -2790,17 +2909,20 @@ sample_mip(struct sp_sampler_view *sp_sview, img_filter_func min_img_filter = NULL; img_filter_func mag_img_filter = NULL; - if (sp_sview->pot2d & sp_samp->min_mag_equal_repeat_linear) { + if (filt_args->control == tgsi_sampler_gather) { + mip_filter = mip_filter_nearest; + min_img_filter = get_img_filter(sp_sview, &sp_samp->base, PIPE_TEX_FILTER_LINEAR, true); + } else if (sp_sview->pot2d & sp_samp->min_mag_equal_repeat_linear) { mip_filter = mip_filter_linear_2d_linear_repeat_POT; } else { mip_filter = sp_samp->mip_filter; - min_img_filter = get_img_filter(sp_sview, &sp_samp->base, sp_samp->min_img_filter); + min_img_filter = get_img_filter(sp_sview, &sp_samp->base, sp_samp->min_img_filter, false); if (sp_samp->min_mag_equal) { mag_img_filter = min_img_filter; } else { - mag_img_filter = get_img_filter(sp_sview, &sp_samp->base, sp_samp->base.mag_img_filter); + mag_img_filter = get_img_filter(sp_sview, &sp_samp->base, sp_samp->base.mag_img_filter, false); } } @@ -2811,7 +2933,7 @@ sample_mip(struct sp_sampler_view *sp_sview, sample_compare(sp_sview, sp_samp, s, t, p, c0, lod, filt_args->control, rgba); } - if (sp_sview->need_swizzle) { + if (sp_sview->need_swizzle && filt_args->control != tgsi_sampler_gather) { float rgba_temp[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]; memcpy(rgba_temp, rgba, sizeof(rgba_temp)); do_swizzling(&sp_sview->base, rgba_temp, rgba); diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.h b/src/gallium/drivers/softpipe/sp_tex_sample.h index c9de56c5ab4..7d1aafc4473 100644 --- a/src/gallium/drivers/softpipe/sp_tex_sample.h +++ b/src/gallium/drivers/softpipe/sp_tex_sample.h @@ -60,6 +60,8 @@ struct img_filter_args { unsigned level; unsigned face_id; const int8_t *offset; + bool gather_only; + int gather_comp; }; typedef void (*img_filter_func)(struct sp_sampler_view *sp_sview, From 1b052906763a36465e384366b875235b962ac143 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Wed, 20 May 2015 12:36:14 +1000 Subject: [PATCH 210/834] GL3.txt: update softpipe ARB_gpu_shader5 status texture gather and it already supported the new instructions. Signed-off-by: Dave Airlie --- docs/GL3.txt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/GL3.txt b/docs/GL3.txt index c7009308aba..9d56ee5d67e 100644 --- a/docs/GL3.txt +++ b/docs/GL3.txt @@ -102,8 +102,8 @@ GL 4.0, GLSL 4.00: - Dynamically uniform UBO array indices DONE (r600) - Implicit signed -> unsigned conversions DONE - Fused multiply-add DONE () - - Packing/bitfield/conversion functions DONE (r600, radeonsi) - - Enhanced textureGather DONE (r600, radeonsi) + - Packing/bitfield/conversion functions DONE (r600, radeonsi, softpipe) + - Enhanced textureGather DONE (r600, radeonsi, softpipe) - Geometry shader instancing DONE (r600) - Geometry shader multiple streams DONE () - Enhanced per-sample shading DONE (r600, radeonsi) @@ -115,7 +115,7 @@ GL 4.0, GLSL 4.00: GL_ARB_tessellation_shader started (Chris, Ilia) GL_ARB_texture_buffer_object_rgb32 DONE (i965, nvc0, r600, radeonsi, llvmpipe, softpipe) GL_ARB_texture_cube_map_array DONE (i965, nv50, nvc0, r600, radeonsi, llvmpipe, softpipe) - GL_ARB_texture_gather DONE (i965, nv50, nvc0, r600, radeonsi, llvmpipe) + GL_ARB_texture_gather DONE (i965, nv50, nvc0, r600, radeonsi, llvmpipe, softpipe) GL_ARB_texture_query_lod DONE (i965, nv50, nvc0, r600, radeonsi) GL_ARB_transform_feedback2 DONE (i965, nv50, nvc0, r600, radeonsi, llvmpipe, softpipe) GL_ARB_transform_feedback3 DONE (i965, nv50, nvc0, r600, radeonsi, llvmpipe, softpipe) From 4ee69a97bb0af0cc216539c48b246ea2abf8f208 Mon Sep 17 00:00:00 2001 From: Samuel Iglesias Gonsalvez Date: Wed, 13 May 2015 12:18:31 +0200 Subject: [PATCH 211/834] mesa/main: validate name syntax for array variables only MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit From ARB_program_interface_query: "Note that if an interface enumerates a single active resource list entry for an array variable (e.g., "a[0]"), a identifying any array element other than the first (e.g., "a[1]") is not considered to match." It doesn't apply to arrays of interface blocks but just to array variables. Signed-off-by: Samuel Iglesias Gonsalvez Reviewed-by: Tapani Pälli --- src/mesa/main/program_resource.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/mesa/main/program_resource.c b/src/mesa/main/program_resource.c index b15a13210c0..d857b84e60d 100644 --- a/src/mesa/main/program_resource.c +++ b/src/mesa/main/program_resource.c @@ -220,12 +220,12 @@ _mesa_GetProgramResourceIndex(GLuint program, GLenum programInterface, case GL_PROGRAM_INPUT: case GL_PROGRAM_OUTPUT: case GL_UNIFORM: - case GL_UNIFORM_BLOCK: case GL_TRANSFORM_FEEDBACK_VARYING: - /* Validate name syntax for arrays. */ + /* Validate name syntax for array variables */ if (!valid_program_resource_index_name(name)) return GL_INVALID_INDEX; - + /* fall-through */ + case GL_UNIFORM_BLOCK: res = _mesa_program_resource_find_name(shProg, programInterface, name); if (!res) return GL_INVALID_INDEX; From e4201bb618f02a279fda59a1c528d7218e6900a5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Mon, 18 May 2015 11:26:10 +0200 Subject: [PATCH 212/834] cso: add context cleanup code from st/mesa This fixes a crash in nouveau which can't handle set_constant_buffer(PIPE_SHADER_TESS_*). Cc: 10.6 Reviewed-by: Samuel Pitoiset Reviewed-by: Tobias Klausmann Reviewed-by: Ilia Mirkin --- src/gallium/auxiliary/cso_cache/cso_context.c | 7 +++++++ src/mesa/state_tracker/st_context.c | 6 ------ 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/src/gallium/auxiliary/cso_cache/cso_context.c b/src/gallium/auxiliary/cso_cache/cso_context.c index 59bad2cb2d7..744b00cbd92 100644 --- a/src/gallium/auxiliary/cso_cache/cso_context.c +++ b/src/gallium/auxiliary/cso_cache/cso_context.c @@ -300,6 +300,8 @@ void cso_destroy_context( struct cso_context *ctx ) unsigned i, shader; if (ctx->pipe) { + ctx->pipe->set_index_buffer(ctx->pipe, NULL); + ctx->pipe->bind_blend_state( ctx->pipe, NULL ); ctx->pipe->bind_rasterizer_state( ctx->pipe, NULL ); @@ -326,13 +328,18 @@ void cso_destroy_context( struct cso_context *ctx ) ctx->pipe->bind_depth_stencil_alpha_state( ctx->pipe, NULL ); ctx->pipe->bind_fs_state( ctx->pipe, NULL ); + ctx->pipe->set_constant_buffer(ctx->pipe, PIPE_SHADER_FRAGMENT, 0, NULL); ctx->pipe->bind_vs_state( ctx->pipe, NULL ); + ctx->pipe->set_constant_buffer(ctx->pipe, PIPE_SHADER_VERTEX, 0, NULL); if (ctx->has_geometry_shader) { ctx->pipe->bind_gs_state(ctx->pipe, NULL); + ctx->pipe->set_constant_buffer(ctx->pipe, PIPE_SHADER_GEOMETRY, 0, NULL); } if (ctx->has_tessellation) { ctx->pipe->bind_tcs_state(ctx->pipe, NULL); + ctx->pipe->set_constant_buffer(ctx->pipe, PIPE_SHADER_TESS_CTRL, 0, NULL); ctx->pipe->bind_tes_state(ctx->pipe, NULL); + ctx->pipe->set_constant_buffer(ctx->pipe, PIPE_SHADER_TESS_EVAL, 0, NULL); } ctx->pipe->bind_vertex_elements_state( ctx->pipe, NULL ); diff --git a/src/mesa/state_tracker/st_context.c b/src/mesa/state_tracker/st_context.c index 69e0f929db8..ed9ed0f1b6c 100644 --- a/src/mesa/state_tracker/st_context.c +++ b/src/mesa/state_tracker/st_context.c @@ -376,12 +376,6 @@ void st_destroy_context( struct st_context *st ) } pipe_surface_reference(&st->state.framebuffer.zsbuf, NULL); - pipe->set_index_buffer(pipe, NULL); - - for (i = 0; i < PIPE_SHADER_TYPES; i++) { - pipe->set_constant_buffer(pipe, i, 0, NULL); - } - _mesa_delete_program_cache(st->ctx, st->pixel_xfer.cache); _vbo_DestroyContext(st->ctx); From e1c4e8aaaafddd0e04cf2a16e28ef8f1e09d8b44 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Sun, 17 May 2015 16:35:14 +0200 Subject: [PATCH 213/834] gallium: remove TGSI_SAT_MINUS_PLUS_ONE It's a remnant of some old NV extension. Unused. I also have a patch that removes predicates if anyone is interested. Reviewed-by: Roland Scheidegger --- .../auxiliary/gallivm/lp_bld_tgsi_aos.c | 16 +----- .../auxiliary/gallivm/lp_bld_tgsi_soa.c | 21 +------- src/gallium/auxiliary/nir/tgsi_to_nir.c | 1 - src/gallium/auxiliary/tgsi/tgsi_build.c | 4 +- src/gallium/auxiliary/tgsi/tgsi_dump.c | 11 +--- src/gallium/auxiliary/tgsi/tgsi_exec.c | 51 +++---------------- src/gallium/auxiliary/tgsi/tgsi_lowering.c | 3 +- src/gallium/auxiliary/tgsi/tgsi_text.c | 12 ++--- .../drivers/freedreno/a2xx/fd2_compiler.c | 22 ++------ .../drivers/freedreno/ir3/ir3_compiler.c | 8 +-- src/gallium/drivers/i915/i915_fpc_optimize.c | 4 +- src/gallium/drivers/i915/i915_fpc_translate.c | 2 +- src/gallium/drivers/ilo/shader/toy_tgsi.c | 3 -- .../nouveau/codegen/nv50_ir_from_tgsi.cpp | 13 +---- .../drivers/nouveau/nv30/nvfx_fragprog.c | 2 +- .../drivers/nouveau/nv30/nvfx_vertprog.c | 4 +- src/gallium/drivers/r300/r300_tgsi_to_rc.c | 8 +-- .../drivers/radeon/radeon_setup_tgsi_llvm.c | 40 +++++++-------- src/gallium/drivers/svga/svga_tgsi_insn.c | 2 +- src/gallium/include/pipe/p_shader_tokens.h | 8 +-- 20 files changed, 47 insertions(+), 188 deletions(-) diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c index 738d5e9fd64..610283d7912 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c @@ -232,23 +232,9 @@ lp_emit_store_aos( /* * Saturate the value */ - - switch (inst->Instruction.Saturate) { - case TGSI_SAT_NONE: - break; - - case TGSI_SAT_ZERO_ONE: + if (inst->Instruction.Saturate) { value = lp_build_max(&bld->bld_base.base, value, bld->bld_base.base.zero); value = lp_build_min(&bld->bld_base.base, value, bld->bld_base.base.one); - break; - - case TGSI_SAT_MINUS_PLUS_ONE: - value = lp_build_max(&bld->bld_base.base, value, lp_build_const_vec(bld->bld_base.base.gallivm, bld->bld_base.base.type, -1.0)); - value = lp_build_min(&bld->bld_base.base, value, bld->bld_base.base.one); - break; - - default: - assert(0); } /* diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c index 448c99d3547..092bd18b361 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c @@ -1670,30 +1670,11 @@ emit_store_chan( * * It is always assumed to be float. */ - switch( inst->Instruction.Saturate ) { - case TGSI_SAT_NONE: - break; - - case TGSI_SAT_ZERO_ONE: + if (inst->Instruction.Saturate) { assert(dtype == TGSI_TYPE_FLOAT || dtype == TGSI_TYPE_UNTYPED); value = LLVMBuildBitCast(builder, value, float_bld->vec_type, ""); value = lp_build_clamp_zero_one_nanzero(float_bld, value); - break; - - case TGSI_SAT_MINUS_PLUS_ONE: - assert(dtype == TGSI_TYPE_FLOAT || - dtype == TGSI_TYPE_UNTYPED); - value = LLVMBuildBitCast(builder, value, float_bld->vec_type, ""); - /* This will give -1.0 for NaN which is probably not what we want. */ - value = lp_build_max_ext(float_bld, value, - lp_build_const_vec(gallivm, float_bld->type, -1.0), - GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN); - value = lp_build_min(float_bld, value, float_bld->one); - break; - - default: - assert(0); } if (reg->Register.Indirect) { diff --git a/src/gallium/auxiliary/nir/tgsi_to_nir.c b/src/gallium/auxiliary/nir/tgsi_to_nir.c index 59aaf677888..50ce3dc366e 100644 --- a/src/gallium/auxiliary/nir/tgsi_to_nir.c +++ b/src/gallium/auxiliary/nir/tgsi_to_nir.c @@ -1625,7 +1625,6 @@ ttn_emit_instruction(struct ttn_compile *c) } if (tgsi_inst->Instruction.Saturate) { - assert(tgsi_inst->Instruction.Saturate == TGSI_SAT_ZERO_ONE); assert(!dest.dest.is_ssa); ttn_move_dest(b, dest, nir_fsat(b, ttn_src_for_dest(b, &dest))); } diff --git a/src/gallium/auxiliary/tgsi/tgsi_build.c b/src/gallium/auxiliary/tgsi/tgsi_build.c index 39a4296a3a1..fdb7febf7ea 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_build.c +++ b/src/gallium/auxiliary/tgsi/tgsi_build.c @@ -610,7 +610,7 @@ tgsi_default_instruction( void ) instruction.Type = TGSI_TOKEN_TYPE_INSTRUCTION; instruction.NrTokens = 0; instruction.Opcode = TGSI_OPCODE_MOV; - instruction.Saturate = TGSI_SAT_NONE; + instruction.Saturate = 0; instruction.Predicate = 0; instruction.NumDstRegs = 1; instruction.NumSrcRegs = 1; @@ -632,7 +632,7 @@ tgsi_build_instruction(unsigned opcode, struct tgsi_instruction instruction; assert (opcode <= TGSI_OPCODE_LAST); - assert (saturate <= TGSI_SAT_MINUS_PLUS_ONE); + assert (saturate <= 1); assert (num_dst_regs <= 3); assert (num_src_regs <= 15); diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump.c b/src/gallium/auxiliary/tgsi/tgsi_dump.c index 27d410853bf..c584c2b0001 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_dump.c +++ b/src/gallium/auxiliary/tgsi/tgsi_dump.c @@ -539,17 +539,8 @@ iter_instruction( TXT( info->mnemonic ); - switch (inst->Instruction.Saturate) { - case TGSI_SAT_NONE: - break; - case TGSI_SAT_ZERO_ONE: + if (inst->Instruction.Saturate) { TXT( "_SAT" ); - break; - case TGSI_SAT_MINUS_PLUS_ONE: - TXT( "_SATNV" ); - break; - default: - assert( 0 ); } for (i = 0; i < inst->Instruction.NumDstRegs; i++) { diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c index 6512e80ba2e..a098a82be63 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.c +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c @@ -1765,14 +1765,12 @@ store_dest(struct tgsi_exec_machine *mach, if (!dst) return; - switch (inst->Instruction.Saturate) { - case TGSI_SAT_NONE: + if (!inst->Instruction.Saturate) { for (i = 0; i < TGSI_QUAD_SIZE; i++) if (execmask & (1 << i)) dst->i[i] = chan->i[i]; - break; - - case TGSI_SAT_ZERO_ONE: + } + else { for (i = 0; i < TGSI_QUAD_SIZE; i++) if (execmask & (1 << i)) { if (chan->f[i] < 0.0f) @@ -1782,22 +1780,6 @@ store_dest(struct tgsi_exec_machine *mach, else dst->i[i] = chan->i[i]; } - break; - - case TGSI_SAT_MINUS_PLUS_ONE: - for (i = 0; i < TGSI_QUAD_SIZE; i++) - if (execmask & (1 << i)) { - if (chan->f[i] < -1.0f) - dst->f[i] = -1.0f; - else if (chan->f[i] > 1.0f) - dst->f[i] = 1.0f; - else - dst->i[i] = chan->i[i]; - } - break; - - default: - assert( 0 ); } } @@ -3317,16 +3299,14 @@ store_double_channel(struct tgsi_exec_machine *mach, union tgsi_double_channel temp; const uint execmask = mach->ExecMask; - switch (inst->Instruction.Saturate) { - case TGSI_SAT_NONE: + if (!inst->Instruction.Saturate) { for (i = 0; i < TGSI_QUAD_SIZE; i++) if (execmask & (1 << i)) { dst[0].u[i] = chan->u[i][0]; dst[1].u[i] = chan->u[i][1]; } - break; - - case TGSI_SAT_ZERO_ONE: + } + else { for (i = 0; i < TGSI_QUAD_SIZE; i++) if (execmask & (1 << i)) { if (chan->d[i] < 0.0) @@ -3339,25 +3319,6 @@ store_double_channel(struct tgsi_exec_machine *mach, dst[0].u[i] = temp.u[i][0]; dst[1].u[i] = temp.u[i][1]; } - break; - - case TGSI_SAT_MINUS_PLUS_ONE: - for (i = 0; i < TGSI_QUAD_SIZE; i++) - if (execmask & (1 << i)) { - if (chan->d[i] < -1.0) - temp.d[i] = -1.0; - else if (chan->d[i] > 1.0) - temp.d[i] = 1.0; - else - temp.d[i] = chan->d[i]; - - dst[0].u[i] = temp.u[i][0]; - dst[1].u[i] = temp.u[i][1]; - } - break; - - default: - assert( 0 ); } store_dest_double(mach, &dst[0], reg, inst, chan_0, TGSI_EXEC_DATA_UINT); diff --git a/src/gallium/auxiliary/tgsi/tgsi_lowering.c b/src/gallium/auxiliary/tgsi/tgsi_lowering.c index 4954c1178e5..a3b90bdb509 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_lowering.c +++ b/src/gallium/auxiliary/tgsi/tgsi_lowering.c @@ -1133,8 +1133,7 @@ transform_samp(struct tgsi_transform_context *tctx, /* MOV_SAT tmpA., tmpA */ if (mask) { - create_mov(tctx, &ctx->tmp[A].dst, &ctx->tmp[A].src, mask, - TGSI_SAT_ZERO_ONE); + create_mov(tctx, &ctx->tmp[A].dst, &ctx->tmp[A].src, mask, 1); } /* modify the texture samp instruction to take fixed up coord: */ diff --git a/src/gallium/auxiliary/tgsi/tgsi_text.c b/src/gallium/auxiliary/tgsi/tgsi_text.c index b6b3585d561..a9734db6355 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_text.c +++ b/src/gallium/auxiliary/tgsi/tgsi_text.c @@ -903,7 +903,7 @@ match_inst(const char **pcur, /* simple case: the whole string matches the instruction name */ if (str_match_nocase_whole(&cur, info->mnemonic)) { *pcur = cur; - *saturate = TGSI_SAT_NONE; + *saturate = 0; return TRUE; } @@ -911,13 +911,7 @@ match_inst(const char **pcur, /* the instruction has a suffix, figure it out */ if (str_match_nocase_whole(&cur, "_SAT")) { *pcur = cur; - *saturate = TGSI_SAT_ZERO_ONE; - return TRUE; - } - - if (str_match_nocase_whole(&cur, "_SATNV")) { - *pcur = cur; - *saturate = TGSI_SAT_MINUS_PLUS_ONE; + *saturate = 1; return TRUE; } } @@ -931,7 +925,7 @@ parse_instruction( boolean has_label ) { uint i; - uint saturate = TGSI_SAT_NONE; + uint saturate = 0; const struct tgsi_opcode_info *info; struct tgsi_full_instruction inst; const char *cur; diff --git a/src/gallium/drivers/freedreno/a2xx/fd2_compiler.c b/src/gallium/drivers/freedreno/a2xx/fd2_compiler.c index e4acc7e95b4..b48fb4659cd 100644 --- a/src/gallium/drivers/freedreno/a2xx/fd2_compiler.c +++ b/src/gallium/drivers/freedreno/a2xx/fd2_compiler.c @@ -414,32 +414,16 @@ add_src_reg(struct fd2_compile_context *ctx, struct ir2_instruction *alu, static void add_vector_clamp(struct tgsi_full_instruction *inst, struct ir2_instruction *alu) { - switch (inst->Instruction.Saturate) { - case TGSI_SAT_NONE: - break; - case TGSI_SAT_ZERO_ONE: + if (inst->Instruction.Saturate) { alu->alu.vector_clamp = true; - break; - case TGSI_SAT_MINUS_PLUS_ONE: - DBG("unsupported saturate"); - assert(0); - break; } } static void add_scalar_clamp(struct tgsi_full_instruction *inst, struct ir2_instruction *alu) { - switch (inst->Instruction.Saturate) { - case TGSI_SAT_NONE: - break; - case TGSI_SAT_ZERO_ONE: + if (inst->Instruction.Saturate) { alu->alu.scalar_clamp = true; - break; - case TGSI_SAT_MINUS_PLUS_ONE: - DBG("unsupported saturate"); - assert(0); - break; } } @@ -758,7 +742,7 @@ translate_tex(struct fd2_compile_context *ctx, struct tgsi_src_register tmp_src; const struct tgsi_src_register *coord; bool using_temp = (inst->Dst[0].Register.File == TGSI_FILE_OUTPUT) || - (inst->Instruction.Saturate != TGSI_SAT_NONE); + inst->Instruction.Saturate; int idx; if (using_temp || (opc == TGSI_OPCODE_TXP)) diff --git a/src/gallium/drivers/freedreno/ir3/ir3_compiler.c b/src/gallium/drivers/freedreno/ir3/ir3_compiler.c index 43f4c955ac0..ad0340032e4 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_compiler.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_compiler.c @@ -3487,15 +3487,9 @@ compile_instructions(struct ir3_compile_context *ctx) tgsi_get_opcode_name(opc)); } - switch (inst->Instruction.Saturate) { - case TGSI_SAT_ZERO_ONE: + if (inst->Instruction.Saturate) { create_clamp_imm(ctx, &inst->Dst[0].Register, fui(0.0), fui(1.0)); - break; - case TGSI_SAT_MINUS_PLUS_ONE: - create_clamp_imm(ctx, &inst->Dst[0].Register, - fui(-1.0), fui(1.0)); - break; } instr_finish(ctx); diff --git a/src/gallium/drivers/i915/i915_fpc_optimize.c b/src/gallium/drivers/i915/i915_fpc_optimize.c index e0134a7c4ee..83bb64918d4 100644 --- a/src/gallium/drivers/i915/i915_fpc_optimize.c +++ b/src/gallium/drivers/i915/i915_fpc_optimize.c @@ -552,7 +552,7 @@ static boolean i915_fpc_useless_mov(union tgsi_full_token *tgsi_current) if ( current.Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION && current.FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV && op_has_dst(current.FullInstruction.Instruction.Opcode) && - current.FullInstruction.Instruction.Saturate == TGSI_SAT_NONE && + !current.FullInstruction.Instruction.Saturate && current.FullInstruction.Src[0].Register.Absolute == 0 && current.FullInstruction.Src[0].Register.Negate == 0 && is_unswizzled(¤t.FullInstruction.Src[0], current.FullInstruction.Dst[0].Register.WriteMask) && @@ -582,7 +582,7 @@ static void i915_fpc_optimize_useless_mov_after_inst(struct i915_optimize_contex next->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION && next->FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV && op_has_dst(current->FullInstruction.Instruction.Opcode) && - next->FullInstruction.Instruction.Saturate == TGSI_SAT_NONE && + !next->FullInstruction.Instruction.Saturate && next->FullInstruction.Src[0].Register.Absolute == 0 && next->FullInstruction.Src[0].Register.Negate == 0 && unused_from(ctx, ¤t->FullInstruction.Dst[0], index) && diff --git a/src/gallium/drivers/i915/i915_fpc_translate.c b/src/gallium/drivers/i915/i915_fpc_translate.c index b74f8239bb4..38a33888166 100644 --- a/src/gallium/drivers/i915/i915_fpc_translate.c +++ b/src/gallium/drivers/i915/i915_fpc_translate.c @@ -329,7 +329,7 @@ get_result_flags(const struct i915_full_instruction *inst) = inst->Dst[0].Register.WriteMask; uint flags = 0x0; - if (inst->Instruction.Saturate == TGSI_SAT_ZERO_ONE) + if (inst->Instruction.Saturate) flags |= A0_DEST_SATURATE; if (writeMask & TGSI_WRITEMASK_X) diff --git a/src/gallium/drivers/ilo/shader/toy_tgsi.c b/src/gallium/drivers/ilo/shader/toy_tgsi.c index 65e47bf3a4a..d38585f1475 100644 --- a/src/gallium/drivers/ilo/shader/toy_tgsi.c +++ b/src/gallium/drivers/ilo/shader/toy_tgsi.c @@ -2036,9 +2036,6 @@ parse_instruction(struct toy_tgsi *tgsi, if (!dst_is_scratch[i]) continue; - if (tgsi_inst->Instruction.Saturate == TGSI_SAT_MINUS_PLUS_ONE) - tc_fail(tgsi->tc, "TGSI_SAT_MINUS_PLUS_ONE unhandled"); - tgsi->tc->templ.saturate = tgsi_inst->Instruction.Saturate; /* emit indirect store */ diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp index 6f7f397609b..2dcadeed44d 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp @@ -1604,19 +1604,8 @@ Converter::storeDst(int d, int c, Value *val) { const tgsi::Instruction::DstRegister dst = tgsi.getDst(d); - switch (tgsi.getSaturate()) { - case TGSI_SAT_NONE: - break; - case TGSI_SAT_ZERO_ONE: + if (tgsi.getSaturate()) { mkOp1(OP_SAT, dstTy, val, val); - break; - case TGSI_SAT_MINUS_PLUS_ONE: - mkOp2(OP_MAX, dstTy, val, val, mkImm(-1.0f)); - mkOp2(OP_MIN, dstTy, val, val, mkImm(+1.0f)); - break; - default: - assert(!"invalid saturation mode"); - break; } Value *ptr = NULL; diff --git a/src/gallium/drivers/nouveau/nv30/nvfx_fragprog.c b/src/gallium/drivers/nouveau/nv30/nvfx_fragprog.c index 9889c4e5f40..9ef16965f39 100644 --- a/src/gallium/drivers/nouveau/nv30/nvfx_fragprog.c +++ b/src/gallium/drivers/nouveau/nv30/nvfx_fragprog.c @@ -531,7 +531,7 @@ nvfx_fragprog_parse_instruction(struct nvfx_fpc *fpc, dst = tgsi_dst(fpc, &finst->Dst[0]); mask = tgsi_mask(finst->Dst[0].Register.WriteMask); - sat = (finst->Instruction.Saturate == TGSI_SAT_ZERO_ONE); + sat = finst->Instruction.Saturate; switch (finst->Instruction.Opcode) { case TGSI_OPCODE_ABS: diff --git a/src/gallium/drivers/nouveau/nv30/nvfx_vertprog.c b/src/gallium/drivers/nouveau/nv30/nvfx_vertprog.c index 29d506b6e9b..c8960db4c5b 100644 --- a/src/gallium/drivers/nouveau/nv30/nvfx_vertprog.c +++ b/src/gallium/drivers/nouveau/nv30/nvfx_vertprog.c @@ -539,7 +539,7 @@ nvfx_vertprog_parse_instruction(struct nvfx_vpc *vpc, final_dst = dst = tgsi_dst(vpc, &finst->Dst[0]); mask = tgsi_mask(finst->Dst[0].Register.WriteMask); - if(finst->Instruction.Saturate == TGSI_SAT_ZERO_ONE) { + if(finst->Instruction.Saturate) { assert(finst->Instruction.Opcode != TGSI_OPCODE_ARL); if (vpc->is_nv4x) sat = TRUE; @@ -796,7 +796,7 @@ nvfx_vertprog_parse_instruction(struct nvfx_vpc *vpc, return FALSE; } - if(finst->Instruction.Saturate == TGSI_SAT_ZERO_ONE && !vpc->is_nv4x) { + if(finst->Instruction.Saturate && !vpc->is_nv4x) { if (!vpc->r_0_1.type) vpc->r_0_1 = constant(vpc, -1, 0, 1, 0, 0); nvfx_vp_emit(vpc, arith(0, VEC, MAX, dst, mask, nvfx_src(dst), swz(nvfx_src(vpc->r_0_1), X, X, X, X), none)); diff --git a/src/gallium/drivers/r300/r300_tgsi_to_rc.c b/src/gallium/drivers/r300/r300_tgsi_to_rc.c index 69afb4caeaa..23ed2cf2532 100644 --- a/src/gallium/drivers/r300/r300_tgsi_to_rc.c +++ b/src/gallium/drivers/r300/r300_tgsi_to_rc.c @@ -133,13 +133,7 @@ static unsigned translate_opcode(unsigned opcode) static unsigned translate_saturate(unsigned saturate) { - switch(saturate) { - default: - fprintf(stderr, "Unknown saturate mode: %i\n", saturate); - /* fall-through */ - case TGSI_SAT_NONE: return RC_SATURATE_NONE; - case TGSI_SAT_ZERO_ONE: return RC_SATURATE_ZERO_ONE; - } + return saturate ? RC_SATURATE_ZERO_ONE : RC_SATURATE_NONE; } static unsigned translate_register_file(unsigned file) diff --git a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c index 20e506b7c5e..86385375176 100644 --- a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c +++ b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c @@ -314,6 +314,21 @@ static void emit_declaration( } } +static LLVMValueRef radeon_llvm_saturate(struct lp_build_tgsi_context *bld_base, + LLVMValueRef value) +{ + struct lp_build_emit_data clamp_emit_data; + + memset(&clamp_emit_data, 0, sizeof(clamp_emit_data)); + clamp_emit_data.arg_count = 3; + clamp_emit_data.args[0] = value; + clamp_emit_data.args[2] = bld_base->base.one; + clamp_emit_data.args[1] = bld_base->base.zero; + + return lp_build_emit_llvm(bld_base, TGSI_OPCODE_CLAMP, + &clamp_emit_data); +} + static void emit_store( struct lp_build_tgsi_context * bld_base, @@ -324,7 +339,6 @@ emit_store( struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base); struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base); struct gallivm_state *gallivm = bld->bld_base.base.gallivm; - struct lp_build_context base = bld->bld_base.base; const struct tgsi_full_dst_register *reg = &inst->Dst[0]; LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder; LLVMValueRef temp_ptr; @@ -350,28 +364,8 @@ emit_store( TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { LLVMValueRef value = dst[chan_index]; - if (inst->Instruction.Saturate != TGSI_SAT_NONE) { - struct lp_build_emit_data clamp_emit_data; - - memset(&clamp_emit_data, 0, sizeof(clamp_emit_data)); - clamp_emit_data.arg_count = 3; - clamp_emit_data.args[0] = value; - clamp_emit_data.args[2] = base.one; - - switch(inst->Instruction.Saturate) { - case TGSI_SAT_ZERO_ONE: - clamp_emit_data.args[1] = base.zero; - break; - case TGSI_SAT_MINUS_PLUS_ONE: - clamp_emit_data.args[1] = LLVMConstReal( - base.elem_type, -1.0f); - break; - default: - assert(0); - } - value = lp_build_emit_llvm(bld_base, TGSI_OPCODE_CLAMP, - &clamp_emit_data); - } + if (inst->Instruction.Saturate) + value = radeon_llvm_saturate(bld_base, value); if (reg->Register.File == TGSI_FILE_ADDRESS) { temp_ptr = bld->addr[reg->Register.Index][chan_index]; diff --git a/src/gallium/drivers/svga/svga_tgsi_insn.c b/src/gallium/drivers/svga/svga_tgsi_insn.c index 7a12b52e2dd..bac956066a5 100644 --- a/src/gallium/drivers/svga/svga_tgsi_insn.c +++ b/src/gallium/drivers/svga/svga_tgsi_insn.c @@ -1900,7 +1900,7 @@ emit_tex(struct svga_shader_emitter *emit, emit->key.fkey.tex[unit].swizzle_b != PIPE_SWIZZLE_BLUE || emit->key.fkey.tex[unit].swizzle_a != PIPE_SWIZZLE_ALPHA); - boolean saturate = insn->Instruction.Saturate != TGSI_SAT_NONE; + boolean saturate = insn->Instruction.Saturate; /* If doing compare processing or tex swizzle or saturation, we need to put * the fetched color into a temporary so it can be used as a source later on. diff --git a/src/gallium/include/pipe/p_shader_tokens.h b/src/gallium/include/pipe/p_shader_tokens.h index ff1f7d6d21a..953bdf6fbbe 100644 --- a/src/gallium/include/pipe/p_shader_tokens.h +++ b/src/gallium/include/pipe/p_shader_tokens.h @@ -538,10 +538,6 @@ struct tgsi_property_data { #define TGSI_OPCODE_DSSG 222 #define TGSI_OPCODE_LAST 223 -#define TGSI_SAT_NONE 0 /* do not saturate */ -#define TGSI_SAT_ZERO_ONE 1 /* clamp to [0,1] */ -#define TGSI_SAT_MINUS_PLUS_ONE 2 /* clamp to [-1,1] */ - /** * Opcode is the operation code to execute. A given operation defines the * semantics how the source registers (if any) are interpreted and what is @@ -561,13 +557,13 @@ struct tgsi_instruction unsigned Type : 4; /* TGSI_TOKEN_TYPE_INSTRUCTION */ unsigned NrTokens : 8; /* UINT */ unsigned Opcode : 8; /* TGSI_OPCODE_ */ - unsigned Saturate : 2; /* TGSI_SAT_ */ + unsigned Saturate : 1; /* BOOL */ unsigned NumDstRegs : 2; /* UINT */ unsigned NumSrcRegs : 4; /* UINT */ unsigned Predicate : 1; /* BOOL */ unsigned Label : 1; unsigned Texture : 1; - unsigned Padding : 1; + unsigned Padding : 2; }; /* From 2126c68e5cba79709e228f12eb3062a9be634a0e Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 19 May 2015 16:57:43 -0700 Subject: [PATCH 214/834] nir: Get rid of the array elements parameter on load/store intrinsics Previously, we used intrinsic->const_index[1] to represent "the number of array elements to load" for load/store intrinsics. However, this set to 1 by every pass that ever creates a load/store intrinsic. Also, while it might make some sense for registers, it makes no sense whatsoever in SSA. On top of that, the i965 backend was the only backend to ever support it; freedreno and vc4 just assert that it's always 1. Let's just delete it. Signed-off-by: Jason Ekstrand Reviewed-by: Connor Abbott Reviewed-by: Rob Clark --- src/gallium/auxiliary/nir/tgsi_to_nir.c | 2 - .../drivers/freedreno/ir3/ir3_compiler_nir.c | 5 -- src/gallium/drivers/vc4/vc4_program.c | 6 --- src/glsl/nir/nir_intrinsics.h | 19 ++++--- src/glsl/nir/nir_lower_io.c | 2 - src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 52 ++++++++----------- 6 files changed, 32 insertions(+), 54 deletions(-) diff --git a/src/gallium/auxiliary/nir/tgsi_to_nir.c b/src/gallium/auxiliary/nir/tgsi_to_nir.c index 50ce3dc366e..1702b41393b 100644 --- a/src/gallium/auxiliary/nir/tgsi_to_nir.c +++ b/src/gallium/auxiliary/nir/tgsi_to_nir.c @@ -401,7 +401,6 @@ ttn_src_for_file_and_index(struct ttn_compile *c, unsigned file, unsigned index, load->num_components = 4; load->const_index[0] = index; - load->const_index[1] = 1; if (dim) { if (dimind) { load->src[srcn] = @@ -1671,7 +1670,6 @@ ttn_add_output_stores(struct ttn_compile *c) nir_intrinsic_instr_create(b->shader, nir_intrinsic_store_output); store->num_components = 4; store->const_index[0] = var->data.driver_location + i; - store->const_index[1] = 1; store->src[0].reg.reg = c->output_regs[var->data.driver_location].reg; nir_instr_insert_after_cf_list(b->cf_node_list, &store->instr); } diff --git a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c index 05e7049ad55..2cf25ea6e0a 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c @@ -1158,14 +1158,12 @@ emit_intrinisic(struct ir3_compile *ctx, nir_intrinsic_instr *intr) switch (intr->intrinsic) { case nir_intrinsic_load_uniform: - compile_assert(ctx, intr->const_index[1] == 1); for (int i = 0; i < intr->num_components; i++) { unsigned n = idx * 4 + i; dst[i] = create_uniform(ctx, n); } break; case nir_intrinsic_load_uniform_indirect: - compile_assert(ctx, intr->const_index[1] == 1); src = get_src(ctx, &intr->src[0]); for (int i = 0; i < intr->num_components; i++) { unsigned n = idx * 4 + i; @@ -1178,14 +1176,12 @@ emit_intrinisic(struct ir3_compile *ctx, nir_intrinsic_instr *intr) emit_intrinsic_load_ubo(ctx, intr, dst); break; case nir_intrinsic_load_input: - compile_assert(ctx, intr->const_index[1] == 1); for (int i = 0; i < intr->num_components; i++) { unsigned n = idx * 4 + i; dst[i] = b->inputs[n]; } break; case nir_intrinsic_load_input_indirect: - compile_assert(ctx, intr->const_index[1] == 1); src = get_src(ctx, &intr->src[0]); struct ir3_instruction *collect = create_collect(b, b->inputs, b->ninputs); @@ -1202,7 +1198,6 @@ emit_intrinisic(struct ir3_compile *ctx, nir_intrinsic_instr *intr) emit_intrinisic_store_var(ctx, intr); break; case nir_intrinsic_store_output: - compile_assert(ctx, intr->const_index[1] == 1); src = get_src(ctx, &intr->src[0]); for (int i = 0; i < intr->num_components; i++) { unsigned n = idx * 4 + i; diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c index bf156f9b42d..d84e5f25616 100644 --- a/src/gallium/drivers/vc4/vc4_program.c +++ b/src/gallium/drivers/vc4/vc4_program.c @@ -1849,8 +1849,6 @@ ntq_emit_intrinsic(struct vc4_compile *c, nir_intrinsic_instr *instr) switch (instr->intrinsic) { case nir_intrinsic_load_uniform: - assert(instr->const_index[1] == 1); - for (int i = 0; i < instr->num_components; i++) { dest[i] = qir_uniform(c, QUNIFORM_UNIFORM, instr->const_index[0] * 4 + i); @@ -1858,8 +1856,6 @@ ntq_emit_intrinsic(struct vc4_compile *c, nir_intrinsic_instr *instr) break; case nir_intrinsic_load_uniform_indirect: - assert(instr->const_index[1] == 1); - for (int i = 0; i < instr->num_components; i++) { dest[i] = indirect_uniform_load(c, ntq_get_src(c, instr->src[0], 0), @@ -1870,8 +1866,6 @@ ntq_emit_intrinsic(struct vc4_compile *c, nir_intrinsic_instr *instr) break; case nir_intrinsic_load_input: - assert(instr->const_index[1] == 1); - for (int i = 0; i < instr->num_components; i++) dest[i] = c->inputs[instr->const_index[0] * 4 + i]; diff --git a/src/glsl/nir/nir_intrinsics.h b/src/glsl/nir/nir_intrinsics.h index 10192c5315c..b516830be95 100644 --- a/src/glsl/nir/nir_intrinsics.h +++ b/src/glsl/nir/nir_intrinsics.h @@ -138,12 +138,11 @@ SYSTEM_VALUE(sample_mask_in, 1) SYSTEM_VALUE(invocation_id, 1) /* - * The first index is the address to load from, and the second index is the - * number of array elements to load. Indirect loads have an additional - * register input, which is added to the constant address to compute the - * final address to load from. For UBO's (and SSBO's), the first source is - * the (possibly constant) UBO buffer index and the indirect (if it exists) - * is the second source. + * The first and only index is the base address to load from. Indirect + * loads have an additional register input, which is added to the constant + * address to compute the final address to load from. For UBO's (and + * SSBO's), the first source is the (possibly constant) UBO buffer index + * and the indirect (if it exists) is the second source. * * For vector backends, the address is in terms of one vec4, and so each array * element is +4 scalar components from the previous array element. For scalar @@ -152,9 +151,9 @@ SYSTEM_VALUE(invocation_id, 1) */ #define LOAD(name, extra_srcs, flags) \ - INTRINSIC(load_##name, extra_srcs, ARR(1), true, 0, 0, 2, flags) \ + INTRINSIC(load_##name, extra_srcs, ARR(1), true, 0, 0, 1, flags) \ INTRINSIC(load_##name##_indirect, extra_srcs + 1, ARR(1, 1), \ - true, 0, 0, 2, flags) + true, 0, 0, 1, flags) LOAD(uniform, 0, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) LOAD(ubo, 1, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) @@ -172,7 +171,7 @@ LOAD(input, 0, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) INTRINSIC(store_##name##_indirect, 2, ARR(0, 1), false, 0, 0, \ num_indices, flags) \ -STORE(output, 2, 0) -/* STORE(ssbo, 3, 0) */ +STORE(output, 1, 0) +/* STORE(ssbo, 2, 0) */ LAST_INTRINSIC(store_output_indirect) diff --git a/src/glsl/nir/nir_lower_io.c b/src/glsl/nir/nir_lower_io.c index 03eed04e1e9..6761d5bad33 100644 --- a/src/glsl/nir/nir_lower_io.c +++ b/src/glsl/nir/nir_lower_io.c @@ -288,7 +288,6 @@ nir_lower_io_block(nir_block *block, void *void_state) offset += intrin->variables[0]->var->data.driver_location; load->const_index[0] = offset; - load->const_index[1] = 1; if (has_indirect) load->src[0] = indirect; @@ -331,7 +330,6 @@ nir_lower_io_block(nir_block *block, void *void_state) offset += intrin->variables[0]->var->data.driver_location; store->const_index[0] = offset; - store->const_index[1] = 1; nir_src_copy(&store->src[0], &intrin->src[0], state->mem_ctx); diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp index 5dd8363b91e..56a2278a2dc 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp @@ -1345,16 +1345,14 @@ fs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) index -= num_direct_uniforms; } - for (int i = 0; i < instr->const_index[1]; i++) { - for (unsigned j = 0; j < instr->num_components; j++) { - fs_reg src = offset(retype(uniform_reg, dest.type), index); - if (has_indirect) - src.reladdr = new(mem_ctx) fs_reg(get_nir_src(instr->src[0])); - index++; + for (unsigned j = 0; j < instr->num_components; j++) { + fs_reg src = offset(retype(uniform_reg, dest.type), index); + if (has_indirect) + src.reladdr = new(mem_ctx) fs_reg(get_nir_src(instr->src[0])); + index++; - emit(MOV(dest, src)); - dest = offset(dest, 1); - } + emit(MOV(dest, src)); + dest = offset(dest, 1); } break; } @@ -1426,17 +1424,15 @@ fs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) /* fallthrough */ case nir_intrinsic_load_input: { unsigned index = 0; - for (int i = 0; i < instr->const_index[1]; i++) { - for (unsigned j = 0; j < instr->num_components; j++) { - fs_reg src = offset(retype(nir_inputs, dest.type), - instr->const_index[0] + index); - if (has_indirect) - src.reladdr = new(mem_ctx) fs_reg(get_nir_src(instr->src[0])); - index++; + for (unsigned j = 0; j < instr->num_components; j++) { + fs_reg src = offset(retype(nir_inputs, dest.type), + instr->const_index[0] + index); + if (has_indirect) + src.reladdr = new(mem_ctx) fs_reg(get_nir_src(instr->src[0])); + index++; - emit(MOV(dest, src)); - dest = offset(dest, 1); - } + emit(MOV(dest, src)); + dest = offset(dest, 1); } break; } @@ -1564,16 +1560,14 @@ fs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) case nir_intrinsic_store_output: { fs_reg src = get_nir_src(instr->src[0]); unsigned index = 0; - for (int i = 0; i < instr->const_index[1]; i++) { - for (unsigned j = 0; j < instr->num_components; j++) { - fs_reg new_dest = offset(retype(nir_outputs, src.type), - instr->const_index[0] + index); - if (has_indirect) - src.reladdr = new(mem_ctx) fs_reg(get_nir_src(instr->src[1])); - index++; - emit(MOV(new_dest, src)); - src = offset(src, 1); - } + for (unsigned j = 0; j < instr->num_components; j++) { + fs_reg new_dest = offset(retype(nir_outputs, src.type), + instr->const_index[0] + index); + if (has_indirect) + src.reladdr = new(mem_ctx) fs_reg(get_nir_src(instr->src[1])); + index++; + emit(MOV(new_dest, src)); + src = offset(src, 1); } break; } From 31cd2d75dc3844e40143f649fe383de17c152a13 Mon Sep 17 00:00:00 2001 From: Alan Coopersmith Date: Fri, 15 May 2015 19:05:45 -0700 Subject: [PATCH 215/834] swrast: Build fix for Solaris Fixes regression from commit 5b2d3480f57168d50ad24cf0b8c9244414bd3701 Cc: "10.5 10.6" Signed-off-by: Alan Coopersmith Reviewed-by: Jeremy Huddleston Sequoia --- configure.ac | 1 + src/mesa/drivers/dri/swrast/swrast.c | 4 +++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/configure.ac b/configure.ac index 095e23e69b6..5594548ddfd 100644 --- a/configure.ac +++ b/configure.ac @@ -649,6 +649,7 @@ if test "x$enable_asm" = xyes; then fi AC_CHECK_HEADER([xlocale.h], [DEFINES="$DEFINES -DHAVE_XLOCALE_H"]) +AC_CHECK_HEADER([sys/sysctl.h], [DEFINES="$DEFINES -DHAVE_SYS_SYSCTL_H"]) AC_CHECK_FUNC([strtof], [DEFINES="$DEFINES -DHAVE_STRTOF"]) dnl Check to see if dlopen is in default libraries (like Solaris, which diff --git a/src/mesa/drivers/dri/swrast/swrast.c b/src/mesa/drivers/dri/swrast/swrast.c index cbc946c3ffd..2d4bb702fc2 100644 --- a/src/mesa/drivers/dri/swrast/swrast.c +++ b/src/mesa/drivers/dri/swrast/swrast.c @@ -62,7 +62,9 @@ #include "swrast/s_context.h" #include -#include +#ifdef HAVE_SYS_SYSCTL_H +# include +#endif const __DRIextension **__driDriverGetExtensions_swrast(void); From 06ff751f97fbeb62a23936cd8f9c54733920d082 Mon Sep 17 00:00:00 2001 From: Jeremy Huddleston Sequoia Date: Wed, 11 Feb 2015 02:32:33 -0800 Subject: [PATCH 216/834] darwin: Fix install name of libOSMesa Passing -module to glibtool causes the resulting library to be called libSomething.so rather than libSomething.dylib on darwin. Regardless if libOSMesa is a library or a module, it has been used as the former for quite some time. Update the build to reflect that and resolve the naming issue. Cc: "10.5 10.6" Signed-off-by: Jeremy Huddleston Sequoia [Emil Velikov: Tweak the commit message.] Reviewed-by: Emil Velikov --- src/mesa/drivers/osmesa/Makefile.am | 1 - 1 file changed, 1 deletion(-) diff --git a/src/mesa/drivers/osmesa/Makefile.am b/src/mesa/drivers/osmesa/Makefile.am index 9a388d64cd5..46332e16bd1 100644 --- a/src/mesa/drivers/osmesa/Makefile.am +++ b/src/mesa/drivers/osmesa/Makefile.am @@ -39,7 +39,6 @@ nodist_EXTRA_lib@OSMESA_LIB@_la_SOURCES = dummy.cpp lib@OSMESA_LIB@_la_SOURCES = osmesa.c lib@OSMESA_LIB@_la_LDFLAGS = \ - -module \ -no-undefined \ -version-number @OSMESA_VERSION@ \ $(GC_SECTIONS) \ From 36438f0db6c7c696df73ced12684f4df9d2b47e5 Mon Sep 17 00:00:00 2001 From: Emil Velikov Date: Wed, 20 May 2015 21:51:52 +0100 Subject: [PATCH 217/834] targets/osmesa: drop the -module tag from LDFLAGS Gallium equivalent of commit 06ff751f97f(darwin: Fix install name of libOSMesa) Cc: "10.5 10.6" Signed-off-by: Emil Velikov --- src/gallium/targets/osmesa/Makefile.am | 1 - 1 file changed, 1 deletion(-) diff --git a/src/gallium/targets/osmesa/Makefile.am b/src/gallium/targets/osmesa/Makefile.am index 2c09736714a..38e515f8252 100644 --- a/src/gallium/targets/osmesa/Makefile.am +++ b/src/gallium/targets/osmesa/Makefile.am @@ -42,7 +42,6 @@ nodist_EXTRA_lib@OSMESA_LIB@_la_SOURCES = dummy.cpp lib@OSMESA_LIB@_la_SOURCES = target.c lib@OSMESA_LIB@_la_LDFLAGS = \ - -module \ -no-undefined \ -version-number @OSMESA_VERSION@ \ $(GC_SECTIONS) \ From 3e7bc6728520b469ed53a2588ead28287f8b88f0 Mon Sep 17 00:00:00 2001 From: Ilia Mirkin Date: Mon, 27 Apr 2015 23:47:40 -0400 Subject: [PATCH 218/834] freedreno/ir3: fix immediate usage in tgsi tex fe get_immediate will return a const reference, the requested immediate isn't necessarily in the x slot. Make sure to use the swizzle. Signed-off-by: Ilia Mirkin Cc: mesa-stable@lists.freedesktop.org --- src/gallium/drivers/freedreno/ir3/ir3_compiler.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/src/gallium/drivers/freedreno/ir3/ir3_compiler.c b/src/gallium/drivers/freedreno/ir3/ir3_compiler.c index ad0340032e4..cc049d3fdfd 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_compiler.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_compiler.c @@ -1615,7 +1615,7 @@ trans_samp(const struct instr_translater *t, instr->cat1.src_type = type_mov; instr->cat1.dst_type = type_mov; add_dst_reg(ctx, instr, &tmp_dst, i); - add_src_reg(ctx, instr, &zero, 0); + add_src_reg(ctx, instr, &zero, zero.SwizzleX); i++; } if (tgt->array) { @@ -1669,15 +1669,18 @@ trans_samp(const struct instr_translater *t, */ if (inst->Instruction.Opcode == TGSI_OPCODE_TXD) { while (collect->regs_count < 5) - ssa_src(ctx, ir3_reg_create(collect, 0, IR3_REG_SSA), &zero, 0); + ssa_src(ctx, ir3_reg_create(collect, 0, IR3_REG_SSA), + &zero, zero.SwizzleX); for (i = 0; i < tgt->dims; i++) ssa_src(ctx, ir3_reg_create(collect, 0, IR3_REG_SSA), dpdx, i); if (tgt->dims < 2) - ssa_src(ctx, ir3_reg_create(collect, 0, IR3_REG_SSA), &zero, 0); + ssa_src(ctx, ir3_reg_create(collect, 0, IR3_REG_SSA), + &zero, zero.SwizzleX); for (i = 0; i < tgt->dims; i++) ssa_src(ctx, ir3_reg_create(collect, 0, IR3_REG_SSA), dpdy, i); if (tgt->dims < 2) - ssa_src(ctx, ir3_reg_create(collect, 0, IR3_REG_SSA), &zero, 0); + ssa_src(ctx, ir3_reg_create(collect, 0, IR3_REG_SSA), + &zero, zero.SwizzleX); tinf.src_wrmask |= ((1 << (2 * MAX2(tgt->dims, 2))) - 1) << 4; } @@ -1700,7 +1703,8 @@ trans_samp(const struct instr_translater *t, ssa_src(ctx, ir3_reg_create(collect, 0, IR3_REG_SSA), offset, i); if (tgt->dims < 2) - ssa_src(ctx, ir3_reg_create(collect, 0, IR3_REG_SSA), &zero, 0); + ssa_src(ctx, ir3_reg_create(collect, 0, IR3_REG_SSA), + &zero, zero.SwizzleX); } if (inst->Instruction.Opcode == TGSI_OPCODE_TXB2) ssa_src(ctx, ir3_reg_create(collect, 0, IR3_REG_SSA), From 6cdb29d52fc51e3d904b50bb7003c9fa38bb7896 Mon Sep 17 00:00:00 2001 From: Ilia Mirkin Date: Wed, 20 May 2015 04:00:16 -0400 Subject: [PATCH 219/834] freedreno/a3xx: set .zw of sprite coords to .01 Fixes non-determinism in bin/point-sprite rendering, and the stars on the intro screen to neverball. Cc: "10.6" Signed-off-by: Ilia Mirkin --- src/gallium/drivers/freedreno/a3xx/fd3_program.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_program.c b/src/gallium/drivers/freedreno/a3xx/fd3_program.c index a6824ef92e7..57fcaa9020e 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_program.c +++ b/src/gallium/drivers/freedreno/a3xx/fd3_program.c @@ -413,12 +413,15 @@ fd3_program_emit(struct fd_ringbuffer *ring, struct fd3_emit *emit, } } - /* TODO: Figure out if there's a way to make it spit out 0's and - * 1's for the .z and .w components. + /* Replace the .xy coordinates with S/T from the point sprite. Set + * interpolation bits for .zw such that they become .01 */ - if (emit->sprite_coord_enable & (1 << sem2idx(fp->inputs[j].semantic))) + if (emit->sprite_coord_enable & (1 << sem2idx(fp->inputs[j].semantic))) { vpsrepl[inloc / 16] |= (emit->sprite_coord_mode ? 0x0d : 0x09) << ((inloc % 16) * 2); + vinterp[(inloc + 2) / 16] |= 2 << (((inloc + 2) % 16) * 2); + vinterp[(inloc + 3) / 16] |= 3 << (((inloc + 3) % 16) * 2); + } } OUT_PKT0(ring, REG_A3XX_VPC_ATTR, 2); From 51ccdb63467b1e848db025670f126eccb051f8f2 Mon Sep 17 00:00:00 2001 From: Matt Turner Date: Tue, 19 May 2015 07:38:40 -0700 Subject: [PATCH 220/834] glsl: Use AM_V_GEN/AM_V_at in NIR rules. --- src/glsl/Makefile.am | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/src/glsl/Makefile.am b/src/glsl/Makefile.am index 23c6fe8bb6c..1b7b1f8c691 100644 --- a/src/glsl/Makefile.am +++ b/src/glsl/Makefile.am @@ -247,21 +247,21 @@ dist-hook: $(RM) glcpp/tests/subtest*/*.out nir/nir_builder_opcodes.h: nir/nir_opcodes.py nir/nir_builder_opcodes_h.py - $(MKDIR_P) nir; \ - $(PYTHON2) $(PYTHON_FLAGS) $(srcdir)/nir/nir_builder_opcodes_h.py > $@ + $(AM_V_at)$(MKDIR_P) nir + $(AM_V_GEN)$(PYTHON2) $(PYTHON_FLAGS) $(srcdir)/nir/nir_builder_opcodes_h.py > $@ nir/nir_constant_expressions.c: nir/nir_opcodes.py nir/nir_constant_expressions.py nir/nir_constant_expressions.h - $(MKDIR_P) nir; \ - $(PYTHON2) $(PYTHON_FLAGS) $(srcdir)/nir/nir_constant_expressions.py > $@ + $(AM_V_at)$(MKDIR_P) nir + $(AM_V_GEN)$(PYTHON2) $(PYTHON_FLAGS) $(srcdir)/nir/nir_constant_expressions.py > $@ nir/nir_opcodes.h: nir/nir_opcodes.py nir/nir_opcodes_h.py - $(MKDIR_P) nir; \ - $(PYTHON2) $(PYTHON_FLAGS) $(srcdir)/nir/nir_opcodes_h.py > $@ + $(AM_V_at)$(MKDIR_P) nir + $(AM_V_GEN)$(PYTHON2) $(PYTHON_FLAGS) $(srcdir)/nir/nir_opcodes_h.py > $@ nir/nir_opcodes.c: nir/nir_opcodes.py nir/nir_opcodes_c.py - $(MKDIR_P) nir; \ - $(PYTHON2) $(PYTHON_FLAGS) $(srcdir)/nir/nir_opcodes_c.py > $@ + $(AM_V_at)$(MKDIR_P) nir + $(AM_V_GEN)$(PYTHON2) $(PYTHON_FLAGS) $(srcdir)/nir/nir_opcodes_c.py > $@ nir/nir_opt_algebraic.c: nir/nir_opt_algebraic.py nir/nir_algebraic.py - $(MKDIR_P) nir; \ - $(PYTHON2) $(PYTHON_FLAGS) $(srcdir)/nir/nir_opt_algebraic.py > $@ + $(AM_V_at)$(MKDIR_P) nir + $(AM_V_GEN)$(PYTHON2) $(PYTHON_FLAGS) $(srcdir)/nir/nir_opt_algebraic.py > $@ From d67515b7be1ebd9482970ac1867ee4e9bbbf96d5 Mon Sep 17 00:00:00 2001 From: Timothy Arceri Date: Thu, 30 Apr 2015 20:45:54 +1000 Subject: [PATCH 221/834] glsl: remove element_type() helper We now have is_array() and without_array() that make the code much clearer and remove the need for this. For all remaining calls to this we already knew that the type was an array so returning a null wasn't adding any value. v2: use without_array() in _mesa_ast_array_index_to_hir() and don't use without_array() in lower_clip_distance_visitor() as we want to make sure the array is 2D. Reviewed-by: Matt Turner --- src/glsl/ast_array_index.cpp | 2 +- src/glsl/ast_function.cpp | 8 ++++---- src/glsl/ast_to_hir.cpp | 9 ++++----- src/glsl/glsl_parser_extras.cpp | 4 ++-- src/glsl/glsl_types.cpp | 2 +- src/glsl/glsl_types.h | 14 +------------- src/glsl/ir.cpp | 4 ++-- src/glsl/link_atomics.cpp | 2 +- src/glsl/link_varyings.cpp | 2 +- src/glsl/linker.cpp | 4 ++-- src/glsl/lower_clip_distance.cpp | 8 ++++---- 11 files changed, 23 insertions(+), 36 deletions(-) diff --git a/src/glsl/ast_array_index.cpp b/src/glsl/ast_array_index.cpp index ecef651f752..752d86f72fd 100644 --- a/src/glsl/ast_array_index.cpp +++ b/src/glsl/ast_array_index.cpp @@ -225,7 +225,7 @@ _mesa_ast_array_index_to_hir(void *mem_ctx, * values *do* diverge, then the behavior of the operation requiring a * dynamically uniform expression is undefined. */ - if (array->type->element_type()->is_sampler()) { + if (array->type->without_array()->is_sampler()) { if (!state->is_version(130, 100)) { if (state->es_shader) { _mesa_glsl_warning(&loc, state, diff --git a/src/glsl/ast_function.cpp b/src/glsl/ast_function.cpp index 758361324e3..1e77124bd15 100644 --- a/src/glsl/ast_function.cpp +++ b/src/glsl/ast_function.cpp @@ -863,7 +863,7 @@ process_array_constructor(exec_list *instructions, if (is_unsized_array) { constructor_type = - glsl_type::get_array_instance(constructor_type->element_type(), + glsl_type::get_array_instance(constructor_type->fields.array, parameter_count); assert(constructor_type != NULL); assert(constructor_type->length == parameter_count); @@ -876,7 +876,7 @@ process_array_constructor(exec_list *instructions, ir_rvalue *result = ir; const glsl_base_type element_base_type = - constructor_type->element_type()->base_type; + constructor_type->fields.array->base_type; /* Apply implicit conversions (not the scalar constructor rules!). See * the spec quote above. */ @@ -896,10 +896,10 @@ process_array_constructor(exec_list *instructions, } } - if (result->type != constructor_type->element_type()) { + if (result->type != constructor_type->fields.array) { _mesa_glsl_error(loc, state, "type error in array constructor: " "expected: %s, found %s", - constructor_type->element_type()->name, + constructor_type->fields.array->name, result->type->name); return ir_rvalue::error_value(ctx); } diff --git a/src/glsl/ast_to_hir.cpp b/src/glsl/ast_to_hir.cpp index 14e63090557..8aebb1320f1 100644 --- a/src/glsl/ast_to_hir.cpp +++ b/src/glsl/ast_to_hir.cpp @@ -678,7 +678,7 @@ validate_assignment(struct _mesa_glsl_parse_state *state, * is handled by ir_dereference::is_lvalue. */ if (lhs_type->is_unsized_array() && rhs->type->is_array() - && (lhs_type->element_type() == rhs->type->element_type())) { + && (lhs_type->fields.array == rhs->type->fields.array)) { if (is_initializer) { return rhs; } else { @@ -820,7 +820,7 @@ do_assignment(exec_list *instructions, struct _mesa_glsl_parse_state *state, var->data.max_array_access); } - var->type = glsl_type::get_array_instance(lhs->type->element_type(), + var->type = glsl_type::get_array_instance(lhs->type->fields.array, rhs->type->array_size()); d->type = var->type; } @@ -2330,8 +2330,7 @@ apply_image_qualifier_to_variable(const struct ast_type_qualifier *qual, struct _mesa_glsl_parse_state *state, YYLTYPE *loc) { - const glsl_type *base_type = - (var->type->is_array() ? var->type->element_type() : var->type); + const glsl_type *base_type = var->type->without_array(); if (base_type->is_image()) { if (var->data.mode != ir_var_uniform && @@ -2855,7 +2854,7 @@ get_variable_being_redeclared(ir_variable *var, YYLTYPE loc, * type and specify a size." */ if (earlier->type->is_unsized_array() && var->type->is_array() - && (var->type->element_type() == earlier->type->element_type())) { + && (var->type->fields.array == earlier->type->fields.array)) { /* FINISHME: This doesn't match the qualifiers on the two * FINISHME: declarations. It's not 100% clear whether this is * FINISHME: required or not. diff --git a/src/glsl/glsl_parser_extras.cpp b/src/glsl/glsl_parser_extras.cpp index be6713c46a2..046d5d7b5bf 100644 --- a/src/glsl/glsl_parser_extras.cpp +++ b/src/glsl/glsl_parser_extras.cpp @@ -778,7 +778,7 @@ _mesa_ast_set_aggregate_type(const glsl_type *type, /* If the aggregate is an array, recursively set its elements' types. */ if (type->is_array()) { - /* Each array element has the type type->element_type(). + /* Each array element has the type type->fields.array. * * E.g., if if struct S[2] we want to set each element's type to * struct S. @@ -790,7 +790,7 @@ _mesa_ast_set_aggregate_type(const glsl_type *type, link); if (expr->oper == ast_aggregate) - _mesa_ast_set_aggregate_type(type->element_type(), expr); + _mesa_ast_set_aggregate_type(type->fields.array, expr); } /* If the aggregate is a struct, recursively set its fields' types. */ diff --git a/src/glsl/glsl_types.cpp b/src/glsl/glsl_types.cpp index 9c9b7efcbc7..f675e90cb0d 100644 --- a/src/glsl/glsl_types.cpp +++ b/src/glsl/glsl_types.cpp @@ -217,7 +217,7 @@ glsl_type::contains_opaque() const { case GLSL_TYPE_ATOMIC_UINT: return true; case GLSL_TYPE_ARRAY: - return element_type()->contains_opaque(); + return fields.array->contains_opaque(); case GLSL_TYPE_STRUCT: for (unsigned int i = 0; i < length; i++) { if (fields.structure[i].type->contains_opaque()) diff --git a/src/glsl/glsl_types.h b/src/glsl/glsl_types.h index 5645dcd5011..f54a9393e73 100644 --- a/src/glsl/glsl_types.h +++ b/src/glsl/glsl_types.h @@ -227,18 +227,6 @@ struct glsl_type { */ const glsl_type *get_scalar_type() const; - /** - * Query the type of elements in an array - * - * \return - * Pointer to the type of elements in the array for array types, or \c NULL - * for non-array types. - */ - const glsl_type *element_type() const - { - return is_array() ? fields.array : NULL; - } - /** * Get the instance of a built-in scalar, vector, or matrix type */ @@ -556,7 +544,7 @@ struct glsl_type { if (base_type == GLSL_TYPE_ATOMIC_UINT) return ATOMIC_COUNTER_SIZE; else if (is_array()) - return length * element_type()->atomic_size(); + return length * fields.array->atomic_size(); else return 0; } diff --git a/src/glsl/ir.cpp b/src/glsl/ir.cpp index 9e3238552e9..dbd064feecc 100644 --- a/src/glsl/ir.cpp +++ b/src/glsl/ir.cpp @@ -912,7 +912,7 @@ ir_constant::zero(void *mem_ctx, const glsl_type *type) c->array_elements = ralloc_array(c, ir_constant *, type->length); for (unsigned i = 0; i < type->length; i++) - c->array_elements[i] = ir_constant::zero(c, type->element_type()); + c->array_elements[i] = ir_constant::zero(c, type->fields.array); } if (type->is_record()) { @@ -1341,7 +1341,7 @@ ir_dereference_array::set_array(ir_rvalue *value) const glsl_type *const vt = this->array->type; if (vt->is_array()) { - type = vt->element_type(); + type = vt->fields.array; } else if (vt->is_matrix()) { type = vt->column_type(); } else if (vt->is_vector()) { diff --git a/src/glsl/link_atomics.cpp b/src/glsl/link_atomics.cpp index 603873a5d4f..100d03c4e8f 100644 --- a/src/glsl/link_atomics.cpp +++ b/src/glsl/link_atomics.cpp @@ -207,7 +207,7 @@ link_assign_atomic_counter_resources(struct gl_context *ctx, storage->atomic_buffer_index = i; storage->offset = var->data.atomic.offset; storage->array_stride = (var->type->is_array() ? - var->type->element_type()->atomic_size() : 0); + var->type->without_array()->atomic_size() : 0); } /* Assign stage-specific fields. */ diff --git a/src/glsl/link_varyings.cpp b/src/glsl/link_varyings.cpp index 605748a9c2a..7b2d4bd2394 100644 --- a/src/glsl/link_varyings.cpp +++ b/src/glsl/link_varyings.cpp @@ -56,7 +56,7 @@ cross_validate_types_and_qualifiers(struct gl_shader_program *prog, const glsl_type *type_to_match = input->type; if (consumer_stage == MESA_SHADER_GEOMETRY) { assert(type_to_match->is_array()); /* Enforced by ast_to_hir */ - type_to_match = type_to_match->element_type(); + type_to_match = type_to_match->fields.array; } if (type_to_match != output->type) { /* There is a bit of a special case for gl_TexCoord. This diff --git a/src/glsl/linker.cpp b/src/glsl/linker.cpp index ecdc025710f..9798afefc98 100644 --- a/src/glsl/linker.cpp +++ b/src/glsl/linker.cpp @@ -224,7 +224,7 @@ public: return visit_continue; } - var->type = glsl_type::get_array_instance(var->type->element_type(), + var->type = glsl_type::get_array_instance(var->type->fields.array, this->num_vertices); var->data.max_array_access = this->num_vertices - 1; @@ -245,7 +245,7 @@ public: { const glsl_type *const vt = ir->array->type; if (vt->is_array()) - ir->type = vt->element_type(); + ir->type = vt->fields.array; return visit_continue; } }; diff --git a/src/glsl/lower_clip_distance.cpp b/src/glsl/lower_clip_distance.cpp index 2d6138d5abd..01f028b1f37 100644 --- a/src/glsl/lower_clip_distance.cpp +++ b/src/glsl/lower_clip_distance.cpp @@ -114,7 +114,7 @@ lower_clip_distance_visitor::visit(ir_variable *ir) return visit_continue; assert (ir->type->is_array()); - if (!ir->type->element_type()->is_array()) { + if (!ir->type->fields.array->is_array()) { /* 1D gl_ClipDistance (used for vertex and geometry output, and fragment * input). */ @@ -123,7 +123,7 @@ lower_clip_distance_visitor::visit(ir_variable *ir) this->progress = true; this->old_clip_distance_1d_var = ir; - assert (ir->type->element_type() == glsl_type::float_type); + assert (ir->type->fields.array == glsl_type::float_type); unsigned new_size = (ir->type->array_size() + 3) / 4; /* Clone the old var so that we inherit all of its properties */ @@ -148,8 +148,8 @@ lower_clip_distance_visitor::visit(ir_variable *ir) this->progress = true; this->old_clip_distance_2d_var = ir; - assert (ir->type->element_type()->element_type() == glsl_type::float_type); - unsigned new_size = (ir->type->element_type()->array_size() + 3) / 4; + assert (ir->type->fields.array->fields.array == glsl_type::float_type); + unsigned new_size = (ir->type->fields.array->array_size() + 3) / 4; /* Clone the old var so that we inherit all of its properties */ this->new_clip_distance_2d_var = ir->clone(ralloc_parent(ir), NULL); From 7c1a00174b2bec102030b19b6094ebcab23fe04d Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Thu, 21 May 2015 11:23:06 +1000 Subject: [PATCH 222/834] u_math: uses assert, include assert.h this fixes a build problem found on RHEL s390. not sure what configure options caused it, I couldn't get it on x86 here. Reviewed-by: Brian Paul Cc: "10.6" mesa-stable@lists.freedesktop.org Signed-off-by: Dave Airlie --- src/gallium/auxiliary/util/u_math.h | 1 + 1 file changed, 1 insertion(+) diff --git a/src/gallium/auxiliary/util/u_math.h b/src/gallium/auxiliary/util/u_math.h index 3d27a59e8c0..58070a9dafa 100644 --- a/src/gallium/auxiliary/util/u_math.h +++ b/src/gallium/auxiliary/util/u_math.h @@ -42,6 +42,7 @@ #include "pipe/p_compiler.h" #include "c99_math.h" +#include #include #include From 2b40c306d238e2e738d8901e10f351a109b02687 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Thu, 26 Mar 2015 09:52:37 +0100 Subject: [PATCH 223/834] radeon/vce: move CPB handling function into common code MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit They are not firmware version dependent. Signed-off-by: Christian König --- src/gallium/drivers/radeon/radeon_vce.c | 38 +++++++++++++++++++ src/gallium/drivers/radeon/radeon_vce.h | 7 ++++ .../drivers/radeon/radeon_vce_40_2_2.c | 32 ++-------------- 3 files changed, 48 insertions(+), 29 deletions(-) diff --git a/src/gallium/drivers/radeon/radeon_vce.c b/src/gallium/drivers/radeon/radeon_vce.c index e220f40165b..9913c8b4f1a 100644 --- a/src/gallium/drivers/radeon/radeon_vce.c +++ b/src/gallium/drivers/radeon/radeon_vce.c @@ -182,6 +182,44 @@ static unsigned get_cpb_num(struct rvce_encoder *enc) return MIN2(dpb / (w * h), 16); } +/** + * Get the slot for the currently encoded frame + */ +struct rvce_cpb_slot *current_slot(struct rvce_encoder *enc) +{ + return LIST_ENTRY(struct rvce_cpb_slot, enc->cpb_slots.prev, list); +} + +/** + * Get the slot for L0 + */ +struct rvce_cpb_slot *l0_slot(struct rvce_encoder *enc) +{ + return LIST_ENTRY(struct rvce_cpb_slot, enc->cpb_slots.next, list); +} + +/** + * Get the slot for L1 + */ +struct rvce_cpb_slot *l1_slot(struct rvce_encoder *enc) +{ + return LIST_ENTRY(struct rvce_cpb_slot, enc->cpb_slots.next->next, list); +} + +/** + * Calculate the offsets into the CPB + */ +void rvce_frame_offset(struct rvce_encoder *enc, struct rvce_cpb_slot *slot, + unsigned *luma_offset, unsigned *chroma_offset) +{ + unsigned pitch = align(enc->luma->level[0].pitch_bytes, 128); + unsigned vpitch = align(enc->luma->npix_y, 16); + unsigned fsize = pitch * (vpitch + vpitch / 2); + + *luma_offset = slot->index * fsize; + *chroma_offset = *luma_offset + pitch * vpitch; +} + /** * destroy this video encoder */ diff --git a/src/gallium/drivers/radeon/radeon_vce.h b/src/gallium/drivers/radeon/radeon_vce.h index 1cf018006a8..9fcaecabac4 100644 --- a/src/gallium/drivers/radeon/radeon_vce.h +++ b/src/gallium/drivers/radeon/radeon_vce.h @@ -104,6 +104,13 @@ struct rvce_encoder { bool use_vui; }; +/* CPB handling functions */ +struct rvce_cpb_slot *current_slot(struct rvce_encoder *enc); +struct rvce_cpb_slot *l0_slot(struct rvce_encoder *enc); +struct rvce_cpb_slot *l1_slot(struct rvce_encoder *enc); +void rvce_frame_offset(struct rvce_encoder *enc, struct rvce_cpb_slot *slot, + unsigned *luma_offset, unsigned *chroma_offset); + struct pipe_video_codec *rvce_create_encoder(struct pipe_context *context, const struct pipe_video_codec *templat, struct radeon_winsys* ws, diff --git a/src/gallium/drivers/radeon/radeon_vce_40_2_2.c b/src/gallium/drivers/radeon/radeon_vce_40_2_2.c index 09029575547..51b17b5f6a8 100644 --- a/src/gallium/drivers/radeon/radeon_vce_40_2_2.c +++ b/src/gallium/drivers/radeon/radeon_vce_40_2_2.c @@ -46,32 +46,6 @@ static const unsigned profiles[7] = { 66, 77, 88, 100, 110, 122, 244 }; -static struct rvce_cpb_slot *current_slot(struct rvce_encoder *enc) -{ - return LIST_ENTRY(struct rvce_cpb_slot, enc->cpb_slots.prev, list); -} - -static struct rvce_cpb_slot *l0_slot(struct rvce_encoder *enc) -{ - return LIST_ENTRY(struct rvce_cpb_slot, enc->cpb_slots.next, list); -} - -static struct rvce_cpb_slot *l1_slot(struct rvce_encoder *enc) -{ - return LIST_ENTRY(struct rvce_cpb_slot, enc->cpb_slots.next->next, list); -} - -static void frame_offset(struct rvce_encoder *enc, struct rvce_cpb_slot *slot, - unsigned *luma_offset, unsigned *chroma_offset) -{ - unsigned pitch = align(enc->luma->level[0].pitch_bytes, 128); - unsigned vpitch = align(enc->luma->npix_y, 16); - unsigned fsize = pitch * (vpitch + vpitch / 2); - - *luma_offset = slot->index * fsize; - *chroma_offset = *luma_offset + pitch * vpitch; -} - static void session(struct rvce_encoder *enc) { RVCE_BEGIN(0x00000001); // session cmd @@ -369,7 +343,7 @@ static void encode(struct rvce_encoder *enc) if(enc->pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_P || enc->pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_B) { struct rvce_cpb_slot *l0 = l0_slot(enc); - frame_offset(enc, l0, &luma_offset, &chroma_offset); + rvce_frame_offset(enc, l0, &luma_offset, &chroma_offset); RVCE_CS(l0->picture_type); // encPicType RVCE_CS(l0->frame_num); // frameNumber RVCE_CS(l0->pic_order_cnt); // pictureOrderCount @@ -395,7 +369,7 @@ static void encode(struct rvce_encoder *enc) RVCE_CS(0x00000000); // pictureStructure if(enc->pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_B) { struct rvce_cpb_slot *l1 = l1_slot(enc); - frame_offset(enc, l1, &luma_offset, &chroma_offset); + rvce_frame_offset(enc, l1, &luma_offset, &chroma_offset); RVCE_CS(l1->picture_type); // encPicType RVCE_CS(l1->frame_num); // frameNumber RVCE_CS(l1->pic_order_cnt); // pictureOrderCount @@ -409,7 +383,7 @@ static void encode(struct rvce_encoder *enc) RVCE_CS(0xffffffff); // chromaOffset } - frame_offset(enc, current_slot(enc), &luma_offset, &chroma_offset); + rvce_frame_offset(enc, current_slot(enc), &luma_offset, &chroma_offset); RVCE_CS(luma_offset); // encReconstructedLumaOffset RVCE_CS(chroma_offset); // encReconstructedChromaOffset RVCE_CS(0x00000000); // encColocBufferOffset From 6921ea42a17c715c4b5b2d0092f9b9f4df42b10c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Thu, 26 Mar 2015 10:00:09 +0100 Subject: [PATCH 224/834] radeon/vce: adapt new firmware interface changes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit v2: make this also compatible with original released firmware v3 (chk): switch to original idea of separate files for fw versions Signed-off-by: Leo Liu Signed-off-by: Christian König Reviewed-by: Alex Deucher (v2) --- src/gallium/drivers/radeon/Makefile.sources | 1 + src/gallium/drivers/radeon/radeon_vce.c | 22 +- src/gallium/drivers/radeon/radeon_vce.h | 3 + src/gallium/drivers/radeon/radeon_vce_50.c | 228 ++++++++++++++++++++ 4 files changed, 252 insertions(+), 2 deletions(-) create mode 100644 src/gallium/drivers/radeon/radeon_vce_50.c diff --git a/src/gallium/drivers/radeon/Makefile.sources b/src/gallium/drivers/radeon/Makefile.sources index c655fe5787b..f63790c329e 100644 --- a/src/gallium/drivers/radeon/Makefile.sources +++ b/src/gallium/drivers/radeon/Makefile.sources @@ -12,6 +12,7 @@ C_SOURCES := \ radeon_uvd.c \ radeon_uvd.h \ radeon_vce_40_2_2.c \ + radeon_vce_50.c \ radeon_vce.c \ radeon_vce.h \ radeon_video.c \ diff --git a/src/gallium/drivers/radeon/radeon_vce.c b/src/gallium/drivers/radeon/radeon_vce.c index 9913c8b4f1a..a6567379fe3 100644 --- a/src/gallium/drivers/radeon/radeon_vce.c +++ b/src/gallium/drivers/radeon/radeon_vce.c @@ -44,6 +44,10 @@ #include "radeon_video.h" #include "radeon_vce.h" +#define FW_40_2_2 ((40 << 24) | (2 << 16) | (2 << 8)) +#define FW_50_0_1 ((50 << 24) | (0 << 16) | (1 << 8)) +#define FW_50_1_2 ((50 << 24) | (1 << 16) | (2 << 8)) + /** * flush commands to the hardware */ @@ -444,7 +448,19 @@ struct pipe_video_codec *rvce_create_encoder(struct pipe_context *context, reset_cpb(enc); - radeon_vce_40_2_2_init(enc); + switch (rscreen->info.vce_fw_version) { + case FW_40_2_2: + radeon_vce_40_2_2_init(enc); + break; + + case FW_50_0_1: + case FW_50_1_2: + radeon_vce_50_init(enc); + break; + + default: + goto error; + } return &enc->base; @@ -464,5 +480,7 @@ error: */ bool rvce_is_fw_version_supported(struct r600_common_screen *rscreen) { - return rscreen->info.vce_fw_version == ((40 << 24) | (2 << 16) | (2 << 8)); + return rscreen->info.vce_fw_version == FW_40_2_2 || + rscreen->info.vce_fw_version == FW_50_0_1 || + rscreen->info.vce_fw_version == FW_50_1_2; } diff --git a/src/gallium/drivers/radeon/radeon_vce.h b/src/gallium/drivers/radeon/radeon_vce.h index 9fcaecabac4..8319ef48cd5 100644 --- a/src/gallium/drivers/radeon/radeon_vce.h +++ b/src/gallium/drivers/radeon/radeon_vce.h @@ -121,4 +121,7 @@ bool rvce_is_fw_version_supported(struct r600_common_screen *rscreen); /* init vce fw 40.2.2 specific callbacks */ void radeon_vce_40_2_2_init(struct rvce_encoder *enc); +/* init vce fw 50 specific callbacks */ +void radeon_vce_50_init(struct rvce_encoder *enc); + #endif diff --git a/src/gallium/drivers/radeon/radeon_vce_50.c b/src/gallium/drivers/radeon/radeon_vce_50.c new file mode 100644 index 00000000000..84a2bfb117e --- /dev/null +++ b/src/gallium/drivers/radeon/radeon_vce_50.c @@ -0,0 +1,228 @@ +/************************************************************************** + * + * Copyright 2013 Advanced Micro Devices, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* + * Authors: + * Christian König + * + */ + +#include + +#include "pipe/p_video_codec.h" + +#include "util/u_video.h" +#include "util/u_memory.h" + +#include "vl/vl_video_buffer.h" + +#include "r600_pipe_common.h" +#include "radeon_video.h" +#include "radeon_vce.h" + +static void task_info(struct rvce_encoder *enc, uint32_t taskOperation) +{ + RVCE_BEGIN(0x00000002); // task info + RVCE_CS(0xffffffff); // offsetOfNextTaskInfo + RVCE_CS(taskOperation); // taskOperation + RVCE_CS(0x00000000); // referencePictureDependency + RVCE_CS(0x00000000); // collocateFlagDependency + RVCE_CS(0x00000000); // feedbackIndex + RVCE_CS(0x00000000); // videoBitstreamRingIndex + RVCE_END(); +} + +static void rate_control(struct rvce_encoder *enc) +{ + RVCE_BEGIN(0x04000005); // rate control + RVCE_CS(enc->pic.rate_ctrl.rate_ctrl_method); // encRateControlMethod + RVCE_CS(enc->pic.rate_ctrl.target_bitrate); // encRateControlTargetBitRate + RVCE_CS(enc->pic.rate_ctrl.peak_bitrate); // encRateControlPeakBitRate + RVCE_CS(enc->pic.rate_ctrl.frame_rate_num); // encRateControlFrameRateNum + RVCE_CS(0x00000000); // encGOPSize + RVCE_CS(enc->pic.quant_i_frames); // encQP_I + RVCE_CS(enc->pic.quant_p_frames); // encQP_P + RVCE_CS(enc->pic.quant_b_frames); // encQP_B + RVCE_CS(enc->pic.rate_ctrl.vbv_buffer_size); // encVBVBufferSize + RVCE_CS(enc->pic.rate_ctrl.frame_rate_den); // encRateControlFrameRateDen + RVCE_CS(0x00000000); // encVBVBufferLevel + RVCE_CS(0x00000000); // encMaxAUSize + RVCE_CS(0x00000000); // encQPInitialMode + RVCE_CS(enc->pic.rate_ctrl.target_bits_picture); // encTargetBitsPerPicture + RVCE_CS(enc->pic.rate_ctrl.peak_bits_picture_integer); // encPeakBitsPerPictureInteger + RVCE_CS(enc->pic.rate_ctrl.peak_bits_picture_fraction); // encPeakBitsPerPictureFractional + RVCE_CS(0x00000000); // encMinQP + RVCE_CS(0x00000033); // encMaxQP + RVCE_CS(0x00000000); // encSkipFrameEnable + RVCE_CS(0x00000000); // encFillerDataEnable + RVCE_CS(0x00000000); // encEnforceHRD + RVCE_CS(0x00000000); // encBPicsDeltaQP + RVCE_CS(0x00000000); // encReferenceBPicsDeltaQP + RVCE_CS(0x00000000); // encRateControlReInitDisable + RVCE_CS(0x00000000); // encLCVBRInitQPFlag + RVCE_CS(0x00000000); // encLCVBRSATDBasedNonlinearBitBudgetFlag + RVCE_END(); +} + +static void encode(struct rvce_encoder *enc) +{ + int i; + unsigned luma_offset, chroma_offset; + + task_info(enc, 0x00000003); + + RVCE_BEGIN(0x05000001); // context buffer + RVCE_READWRITE(enc->cpb.res->cs_buf, enc->cpb.res->domains); // encodeContextAddressHi + RVCE_CS(0x00000000); // encodeContextAddressLo + RVCE_END(); + + RVCE_BEGIN(0x05000004); // video bitstream buffer + RVCE_WRITE(enc->bs_handle, RADEON_DOMAIN_GTT); // videoBitstreamRingAddressHi + RVCE_CS(0x00000000); // videoBitstreamRingAddressLo + RVCE_CS(enc->bs_size); // videoBitstreamRingSize + RVCE_END(); + + RVCE_BEGIN(0x03000001); // encode + RVCE_CS(enc->pic.frame_num ? 0x0 : 0x11); // insertHeaders + RVCE_CS(0x00000000); // pictureStructure + RVCE_CS(enc->bs_size); // allowedMaxBitstreamSize + RVCE_CS(0x00000000); // forceRefreshMap + RVCE_CS(0x00000000); // insertAUD + RVCE_CS(0x00000000); // endOfSequence + RVCE_CS(0x00000000); // endOfStream + RVCE_READ(enc->handle, RADEON_DOMAIN_VRAM); // inputPictureLumaAddressHi + RVCE_CS(enc->luma->level[0].offset); // inputPictureLumaAddressLo + RVCE_READ(enc->handle, RADEON_DOMAIN_VRAM); // inputPictureChromaAddressHi + RVCE_CS(enc->chroma->level[0].offset); // inputPictureChromaAddressLo + RVCE_CS(align(enc->luma->npix_y, 16)); // encInputFrameYPitch + RVCE_CS(enc->luma->level[0].pitch_bytes); // encInputPicLumaPitch + RVCE_CS(enc->chroma->level[0].pitch_bytes); // encInputPicChromaPitch + RVCE_CS(0x00010000); // encInputPic(Addr|Array)Mode,encDisable(TwoPipeMode|MBOffloading) + RVCE_CS(0x00000000); // encInputPicTileConfig + RVCE_CS(enc->pic.picture_type); // encPicType + RVCE_CS(enc->pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_IDR); // encIdrFlag + RVCE_CS(0x00000000); // encIdrPicId + RVCE_CS(0x00000000); // encMGSKeyPic + RVCE_CS(!enc->pic.not_referenced); // encReferenceFlag + RVCE_CS(0x00000000); // encTemporalLayerIndex + RVCE_CS(0x00000000); // num_ref_idx_active_override_flag + RVCE_CS(0x00000000); // num_ref_idx_l0_active_minus1 + RVCE_CS(0x00000000); // num_ref_idx_l1_active_minus1 + + i = enc->pic.frame_num - enc->pic.ref_idx_l0; + if (i > 1 && enc->pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_P) { + RVCE_CS(0x00000001); // encRefListModificationOp + RVCE_CS(i - 1); // encRefListModificationNum + } else { + RVCE_CS(0x00000000); // encRefListModificationOp + RVCE_CS(0x00000000); // encRefListModificationNum + } + + for (i = 0; i < 3; ++i) { + RVCE_CS(0x00000000); // encRefListModificationOp + RVCE_CS(0x00000000); // encRefListModificationNum + } + for (i = 0; i < 4; ++i) { + RVCE_CS(0x00000000); // encDecodedPictureMarkingOp + RVCE_CS(0x00000000); // encDecodedPictureMarkingNum + RVCE_CS(0x00000000); // encDecodedPictureMarkingIdx + RVCE_CS(0x00000000); // encDecodedRefBasePictureMarkingOp + RVCE_CS(0x00000000); // encDecodedRefBasePictureMarkingNum + } + + // encReferencePictureL0[0] + RVCE_CS(0x00000000); // pictureStructure + if(enc->pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_P || + enc->pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_B) { + struct rvce_cpb_slot *l0 = l0_slot(enc); + rvce_frame_offset(enc, l0, &luma_offset, &chroma_offset); + RVCE_CS(l0->picture_type); // encPicType + RVCE_CS(l0->frame_num); // frameNumber + RVCE_CS(l0->pic_order_cnt); // pictureOrderCount + RVCE_CS(luma_offset); // lumaOffset + RVCE_CS(chroma_offset); // chromaOffset + } else { + RVCE_CS(0x00000000); // encPicType + RVCE_CS(0x00000000); // frameNumber + RVCE_CS(0x00000000); // pictureOrderCount + RVCE_CS(0xffffffff); // lumaOffset + RVCE_CS(0xffffffff); // chromaOffset + } + + // encReferencePictureL0[1] + RVCE_CS(0x00000000); // pictureStructure + RVCE_CS(0x00000000); // encPicType + RVCE_CS(0x00000000); // frameNumber + RVCE_CS(0x00000000); // pictureOrderCount + RVCE_CS(0xffffffff); // lumaOffset + RVCE_CS(0xffffffff); // chromaOffset + + // encReferencePictureL1[0] + RVCE_CS(0x00000000); // pictureStructure + if(enc->pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_B) { + struct rvce_cpb_slot *l1 = l1_slot(enc); + rvce_frame_offset(enc, l1, &luma_offset, &chroma_offset); + RVCE_CS(l1->picture_type); // encPicType + RVCE_CS(l1->frame_num); // frameNumber + RVCE_CS(l1->pic_order_cnt); // pictureOrderCount + RVCE_CS(luma_offset); // lumaOffset + RVCE_CS(chroma_offset); // chromaOffset + } else { + RVCE_CS(0x00000000); // encPicType + RVCE_CS(0x00000000); // frameNumber + RVCE_CS(0x00000000); // pictureOrderCount + RVCE_CS(0xffffffff); // lumaOffset + RVCE_CS(0xffffffff); // chromaOffset + } + + rvce_frame_offset(enc, current_slot(enc), &luma_offset, &chroma_offset); + RVCE_CS(luma_offset); // encReconstructedLumaOffset + RVCE_CS(chroma_offset); // encReconstructedChromaOffset + RVCE_CS(0x00000000); // encColocBufferOffset + RVCE_CS(0x00000000); // encReconstructedRefBasePictureLumaOffset + RVCE_CS(0x00000000); // encReconstructedRefBasePictureChromaOffset + RVCE_CS(0x00000000); // encReferenceRefBasePictureLumaOffset + RVCE_CS(0x00000000); // encReferenceRefBasePictureChromaOffset + RVCE_CS(0x00000000); // pictureCount + RVCE_CS(enc->pic.frame_num); // frameNumber + RVCE_CS(enc->pic.pic_order_cnt); // pictureOrderCount + RVCE_CS(0x00000000); // numIPicRemainInRCGOP + RVCE_CS(0x00000000); // numPPicRemainInRCGOP + RVCE_CS(0x00000000); // numBPicRemainInRCGOP + RVCE_CS(0x00000000); // numIRPicRemainInRCGOP + RVCE_CS(0x00000000); // enableIntraRefresh + RVCE_END(); +} + +void radeon_vce_50_init(struct rvce_encoder *enc) +{ + radeon_vce_40_2_2_init(enc); + + /* only the two below are different */ + enc->rate_control = rate_control; + enc->encode = encode; +} From 867fd2b5f586085c137af264e3eb640094e88526 Mon Sep 17 00:00:00 2001 From: Samuel Pitoiset Date: Thu, 21 May 2015 20:06:19 +0200 Subject: [PATCH 225/834] nv50: fix 64-bit queries with HUD, based on nvc0 A sequence number is written for 32-bits queries to make sure they are ready, but not for 64-bits queries. Instead, we have to use a fence in order to fix the HUD because it doesn't wait until the result is ready. Signed-off-by: Samuel Pitoiset Reviewed-by: Ilia Mirkin --- src/gallium/drivers/nouveau/nv50/nv50_query.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/src/gallium/drivers/nouveau/nv50/nv50_query.c b/src/gallium/drivers/nouveau/nv50/nv50_query.c index 6690aa282eb..a3c88411199 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_query.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_query.c @@ -46,6 +46,7 @@ struct nv50_query { boolean flushed; boolean is64bit; struct nouveau_mm_allocation *mm; + struct nouveau_fence *fence; }; #define NV50_QUERY_ALLOC_SPACE 256 @@ -92,6 +93,7 @@ static void nv50_query_destroy(struct pipe_context *pipe, struct pipe_query *pq) { nv50_query_allocate(nv50_context(pipe), nv50_query(pq), 0); + nouveau_fence_ref(NULL, &nv50_query(pq)->fence); FREE(nv50_query(pq)); } @@ -260,12 +262,22 @@ nv50_query_end(struct pipe_context *pipe, struct pipe_query *pq) break; } q->ready = q->flushed = FALSE; + + if (q->is64bit) + nouveau_fence_ref(nv50->screen->base.fence.current, &q->fence); } static INLINE boolean nv50_query_ready(struct nv50_query *q) { - return q->ready || (!q->is64bit && (q->data[0] == q->sequence)); + if (q->is64bit) { + if (nouveau_fence_signalled(q->fence)) + return TRUE; + } else { + if (q->data[0] == q->sequence) + return TRUE; + } + return FALSE; } static boolean From a21d23e191696ca130fd63617b8d177055b73dda Mon Sep 17 00:00:00 2001 From: Samuel Pitoiset Date: Thu, 21 May 2015 20:14:44 +0200 Subject: [PATCH 226/834] nv50: fix PIPELINE_STATISTICS with HUD, based on nvc0 Tested on NVA8. No regression for ARB_pipeline_statistics piglit tests. Signed-off-by: Samuel Pitoiset Reviewed-by: Ilia Mirkin --- src/gallium/drivers/nouveau/nv50/nv50_query.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/gallium/drivers/nouveau/nv50/nv50_query.c b/src/gallium/drivers/nouveau/nv50/nv50_query.c index a3c88411199..da412097f38 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_query.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_query.c @@ -114,7 +114,8 @@ nv50_query_create(struct pipe_context *pipe, unsigned type, unsigned index) q->is64bit = (type == PIPE_QUERY_PRIMITIVES_GENERATED || type == PIPE_QUERY_PRIMITIVES_EMITTED || - type == PIPE_QUERY_SO_STATISTICS); + type == PIPE_QUERY_SO_STATISTICS || + type == PIPE_QUERY_PIPELINE_STATISTICS); q->type = type; if (q->type == PIPE_QUERY_OCCLUSION_COUNTER) { From 5614bcc416cf2ff1d816d52198e644565ca23bcd Mon Sep 17 00:00:00 2001 From: Matt Turner Date: Mon, 18 May 2015 14:59:13 -0700 Subject: [PATCH 227/834] nir: Remove sRGB colorspace conversion round-trip. Some shaders in Civilization V and Beyond Earth do pow(pow(x, 2.2), 0.454545) which is converting to and from sRGB colorspace. A more general rule that replaces pow(pow(a, b), c) with pow(a, b * c) actually regresses two shaders in Sun Temple in which the result of the inner pow is used twice, once by another pow and once by another instruction. Also, since 2.2 * 0.454545 isn't exactly one, the more general pattern would have still left us with a pow, and I'm 2.2 * 0.454545 percent sure that's not what they want. instructions in affected programs: 934 -> 886 (-5.14%) helped: 16 --- src/glsl/nir/nir_opt_algebraic.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/glsl/nir/nir_opt_algebraic.py b/src/glsl/nir/nir_opt_algebraic.py index fa039222fd2..eace791f5b0 100644 --- a/src/glsl/nir/nir_opt_algebraic.py +++ b/src/glsl/nir/nir_opt_algebraic.py @@ -156,6 +156,8 @@ optimizations = [ (('fpow', a, 2.0), ('fmul', a, a)), (('fpow', a, 4.0), ('fmul', ('fmul', a, a), ('fmul', a, a))), (('fpow', 2.0, a), ('fexp2', a)), + (('fpow', ('fpow', a, 2.2), 0.454545), a), + (('fpow', ('fabs', ('fpow', a, 2.2)), 0.454545), ('fabs', a)), (('fsqrt', ('fexp2', a)), ('fexp2', ('fmul', 0.5, a))), (('frcp', ('fexp2', a)), ('fexp2', ('fneg', a))), (('frsq', ('fexp2', a)), ('fexp2', ('fmul', -0.5, a))), From a1c070c1a7c6b37a36f591bd8caf4619e4457eae Mon Sep 17 00:00:00 2001 From: Matt Turner Date: Mon, 18 May 2015 15:23:28 -0700 Subject: [PATCH 228/834] i965/disasm: Skip swizzle disassembly when using 3-src repctrl. ... since it's always .x, and also always print the subreg offset when using repctrl. --- src/mesa/drivers/dri/i965/brw_disasm.c | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_disasm.c b/src/mesa/drivers/dri/i965/brw_disasm.c index 95e262a361b..ef9e7ccda79 100644 --- a/src/mesa/drivers/dri/i965/brw_disasm.c +++ b/src/mesa/drivers/dri/i965/brw_disasm.c @@ -977,13 +977,14 @@ src0_3src(FILE *file, const struct brw_device_info *devinfo, brw_inst *inst) brw_inst_3src_src0_reg_nr(devinfo, inst)); if (err == -1) return 0; - if (src0_subreg_nr) + if (src0_subreg_nr || brw_inst_3src_src0_rep_ctrl(devinfo, inst)) format(file, ".%d", src0_subreg_nr); if (brw_inst_3src_src0_rep_ctrl(devinfo, inst)) string(file, "<0,1,0>"); - else + else { string(file, "<4,4,1>"); - err |= src_swizzle(file, brw_inst_3src_src0_swizzle(devinfo, inst)); + err |= src_swizzle(file, brw_inst_3src_src0_swizzle(devinfo, inst)); + } err |= control(file, "src da16 reg type", three_source_reg_encoding, brw_inst_3src_src_type(devinfo, inst), NULL); return err; @@ -1003,13 +1004,14 @@ src1_3src(FILE *file, const struct brw_device_info *devinfo, brw_inst *inst) brw_inst_3src_src1_reg_nr(devinfo, inst)); if (err == -1) return 0; - if (src1_subreg_nr) + if (src1_subreg_nr || brw_inst_3src_src1_rep_ctrl(devinfo, inst)) format(file, ".%d", src1_subreg_nr); if (brw_inst_3src_src1_rep_ctrl(devinfo, inst)) string(file, "<0,1,0>"); - else + else { string(file, "<4,4,1>"); - err |= src_swizzle(file, brw_inst_3src_src1_swizzle(devinfo, inst)); + err |= src_swizzle(file, brw_inst_3src_src1_swizzle(devinfo, inst)); + } err |= control(file, "src da16 reg type", three_source_reg_encoding, brw_inst_3src_src_type(devinfo, inst), NULL); return err; @@ -1030,13 +1032,14 @@ src2_3src(FILE *file, const struct brw_device_info *devinfo, brw_inst *inst) brw_inst_3src_src2_reg_nr(devinfo, inst)); if (err == -1) return 0; - if (src2_subreg_nr) + if (src2_subreg_nr || brw_inst_3src_src2_rep_ctrl(devinfo, inst)) format(file, ".%d", src2_subreg_nr); if (brw_inst_3src_src2_rep_ctrl(devinfo, inst)) string(file, "<0,1,0>"); - else + else { string(file, "<4,4,1>"); - err |= src_swizzle(file, brw_inst_3src_src2_swizzle(devinfo, inst)); + err |= src_swizzle(file, brw_inst_3src_src2_swizzle(devinfo, inst)); + } err |= control(file, "src da16 reg type", three_source_reg_encoding, brw_inst_3src_src_type(devinfo, inst), NULL); return err; From b6298c7a7143eafea3c1be6e98af1d0239fdf5b7 Mon Sep 17 00:00:00 2001 From: Dylan Baker Date: Wed, 19 Nov 2014 13:17:48 -0800 Subject: [PATCH 229/834] glapi: gl_table.py: Fix some low hanging style issues Making the tools shut up about worthless errors so you can see real ones is very useful Signed-off-by: Dylan Baker Acked-by: Matt Turner --- src/mapi/glapi/gen/gl_table.py | 42 +++++++++++++++++++++------------- 1 file changed, 26 insertions(+), 16 deletions(-) diff --git a/src/mapi/glapi/gen/gl_table.py b/src/mapi/glapi/gen/gl_table.py index fd384689f57..32456250dc4 100644 --- a/src/mapi/glapi/gen/gl_table.py +++ b/src/mapi/glapi/gen/gl_table.py @@ -2,6 +2,7 @@ # (C) Copyright IBM Corporation 2004 # All Rights Reserved. +# Copyright (c) 2014 Intel Corporation # # Permission is hereby granted, free of charge, to any person obtaining a # copy of this software and associated documentation files (the "Software"), @@ -25,9 +26,12 @@ # Authors: # Ian Romanick +import sys +import getopt + import gl_XML import license -import sys, getopt + class PrintGlTable(gl_XML.gl_print_base): def __init__(self, es=False): @@ -39,21 +43,20 @@ class PrintGlTable(gl_XML.gl_print_base): self.license = license.bsd_license_template % ( \ """Copyright (C) 1999-2003 Brian Paul All Rights Reserved. (C) Copyright IBM Corporation 2004""", "BRIAN PAUL, IBM") - self.ifdef_emitted = False; + self.ifdef_emitted = False return - def printBody(self, api): for f in api.functionIterateByOffset(): if not f.is_abi() and not self.ifdef_emitted: print '#if !defined HAVE_SHARED_GLAPI' self.ifdef_emitted = True arg_string = f.get_parameter_string() - print ' %s (GLAPIENTRYP %s)(%s); /* %d */' % (f.return_type, f.name, arg_string, f.offset) + print ' %s (GLAPIENTRYP %s)(%s); /* %d */' % ( + f.return_type, f.name, arg_string, f.offset) print '#endif /* !defined HAVE_SHARED_GLAPI */' - def printRealHeader(self): print '#ifndef GLAPIENTRYP' print '# ifndef GLAPIENTRY' @@ -68,7 +71,6 @@ class PrintGlTable(gl_XML.gl_print_base): print '{' return - def printRealFooter(self): print '};' return @@ -81,7 +83,8 @@ class PrintRemapTable(gl_XML.gl_print_base): self.es = es self.header_tag = '_DISPATCH_H_' self.name = "gl_table.py (from Mesa)" - self.license = license.bsd_license_template % ("(C) Copyright IBM Corporation 2005", "IBM") + self.license = license.bsd_license_template % ( + "(C) Copyright IBM Corporation 2005", "IBM") return @@ -100,6 +103,7 @@ class PrintRemapTable(gl_XML.gl_print_base): """ return + def printBody(self, api): print '#define CALL_by_offset(disp, cast, offset, parameters) \\' print ' (*(cast (GET_by_offset(disp, offset)))) parameters' @@ -124,10 +128,10 @@ class PrintRemapTable(gl_XML.gl_print_base): count = 0 for f in api.functionIterateByOffset(): if not f.is_abi(): - functions.append( [f, count] ) + functions.append([f, count]) count += 1 else: - abi_functions.append( [f, -1] ) + abi_functions.append([f, -1]) if self.es: # remember functions with aliases @@ -165,7 +169,7 @@ class PrintRemapTable(gl_XML.gl_print_base): print '' for f, index in abi_functions + functions: - arg_string = gl_XML.create_parameter_string( f.parameters, 0 ) + arg_string = gl_XML.create_parameter_string(f.parameters, 0) print 'typedef %s (GLAPIENTRYP _glptr_%s)(%s);' % (f.return_type, f.name, arg_string) print '#define CALL_%s(disp, parameters) \\' % (f.name) @@ -205,17 +209,19 @@ def show_usage(): print " -c ver Version can be 'es1' or 'es2'." sys.exit(1) -if __name__ == '__main__': + +def main(): + """Main function.""" file_name = "gl_API.xml" try: - (args, trail) = getopt.getopt(sys.argv[1:], "f:m:c:") - except Exception,e: + args, _ = getopt.getopt(sys.argv[1:], "f:m:c:") + except Exception: show_usage() mode = "table" es = None - for (arg,val) in args: + for (arg, val) in args: if arg == "-f": file_name = val elif arg == "-m": @@ -230,9 +236,13 @@ if __name__ == '__main__': else: show_usage() - api = gl_XML.parse_GL_API( file_name ) + api = gl_XML.parse_GL_API(file_name) if es is not None: api.filter_functions_by_api(es) - printer.Print( api ) + printer.Print(api) + + +if __name__ == '__main__': + main() From cf718cc964f86dc49c1fc9ed5e39aa5bd87ad931 Mon Sep 17 00:00:00 2001 From: Dylan Baker Date: Wed, 19 Nov 2014 13:36:35 -0800 Subject: [PATCH 230/834] glapi: gl_table.py: replace getopt with argparse. This results in slightly less code, but code that is much more readable. It has the advantage of putting everything together in one place, all of the code is self documenting, help messages are auto-generated, choices are automatically enforced, and the syntax is much less C like, taking advantage of python features and idioms. Signed-off-by: Dylan Baker Acked-by: Matt Turner --- src/mapi/glapi/gen/gl_table.py | 64 ++++++++++++++++------------------ 1 file changed, 30 insertions(+), 34 deletions(-) diff --git a/src/mapi/glapi/gen/gl_table.py b/src/mapi/glapi/gen/gl_table.py index 32456250dc4..30903fd8f60 100644 --- a/src/mapi/glapi/gen/gl_table.py +++ b/src/mapi/glapi/gen/gl_table.py @@ -26,8 +26,7 @@ # Authors: # Ian Romanick -import sys -import getopt +import argparse import gl_XML import license @@ -203,45 +202,42 @@ class PrintRemapTable(gl_XML.gl_print_base): return -def show_usage(): - print "Usage: %s [-f input_file_name] [-m mode] [-c ver]" % sys.argv[0] - print " -m mode Mode can be 'table' or 'remap_table'." - print " -c ver Version can be 'es1' or 'es2'." - sys.exit(1) +def _parser(): + """Parse arguments and return a namespace.""" + parser = argparse.ArgumentParser() + parser.add_argument('-f', '--filename', + type=gl_XML.parse_GL_API, + default='gl_API.xml', + metavar="input_file_name", + dest='api', + help="Path to an XML description of OpenGL API.") + parser.add_argument('-m', '--mode', + choices=['table', 'remap_table'], + default='table', + metavar="mode", + help="Generate either a table or a remap_table") + parser.add_argument('-c', '--es-version', + choices=[None, 'es1', 'es2'], + default=None, + metavar="ver", + dest='es', + help="filter functions for es") + return parser.parse_args() def main(): """Main function.""" - file_name = "gl_API.xml" + args = _parser() - try: - args, _ = getopt.getopt(sys.argv[1:], "f:m:c:") - except Exception: - show_usage() + if args.mode == "table": + printer = PrintGlTable(args.es) + elif args.mode == "remap_table": + printer = PrintRemapTable(args.es) - mode = "table" - es = None - for (arg, val) in args: - if arg == "-f": - file_name = val - elif arg == "-m": - mode = val - elif arg == "-c": - es = val + if args.es is not None: + args.api.filter_functions_by_api(args.es) - if mode == "table": - printer = PrintGlTable(es) - elif mode == "remap_table": - printer = PrintRemapTable(es) - else: - show_usage() - - api = gl_XML.parse_GL_API(file_name) - - if es is not None: - api.filter_functions_by_api(es) - - printer.Print(api) + printer.Print(args.api) if __name__ == '__main__': From bdae3bc1ffb14b705a0c6fef3e90380dfd0eed97 Mon Sep 17 00:00:00 2001 From: Dylan Baker Date: Thu, 20 Nov 2014 14:01:40 -0800 Subject: [PATCH 231/834] glapi: remap_helper.py: Fix some low hanging style issues This makes the tools shut up about a bunch of problems, making them more useful for catching actual problems. Signed-off-by: Dylan Baker Acked-by: Matt Turner --- src/mapi/glapi/gen/remap_helper.py | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/src/mapi/glapi/gen/remap_helper.py b/src/mapi/glapi/gen/remap_helper.py index e1a13d0b3dd..d34e1b55bae 100644 --- a/src/mapi/glapi/gen/remap_helper.py +++ b/src/mapi/glapi/gen/remap_helper.py @@ -24,9 +24,12 @@ # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS # IN THE SOFTWARE. -import gl_XML +import sys +import getopt + import license -import sys, getopt, string +import gl_XML + def get_function_spec(func): sig = "" @@ -54,6 +57,7 @@ def get_function_spec(func): return spec + class PrintGlRemap(gl_XML.gl_print_base): def __init__(self): gl_XML.gl_print_base.__init__(self) @@ -168,12 +172,13 @@ def show_usage(): print " -c ver Version can be 'es1' or 'es2'." sys.exit(1) -if __name__ == '__main__': + +def main(): file_name = "gl_API.xml" try: (args, trail) = getopt.getopt(sys.argv[1:], "f:c:") - except Exception,e: + except Exception: show_usage() es = None @@ -190,3 +195,7 @@ if __name__ == '__main__': printer = PrintGlRemap() printer.Print( api ) + + +if __name__ == '__main__': + main() From 622fee43c8aa339e6b642fc8a90c759dcf28c6e7 Mon Sep 17 00:00:00 2001 From: Dylan Baker Date: Thu, 20 Nov 2014 14:07:15 -0800 Subject: [PATCH 232/834] glapi: remap_helper.py: use argparse instead of optparse Make the code simpler, cleaner, and easier to work with. Signed-off-by: Dylan Baker Acked-by: Matt Turner --- src/mapi/glapi/gen/remap_helper.py | 46 ++++++++++++++---------------- 1 file changed, 22 insertions(+), 24 deletions(-) diff --git a/src/mapi/glapi/gen/remap_helper.py b/src/mapi/glapi/gen/remap_helper.py index d34e1b55bae..9e3c3908d8c 100644 --- a/src/mapi/glapi/gen/remap_helper.py +++ b/src/mapi/glapi/gen/remap_helper.py @@ -24,8 +24,7 @@ # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS # IN THE SOFTWARE. -import sys -import getopt +import argparse import license import gl_XML @@ -167,34 +166,33 @@ class PrintGlRemap(gl_XML.gl_print_base): return -def show_usage(): - print "Usage: %s [-f input_file_name] [-c ver]" % sys.argv[0] - print " -c ver Version can be 'es1' or 'es2'." - sys.exit(1) +def _parser(): + """Parse input options and return a namsepace.""" + parser = argparse.ArgumentParser() + parser.add_argument('-f', '--filename', + type=gl_XML.parse_GL_API, + default="gl_API.xml", + metavar="input_file_name", + dest='api', + help="An xml description file.") + parser.add_argument('-c', '--es-version', + choices=[None, 'es1', 'es2'], + default=None, + metavar='ver', + dest='es', + help='A GLES version to support') + return parser.parse_args() def main(): - file_name = "gl_API.xml" + """Main function.""" + args = _parser() - try: - (args, trail) = getopt.getopt(sys.argv[1:], "f:c:") - except Exception: - show_usage() - - es = None - for (arg,val) in args: - if arg == "-f": - file_name = val - elif arg == "-c": - es = val - - api = gl_XML.parse_GL_API( file_name ) - - if es is not None: - api.filter_functions_by_api(es) + if args.es is not None: + args.api.filter_functions_by_api(args.es) printer = PrintGlRemap() - printer.Print( api ) + printer.Print(args.api) if __name__ == '__main__': From 28ecdd6be7e6f58eabfc9aa0461fb8db7dd8133d Mon Sep 17 00:00:00 2001 From: Dylan Baker Date: Thu, 20 Nov 2014 17:07:48 -0800 Subject: [PATCH 233/834] glapi: gl_procs.py: Fix a few low hanging style things Shuts up analysis tools to make them return actual problems. Signed-off-by: Dylan Baker Acked-by: Matt Turner --- src/mapi/glapi/gen/gl_procs.py | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/src/mapi/glapi/gen/gl_procs.py b/src/mapi/glapi/gen/gl_procs.py index b1fffc4ca07..d5ffb9286ef 100644 --- a/src/mapi/glapi/gen/gl_procs.py +++ b/src/mapi/glapi/gen/gl_procs.py @@ -25,9 +25,13 @@ # Authors: # Ian Romanick +import sys +import getopt + import license -import gl_XML, glX_XML -import sys, getopt +import gl_XML +import glX_XML + class PrintGlProcs(gl_XML.gl_print_base): def __init__(self, es=False): @@ -39,7 +43,6 @@ class PrintGlProcs(gl_XML.gl_print_base): """Copyright (C) 1999-2001 Brian Paul All Rights Reserved. (C) Copyright IBM Corporation 2004, 2006""", "BRIAN PAUL, IBM") - def printRealHeader(self): print """ /* This file is only included by glapi.c and is used for @@ -166,16 +169,18 @@ def show_usage(): print "-c Enable compatibility with OpenGL ES." sys.exit(1) -if __name__ == '__main__': + +def main(): + """Main function.""" file_name = "gl_API.xml" try: - (args, trail) = getopt.getopt(sys.argv[1:], "f:c") - except Exception,e: + args, _ = getopt.getopt(sys.argv[1:], "f:c") + except Exception: show_usage() es = False - for (arg,val) in args: + for arg, val in args: if arg == "-f": file_name = val elif arg == "-c": @@ -184,3 +189,7 @@ if __name__ == '__main__': api = gl_XML.parse_GL_API(file_name, glX_XML.glx_item_factory()) printer = PrintGlProcs(es) printer.Print(api) + + +if __name__ == '__main__': + main() From e51530ba1665af7120be852653bbff930fa1ca33 Mon Sep 17 00:00:00 2001 From: Dylan Baker Date: Mon, 24 Nov 2014 14:14:12 -0800 Subject: [PATCH 234/834] glapi: gl_procs.py: Use argparse rather than getopt Signed-off-by: Dylan Baker Acked-by: Matt Turner --- src/mapi/glapi/gen/gl_procs.py | 42 +++++++++++++++------------------- 1 file changed, 19 insertions(+), 23 deletions(-) diff --git a/src/mapi/glapi/gen/gl_procs.py b/src/mapi/glapi/gen/gl_procs.py index d5ffb9286ef..cf6d2de8a4b 100644 --- a/src/mapi/glapi/gen/gl_procs.py +++ b/src/mapi/glapi/gen/gl_procs.py @@ -25,8 +25,7 @@ # Authors: # Ian Romanick -import sys -import getopt +import argparse import license import gl_XML @@ -164,31 +163,28 @@ typedef struct { return -def show_usage(): - print "Usage: %s [-f input_file_name] [-c]" % sys.argv[0] - print "-c Enable compatibility with OpenGL ES." - sys.exit(1) +def _parser(): + """Parse arguments and return a namepsace.""" + api_type = lambda x: gl_XML.parse_GL_API(x, glX_XML.glx_item_factory()) + + parser = argparse.ArgumentParser() + parser.add_argument('-f', '--filename', + type=api_type, + default='gl_API.xml', + metavar="input_file_name", + dest='api', + help="Path to an XML description of OpenGL API.") + parser.add_argument('-c', '--es-version', + dest='es', + action="store_true", + help="filter functions for es") + return parser.parse_args() def main(): """Main function.""" - file_name = "gl_API.xml" - - try: - args, _ = getopt.getopt(sys.argv[1:], "f:c") - except Exception: - show_usage() - - es = False - for arg, val in args: - if arg == "-f": - file_name = val - elif arg == "-c": - es = True - - api = gl_XML.parse_GL_API(file_name, glX_XML.glx_item_factory()) - printer = PrintGlProcs(es) - printer.Print(api) + args = _parser() + PrintGlProcs(args.es).Print(args.api) if __name__ == '__main__': From fd5f1dd6c72e34a8f0522c4706cd5eec624e80cf Mon Sep 17 00:00:00 2001 From: Dylan Baker Date: Mon, 9 Feb 2015 14:18:30 -0800 Subject: [PATCH 235/834] glapi: gl_enums.py: use argparse instead of getopt. Signed-off-by: Dylan Baker Acked-by: Matt Turner --- src/mapi/glapi/gen/gl_enums.py | 30 ++++++++++++++---------------- 1 file changed, 14 insertions(+), 16 deletions(-) diff --git a/src/mapi/glapi/gen/gl_enums.py b/src/mapi/glapi/gen/gl_enums.py index f45782df85f..927a68b3e05 100644 --- a/src/mapi/glapi/gen/gl_enums.py +++ b/src/mapi/glapi/gen/gl_enums.py @@ -1,8 +1,8 @@ #!/usr/bin/python2 # -*- Mode: Python; py-indent-offset: 8 -*- -# (C) Copyright Zack Rusin 2005 -# All Rights Reserved. +# (C) Copyright Zack Rusin 2005. All Rights Reserved. +# Copyright (C) 2015 Intel Corporation # # Permission is hereby granted, free of charge, to any person obtaining a # copy of this software and associated documentation files (the "Software"), @@ -26,6 +26,8 @@ # Authors: # Zack Rusin +import argparse + import license import gl_XML import sys, getopt @@ -201,21 +203,17 @@ _mesa_lookup_prim_by_nr(GLuint nr) enum.append( [name, priority] ) -def show_usage(): - print "Usage: %s [-f input_file_name]" % sys.argv[0] - sys.exit(1) +def _parser(): + parser = argparse.ArgumentParser() + parser.add_argument('-f', '--input_file', + required=True, + help="Choose an xml file to parse.") + return parser.parse_args() + if __name__ == '__main__': - try: - (args, trail) = getopt.getopt(sys.argv[1:], "f:") - except Exception,e: - show_usage() - - api_list = [] - for (arg,val) in args: - if arg == "-f": - api = gl_XML.parse_GL_API( val ) - api_list.append(api); + args = _parser() + api_list = [gl_XML.parse_GL_API(args.input_file)] printer = PrintGlEnums() - printer.Print( api_list ) + printer.Print(api_list) From 6c4dcef6dc704156115b8d3ad19c3020663c7ffc Mon Sep 17 00:00:00 2001 From: Dylan Baker Date: Mon, 9 Feb 2015 14:19:23 -0800 Subject: [PATCH 236/834] glapi: gl_enums.py: use main() function for if __name__ == "__main__" Signed-off-by: Dylan Baker Acked-by: Matt Turner --- src/mapi/glapi/gen/gl_enums.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/mapi/glapi/gen/gl_enums.py b/src/mapi/glapi/gen/gl_enums.py index 927a68b3e05..955f27d0818 100644 --- a/src/mapi/glapi/gen/gl_enums.py +++ b/src/mapi/glapi/gen/gl_enums.py @@ -211,9 +211,13 @@ def _parser(): return parser.parse_args() -if __name__ == '__main__': +def main(): args = _parser() api_list = [gl_XML.parse_GL_API(args.input_file)] printer = PrintGlEnums() printer.Print(api_list) + + +if __name__ == '__main__': + main() From 24ec03bd05153bf0c8b1063d4e6a68b073e57840 Mon Sep 17 00:00:00 2001 From: Dylan Baker Date: Tue, 10 Feb 2015 10:55:45 -0800 Subject: [PATCH 237/834] glapi: gl_apitemp.py: Convert to argparse instead of getopt Signed-off-by: Dylan Baker Acked-by: Matt Turner --- src/mapi/glapi/gen/gl_apitemp.py | 39 ++++++++++++++++---------------- 1 file changed, 19 insertions(+), 20 deletions(-) diff --git a/src/mapi/glapi/gen/gl_apitemp.py b/src/mapi/glapi/gen/gl_apitemp.py index 4157032c5f0..7647428b331 100644 --- a/src/mapi/glapi/gen/gl_apitemp.py +++ b/src/mapi/glapi/gen/gl_apitemp.py @@ -25,9 +25,10 @@ # Authors: # Ian Romanick +import argparse + import gl_XML, glX_XML import license -import sys, getopt class PrintGlOffsets(gl_XML.gl_print_base): def __init__(self, es=False): @@ -301,27 +302,25 @@ _glapi_proc UNUSED_TABLE_NAME[] = {""" return -def show_usage(): - print "Usage: %s [-f input_file_name] [-c]" % sys.argv[0] - print "-c Enable compatibility with OpenGL ES." - sys.exit(1) +def _parser(): + """Parser arguments and return a namespace.""" + parser = argparse.ArgumentParser() + parser.add_argument('-f', + metavar='', + dest='filename', + default="gl_API.xml", + help="An XML file describing the API.") + parser.add_argument('-c', + action='store_true', + dest='es', + help="Enable OpenGL ES compatibility") + return parser.parse_args() + if __name__ == '__main__': - file_name = "gl_API.xml" + args = _parser() - try: - (args, trail) = getopt.getopt(sys.argv[1:], "f:c") - except Exception,e: - show_usage() + api = gl_XML.parse_GL_API(args.filename, glX_XML.glx_item_factory()) - es = False - for (arg,val) in args: - if arg == "-f": - file_name = val - elif arg == "-c": - es = True - - api = gl_XML.parse_GL_API(file_name, glX_XML.glx_item_factory()) - - printer = PrintGlOffsets(es) + printer = PrintGlOffsets(args.es) printer.Print(api) From 3317cea0488075f291744ebc4eaa48fc73d293de Mon Sep 17 00:00:00 2001 From: Dylan Baker Date: Tue, 10 Feb 2015 10:57:08 -0800 Subject: [PATCH 238/834] glapi: gl_apitemp.py: Use a main function Signed-off-by: Dylan Baker Acked-by: Matt Turner --- src/mapi/glapi/gen/gl_apitemp.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/mapi/glapi/gen/gl_apitemp.py b/src/mapi/glapi/gen/gl_apitemp.py index 7647428b331..5e985a2ecac 100644 --- a/src/mapi/glapi/gen/gl_apitemp.py +++ b/src/mapi/glapi/gen/gl_apitemp.py @@ -317,10 +317,15 @@ def _parser(): return parser.parse_args() -if __name__ == '__main__': +def main(): + """Main function.""" args = _parser() api = gl_XML.parse_GL_API(args.filename, glX_XML.glx_item_factory()) printer = PrintGlOffsets(args.es) printer.Print(api) + + +if __name__ == '__main__': + main() From d36fa4472ea408eb2a1ecadc44268bce4bab92ea Mon Sep 17 00:00:00 2001 From: Dylan Baker Date: Tue, 10 Feb 2015 14:32:40 -0800 Subject: [PATCH 239/834] glapi: gl_gentable.py: Replace getopt with argparse Signed-off-by: Dylan Baker Acked-by: Matt Turner --- src/mapi/glapi/gen/gl_gentable.py | 31 ++++++++++++++++--------------- 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/src/mapi/glapi/gen/gl_gentable.py b/src/mapi/glapi/gen/gl_gentable.py index 06a5ebf049f..5ef5a75fc17 100644 --- a/src/mapi/glapi/gen/gl_gentable.py +++ b/src/mapi/glapi/gen/gl_gentable.py @@ -2,6 +2,7 @@ # (C) Copyright IBM Corporation 2004, 2005 # (C) Copyright Apple Inc. 2011 +# Copyright (C) 2015 Intel Corporation # All Rights Reserved. # # Permission is hereby granted, free of charge, to any person obtaining a @@ -29,9 +30,10 @@ # Based on code ogiginally by: # Ian Romanick +import argparse + import license import gl_XML, glX_XML -import sys, getopt header = """/* GLXEXT is the define used in the xserver when the GLX extension is being * built. Hijack this to determine whether this file is being built for the @@ -186,23 +188,22 @@ class PrintCode(gl_XML.gl_print_base): print body_template % vars return -def show_usage(): - print "Usage: %s [-f input_file_name]" % sys.argv[0] - sys.exit(1) + +def _parser(): + """Parse arguments and return a namespace object.""" + parser = argparse.ArgumentParser() + parser.add_argument('-f', + dest='filename', + default='gl_API.xml', + help='An XML file description of an API') + + return parser.parse_args() + if __name__ == '__main__': - file_name = "gl_API.xml" - - try: - (args, trail) = getopt.getopt(sys.argv[1:], "m:f:") - except Exception,e: - show_usage() - - for (arg,val) in args: - if arg == "-f": - file_name = val + args = _parser() printer = PrintCode() - api = gl_XML.parse_GL_API(file_name, glX_XML.glx_item_factory()) + api = gl_XML.parse_GL_API(args.filename, glX_XML.glx_item_factory()) printer.Print(api) From 5998d32f09777b9bbcd422dfbab9261f1b0e07b8 Mon Sep 17 00:00:00 2001 From: Dylan Baker Date: Tue, 10 Feb 2015 14:33:12 -0800 Subject: [PATCH 240/834] glapi: gl_gentable.py: use a main function Signed-off-by: Dylan Baker Acked-by: Matt Turner --- src/mapi/glapi/gen/gl_gentable.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/mapi/glapi/gen/gl_gentable.py b/src/mapi/glapi/gen/gl_gentable.py index 5ef5a75fc17..1b3eb72470d 100644 --- a/src/mapi/glapi/gen/gl_gentable.py +++ b/src/mapi/glapi/gen/gl_gentable.py @@ -200,10 +200,15 @@ def _parser(): return parser.parse_args() -if __name__ == '__main__': +def main(): + """Main function.""" args = _parser() printer = PrintCode() api = gl_XML.parse_GL_API(args.filename, glX_XML.glx_item_factory()) printer.Print(api) + + +if __name__ == '__main__': + main() From fc96122fb6450dd9b8c90dc5efb6bb0ab235fe0e Mon Sep 17 00:00:00 2001 From: Dylan Baker Date: Wed, 11 Feb 2015 18:04:22 -0800 Subject: [PATCH 241/834] glapi: gl_x86_asm.py: use argparse instead of getopt Signed-off-by: Dylan Baker Acked-by: Matt Turner --- src/mapi/glapi/gen/gl_x86_asm.py | 37 +++++++++++--------------------- 1 file changed, 13 insertions(+), 24 deletions(-) diff --git a/src/mapi/glapi/gen/gl_x86_asm.py b/src/mapi/glapi/gen/gl_x86_asm.py index f855dbaaa41..86d45f26046 100644 --- a/src/mapi/glapi/gen/gl_x86_asm.py +++ b/src/mapi/glapi/gen/gl_x86_asm.py @@ -25,9 +25,10 @@ # Authors: # Ian Romanick +import argparse + import license import gl_XML, glX_XML -import sys, getopt class PrintGenericStubs(gl_XML.gl_print_base): @@ -217,30 +218,18 @@ class PrintGenericStubs(gl_XML.gl_print_base): return -def show_usage(): - print "Usage: %s [-f input_file_name] [-m output_mode]" % sys.argv[0] - sys.exit(1) +def _parser(): + parser = argparse.ArgumentParser() + parser.add_argument('-f', + dest='filename', + default='gl_API.xml', + help='An XML file describing an API.') + return parser.parse_args() + if __name__ == '__main__': - file_name = "gl_API.xml" - mode = "generic" + args = _parser() + printer = PrintGenericStubs() - try: - (args, trail) = getopt.getopt(sys.argv[1:], "m:f:") - except Exception,e: - show_usage() - - for (arg,val) in args: - if arg == '-m': - mode = val - elif arg == "-f": - file_name = val - - if mode == "generic": - printer = PrintGenericStubs() - else: - print "ERROR: Invalid mode \"%s\" specified." % mode - show_usage() - - api = gl_XML.parse_GL_API(file_name, glX_XML.glx_item_factory()) + api = gl_XML.parse_GL_API(args.filename, glX_XML.glx_item_factory()) printer.Print(api) From 48924567994c43e734f97a4b9150e87fa72b6c11 Mon Sep 17 00:00:00 2001 From: Dylan Baker Date: Wed, 11 Feb 2015 18:05:35 -0800 Subject: [PATCH 242/834] glapi: gl_x86_asm.py: use a main function Signed-off-by: Dylan Baker Acked-by: Matt Turner --- src/mapi/glapi/gen/gl_x86_asm.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/mapi/glapi/gen/gl_x86_asm.py b/src/mapi/glapi/gen/gl_x86_asm.py index 86d45f26046..c0c7941ce10 100644 --- a/src/mapi/glapi/gen/gl_x86_asm.py +++ b/src/mapi/glapi/gen/gl_x86_asm.py @@ -227,9 +227,13 @@ def _parser(): return parser.parse_args() -if __name__ == '__main__': +def main(): args = _parser() printer = PrintGenericStubs() api = gl_XML.parse_GL_API(args.filename, glX_XML.glx_item_factory()) printer.Print(api) + + +if __name__ == '__main__': + main() From 2e3da443f16e479997cdc5a2a137b9823f8617df Mon Sep 17 00:00:00 2001 From: Dylan Baker Date: Thu, 12 Feb 2015 14:04:03 -0800 Subject: [PATCH 243/834] glapi: gl_x86_64_asm.py: Use argparse instead of getopt Also removes the redundant -m argument, which could only be set to 'generic', or it would raise an exception. This option wasn't used in the makefile. Signed-off-by: Dylan Baker Acked-by: Matt Turner --- src/mapi/glapi/gen/gl_x86-64_asm.py | 40 ++++++++++++----------------- 1 file changed, 16 insertions(+), 24 deletions(-) diff --git a/src/mapi/glapi/gen/gl_x86-64_asm.py b/src/mapi/glapi/gen/gl_x86-64_asm.py index 7afc2b108f9..018896403d5 100644 --- a/src/mapi/glapi/gen/gl_x86-64_asm.py +++ b/src/mapi/glapi/gen/gl_x86-64_asm.py @@ -25,9 +25,11 @@ # Authors: # Ian Romanick +import argparse +import copy + import license import gl_XML, glX_XML -import sys, getopt, copy def should_use_push(registers): for [reg, offset] in registers: @@ -289,30 +291,20 @@ class PrintGenericStubs(gl_XML.gl_print_base): return -def show_usage(): - print "Usage: %s [-f input_file_name] [-m output_mode]" % sys.argv[0] - sys.exit(1) + +def _parser(): + """Parse arguments and return a namespace.""" + parser = argparse.ArgumentParser() + parser.add_argument('-f', + default='gl_API.xml', + dest='filename', + help='An XML file describing an API') + return parser.parse_args() + if __name__ == '__main__': - file_name = "gl_API.xml" - mode = "generic" + args = _parser() + printer = PrintGenericStubs() + api = gl_XML.parse_GL_API(args.filename, glX_XML.glx_item_factory()) - try: - (args, trail) = getopt.getopt(sys.argv[1:], "m:f:") - except Exception,e: - show_usage() - - for (arg,val) in args: - if arg == '-m': - mode = val - elif arg == "-f": - file_name = val - - if mode == "generic": - printer = PrintGenericStubs() - else: - print "ERROR: Invalid mode \"%s\" specified." % mode - show_usage() - - api = gl_XML.parse_GL_API(file_name, glX_XML.glx_item_factory()) printer.Print(api) From f2e78bd697e168c4f8cb1fd7f939713f8319eb78 Mon Sep 17 00:00:00 2001 From: Dylan Baker Date: Thu, 12 Feb 2015 14:05:46 -0800 Subject: [PATCH 244/834] glapi: gl_x86-64_asm.py: Use a main function Signed-off-by: Dylan Baker Acked-by: Matt Turner --- src/mapi/glapi/gen/gl_x86-64_asm.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/mapi/glapi/gen/gl_x86-64_asm.py b/src/mapi/glapi/gen/gl_x86-64_asm.py index 018896403d5..cf42371f8c3 100644 --- a/src/mapi/glapi/gen/gl_x86-64_asm.py +++ b/src/mapi/glapi/gen/gl_x86-64_asm.py @@ -302,9 +302,14 @@ def _parser(): return parser.parse_args() -if __name__ == '__main__': +def main(): + """Main file.""" args = _parser() printer = PrintGenericStubs() api = gl_XML.parse_GL_API(args.filename, glX_XML.glx_item_factory()) printer.Print(api) + + +if __name__ == '__main__': + main() From 86c9fb526ed9b0a68eb7bb29d661b7f61415d3f0 Mon Sep 17 00:00:00 2001 From: Dylan Baker Date: Fri, 13 Feb 2015 15:49:16 -0800 Subject: [PATCH 245/834] glapi: gl_SPARC_asm.py use argparse instead of getopt Also drop -m switch, which only accepted a single value or raised an error, and was unused in the makefile. Signed-off-by: Dylan Baker Acked-by: Matt Turner --- src/mapi/glapi/gen/gl_SPARC_asm.py | 38 +++++++++++------------------- 1 file changed, 14 insertions(+), 24 deletions(-) diff --git a/src/mapi/glapi/gen/gl_SPARC_asm.py b/src/mapi/glapi/gen/gl_SPARC_asm.py index 23e23295bec..df7a039f51e 100644 --- a/src/mapi/glapi/gen/gl_SPARC_asm.py +++ b/src/mapi/glapi/gen/gl_SPARC_asm.py @@ -25,9 +25,10 @@ # Authors: # Ian Romanick +import argparse + import license import gl_XML, glX_XML -import sys, getopt class PrintGenericStubs(gl_XML.gl_print_base): def __init__(self): @@ -244,30 +245,19 @@ class PrintGenericStubs(gl_XML.gl_print_base): return -def show_usage(): - print "Usage: %s [-f input_file_name] [-m output_mode]" % sys.argv[0] - sys.exit(1) +def _parser(): + """Parse arguments and return a namespace.""" + parser = argparse.ArgumentParser() + parser.add_argument('-f', + dest='filename', + default='gl_API.xml', + help='An XML description of an API.') + return parser.parse_args() + if __name__ == '__main__': - file_name = "gl_API.xml" - mode = "generic" + args = _parser() + printer = PrintGenericStubs() - try: - (args, trail) = getopt.getopt(sys.argv[1:], "m:f:") - except Exception,e: - show_usage() - - for (arg,val) in args: - if arg == '-m': - mode = val - elif arg == "-f": - file_name = val - - if mode == "generic": - printer = PrintGenericStubs() - else: - print "ERROR: Invalid mode \"%s\" specified." % mode - show_usage() - - api = gl_XML.parse_GL_API(file_name, glX_XML.glx_item_factory()) + api = gl_XML.parse_GL_API(args.filename, glX_XML.glx_item_factory()) printer.Print(api) From 952bd305c6862113c60d3b62402fc5a32dbb65ca Mon Sep 17 00:00:00 2001 From: Dylan Baker Date: Fri, 13 Feb 2015 15:54:17 -0800 Subject: [PATCH 246/834] glapi: gl_SPARC_asm.py: use main function Signed-off-by: Dylan Baker Acked-by: Matt Turner --- src/mapi/glapi/gen/gl_SPARC_asm.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/mapi/glapi/gen/gl_SPARC_asm.py b/src/mapi/glapi/gen/gl_SPARC_asm.py index df7a039f51e..fa6217e1b8f 100644 --- a/src/mapi/glapi/gen/gl_SPARC_asm.py +++ b/src/mapi/glapi/gen/gl_SPARC_asm.py @@ -255,9 +255,14 @@ def _parser(): return parser.parse_args() -if __name__ == '__main__': +def main(): + """Main function.""" args = _parser() printer = PrintGenericStubs() api = gl_XML.parse_GL_API(args.filename, glX_XML.glx_item_factory()) printer.Print(api) + + +if __name__ == '__main__': + main() From dddac8cac3ab883b6beeb4af9ca27bb2f3b4ebec Mon Sep 17 00:00:00 2001 From: Dylan Baker Date: Fri, 13 Feb 2015 16:41:03 -0800 Subject: [PATCH 247/834] glapi: glX_server_table.py: use argparse instead of getopt Signed-off-by: Dylan Baker Acked-by: Matt Turner --- src/mapi/glapi/gen/glX_server_table.py | 40 +++++++++++--------------- 1 file changed, 16 insertions(+), 24 deletions(-) diff --git a/src/mapi/glapi/gen/glX_server_table.py b/src/mapi/glapi/gen/glX_server_table.py index 47aa11116be..2d21f4e4ee1 100644 --- a/src/mapi/glapi/gen/glX_server_table.py +++ b/src/mapi/glapi/gen/glX_server_table.py @@ -25,8 +25,9 @@ # Authors: # Ian Romanick +import argparse + import gl_XML, glX_XML, glX_proto_common, license -import sys, getopt def log2(value): @@ -383,28 +384,19 @@ class PrintGlxDispatchTables(glX_proto_common.glx_print_proto): return +def _parser(): + """Parse arguments and return namespace.""" + parser = argparse.ArgumentParser() + parser.add_argument('-f', + dest='filename', + default='gl_API.xml', + help='An XML file describing an API.') + return parser.parse_args() + + if __name__ == '__main__': - file_name = "gl_API.xml" + args = _parser() + printer = PrintGlxDispatchTables() + api = gl_XML.parse_GL_API(args.filename, glX_XML.glx_item_factory()) - try: - (args, trail) = getopt.getopt(sys.argv[1:], "f:m") - except Exception,e: - show_usage() - - mode = "table_c" - for (arg,val) in args: - if arg == "-f": - file_name = val - elif arg == "-m": - mode = val - - if mode == "table_c": - printer = PrintGlxDispatchTables() - else: - show_usage() - - - api = gl_XML.parse_GL_API( file_name, glX_XML.glx_item_factory() ) - - - printer.Print( api ) + printer.Print(api) From 9eed4e6232b9ca936ad8e87aa21f97ffb81981ce Mon Sep 17 00:00:00 2001 From: Dylan Baker Date: Tue, 10 Feb 2015 15:20:18 -0800 Subject: [PATCH 248/834] glapi: glX_proto_send.py: use argparse instead of getopt Signed-off-by: Dylan Baker Acked-by: Matt Turner --- src/mapi/glapi/gen/glX_proto_send.py | 57 +++++++++++++--------------- 1 file changed, 27 insertions(+), 30 deletions(-) diff --git a/src/mapi/glapi/gen/glX_proto_send.py b/src/mapi/glapi/gen/glX_proto_send.py index b93989f753a..ea766afe80a 100644 --- a/src/mapi/glapi/gen/glX_proto_send.py +++ b/src/mapi/glapi/gen/glX_proto_send.py @@ -2,6 +2,7 @@ # (C) Copyright IBM Corporation 2004, 2005 # All Rights Reserved. +# Copyright (c) 2015 Intel Corporation # # Permission is hereby granted, free of charge, to any person obtaining a # copy of this software and associated documentation files (the "Software"), @@ -26,8 +27,10 @@ # Ian Romanick # Jeremy Kolb +import argparse + import gl_XML, glX_XML, glX_proto_common, license -import sys, getopt, copy, string +import copy, string def convertStringForXCB(str): tmp = "" @@ -1085,42 +1088,36 @@ extern _X_HIDDEN NOINLINE FASTCALL GLubyte * __glXSetupVendorRequest( print '#endif' -def show_usage(): - print "Usage: %s [-f input_file_name] [-m output_mode] [-d]" % sys.argv[0] - print " -m output_mode Output mode can be one of 'proto', 'init_c' or 'init_h'." - print " -d Enable extra debug information in the generated code." - sys.exit(1) +def _parser(): + """Parse input and returned a parsed namespace.""" + parser = argparse.ArgumentParser() + parser.add_argument('-f', + default='gl_API.xml', + dest='filename', + help='An XML file describing an API') + parser.add_argument('-m', + required=True, + dest='mode', + choices=frozenset(['proto', 'init_c', 'init_h']), + help='which file to generate') + parser.add_argument('-d', + action='store_true', + dest='debug', + help='turn debug mode on.') + return parser.parse_args() if __name__ == '__main__': - file_name = "gl_API.xml" + args = _parser() - try: - (args, trail) = getopt.getopt(sys.argv[1:], "f:m:d") - except Exception,e: - show_usage() - - debug = 0 - mode = "proto" - for (arg,val) in args: - if arg == "-f": - file_name = val - elif arg == "-m": - mode = val - elif arg == "-d": - debug = 1 - - if mode == "proto": + if args.mode == "proto": printer = PrintGlxProtoStubs() - elif mode == "init_c": + elif args.mode == "init_c": printer = PrintGlxProtoInit_c() - elif mode == "init_h": + elif args.mode == "init_h": printer = PrintGlxProtoInit_h() - else: - show_usage() - - printer.debug = debug - api = gl_XML.parse_GL_API( file_name, glX_XML.glx_item_factory() ) + printer.debug = args.debug + api = gl_XML.parse_GL_API(args.filename, glX_XML.glx_item_factory()) printer.Print( api ) From 9097a4a103f2f7abf5af3e1056467c21051405ca Mon Sep 17 00:00:00 2001 From: Dylan Baker Date: Tue, 10 Feb 2015 15:20:57 -0800 Subject: [PATCH 249/834] glapi: glX_proto_send.py: use a main function. Signed-off-by: Dylan Baker Acked-by: Matt Turner --- src/mapi/glapi/gen/glX_proto_send.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/mapi/glapi/gen/glX_proto_send.py b/src/mapi/glapi/gen/glX_proto_send.py index ea766afe80a..2b3303078a2 100644 --- a/src/mapi/glapi/gen/glX_proto_send.py +++ b/src/mapi/glapi/gen/glX_proto_send.py @@ -1107,7 +1107,8 @@ def _parser(): return parser.parse_args() -if __name__ == '__main__': +def main(): + """Main function.""" args = _parser() if args.mode == "proto": @@ -1121,3 +1122,7 @@ if __name__ == '__main__': api = gl_XML.parse_GL_API(args.filename, glX_XML.glx_item_factory()) printer.Print( api ) + + +if __name__ == '__main__': + main() From 79c4e595bce563d6075fed176c2256bf2e7e99a5 Mon Sep 17 00:00:00 2001 From: Dylan Baker Date: Wed, 20 May 2015 11:49:10 -0700 Subject: [PATCH 250/834] glapi: gl_genexec.py: use argparse instead of getopt Signed-off-by: Dylan Baker Acked-by: Matt Turner --- src/mapi/glapi/gen/gl_genexec.py | 29 ++++++++++++----------------- 1 file changed, 12 insertions(+), 17 deletions(-) diff --git a/src/mapi/glapi/gen/gl_genexec.py b/src/mapi/glapi/gen/gl_genexec.py index 4e76fe3c2cd..dbaafa71fd7 100644 --- a/src/mapi/glapi/gen/gl_genexec.py +++ b/src/mapi/glapi/gen/gl_genexec.py @@ -25,10 +25,11 @@ # _mesa_initialize_exec_table(). It is responsible for populating all # entries in the "exec" dispatch table that aren't dynamic. +import argparse import collections import license import gl_XML -import sys, getopt +import sys exec_flavor_map = { @@ -207,24 +208,18 @@ class PrintCode(gl_XML.gl_print_base): print ' }' -def show_usage(): - print "Usage: %s [-f input_file_name]" % sys.argv[0] - sys.exit(1) +def _parser(): + """Parse arguments and return namespace.""" + parser = argparse.ArgumentParser() + parser.add_argument('-f', + dest='filename', + default='gl_and_es_API.xml', + help='an xml file describing an API') + return parser.parse_args() if __name__ == '__main__': - file_name = "gl_and_es_API.xml" - - try: - (args, trail) = getopt.getopt(sys.argv[1:], "m:f:") - except Exception,e: - show_usage() - - for (arg,val) in args: - if arg == "-f": - file_name = val - + args = _parser() printer = PrintCode() - - api = gl_XML.parse_GL_API(file_name) + api = gl_XML.parse_GL_API(args.filename) printer.Print(api) From 67d3ec0bb8f6a8c918b371ed03ef21814899f07d Mon Sep 17 00:00:00 2001 From: Dylan Baker Date: Wed, 20 May 2015 11:49:40 -0700 Subject: [PATCH 251/834] glapy: gl_genexec.py: use a main function Signed-off-by: Dylan Baker Acked-by: Matt Turner --- src/mapi/glapi/gen/gl_genexec.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/mapi/glapi/gen/gl_genexec.py b/src/mapi/glapi/gen/gl_genexec.py index dbaafa71fd7..0d58a8a2914 100644 --- a/src/mapi/glapi/gen/gl_genexec.py +++ b/src/mapi/glapi/gen/gl_genexec.py @@ -218,8 +218,13 @@ def _parser(): return parser.parse_args() -if __name__ == '__main__': +def main(): + """Main function.""" args = _parser() printer = PrintCode() api = gl_XML.parse_GL_API(args.filename) printer.Print(api) + + +if __name__ == '__main__': + main() From d986cb7c70db3b512f6ee0bbc95ba2565606c222 Mon Sep 17 00:00:00 2001 From: Dylan Baker Date: Wed, 20 May 2015 15:19:28 -0700 Subject: [PATCH 252/834] glapi: glX_proto_recv.py: use argparse instead of getopt Signed-off-by: Dylan Baker Acked-by: Matt Turner --- src/mapi/glapi/gen/glX_proto_recv.py | 53 +++++++++++++++------------- 1 file changed, 29 insertions(+), 24 deletions(-) diff --git a/src/mapi/glapi/gen/glX_proto_recv.py b/src/mapi/glapi/gen/glX_proto_recv.py index d076409c2fa..dbc26a8eb03 100644 --- a/src/mapi/glapi/gen/glX_proto_recv.py +++ b/src/mapi/glapi/gen/glX_proto_recv.py @@ -25,8 +25,10 @@ # Authors: # Ian Romanick +import argparse +import string + import gl_XML, glX_XML, glX_proto_common, license -import sys, getopt, string class PrintGlxDispatch_h(gl_XML.gl_print_base): @@ -524,31 +526,34 @@ class PrintGlxDispatchFunctions(glX_proto_common.glx_print_proto): return +def _parser(): + """Parse any arguments passed and return a namespace.""" + parser = argparse.ArgumentParser() + parser.add_argument('-f', + dest='filename', + default='gl_API.xml', + help='an xml file describing an OpenGL API') + parser.add_argument('-m', + dest='mode', + default='dispatch_c', + choices=['dispatch_c', 'dispatch_h'], + help='what file to generate') + parser.add_argument('-s', + dest='swap', + action='store_true', + help='emit swap in GlXDispatchFunctions') + return parser.parse_args() + + if __name__ == '__main__': - file_name = "gl_API.xml" + args = _parser() - try: - (args, trail) = getopt.getopt(sys.argv[1:], "f:m:s") - except Exception,e: - show_usage() - - mode = "dispatch_c" - do_swap = 0 - for (arg,val) in args: - if arg == "-f": - file_name = val - elif arg == "-m": - mode = val - elif arg == "-s": - do_swap = 1 - - if mode == "dispatch_c": - printer = PrintGlxDispatchFunctions(do_swap) - elif mode == "dispatch_h": + if args._mode == "dispatch_c": + printer = PrintGlxDispatchFunctions(args.swap) + elif args._mode == "dispatch_h": printer = PrintGlxDispatch_h() - else: - show_usage() - api = gl_XML.parse_GL_API( file_name, glX_proto_common.glx_proto_item_factory() ) + api = gl_XML.parse_GL_API( + args.filename, glX_proto_common.glx_proto_item_factory()) - printer.Print( api ) + printer.Print(api) From 1c7cc67778073fd802773390da55980702637547 Mon Sep 17 00:00:00 2001 From: Dylan Baker Date: Wed, 20 May 2015 15:20:09 -0700 Subject: [PATCH 253/834] glapi: glX_proto_recv.py: Use a main function Signed-off-by: Dylan Baker Acked-by: Matt Turner --- src/mapi/glapi/gen/glX_proto_recv.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/mapi/glapi/gen/glX_proto_recv.py b/src/mapi/glapi/gen/glX_proto_recv.py index dbc26a8eb03..da468dc5876 100644 --- a/src/mapi/glapi/gen/glX_proto_recv.py +++ b/src/mapi/glapi/gen/glX_proto_recv.py @@ -545,7 +545,8 @@ def _parser(): return parser.parse_args() -if __name__ == '__main__': +def main(): + """Main function.""" args = _parser() if args._mode == "dispatch_c": @@ -557,3 +558,7 @@ if __name__ == '__main__': args.filename, glX_proto_common.glx_proto_item_factory()) printer.Print(api) + + +if __name__ == '__main__': + main() From 9ace0b542241c77ae82a0835ac8a09e2a7510eaf Mon Sep 17 00:00:00 2001 From: Dylan Baker Date: Wed, 20 May 2015 15:49:11 -0700 Subject: [PATCH 254/834] glapi: glX_proto_size.py: use argparse instead of getopt This is roughly equivalent to the original getopt, except that it removes the '-h' short option, which argparse reserves for auto-generated help messages. It does retain the long option specified by the getopt version, and changes the makefile to use that. Signed-off-by: Dylan Baker Acked-by: Matt Turner --- src/mapi/glapi/gen/Makefile.am | 2 +- src/mapi/glapi/gen/glX_proto_size.py | 88 ++++++++++++++-------------- 2 files changed, 45 insertions(+), 45 deletions(-) diff --git a/src/mapi/glapi/gen/Makefile.am b/src/mapi/glapi/gen/Makefile.am index adebd5c65f6..d7742f7b9d4 100644 --- a/src/mapi/glapi/gen/Makefile.am +++ b/src/mapi/glapi/gen/Makefile.am @@ -290,7 +290,7 @@ $(MESA_GLX_DIR)/indirect_init.c: glX_proto_send.py $(COMMON_GLX) $(MESA_GLX_DIR)/indirect_size.h $(XORG_GLX_DIR)/indirect_size.h: glX_proto_size.py $(COMMON_GLX) $(PYTHON_GEN) $< -f $(srcdir)/gl_API.xml -m size_h --only-set \ - -h _INDIRECT_SIZE_H_ \ + --header-tag _INDIRECT_SIZE_H_ \ | $(INDENT) $(INDENT_FLAGS) > $@ $(MESA_GLX_DIR)/indirect_size.c: glX_proto_size.py $(COMMON_GLX) diff --git a/src/mapi/glapi/gen/glX_proto_size.py b/src/mapi/glapi/gen/glX_proto_size.py index 4737fbf717d..59f65d49ce0 100644 --- a/src/mapi/glapi/gen/glX_proto_size.py +++ b/src/mapi/glapi/gen/glX_proto_size.py @@ -25,9 +25,11 @@ # Authors: # Ian Romanick +import argparse +import sys, string + import gl_XML, glX_XML import license -import sys, getopt, copy, string class glx_enum_function(object): @@ -650,54 +652,52 @@ class PrintGlxReqSize_c(PrintGlxReqSize_common): return alias -def show_usage(): - print "Usage: %s [-f input_file_name] -m output_mode [--only-get | --only-set] [--get-alias-set]" % sys.argv[0] - print " -m output_mode Output mode can be one of 'size_c' or 'size_h'." - print " --only-get Only emit 'get'-type functions." - print " --only-set Only emit 'set'-type functions." - print "" - print "By default, both 'get' and 'set'-type functions are emitted." - sys.exit(1) +def _parser(): + """Parse arguments and return a namespace.""" + parser = argparse.ArgumentParser() + parser.set_defaults(which_functions=(PrintGlxSizeStubs_common.do_get | + PrintGlxSizeStubs_common.do_set)) + parser.add_argument('-f', + dest='filename', + default='gl_API.xml', + help='an XML file describing an OpenGL API.') + parser.add_argument('-m', + dest='mode', + choices=['size_c', 'size_h', 'reqsize_c', 'reqsize_h'], + help='Which file to generate') + getset = parser.add_mutually_exclusive_group() + getset.add_argument('--only-get', + dest='which_functions', + action='store_const', + const=PrintGlxSizeStubs_common.do_get, + help='only emit "get-type" functions') + getset.add_argument('--only-set', + dest='which_functions', + action='store_const', + const=PrintGlxSizeStubs_common.do_set, + help='only emit "set-type" functions') + parser.add_argument('--header-tag', + dest='header_tag', + action='store', + default=None, + help='set header tag value') + return parser.parse_args() if __name__ == '__main__': - file_name = "gl_API.xml" + args = _parser() - try: - (args, trail) = getopt.getopt(sys.argv[1:], "f:m:h:", ["only-get", "only-set", "header-tag"]) - except Exception,e: - show_usage() - - mode = None - header_tag = None - which_functions = PrintGlxSizeStubs_common.do_get | PrintGlxSizeStubs_common.do_set - - for (arg,val) in args: - if arg == "-f": - file_name = val - elif arg == "-m": - mode = val - elif arg == "--only-get": - which_functions = PrintGlxSizeStubs_common.do_get - elif arg == "--only-set": - which_functions = PrintGlxSizeStubs_common.do_set - elif (arg == '-h') or (arg == "--header-tag"): - header_tag = val - - if mode == "size_c": - printer = PrintGlxSizeStubs_c( which_functions ) - elif mode == "size_h": - printer = PrintGlxSizeStubs_h( which_functions ) - if header_tag: - printer.header_tag = header_tag - elif mode == "reqsize_c": + if args.mode == "size_c": + printer = PrintGlxSizeStubs_c(args.which_functions) + elif args.mode == "size_h": + printer = PrintGlxSizeStubs_h(args.which_functions) + if args.header_tag is not None: + printer.header_tag = args.header_tag + elif args.mode == "reqsize_c": printer = PrintGlxReqSize_c() - elif mode == "reqsize_h": + elif args.mode == "reqsize_h": printer = PrintGlxReqSize_h() - else: - show_usage() - api = gl_XML.parse_GL_API( file_name, glX_XML.glx_item_factory() ) + api = gl_XML.parse_GL_API(args.filename, glX_XML.glx_item_factory()) - - printer.Print( api ) + printer.Print(api) From 3f823cc55a15bc0b54d09e2c3fb5944a645b09e4 Mon Sep 17 00:00:00 2001 From: Dylan Baker Date: Wed, 20 May 2015 15:51:40 -0700 Subject: [PATCH 255/834] glapi: glX_proto_size.py: use a main function Signed-off-by: Dylan Baker Acked-by: Matt Turner --- src/mapi/glapi/gen/glX_proto_size.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/mapi/glapi/gen/glX_proto_size.py b/src/mapi/glapi/gen/glX_proto_size.py index 59f65d49ce0..75fc26f5db0 100644 --- a/src/mapi/glapi/gen/glX_proto_size.py +++ b/src/mapi/glapi/gen/glX_proto_size.py @@ -684,7 +684,8 @@ def _parser(): return parser.parse_args() -if __name__ == '__main__': +def main(): + """Main function.""" args = _parser() if args.mode == "size_c": @@ -701,3 +702,7 @@ if __name__ == '__main__': api = gl_XML.parse_GL_API(args.filename, glX_XML.glx_item_factory()) printer.Print(api) + + +if __name__ == '__main__': + main() From 491adb61d25eef8afe2615e0fd842dda20b17004 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 22 May 2015 13:18:54 -0700 Subject: [PATCH 256/834] glx: fix Scons build Replace -h with --header-tag as was done for the Makefile build. Reviewed-by: Dylan Baker --- src/glx/SConscript | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/glx/SConscript b/src/glx/SConscript index b91c0bdd758..619e4c373ed 100644 --- a/src/glx/SConscript +++ b/src/glx/SConscript @@ -125,7 +125,7 @@ env.CodeGenerate( target = 'indirect_size.h', script = GLAPI + 'gen/glX_proto_size.py', source = sources, - command = python_cmd + ' $SCRIPT -f $SOURCE -m size_h --only-set -h _INDIRECT_SIZE_H > $TARGET' + command = python_cmd + ' $SCRIPT -f $SOURCE -m size_h --only-set --header-tag _INDIRECT_SIZE_H > $TARGET' ) env.CodeGenerate( From 0ec6b8ea8ce0929ecacf6edc8db198b7b9604f18 Mon Sep 17 00:00:00 2001 From: Ilia Mirkin Date: Sun, 3 May 2015 18:38:52 -0400 Subject: [PATCH 257/834] nvc0/ir: avoid jumping to a sched instruction Signed-off-by: Ilia Mirkin --- src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp | 2 ++ src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp | 7 +++++-- src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp | 2 ++ 3 files changed, 9 insertions(+), 2 deletions(-) diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp index 6bb9620d5f7..28081fa8cd8 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp @@ -1316,6 +1316,8 @@ CodeEmitterGK110::emitFlow(const Instruction *i) } else if (mask & 2) { int32_t pcRel = f->target.bb->binPos - (codeSize + 8); + if (writeIssueDelays && !(f->target.bb->binPos & 0x3f)) + pcRel += 8; // currently we don't want absolute branches assert(!f->absolute); code[0] |= (pcRel & 0x1ff) << 23; diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp index 22db368b371..442cedfaa5c 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp @@ -509,10 +509,13 @@ CodeEmitterGM107::emitBRA() emitCond5(0x00, CC_TR); if (!insn->srcExists(0) || insn->src(0).getFile() != FILE_MEMORY_CONST) { + int32_t pos = insn->target.bb->binPos; + if (writeIssueDelays && !(pos & 0x1f)) + pos += 8; if (!insn->absolute) - emitField(0x14, 24, insn->target.bb->binPos - (codeSize + 8)); + emitField(0x14, 24, pos - (codeSize + 8)); else - emitField(0x14, 32, insn->target.bb->binPos); + emitField(0x14, 32, pos); } else { emitCBUF (0x24, gpr, 20, 16, 0, insn->src(0)); emitField(0x05, 1, 1); diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp index be6fe9574df..b352d0a9690 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp @@ -1406,6 +1406,8 @@ CodeEmitterNVC0::emitFlow(const Instruction *i) } else if (mask & 2) { int32_t pcRel = f->target.bb->binPos - (codeSize + 8); + if (writeIssueDelays && !(f->target.bb->binPos & 0x3f)) + pcRel += 8; // currently we don't want absolute branches assert(!f->absolute); code[0] |= (pcRel & 0x3f) << 26; From e5ad19a46e87ed22943d7f6ad046f974fd5977e1 Mon Sep 17 00:00:00 2001 From: Ilia Mirkin Date: Fri, 8 May 2015 23:00:05 -0400 Subject: [PATCH 258/834] nvc0/ir: allow iset to produce a boolean float Signed-off-by: Ilia Mirkin --- .../drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp | 12 ++++++++---- .../drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp | 1 + .../drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp | 8 +++++++- 3 files changed, 16 insertions(+), 5 deletions(-) diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp index 28081fa8cd8..ab8bf2e5504 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp @@ -967,8 +967,8 @@ CodeEmitterGK110::emitSET(const CmpInstruction *i) code[0] = (code[0] & ~0xfc) | ((code[0] << 3) & 0xe0); if (i->defExists(1)) defId(i->def(1), 2); - else - code[0] |= 0x1c; + else + code[0] |= 0x1c; } else { switch (i->sType) { case TYPE_F32: op2 = 0x000; op1 = 0x800; break; @@ -990,8 +990,12 @@ CodeEmitterGK110::emitSET(const CmpInstruction *i) } FTZ_(3a); - if (i->dType == TYPE_F32) - code[1] |= 1 << 23; + if (i->dType == TYPE_F32) { + if (isFloatType(i->sType)) + code[1] |= 1 << 23; + else + code[1] |= 1 << 15; + } } if (i->sType == TYPE_S32) code[1] |= 1 << 19; diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp index 442cedfaa5c..399a6f1db13 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp @@ -1830,6 +1830,7 @@ CodeEmitterGM107::emitISET() emitCond3(0x31, insn->setCond); emitField(0x30, 1, isSignedType(insn->sType)); emitCC (0x2f); + emitField(0x2c, 1, insn->dType == TYPE_F32); emitX (0x2b); emitGPR (0x08, insn->src(0)); emitGPR (0x00, insn->def(0)); diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp index b352d0a9690..472e3a84119 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp @@ -1078,8 +1078,14 @@ CodeEmitterNVC0::emitSET(const CmpInstruction *i) if (!isFloatType(i->sType)) lo = 0x3; - if (isFloatType(i->dType) || isSignedIntType(i->sType)) + if (isSignedIntType(i->sType)) lo |= 0x20; + if (isFloatType(i->dType)) { + if (isFloatType(i->sType)) + lo |= 0x20; + else + lo |= 0x80; + } switch (i->op) { case OP_SET_AND: hi = 0x10000000; break; From d2a474e8d4b03f10aec57c7f7740addad1e1ea9d Mon Sep 17 00:00:00 2001 From: Ilia Mirkin Date: Sun, 3 May 2015 22:15:16 -0400 Subject: [PATCH 259/834] nvc0/ir: optimize set & 1.0 to produce boolean-float sets This has started to happen more now that the backend is producing KILL_IF more often. Signed-off-by: Ilia Mirkin Reviewed-by: Tobias Klausmann --- .../nouveau/codegen/nv50_ir_peephole.cpp | 27 +++++++++++++++++++ .../nouveau/codegen/nv50_ir_target_nv50.cpp | 2 ++ 2 files changed, 29 insertions(+) diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp index 14446b6b53f..82e81482b51 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp @@ -973,6 +973,33 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s) } break; + case OP_AND: + { + CmpInstruction *cmp = i->getSrc(t)->getInsn()->asCmp(); + if (!cmp || cmp->op == OP_SLCT || cmp->getDef(0)->refCount() > 1) + return; + if (!prog->getTarget()->isOpSupported(cmp->op, TYPE_F32)) + return; + if (imm0.reg.data.f32 != 1.0) + return; + if (i->getSrc(t)->getInsn()->dType != TYPE_U32) + return; + + i->getSrc(t)->getInsn()->dType = TYPE_F32; + if (i->src(t).mod != Modifier(0)) { + assert(i->src(t).mod == Modifier(NV50_IR_MOD_NOT)); + i->src(t).mod = Modifier(0); + cmp->setCond = inverseCondCode(cmp->setCond); + } + i->op = OP_MOV; + i->setSrc(s, NULL); + if (t) { + i->setSrc(0, i->getSrc(t)); + i->setSrc(t, NULL); + } + } + break; + case OP_SHL: { if (s != 1 || i->src(0).mod != Modifier(0)) diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp index a742162ad3c..ca545a6024a 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp @@ -416,6 +416,8 @@ TargetNV50::isOpSupported(operation op, DataType ty) const return false; case OP_SAD: return ty == TYPE_S32; + case OP_SET: + return !isFloatType(ty); default: return true; } From a85aba190dfab02ffccf744bad5ad10357394de0 Mon Sep 17 00:00:00 2001 From: Ilia Mirkin Date: Fri, 8 May 2015 23:46:53 -0400 Subject: [PATCH 260/834] nv50/ir: allow OP_SET to merge with OP_SET_AND/etc as well as a neg This covers the pattern where a KILL_IF is used, which triggers a comparison of -x to 0. This can usually be folded into the comparison whose result is being compared to 0, however it may, itself, have already been combined with another comparison. That shouldn't impact the logic of this pass however. With this and the & 1.0 change, code like 00000020: 001c0001 80081df4 set b32 $r0 lt f32 $r0 0x3e800000 00000028: 001c0000 201fc000 and b32 $r0 $r0 0x3f800000 00000030: 7f9c001e dd885c00 set $p0 0x1 lt f32 neg $r0 0x0 00000038: 0000003c 19800000 $p0 discard becomes 00000020: 001c001d b5881df4 set $p0 0x1 lt f32 $r0 0x3e800000 00000028: 0000003c 19800000 $p0 discard Signed-off-by: Ilia Mirkin --- .../nouveau/codegen/nv50_ir_peephole.cpp | 81 +++++++++++++------ 1 file changed, 55 insertions(+), 26 deletions(-) diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp index 82e81482b51..72dd31efecc 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp @@ -278,7 +278,6 @@ private: void tryCollapseChainedMULs(Instruction *, const int s, ImmediateValue&); - // TGSI 'true' is converted to -1 by F2I(NEG(SET)), track back to SET CmpInstruction *findOriginForTestWithZero(Value *); unsigned int foldCount; @@ -337,25 +336,33 @@ ConstantFolding::findOriginForTestWithZero(Value *value) return NULL; Instruction *insn = value->getInsn(); - while (insn && insn->op != OP_SET) { - Instruction *next = NULL; - switch (insn->op) { - case OP_NEG: - case OP_ABS: - case OP_CVT: - next = insn->getSrc(0)->getInsn(); - if (insn->sType != next->dType) + if (insn->asCmp() && insn->op != OP_SLCT) + return insn->asCmp(); + + /* Sometimes mov's will sneak in as a result of other folding. This gets + * cleaned up later. + */ + if (insn->op == OP_MOV) + return findOriginForTestWithZero(insn->getSrc(0)); + + /* Deal with AND 1.0 here since nv50 can't fold into boolean float */ + if (insn->op == OP_AND) { + int s = 0; + ImmediateValue imm; + if (!insn->src(s).getImmediate(imm)) { + s = 1; + if (!insn->src(s).getImmediate(imm)) return NULL; - break; - case OP_MOV: - next = insn->getSrc(0)->getInsn(); - break; - default: - return NULL; } - insn = next; + if (imm.reg.data.f32 != 1.0f) + return NULL; + /* TODO: Come up with a way to handle the condition being inverted */ + if (insn->src(!s).mod != Modifier(0)) + return NULL; + return findOriginForTestWithZero(insn->getSrc(!s)); } - return insn ? insn->asCmp() : NULL; + + return NULL; } void @@ -946,29 +953,51 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s) case OP_SET: // TODO: SET_AND,OR,XOR { + /* This optimizes the case where the output of a set is being compared + * to zero. Since the set can only produce 0/-1 (int) or 0/1 (float), we + * can be a lot cleverer in our comparison. + */ CmpInstruction *si = findOriginForTestWithZero(i->getSrc(t)); CondCode cc, ccZ; - if (i->src(t).mod != Modifier(0)) - return; - if (imm0.reg.data.u32 != 0 || !si || si->op != OP_SET) + if (imm0.reg.data.u32 != 0 || !si) return; cc = si->setCond; ccZ = (CondCode)((unsigned int)i->asCmp()->setCond & ~CC_U); + // We do everything assuming var (cmp) 0, reverse the condition if 0 is + // first. if (s == 0) ccZ = reverseCondCode(ccZ); + // If there is a negative modifier, we need to undo that, by flipping + // the comparison to zero. + if (i->src(t).mod.neg()) + ccZ = reverseCondCode(ccZ); + // If this is a signed comparison, we expect the input to be a regular + // boolean, i.e. 0/-1. However the rest of the logic assumes that true + // is positive, so just flip the sign. + if (i->sType == TYPE_S32) { + assert(!isFloatType(si->dType)); + ccZ = reverseCondCode(ccZ); + } switch (ccZ) { - case CC_LT: cc = CC_FL; break; - case CC_GE: cc = CC_TR; break; - case CC_EQ: cc = inverseCondCode(cc); break; - case CC_LE: cc = inverseCondCode(cc); break; - case CC_GT: break; - case CC_NE: break; + case CC_LT: cc = CC_FL; break; // bool < 0 -- this is never true + case CC_GE: cc = CC_TR; break; // bool >= 0 -- this is always true + case CC_EQ: cc = inverseCondCode(cc); break; // bool == 0 -- !bool + case CC_LE: cc = inverseCondCode(cc); break; // bool <= 0 -- !bool + case CC_GT: break; // bool > 0 -- bool + case CC_NE: break; // bool != 0 -- bool default: return; } + + // Update the condition of this SET to be identical to the origin set, + // but with the updated condition code. The original SET should get + // DCE'd, ideally. + i->op = si->op; i->asCmp()->setCond = cc; i->setSrc(0, si->src(0)); i->setSrc(1, si->src(1)); + if (si->srcExists(2)) + i->setSrc(2, si->src(2)); i->sType = si->sType; } break; From d1eea18a595a468dbc2267a8d14197a3b1a5a4b6 Mon Sep 17 00:00:00 2001 From: Ilia Mirkin Date: Fri, 22 May 2015 16:40:08 -0400 Subject: [PATCH 261/834] nvc0/ir: set ftz when sources are floats, not just destinations In the case of a compare, the destination might be a predicate, but we still want to flush denorms. Signed-off-by: Ilia Mirkin Cc: "10.5 10.6" --- .../drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp index b61f3c49bb9..6d45e22121e 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp @@ -100,8 +100,7 @@ void NVC0LegalizeSSA::handleFTZ(Instruction *i) { // Only want to flush float inputs - if (i->sType != TYPE_F32) - return; + assert(i->sType == TYPE_F32); // If we're already flushing denorms (and NaN's) to zero, no need for this. if (i->dnz) @@ -129,7 +128,7 @@ NVC0LegalizeSSA::visit(BasicBlock *bb) Instruction *next; for (Instruction *i = bb->getEntry(); i; i = next) { next = i->next; - if (i->dType == TYPE_F32) { + if (i->sType == TYPE_F32) { if (prog->getType() != Program::TYPE_COMPUTE) handleFTZ(i); continue; From 0bab3962f5f313ea829c95920c02f32afb23715d Mon Sep 17 00:00:00 2001 From: Ilia Mirkin Date: Fri, 22 May 2015 19:02:41 -0400 Subject: [PATCH 262/834] nv50/ir: guess that the constant offset is the starting slot of array When we get something like IN[ADDR[0].x+5], we will now guess that we should look at IN[5] for the "base" information. Signed-off-by: Ilia Mirkin Cc: "10.5 10.6" --- src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp index 2dcadeed44d..ecd115f9807 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp @@ -1356,18 +1356,20 @@ Converter::srcToSym(tgsi::Instruction::SrcRegister src, int c) { const int swz = src.getSwizzle(c); + /* TODO: Use Array ID when it's available for the index */ return makeSym(src.getFile(), src.is2D() ? src.getIndex(1) : 0, - src.isIndirect(0) ? -1 : src.getIndex(0), swz, + src.getIndex(0), swz, src.getIndex(0) * 16 + swz * 4); } Symbol * Converter::dstToSym(tgsi::Instruction::DstRegister dst, int c) { + /* TODO: Use Array ID when it's available for the index */ return makeSym(dst.getFile(), dst.is2D() ? dst.getIndex(1) : 0, - dst.isIndirect(0) ? -1 : dst.getIndex(0), c, + dst.getIndex(0), c, dst.getIndex(0) * 16 + c * 4); } From 217301843aea0299ab245e260b20af7ad250e9d8 Mon Sep 17 00:00:00 2001 From: Ilia Mirkin Date: Fri, 22 May 2015 19:03:58 -0400 Subject: [PATCH 263/834] nvc0/ir: LOAD's can't be used for shader inputs We forgot to convert to VFETCH in case of indirect access. Fix that. This avoids crashes on the new gs-input-array-vec4-index-rd and vs-output-array-vec4-index-wr-before-gs but they still fail. Signed-off-by: Ilia Mirkin Cc: "10.5 10.6" --- src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gm107.cpp | 1 + src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp | 1 + 2 files changed, 2 insertions(+) diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gm107.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gm107.cpp index 64989ac8846..596ac95d489 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gm107.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gm107.cpp @@ -240,6 +240,7 @@ GM107LoweringPass::visit(Instruction *i) Value *ptr = bld.mkOp2v(OP_SHL, TYPE_U32, bld.getSSA(), i->getIndirect(0, 0), bld.mkImm(4)); i->setIndirect(0, 0, ptr); + i->op = OP_VFETCH; } else { i->op = OP_VFETCH; assert(prog->getType() != Program::TYPE_FRAGMENT); // INTERP diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp index 6d45e22121e..411e2de1b11 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp @@ -1750,6 +1750,7 @@ NVC0LoweringPass::visit(Instruction *i) Value *ptr = bld.mkOp2v(OP_SHL, TYPE_U32, bld.getSSA(), i->getIndirect(0, 0), bld.mkImm(4)); i->setIndirect(0, 0, ptr); + i->op = OP_VFETCH; } else { i->op = OP_VFETCH; assert(prog->getType() != Program::TYPE_FRAGMENT); // INTERP From c783fd476c61fae41bddead4e47740e23d0cf2eb Mon Sep 17 00:00:00 2001 From: Samuel Pitoiset Date: Sat, 23 May 2015 18:50:25 +0200 Subject: [PATCH 264/834] nv50: fix PIPE_QUERY_TIMESTAMP_DISJOINT, based on nvc0 PIPE_QUERY_TIMESTAMP_DISJOINT could not work because q->ready was always set to FALSE. To fix this issue, add more different states for queries according to nvc0. Signed-off-by: Samuel Pitoiset Reviewed-by: Ilia Mirkin --- src/gallium/drivers/nouveau/nv50/nv50_query.c | 39 +++++++++++-------- 1 file changed, 22 insertions(+), 17 deletions(-) diff --git a/src/gallium/drivers/nouveau/nv50/nv50_query.c b/src/gallium/drivers/nouveau/nv50/nv50_query.c index da412097f38..55fcac86bd4 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_query.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_query.c @@ -27,6 +27,11 @@ #include "nv50/nv50_context.h" #include "nv_object.xml.h" +#define NV50_QUERY_STATE_READY 0 +#define NV50_QUERY_STATE_ACTIVE 1 +#define NV50_QUERY_STATE_ENDED 2 +#define NV50_QUERY_STATE_FLUSHED 3 + /* XXX: Nested queries, and simultaneous queries on multiple gallium contexts * (since we use only a single GPU channel per screen) will not work properly. * @@ -42,8 +47,7 @@ struct nv50_query { struct nouveau_bo *bo; uint32_t base; uint32_t offset; /* base + i * 32 */ - boolean ready; - boolean flushed; + uint8_t state; boolean is64bit; struct nouveau_mm_allocation *mm; struct nouveau_fence *fence; @@ -66,7 +70,7 @@ nv50_query_allocate(struct nv50_context *nv50, struct nv50_query *q, int size) if (q->bo) { nouveau_bo_ref(NULL, &q->bo); if (q->mm) { - if (q->ready) + if (q->state == NV50_QUERY_STATE_READY) nouveau_mm_free(q->mm); else nouveau_fence_work(screen->base.fence.current, nouveau_mm_free_work, @@ -203,7 +207,7 @@ nv50_query_begin(struct pipe_context *pipe, struct pipe_query *pq) default: break; } - q->ready = FALSE; + q->state = NV50_QUERY_STATE_ACTIVE; return true; } @@ -214,6 +218,8 @@ nv50_query_end(struct pipe_context *pipe, struct pipe_query *pq) struct nouveau_pushbuf *push = nv50->base.pushbuf; struct nv50_query *q = nv50_query(pq); + q->state = NV50_QUERY_STATE_ENDED; + switch (q->type) { case PIPE_QUERY_OCCLUSION_COUNTER: nv50_query_get(push, q, 0, 0x0100f002); @@ -256,29 +262,27 @@ nv50_query_end(struct pipe_context *pipe, struct pipe_query *pq) break; case PIPE_QUERY_TIMESTAMP_DISJOINT: /* This query is not issued on GPU because disjoint is forced to FALSE */ - q->ready = TRUE; + q->state = NV50_QUERY_STATE_READY; break; default: assert(0); break; } - q->ready = q->flushed = FALSE; if (q->is64bit) nouveau_fence_ref(nv50->screen->base.fence.current, &q->fence); } -static INLINE boolean -nv50_query_ready(struct nv50_query *q) +static INLINE void +nv50_query_update(struct nv50_query *q) { if (q->is64bit) { if (nouveau_fence_signalled(q->fence)) - return TRUE; + q->state = NV50_QUERY_STATE_READY; } else { if (q->data[0] == q->sequence) - return TRUE; + q->state = NV50_QUERY_STATE_READY; } - return FALSE; } static boolean @@ -293,13 +297,14 @@ nv50_query_result(struct pipe_context *pipe, struct pipe_query *pq, uint64_t *data64 = (uint64_t *)q->data; int i; - if (!q->ready) /* update ? */ - q->ready = nv50_query_ready(q); - if (!q->ready) { + if (q->state != NV50_QUERY_STATE_READY) + nv50_query_update(q); + + if (q->state != NV50_QUERY_STATE_READY) { if (!wait) { /* for broken apps that spin on GL_QUERY_RESULT_AVAILABLE */ - if (!q->flushed) { - q->flushed = TRUE; + if (q->state != NV50_QUERY_STATE_FLUSHED) { + q->state = NV50_QUERY_STATE_FLUSHED; PUSH_KICK(nv50->base.pushbuf); } return FALSE; @@ -307,7 +312,7 @@ nv50_query_result(struct pipe_context *pipe, struct pipe_query *pq, if (nouveau_bo_wait(q->bo, NOUVEAU_BO_RD, nv50->screen->base.client)) return FALSE; } - q->ready = TRUE; + q->state = NV50_QUERY_STATE_READY; switch (q->type) { case PIPE_QUERY_GPU_FINISHED: From 58aed1031d40e62c9f41f7c512b3165dd5913d1e Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 19 May 2015 16:25:02 -0700 Subject: [PATCH 265/834] prog_to_nir: Use a variable for uniform data Previously, the prog_to_nir pass was directly generating uniform load/store intrinsics. This converts it to use a single giant "parameters" variable and we now depend on lowering to get the uniform load/store intrinsics. One advantage of this is that we now have one code-path after we do the initial conversion into NIR. No shader-db changes. Signed-off-by: Jason Ekstrand Reviewed-by: Kenneth Graunke --- src/mesa/drivers/dri/i965/brw_nir.c | 15 ++----- src/mesa/program/prog_to_nir.c | 65 ++++++++++++++++------------- 2 files changed, 38 insertions(+), 42 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_nir.c b/src/mesa/drivers/dri/i965/brw_nir.c index de4d7aafd44..142162c1f8a 100644 --- a/src/mesa/drivers/dri/i965/brw_nir.c +++ b/src/mesa/drivers/dri/i965/brw_nir.c @@ -122,18 +122,9 @@ brw_create_nir(struct brw_context *brw, /* Get rid of split copies */ nir_optimize(nir); - if (shader_prog) { - nir_assign_var_locations_scalar_direct_first(nir, &nir->uniforms, - &nir->num_direct_uniforms, - &nir->num_uniforms); - } else { - /* ARB programs generally create a giant array of "uniform" data, and allow - * indirect addressing without any boundaries. In the absence of bounds - * analysis, it's all or nothing. num_direct_uniforms is only useful when - * we have some direct and some indirect access; it doesn't matter here. - */ - nir->num_direct_uniforms = 0; - } + nir_assign_var_locations_scalar_direct_first(nir, &nir->uniforms, + &nir->num_direct_uniforms, + &nir->num_uniforms); nir_assign_var_locations_scalar(&nir->inputs, &nir->num_inputs); nir_assign_var_locations_scalar(&nir->outputs, &nir->num_outputs); diff --git a/src/mesa/program/prog_to_nir.c b/src/mesa/program/prog_to_nir.c index 6c5fa51ec61..3067b2516bf 100644 --- a/src/mesa/program/prog_to_nir.c +++ b/src/mesa/program/prog_to_nir.c @@ -47,6 +47,7 @@ struct ptn_compile { nir_builder build; bool error; + nir_variable *parameters; nir_variable *input_vars[VARYING_SLOT_MAX]; nir_variable *output_vars[VARYING_SLOT_MAX]; nir_register **output_regs; @@ -112,21 +113,6 @@ ptn_get_dest(struct ptn_compile *c, const struct prog_dst_register *prog_dst) return dest; } -/** - * Multiply the contents of the ADDR register by 4 to convert from the number - * of vec4s to the number of floating point components. - */ -static nir_ssa_def * -ptn_addr_reg_value(struct ptn_compile *c) -{ - nir_builder *b = &c->build; - nir_alu_src src; - memset(&src, 0, sizeof(src)); - src.src = nir_src_for_reg(c->addr_reg); - - return nir_imul(b, nir_fmov_alu(b, src, 1), nir_imm_int(b, 4)); -} - static nir_ssa_def * ptn_get_src(struct ptn_compile *c, const struct prog_src_register *prog_src) { @@ -180,27 +166,40 @@ ptn_get_src(struct ptn_compile *c, const struct prog_src_register *prog_src) } /* FALLTHROUGH */ case PROGRAM_STATE_VAR: { - nir_intrinsic_op load_op = - prog_src->RelAddr ? nir_intrinsic_load_uniform_indirect : - nir_intrinsic_load_uniform; - nir_intrinsic_instr *load = nir_intrinsic_instr_create(b->shader, load_op); + nir_intrinsic_instr *load = + nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_var); nir_ssa_dest_init(&load->instr, &load->dest, 4, NULL); load->num_components = 4; - /* Multiply src->Index by 4 to scale from # of vec4s to components. */ - load->const_index[0] = 4 * prog_src->Index; - load->const_index[1] = 1; + load->variables[0] = nir_deref_var_create(load, c->parameters); + nir_deref_array *deref_arr = + nir_deref_array_create(load->variables[0]); + deref_arr->deref.type = glsl_vec4_type(); + load->variables[0]->deref.child = &deref_arr->deref; if (prog_src->RelAddr) { - nir_ssa_def *reladdr = ptn_addr_reg_value(c); + deref_arr->deref_array_type = nir_deref_array_type_indirect; + + nir_alu_src addr_src = { NIR_SRC_INIT }; + addr_src.src = nir_src_for_reg(c->addr_reg); + nir_ssa_def *reladdr = nir_imov_alu(b, addr_src, 1); + if (prog_src->Index < 0) { /* This is a negative offset which should be added to the address * register's value. */ - reladdr = nir_iadd(b, reladdr, nir_imm_int(b, load->const_index[0])); - load->const_index[0] = 0; + nir_alu_src addr_src = { NIR_SRC_INIT }; + addr_src.src = nir_src_for_reg(c->addr_reg); + reladdr = nir_iadd(b, reladdr, nir_imm_int(b, prog_src->Index)); + + deref_arr->base_offset = 0; + } else { + deref_arr->base_offset = prog_src->Index; } - load->src[0] = nir_src_for_ssa(reladdr); + deref_arr->indirect = nir_src_for_ssa(reladdr); + } else { + deref_arr->deref_array_type = nir_deref_array_type_direct; + deref_arr->base_offset = prog_src->Index; } nir_instr_insert_after_cf_list(b->cf_node_list, &load->instr); @@ -1057,13 +1056,11 @@ setup_registers_and_variables(struct ptn_compile *c) } reg->num_components = 1; c->addr_reg = reg; - - /* Set the number of uniforms */ - shader->num_uniforms = 4 * c->prog->Parameters->NumParameters; } struct nir_shader * -prog_to_nir(const struct gl_program *prog, const nir_shader_compiler_options *options) +prog_to_nir(const struct gl_program *prog, + const nir_shader_compiler_options *options) { struct ptn_compile *c; struct nir_shader *s; @@ -1076,6 +1073,14 @@ prog_to_nir(const struct gl_program *prog, const nir_shader_compiler_options *op goto fail; c->prog = prog; + c->parameters = rzalloc(s, nir_variable); + c->parameters->type = glsl_array_type(glsl_vec4_type(), + prog->Parameters->NumParameters); + c->parameters->name = "parameters"; + c->parameters->data.read_only = true; + c->parameters->data.mode = nir_var_uniform; + exec_list_push_tail(&s->uniforms, &c->parameters->node); + nir_function *func = nir_function_create(s, "main"); nir_function_overload *overload = nir_function_overload_create(func); nir_function_impl *impl = nir_function_impl_create(overload); From 6ca67f62e885f0e42c0cef2db5c0ae837adfe646 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 19 May 2015 17:35:29 -0700 Subject: [PATCH 266/834] i965/fs: Fix implied_mrf_writes for scratch writes We build the entire message in the generator so all the MRF writes are implied. Cc: "10.5 10.6" Reviewed-by: Kenneth Graunke --- src/mesa/drivers/dri/i965/brw_fs.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 9b3186bd3f8..42a0d78e542 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -1050,7 +1050,7 @@ fs_visitor::implied_mrf_writes(fs_inst *inst) case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD: return inst->mlen; case SHADER_OPCODE_GEN4_SCRATCH_WRITE: - return 2; + return inst->mlen; case SHADER_OPCODE_UNTYPED_ATOMIC: case SHADER_OPCODE_UNTYPED_SURFACE_READ: case SHADER_OPCODE_UNTYPED_SURFACE_WRITE: From 921917c8d8e707dd854e7be05fba7a3e55bc71bf Mon Sep 17 00:00:00 2001 From: Ilia Mirkin Date: Sat, 23 May 2015 17:35:42 -0400 Subject: [PATCH 267/834] nvc0: a geometry shader can have up to 1024 vertices output The 1024 is already reported everywhere, not sure where this 0x1ff came from. Signed-off-by: Ilia Mirkin Cc: "10.5 10.6" --- src/gallium/drivers/nouveau/nvc0/nvc0_program.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c index 55896955ca2..4a47cb2d164 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c @@ -392,7 +392,7 @@ nvc0_gp_gen_header(struct nvc0_program *gp, struct nv50_ir_prog_info *info) break; } - gp->hdr[4] = info->prop.gp.maxVertices & 0x1ff; + gp->hdr[4] = MIN2(info->prop.gp.maxVertices, 1024); return nvc0_vtgp_gen_header(gp, info); } From c922758685932e86d935972980df3be22d7b2fdf Mon Sep 17 00:00:00 2001 From: Ilia Mirkin Date: Sat, 23 May 2015 19:07:48 -0400 Subject: [PATCH 268/834] nv30: check nouveau_bo_map output of notify bo Signed-off-by: Ilia Mirkin --- src/gallium/drivers/nouveau/nv30/nv30_screen.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gallium/drivers/nouveau/nv30/nv30_screen.c b/src/gallium/drivers/nouveau/nv30/nv30_screen.c index 025cad28042..bb79ccc538f 100644 --- a/src/gallium/drivers/nouveau/nv30/nv30_screen.c +++ b/src/gallium/drivers/nouveau/nv30/nv30_screen.c @@ -524,7 +524,7 @@ nv30_screen_create(struct nouveau_device *dev) ret = nouveau_bo_wrap(screen->base.device, fifo->notify, &screen->notify); if (ret == 0) - nouveau_bo_map(screen->notify, 0, screen->base.client); + ret = nouveau_bo_map(screen->notify, 0, screen->base.client); if (ret) FAIL_SCREEN_INIT("error mapping notifier memory: %d\n", ret); From 5c495e86388b55af067677e8608eb124a5d70d29 Mon Sep 17 00:00:00 2001 From: Grigori Goronzy Date: Tue, 12 May 2015 02:22:12 +0200 Subject: [PATCH 269/834] clover: implement CL_MEM_ALLOC_HOST_PTR This flag is typically used to request pinned host memory, to avoid any copies between GPU and CPU. This improves throughput with an older OpenCL app which I unfortunately can't publish due to its licensing. Reviewed-by: Francisco Jerez --- src/gallium/state_trackers/clover/core/resource.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/gallium/state_trackers/clover/core/resource.cpp b/src/gallium/state_trackers/clover/core/resource.cpp index bcf87e15480..8ed4c4284e1 100644 --- a/src/gallium/state_trackers/clover/core/resource.cpp +++ b/src/gallium/state_trackers/clover/core/resource.cpp @@ -137,6 +137,10 @@ root_resource::root_resource(clover::device &dev, memory_obj &obj, PIPE_BIND_TRANSFER_READ | PIPE_BIND_TRANSFER_WRITE); + if (obj.flags() & CL_MEM_ALLOC_HOST_PTR) { + info.usage = PIPE_USAGE_STAGING; + } + pipe = dev.pipe->resource_create(dev.pipe, &info); if (!pipe) throw error(CL_OUT_OF_RESOURCES); From f972b223c4cb4ec58a9451cbac5d120ac9deb336 Mon Sep 17 00:00:00 2001 From: Grigori Goronzy Date: Tue, 19 May 2015 09:28:30 +0200 Subject: [PATCH 270/834] clover: try userptr for CL_MEM_USE_HOST_PTR According to spec, CL_MEM_USE_HOST_PTR should directly use host memory, if possible. This is just what userptr is for, so use it. In case the memory cannot be mapped, a fallback similar to CL_MEM_COPY_HOST_PTR is used. v2: constify, drop unneeded cast Reviewed-by: Francisco Jerez --- .../state_trackers/clover/core/memory.cpp | 2 +- .../state_trackers/clover/core/resource.cpp | 17 ++++++++++++++--- 2 files changed, 15 insertions(+), 4 deletions(-) diff --git a/src/gallium/state_trackers/clover/core/memory.cpp b/src/gallium/state_trackers/clover/core/memory.cpp index 905ebc0fd02..055336a3325 100644 --- a/src/gallium/state_trackers/clover/core/memory.cpp +++ b/src/gallium/state_trackers/clover/core/memory.cpp @@ -30,7 +30,7 @@ memory_obj::memory_obj(clover::context &ctx, cl_mem_flags flags, size_t size, void *host_ptr) : context(ctx), _flags(flags), _size(size), _host_ptr(host_ptr) { - if (flags & (CL_MEM_COPY_HOST_PTR | CL_MEM_USE_HOST_PTR)) + if (flags & CL_MEM_COPY_HOST_PTR) data.append((char *)host_ptr, size); } diff --git a/src/gallium/state_trackers/clover/core/resource.cpp b/src/gallium/state_trackers/clover/core/resource.cpp index 8ed4c4284e1..78ebafb644f 100644 --- a/src/gallium/state_trackers/clover/core/resource.cpp +++ b/src/gallium/state_trackers/clover/core/resource.cpp @@ -118,6 +118,8 @@ root_resource::root_resource(clover::device &dev, memory_obj &obj, command_queue &q, const std::string &data) : resource(dev, obj) { pipe_resource info {}; + const bool user_ptr_support = dev.pipe->get_param(dev.pipe, + PIPE_CAP_RESOURCE_FROM_USER_MEMORY); if (image *img = dynamic_cast(&obj)) { info.format = translate_format(img->format()); @@ -137,7 +139,15 @@ root_resource::root_resource(clover::device &dev, memory_obj &obj, PIPE_BIND_TRANSFER_READ | PIPE_BIND_TRANSFER_WRITE); - if (obj.flags() & CL_MEM_ALLOC_HOST_PTR) { + if (obj.flags() & CL_MEM_USE_HOST_PTR && user_ptr_support) { + // Page alignment is normally required for this, just try, hope for the + // best and fall back if it fails. + pipe = dev.pipe->resource_from_user_memory(dev.pipe, &info, obj.host_ptr()); + if (pipe) + return; + } + + if (obj.flags() & (CL_MEM_ALLOC_HOST_PTR | CL_MEM_USE_HOST_PTR)) { info.usage = PIPE_USAGE_STAGING; } @@ -145,12 +155,13 @@ root_resource::root_resource(clover::device &dev, memory_obj &obj, if (!pipe) throw error(CL_OUT_OF_RESOURCES); - if (!data.empty()) { + if (obj.flags() & (CL_MEM_USE_HOST_PTR | CL_MEM_COPY_HOST_PTR)) { + const void *data_ptr = !data.empty() ? data.data() : obj.host_ptr(); box rect { {{ 0, 0, 0 }}, {{ info.width0, info.height0, info.depth0 }} }; unsigned cpp = util_format_get_blocksize(info.format); q.pipe->transfer_inline_write(q.pipe, pipe, 0, PIPE_TRANSFER_WRITE, - rect, data.data(), cpp * info.width0, + rect, data_ptr, cpp * info.width0, cpp * info.width0 * info.height0); } } From fa7f9f123b70f313d3c073b52c9c16b4b8df28f8 Mon Sep 17 00:00:00 2001 From: Ilia Mirkin Date: Sat, 23 May 2015 01:57:41 -0400 Subject: [PATCH 271/834] nv50/ir: avoid messing up arg1 of PFETCH There can be scenarios where the "indirect" arg of a PFETCH becomes known, and so the code will attempt to propagate it. Use this opportunity to just fold it into the first argument, and prevent the load propagation pass from touching PFETCH further. This fixes gs-input-array-vec4-index-rd.shader_test and vs-output-array-vec4-index-wr-before-gs.shader_test on nvc0 at least. Signed-off-by: Ilia Mirkin Reviewed-by: Tobias Klausmann Cc: "10.5 10.6" --- .../nouveau/codegen/nv50_ir_peephole.cpp | 20 +++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp index 72dd31efecc..b7fcd56724d 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp @@ -236,6 +236,9 @@ LoadPropagation::visit(BasicBlock *bb) if (i->op == OP_CALL) // calls have args as sources, they must be in regs continue; + if (i->op == OP_PFETCH) // pfetch expects arg1 to be a reg + continue; + if (i->srcExists(1)) checkSwapSrc01(i); @@ -581,6 +584,11 @@ ConstantFolding::expr(Instruction *i, case OP_POPCNT: res.data.u32 = util_bitcount(a->data.u32 & b->data.u32); break; + case OP_PFETCH: + // The two arguments to pfetch are logically added together. Normally + // the second argument will not be constant, but that can happen. + res.data.u32 = a->data.u32 + b->data.u32; + break; default: return; } @@ -595,7 +603,9 @@ ConstantFolding::expr(Instruction *i, i->getSrc(0)->reg.data = res.data; - if (i->op == OP_MAD || i->op == OP_FMA) { + switch (i->op) { + case OP_MAD: + case OP_FMA: { i->op = OP_ADD; i->setSrc(1, i->getSrc(0)); @@ -610,8 +620,14 @@ ConstantFolding::expr(Instruction *i, bld.setPosition(i, false); i->setSrc(1, bld.loadImm(NULL, res.data.u32)); } - } else { + break; + } + case OP_PFETCH: + // Leave PFETCH alone... we just folded its 2 args into 1. + break; + default: i->op = i->saturate ? OP_SAT : OP_MOV; /* SAT handled by unary() */ + break; } i->subOp = 0; } From 605ce36d7f4a90c4062d6940bea82ab483bbe3b2 Mon Sep 17 00:00:00 2001 From: Ilia Mirkin Date: Sun, 24 May 2015 01:31:11 -0400 Subject: [PATCH 272/834] nv30: don't leak fragprog consts Signed-off-by: Ilia Mirkin Cc: "10.5 10.6" --- src/gallium/drivers/nouveau/nv30/nv30_fragprog.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/gallium/drivers/nouveau/nv30/nv30_fragprog.c b/src/gallium/drivers/nouveau/nv30/nv30_fragprog.c index a05bfe10ee9..ee669b1c1fc 100644 --- a/src/gallium/drivers/nouveau/nv30/nv30_fragprog.c +++ b/src/gallium/drivers/nouveau/nv30/nv30_fragprog.c @@ -149,6 +149,7 @@ nv30_fp_state_delete(struct pipe_context *pipe, void *hwcso) FREE((void *)fp->pipe.tokens); FREE(fp->insn); + FREE(fp->consts); FREE(fp); } From 9870ed05dd333a20662479b9b1e3a8db542924c4 Mon Sep 17 00:00:00 2001 From: Ilia Mirkin Date: Sun, 24 May 2015 02:23:16 -0400 Subject: [PATCH 273/834] nv30: avoid leaking render state and draw shaders Signed-off-by: Ilia Mirkin Cc: "10.5 10.6" --- src/gallium/drivers/nouveau/nv30/nv30_draw.c | 7 +++++++ src/gallium/drivers/nouveau/nv30/nv30_fragprog.c | 4 ++++ src/gallium/drivers/nouveau/nv30/nv30_vertprog.c | 5 +++++ 3 files changed, 16 insertions(+) diff --git a/src/gallium/drivers/nouveau/nv30/nv30_draw.c b/src/gallium/drivers/nouveau/nv30/nv30_draw.c index 3575c3d29fa..13aad7a4e09 100644 --- a/src/gallium/drivers/nouveau/nv30/nv30_draw.c +++ b/src/gallium/drivers/nouveau/nv30/nv30_draw.c @@ -103,6 +103,7 @@ nv30_render_unmap_vertices(struct vbuf_render *render, { struct nv30_render *r = nv30_render(render); pipe_buffer_unmap(&r->nv30->base.pipe, r->transfer); + r->transfer = NULL; } static void @@ -444,6 +445,12 @@ nv30_render_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) static void nv30_render_destroy(struct vbuf_render *render) { + struct nv30_render *r = nv30_render(render); + + if (r->transfer) + pipe_buffer_unmap(&r->nv30->base.pipe, r->transfer); + pipe_resource_reference(&r->buffer, NULL); + nouveau_heap_free(&r->vertprog); FREE(render); } diff --git a/src/gallium/drivers/nouveau/nv30/nv30_fragprog.c b/src/gallium/drivers/nouveau/nv30/nv30_fragprog.c index ee669b1c1fc..7f227868f73 100644 --- a/src/gallium/drivers/nouveau/nv30/nv30_fragprog.c +++ b/src/gallium/drivers/nouveau/nv30/nv30_fragprog.c @@ -23,6 +23,7 @@ * */ +#include "draw/draw_context.h" #include "tgsi/tgsi_parse.h" #include "nv_object.xml.h" @@ -147,6 +148,9 @@ nv30_fp_state_delete(struct pipe_context *pipe, void *hwcso) pipe_resource_reference(&fp->buffer, NULL); + if (fp->draw) + draw_delete_fragment_shader(nv30_context(pipe)->draw, fp->draw); + FREE((void *)fp->pipe.tokens); FREE(fp->insn); FREE(fp->consts); diff --git a/src/gallium/drivers/nouveau/nv30/nv30_vertprog.c b/src/gallium/drivers/nouveau/nv30/nv30_vertprog.c index 3c1b7e714ea..4d4145d10b5 100644 --- a/src/gallium/drivers/nouveau/nv30/nv30_vertprog.c +++ b/src/gallium/drivers/nouveau/nv30/nv30_vertprog.c @@ -23,6 +23,7 @@ * */ +#include "draw/draw_context.h" #include "util/u_dynarray.h" #include "tgsi/tgsi_parse.h" @@ -237,6 +238,10 @@ nv30_vp_state_delete(struct pipe_context *pipe, void *hwcso) if (vp->translated) nv30_vertprog_destroy(vp); + + if (vp->draw) + draw_delete_vertex_shader(nv30_context(pipe)->draw, vp->draw); + FREE((void *)vp->pipe.tokens); FREE(vp); } From 3ab4556b84a944278dbddc21fba40b328a77c2e9 Mon Sep 17 00:00:00 2001 From: Emil Velikov Date: Sat, 23 May 2015 09:02:41 +0100 Subject: [PATCH 274/834] Add release notes for the 10.5.6 release Signed-off-by: Emil Velikov (cherry picked from commit b1cf9cfb1618f0b73e673745d3c8612aea61723d) --- docs/relnotes/10.5.6.html | 146 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 146 insertions(+) create mode 100644 docs/relnotes/10.5.6.html diff --git a/docs/relnotes/10.5.6.html b/docs/relnotes/10.5.6.html new file mode 100644 index 00000000000..404f51e9479 --- /dev/null +++ b/docs/relnotes/10.5.6.html @@ -0,0 +1,146 @@ + + + + + Mesa Release Notes + + + + +
    +

    The Mesa 3D Graphics Library

    +
    + + +
    + +

    Mesa 10.5.6 Release Notes / May 23, 2015

    + +

    +Mesa 10.5.6 is a bug fix release which fixes bugs found since the 10.5.5 release. +

    +

    +Mesa 10.5.6 implements the OpenGL 3.3 API, but the version reported by +glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) / +glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used. +Some drivers don't support all the features required in OpenGL 3.3. OpenGL +3.3 is only available if requested at context creation +because compatibility contexts are not supported. +

    + + +

    SHA256 checksums

    +
    +TBD
    +
    + + +

    New features

    +

    None

    + +

    Bug fixes

    + +

    This list is likely incomplete.

    + +
      + +
    • Bug 86792 - [NVC0] Portal 2 Crashes in Wine
    • + +
    • Bug 90147 - swrast: build error undeclared _SC_PHYS_PAGES on osx
    • + +
    • Bug 90350 - [G96] Portal's portal are incorrectly rendered
    • + +
    • Bug 90363 - [nv50] HW state is not reset correctly when using a new GL context
    • + +
    + + +

    Changes

    + +

    Alex Deucher (1):

    +
      +
    • radeonsi: add new bonaire pci id
    • +
    + +

    Axel Davy (2):

    +
      +
    • egl/wayland: properly destroy wayland objects
    • +
    • glx/dri3: Add additional check for gpu offloading case
    • +
    + +

    Emil Velikov (4):

    +
      +
    • docs: Add sha256 sums for the 10.5.5 release
    • +
    • egl/main: fix EGL_KHR_get_all_proc_addresses
    • +
    • targets/osmesa: drop the -module tag from LDFLAGS
    • +
    • Update version to 10.5.6
    • +
    + +

    Francisco Jerez (4):

    +
      +
    • clover: Refactor event::trigger and ::abort to prevent deadlock and reentrancy issues.
    • +
    • clover: Wrap event::_status in a method to prevent unlocked access.
    • +
    • clover: Implement locking of the wait_count, _chain and _status members of event.
    • +
    • i965: Fix PBO cache coherency issue after _mesa_meta_pbo_GetTexSubImage().
    • +
    + +

    Fredrik Höglund (2):

    +
      +
    • main: Require that the texture exists in framebuffer_texture
    • +
    • mesa: Generate GL_INVALID_VALUE in framebuffer_texture when layer < 0
    • +
    + +

    Ilia Mirkin (7):

    +
      +
    • nv50/ir: only propagate saturate up if some actual folding took place
    • +
    • nv50: keep track of PGRAPH state in nv50_screen
    • +
    • nvc0: keep track of PGRAPH state in nvc0_screen
    • +
    • nvc0: reset the instanced elements state when doing blit using 3d engine
    • +
    • nv50/ir: only enable mul saturate on G200+
    • +
    • st/mesa: make sure to create a "clean" bool when doing i2b
    • +
    • nvc0: switch mechanism for shader eviction to be a while loop
    • +
    + +

    Jeremy Huddleston Sequoia (2):

    +
      +
    • swrast: Build fix for darwin
    • +
    • darwin: Fix install name of libOSMesa
    • +
    + +

    Laura Ekstrand (2):

    +
      +
    • main: Fix an error generated by FramebufferTexture
    • +
    • main: Complete error conditions for glInvalidate*Framebuffer.
    • +
    + +

    Marta Lofstedt (1):

    +
      +
    • main: glGetIntegeri_v fails for GL_VERTEX_BINDING_STRIDE
    • +
    + +

    Rob Clark (2):

    +
      +
    • freedreno: enable a306
    • +
    • freedreno: fix bug in tile/slot calculation
    • +
    + +

    Roland Scheidegger (1):

    +
      +
    • draw: (trivial) fix out-of-bounds vector initialization
    • +
    + +

    Tim Rowley (1):

    +
      +
    • mesa: fix shininess check for ffvertex_prog v2
    • +
    + +

    Tom Stellard (2):

    +
      +
    • clover: Add a mutex to guard queue::queued_events
    • +
    • clover: Fix a bug with multi-threaded events v2
    • +
    + + +
    + + From 81d5d78573f821fb0983523bbd698d6691c4bb34 Mon Sep 17 00:00:00 2001 From: Emil Velikov Date: Sun, 24 May 2015 10:43:31 +0100 Subject: [PATCH 275/834] docs: Add sha256sums for the 10.5.6 release Signed-off-by: Emil Velikov (cherry picked from commit 8cb28bc49d7799d5accb1feb7e355ec48518e20b) --- docs/relnotes/10.5.6.html | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docs/relnotes/10.5.6.html b/docs/relnotes/10.5.6.html index 404f51e9479..0046b8ff992 100644 --- a/docs/relnotes/10.5.6.html +++ b/docs/relnotes/10.5.6.html @@ -31,7 +31,8 @@ because compatibility contexts are not supported.

    SHA256 checksums

    -TBD
    +89ff9cb08d0f6e3f34154864c3071253057cd21020759457c8ae27e0f70985d3  mesa-10.5.6.tar.gz
    +66017853bde5f7a6647db3eede30512a091a3491daa1708e0ad8027c328ba595  mesa-10.5.6.tar.xz
     
    From 207ae2b0efcdb48a39fd91b05181c7e6d81e5002 Mon Sep 17 00:00:00 2001 From: Emil Velikov Date: Sun, 24 May 2015 10:47:48 +0100 Subject: [PATCH 276/834] docs: add news item and link release notes for mesa 10.5.6 Signed-off-by: Emil Velikov --- docs/index.html | 6 ++++++ docs/relnotes.html | 1 + 2 files changed, 7 insertions(+) diff --git a/docs/index.html b/docs/index.html index 325e554df5b..08f9eb5c1f7 100644 --- a/docs/index.html +++ b/docs/index.html @@ -16,6 +16,12 @@

    News

    +

    May 23, 2015

    +

    +Mesa 10.5.6 is released. +This is a bug-fix release. +

    +

    May 11, 2015

    Mesa 10.5.5 is released. diff --git a/docs/relnotes.html b/docs/relnotes.html index 6ba9e5904be..33a6406fa02 100644 --- a/docs/relnotes.html +++ b/docs/relnotes.html @@ -21,6 +21,7 @@ The release notes summarize what's new or changed in each Mesa release.

      +
    • 10.5.6 release notes
    • 10.5.5 release notes
    • 10.5.4 release notes
    • 10.5.3 release notes From aba3392541f38f82e3ebde251fdcca78e90adbf3 Mon Sep 17 00:00:00 2001 From: Ilia Mirkin Date: Thu, 5 Mar 2015 12:10:15 -0500 Subject: [PATCH 277/834] nv30: avoid doing extra work on clear and hitting unexpected states Clearing can happen at a time when various state objects are incoherent and not ready for a draw. Some of the validation functions don't handle this well, so only flush the framebuffer state. This has the advantage of also not doing extra work. This works around some crashes that can happen when clearing. Signed-off-by: Ilia Mirkin Reviewed-by: Tobias Klausmann --- src/gallium/drivers/nouveau/nv30/nv30_clear.c | 2 +- src/gallium/drivers/nouveau/nv30/nv30_context.h | 2 +- src/gallium/drivers/nouveau/nv30/nv30_draw.c | 4 ++-- src/gallium/drivers/nouveau/nv30/nv30_state_validate.c | 10 ++++++---- src/gallium/drivers/nouveau/nv30/nv30_vbo.c | 2 +- 5 files changed, 11 insertions(+), 9 deletions(-) diff --git a/src/gallium/drivers/nouveau/nv30/nv30_clear.c b/src/gallium/drivers/nouveau/nv30/nv30_clear.c index 1ab8929cc38..83fd1fa38dd 100644 --- a/src/gallium/drivers/nouveau/nv30/nv30_clear.c +++ b/src/gallium/drivers/nouveau/nv30/nv30_clear.c @@ -58,7 +58,7 @@ nv30_clear(struct pipe_context *pipe, unsigned buffers, struct pipe_framebuffer_state *fb = &nv30->framebuffer; uint32_t colr = 0, zeta = 0, mode = 0; - if (!nv30_state_validate(nv30, TRUE)) + if (!nv30_state_validate(nv30, NV30_NEW_FRAMEBUFFER | NV30_NEW_SCISSOR, TRUE)) return; if (buffers & PIPE_CLEAR_COLOR && fb->nr_cbufs) { diff --git a/src/gallium/drivers/nouveau/nv30/nv30_context.h b/src/gallium/drivers/nouveau/nv30/nv30_context.h index 7b32aaee936..592cdbe24f9 100644 --- a/src/gallium/drivers/nouveau/nv30/nv30_context.h +++ b/src/gallium/drivers/nouveau/nv30/nv30_context.h @@ -204,7 +204,7 @@ void nv30_render_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info); boolean -nv30_state_validate(struct nv30_context *nv30, boolean hwtnl); +nv30_state_validate(struct nv30_context *nv30, uint32_t mask, boolean hwtnl); void nv30_state_release(struct nv30_context *nv30); diff --git a/src/gallium/drivers/nouveau/nv30/nv30_draw.c b/src/gallium/drivers/nouveau/nv30/nv30_draw.c index 13aad7a4e09..74f0d66df95 100644 --- a/src/gallium/drivers/nouveau/nv30/nv30_draw.c +++ b/src/gallium/drivers/nouveau/nv30/nv30_draw.c @@ -130,7 +130,7 @@ nv30_render_draw_elements(struct vbuf_render *render, NOUVEAU_BO_LOW | NOUVEAU_BO_RD, 0, 0); } - if (!nv30_state_validate(nv30, FALSE)) + if (!nv30_state_validate(nv30, ~0, FALSE)) return; BEGIN_NV04(push, NV30_3D(VERTEX_BEGIN_END), 1); @@ -175,7 +175,7 @@ nv30_render_draw_arrays(struct vbuf_render *render, unsigned start, uint nr) NOUVEAU_BO_LOW | NOUVEAU_BO_RD, 0, 0); } - if (!nv30_state_validate(nv30, FALSE)) + if (!nv30_state_validate(nv30, ~0, FALSE)) return; BEGIN_NV04(push, NV30_3D(VERTEX_BEGIN_END), 1); diff --git a/src/gallium/drivers/nouveau/nv30/nv30_state_validate.c b/src/gallium/drivers/nouveau/nv30/nv30_state_validate.c index 0f9d19dd68e..86ac4f7d6fd 100644 --- a/src/gallium/drivers/nouveau/nv30/nv30_state_validate.c +++ b/src/gallium/drivers/nouveau/nv30/nv30_state_validate.c @@ -456,7 +456,7 @@ nv30_state_context_switch(struct nv30_context *nv30) } boolean -nv30_state_validate(struct nv30_context *nv30, boolean hwtnl) +nv30_state_validate(struct nv30_context *nv30, uint32_t mask, boolean hwtnl) { struct nouveau_screen *screen = &nv30->screen->base; struct nouveau_pushbuf *push = nv30->base.pushbuf; @@ -481,14 +481,16 @@ nv30_state_validate(struct nv30_context *nv30, boolean hwtnl) else validate = swtnl_validate_list; - if (nv30->dirty) { + mask &= nv30->dirty; + + if (mask) { while (validate->func) { - if (nv30->dirty & validate->mask) + if (mask & validate->mask) validate->func(nv30); validate++; } - nv30->dirty = 0; + nv30->dirty &= ~mask; } nouveau_pushbuf_bufctx(push, bctx); diff --git a/src/gallium/drivers/nouveau/nv30/nv30_vbo.c b/src/gallium/drivers/nouveau/nv30/nv30_vbo.c index 67ab8295218..d4e384b21d2 100644 --- a/src/gallium/drivers/nouveau/nv30/nv30_vbo.c +++ b/src/gallium/drivers/nouveau/nv30/nv30_vbo.c @@ -564,7 +564,7 @@ nv30_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) if (nv30->vbo_user && !(nv30->dirty & (NV30_NEW_VERTEX | NV30_NEW_ARRAYS))) nv30_update_user_vbufs(nv30); - nv30_state_validate(nv30, TRUE); + nv30_state_validate(nv30, ~0, TRUE); if (nv30->draw_flags) { nv30_render_vbo(pipe, info); return; From 7518fc3c66e9b5703b987bccca7970a344deadfa Mon Sep 17 00:00:00 2001 From: Ilia Mirkin Date: Sat, 23 May 2015 20:58:53 -0400 Subject: [PATCH 278/834] nv30: fix clip plane uploads and enable changes nv30_validate_clip depends on the rasterizer state. Also we should upload all the new clip planes on change since next time the plane data won't have changed, but the enables might. This fixes fixed-clip-enables and vs-clip-vertex-enables shader tests. Signed-off-by: Ilia Mirkin Reviewed-by: Tobias Klausmann Cc: "10.5 10.6" --- .../drivers/nouveau/nv30/nv30_state_validate.c | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/src/gallium/drivers/nouveau/nv30/nv30_state_validate.c b/src/gallium/drivers/nouveau/nv30/nv30_state_validate.c index 86ac4f7d6fd..a954dcce562 100644 --- a/src/gallium/drivers/nouveau/nv30/nv30_state_validate.c +++ b/src/gallium/drivers/nouveau/nv30/nv30_state_validate.c @@ -272,15 +272,13 @@ nv30_validate_clip(struct nv30_context *nv30) uint32_t clpd_enable = 0; for (i = 0; i < 6; i++) { - if (nv30->rast->pipe.clip_plane_enable & (1 << i)) { - if (nv30->dirty & NV30_NEW_CLIP) { - BEGIN_NV04(push, NV30_3D(VP_UPLOAD_CONST_ID), 5); - PUSH_DATA (push, i); - PUSH_DATAp(push, nv30->clip.ucp[i], 4); - } - - clpd_enable |= 1 << (1 + 4*i); + if (nv30->dirty & NV30_NEW_CLIP) { + BEGIN_NV04(push, NV30_3D(VP_UPLOAD_CONST_ID), 5); + PUSH_DATA (push, i); + PUSH_DATAp(push, nv30->clip.ucp[i], 4); } + if (nv30->rast->pipe.clip_plane_enable & (1 << i)) + clpd_enable |= 2 << (4*i); } BEGIN_NV04(push, NV30_3D(VP_CLIP_PLANES_ENABLE), 1); @@ -389,7 +387,7 @@ static struct state_validate hwtnl_validate_list[] = { { nv30_validate_stipple, NV30_NEW_STIPPLE }, { nv30_validate_scissor, NV30_NEW_SCISSOR | NV30_NEW_RASTERIZER }, { nv30_validate_viewport, NV30_NEW_VIEWPORT }, - { nv30_validate_clip, NV30_NEW_CLIP }, + { nv30_validate_clip, NV30_NEW_CLIP | NV30_NEW_RASTERIZER }, { nv30_fragprog_validate, NV30_NEW_FRAGPROG | NV30_NEW_FRAGCONST }, { nv30_vertprog_validate, NV30_NEW_VERTPROG | NV30_NEW_VERTCONST | NV30_NEW_FRAGPROG | NV30_NEW_RASTERIZER }, From 3dec892d9b873f6c8a2a963a7646af90ada361bc Mon Sep 17 00:00:00 2001 From: Iago Toral Quiroga Date: Mon, 25 May 2015 09:40:01 +0200 Subject: [PATCH 279/834] docs: Mark ARB_shader_storage_buffer_object as in progress Reviewed-by: Chris Forbes --- docs/GL3.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/GL3.txt b/docs/GL3.txt index 9d56ee5d67e..44a824b91c1 100644 --- a/docs/GL3.txt +++ b/docs/GL3.txt @@ -164,7 +164,7 @@ GL 4.3, GLSL 4.30: GL_ARB_program_interface_query DONE (all drivers) GL_ARB_robust_buffer_access_behavior not started GL_ARB_shader_image_size in progress (Martin Peres) - GL_ARB_shader_storage_buffer_object not started + GL_ARB_shader_storage_buffer_object in progress (Iago Toral, Samuel Iglesias) GL_ARB_stencil_texturing DONE (i965/gen8+, nv50, nvc0, r600, radeonsi, llvmpipe, softpipe) GL_ARB_texture_buffer_range DONE (nv50, nvc0, i965, r600, radeonsi, llvmpipe) GL_ARB_texture_query_levels DONE (all drivers that support GLSL 1.30) @@ -221,7 +221,7 @@ GLES3.1, GLSL ES 3.1 GL_ARB_shader_atomic_counters DONE (i965) GL_ARB_shader_image_load_store in progress (curro) GL_ARB_shader_image_size in progress (Martin Peres) - GL_ARB_shader_storage_buffer_object not started + GL_ARB_shader_storage_buffer_object in progress (Iago Toral, Samuel Iglesias) GL_ARB_shading_language_packing DONE (all drivers) GL_ARB_separate_shader_objects DONE (all drivers) GL_ARB_stencil_texturing DONE (i965/gen8+, nv50, nvc0, r600, radeonsi, llvmpipe, softpipe) From 843ff4ba2af0b19a377a3bf1c9ae7b2b1f2c0e4c Mon Sep 17 00:00:00 2001 From: Tobias Klausmann Date: Mon, 25 May 2015 15:57:09 +0200 Subject: [PATCH 280/834] docs: Mark ARB_cull_distance as in progress Signed-off-by: Tobias Klausmann Reviewed-by: Samuel Pitoiset --- docs/GL3.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/GL3.txt b/docs/GL3.txt index 44a824b91c1..8e1c8cd4eef 100644 --- a/docs/GL3.txt +++ b/docs/GL3.txt @@ -190,7 +190,7 @@ GL 4.5, GLSL 4.50: GL_ARB_ES3_1_compatibility not started GL_ARB_clip_control DONE (i965, nv50, nvc0, r600, radeonsi, llvmpipe, softpipe) GL_ARB_conditional_render_inverted DONE (i965, nv50, nvc0, llvmpipe, softpipe) - GL_ARB_cull_distance not started + GL_ARB_cull_distance in progress (Tobias) GL_ARB_derivative_control DONE (i965, nv50, nvc0, r600) GL_ARB_direct_state_access DONE (all drivers) - Transform Feedback object DONE From cc3d2755577dab8c930f0bccff2756cb92aef8bc Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 25 May 2015 09:27:08 -0700 Subject: [PATCH 281/834] Fix an unused variable warning Trivial. Deleted the 2 unneeded lines. --- src/mesa/program/prog_to_nir.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/mesa/program/prog_to_nir.c b/src/mesa/program/prog_to_nir.c index 3067b2516bf..5ca81e54b7f 100644 --- a/src/mesa/program/prog_to_nir.c +++ b/src/mesa/program/prog_to_nir.c @@ -188,8 +188,6 @@ ptn_get_src(struct ptn_compile *c, const struct prog_src_register *prog_src) /* This is a negative offset which should be added to the address * register's value. */ - nir_alu_src addr_src = { NIR_SRC_INIT }; - addr_src.src = nir_src_for_reg(c->addr_reg); reladdr = nir_iadd(b, reladdr, nir_imm_int(b, prog_src->Index)); deref_arr->base_offset = 0; From 89585edf3c01c94b62d163adf0209568efa68568 Mon Sep 17 00:00:00 2001 From: Ilia Mirkin Date: Sun, 24 May 2015 11:56:21 -0400 Subject: [PATCH 282/834] nv30/draw: avoid leaving stale pointers in draw state Signed-off-by: Ilia Mirkin Cc: "10.5 10.6" --- src/gallium/drivers/nouveau/nv30/nv30_draw.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/gallium/drivers/nouveau/nv30/nv30_draw.c b/src/gallium/drivers/nouveau/nv30/nv30_draw.c index 74f0d66df95..8db15c6c059 100644 --- a/src/gallium/drivers/nouveau/nv30/nv30_draw.c +++ b/src/gallium/drivers/nouveau/nv30/nv30_draw.c @@ -400,16 +400,16 @@ nv30_render_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) void *map = nv04_resource(nv30->vertprog.constbuf)->data; draw_set_mapped_constant_buffer(draw, PIPE_SHADER_VERTEX, 0, map, nv30->vertprog.constbuf_nr); + } else { + draw_set_mapped_constant_buffer(draw, PIPE_SHADER_VERTEX, 0, NULL, 0); } } for (i = 0; i < nv30->num_vtxbufs; i++) { const void *map = nv30->vtxbuf[i].user_buffer; if (!map) { - if (!nv30->vtxbuf[i].buffer) { - continue; - } - map = pipe_buffer_map(pipe, nv30->vtxbuf[i].buffer, + if (nv30->vtxbuf[i].buffer) + map = pipe_buffer_map(pipe, nv30->vtxbuf[i].buffer, PIPE_TRANSFER_UNSYNCHRONIZED | PIPE_TRANSFER_READ, &transfer[i]); } From 147816375d22a653176ab28ed650fa811ceea83f Mon Sep 17 00:00:00 2001 From: Ilia Mirkin Date: Mon, 25 May 2015 14:06:01 -0400 Subject: [PATCH 283/834] nv30/draw: draw expects constbuf size in bytes, not vec4 units This fixes glxgears with NV30_SWTNL=1 forced on. Probably fixes a bunch of other situations where we fall back to the swtnl path. Signed-off-by: Ilia Mirkin Cc: "10.5 10.6" --- src/gallium/drivers/nouveau/nv30/nv30_draw.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gallium/drivers/nouveau/nv30/nv30_draw.c b/src/gallium/drivers/nouveau/nv30/nv30_draw.c index 8db15c6c059..6a0d06f2ccf 100644 --- a/src/gallium/drivers/nouveau/nv30/nv30_draw.c +++ b/src/gallium/drivers/nouveau/nv30/nv30_draw.c @@ -399,7 +399,7 @@ nv30_render_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) if (nv30->vertprog.constbuf) { void *map = nv04_resource(nv30->vertprog.constbuf)->data; draw_set_mapped_constant_buffer(draw, PIPE_SHADER_VERTEX, 0, - map, nv30->vertprog.constbuf_nr); + map, nv30->vertprog.constbuf_nr * 16); } else { draw_set_mapped_constant_buffer(draw, PIPE_SHADER_VERTEX, 0, NULL, 0); } From bb973723a5e1f27817b6be2c2fa4fb3ea28e733c Mon Sep 17 00:00:00 2001 From: Ilia Mirkin Date: Sun, 17 May 2015 17:32:24 -0400 Subject: [PATCH 284/834] st/mesa: don't leak glsl_to_tgsi object on link failure Signed-off-by: Ilia Mirkin Reviewed-by: Kenneth Graunke Cc: "10.5 10.6" --- src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index f0f2a77d065..719d08145ea 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -5549,6 +5549,7 @@ get_mesa_program(struct gl_context *ctx, */ _mesa_associate_uniform_storage(ctx, shader_program, prog->Parameters); if (!shader_program->LinkStatus) { + free_glsl_to_tgsi_visitor(v); return NULL; } From 6a111e54d7578abee6bce4a75ce1399ed369ab5f Mon Sep 17 00:00:00 2001 From: Roland Scheidegger Date: Mon, 25 May 2015 22:24:05 +0200 Subject: [PATCH 285/834] llvmpipe: (trivial) add parantheses in (!x == y) expression Apparently some compilers think we probably wanted to do !(x == y) instead and issue a warning, so just shut it up... No functional change, obviously. Cc: --- src/gallium/drivers/llvmpipe/lp_query.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gallium/drivers/llvmpipe/lp_query.c b/src/gallium/drivers/llvmpipe/lp_query.c index 4f8bab62e7b..fc593670671 100644 --- a/src/gallium/drivers/llvmpipe/lp_query.c +++ b/src/gallium/drivers/llvmpipe/lp_query.c @@ -315,7 +315,7 @@ llvmpipe_check_render_cond(struct llvmpipe_context *lp) b = pipe->get_query_result(pipe, lp->render_cond_query, wait, (void*)&result); if (b) - return (!result == lp->render_cond_cond); + return ((!result) == lp->render_cond_cond); else return TRUE; } From 5646f0f18a620292524eebcd77353ff3d3687eb2 Mon Sep 17 00:00:00 2001 From: Ilia Mirkin Date: Sun, 17 May 2015 17:56:44 -0400 Subject: [PATCH 286/834] glsl: avoid leaking linked gl_shader when there's a late linker error This makes piglit mixing-clip-distance-and-clip-vertex-disallowed have 0 definitely lost blocks with valgrind. (Same non-0 number of possibly lost blocks though.) Signed-off-by: Ilia Mirkin Reviewed-by: Tobias Klausmann Cc: "10.5 10.6" --- src/glsl/linker.cpp | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/glsl/linker.cpp b/src/glsl/linker.cpp index 9798afefc98..99e0a388bb4 100644 --- a/src/glsl/linker.cpp +++ b/src/glsl/linker.cpp @@ -2829,8 +2829,11 @@ link_shaders(struct gl_context *ctx, struct gl_shader_program *prog) link_intrastage_shaders(mem_ctx, ctx, prog, shader_list[stage], num_shaders[stage]); - if (!prog->LinkStatus) + if (!prog->LinkStatus) { + if (sh) + ctx->Driver.DeleteShader(ctx, sh); goto done; + } switch (stage) { case MESA_SHADER_VERTEX: @@ -2843,8 +2846,11 @@ link_shaders(struct gl_context *ctx, struct gl_shader_program *prog) validate_fragment_shader_executable(prog, sh); break; } - if (!prog->LinkStatus) + if (!prog->LinkStatus) { + if (sh) + ctx->Driver.DeleteShader(ctx, sh); goto done; + } _mesa_reference_shader(ctx, &prog->_LinkedShaders[stage], sh); } From 3600439897c79d37c3c654546867ddfa0c420743 Mon Sep 17 00:00:00 2001 From: Ilia Mirkin Date: Mon, 25 May 2015 20:15:09 -0400 Subject: [PATCH 287/834] nv30/draw: fix indexed draws with swtnl path and a resource index buffer The map = assignment was missing. Signed-off-by: Ilia Mirkin Cc: "10.5 10.6" --- src/gallium/drivers/nouveau/nv30/nv30_draw.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/gallium/drivers/nouveau/nv30/nv30_draw.c b/src/gallium/drivers/nouveau/nv30/nv30_draw.c index 6a0d06f2ccf..340474a0247 100644 --- a/src/gallium/drivers/nouveau/nv30/nv30_draw.c +++ b/src/gallium/drivers/nouveau/nv30/nv30_draw.c @@ -419,9 +419,9 @@ nv30_render_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) if (info->indexed) { const void *map = nv30->idxbuf.user_buffer; if (!map) - pipe_buffer_map(pipe, nv30->idxbuf.buffer, - PIPE_TRANSFER_UNSYNCHRONIZED | - PIPE_TRANSFER_READ, &transferi); + map = pipe_buffer_map(pipe, nv30->idxbuf.buffer, + PIPE_TRANSFER_UNSYNCHRONIZED | + PIPE_TRANSFER_READ, &transferi); draw_set_indexes(draw, (ubyte *) map + nv30->idxbuf.offset, nv30->idxbuf.index_size, ~0); From fdad7dfbdae07b9273fc8f57e63258dbe542c9b5 Mon Sep 17 00:00:00 2001 From: Ilia Mirkin Date: Mon, 25 May 2015 21:12:46 -0400 Subject: [PATCH 288/834] nv30/draw: only use the DMA1 object (GART) if the bo is not in VRAM Instead of always having it in the data, let the bo placement decide it. This fixes glxgears with swtnl forced on. Signed-off-by: Ilia Mirkin Reviewed-by: Ben Skeggs Cc: "10.5 10.6" --- src/gallium/drivers/nouveau/nv30/nv30_draw.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/gallium/drivers/nouveau/nv30/nv30_draw.c b/src/gallium/drivers/nouveau/nv30/nv30_draw.c index 340474a0247..b0557b0d082 100644 --- a/src/gallium/drivers/nouveau/nv30/nv30_draw.c +++ b/src/gallium/drivers/nouveau/nv30/nv30_draw.c @@ -127,7 +127,7 @@ nv30_render_draw_elements(struct vbuf_render *render, for (i = 0; i < r->vertex_info.num_attribs; i++) { PUSH_RESRC(push, NV30_3D(VTXBUF(i)), BUFCTX_VTXTMP, nv04_resource(r->buffer), r->offset + r->vtxptr[i], - NOUVEAU_BO_LOW | NOUVEAU_BO_RD, 0, 0); + NOUVEAU_BO_LOW | NOUVEAU_BO_RD, 0, NV30_3D_VTXBUF_DMA1); } if (!nv30_state_validate(nv30, ~0, FALSE)) @@ -172,7 +172,7 @@ nv30_render_draw_arrays(struct vbuf_render *render, unsigned start, uint nr) for (i = 0; i < r->vertex_info.num_attribs; i++) { PUSH_RESRC(push, NV30_3D(VTXBUF(i)), BUFCTX_VTXTMP, nv04_resource(r->buffer), r->offset + r->vtxptr[i], - NOUVEAU_BO_LOW | NOUVEAU_BO_RD, 0, 0); + NOUVEAU_BO_LOW | NOUVEAU_BO_RD, 0, NV30_3D_VTXBUF_DMA1); } if (!nv30_state_validate(nv30, ~0, FALSE)) @@ -245,7 +245,7 @@ vroute_add(struct nv30_render *r, uint attrib, uint sem, uint *idx) format = draw_translate_vinfo_format(emit); r->vtxfmt[attrib] = nv30_vtxfmt(pscreen, format)->hw; - r->vtxptr[attrib] = vinfo->size | NV30_3D_VTXBUF_DMA1; + r->vtxptr[attrib] = vinfo->size; vinfo->size += draw_translate_vinfo_size(emit); if (nv30_screen(pscreen)->eng3d->oclass < NV40_3D_CLASS) { From c3d36a2e1a87a4aded662db7a5d320ee7ac3a8b5 Mon Sep 17 00:00:00 2001 From: Ilia Mirkin Date: Mon, 25 May 2015 21:14:13 -0400 Subject: [PATCH 289/834] nv30/draw: allocate vertex buffers in gart These are only used once per draw, so it makes sense to keep them in GART. Also take this opportunity to modernize the buffer mapping API usage. Signed-off-by: Ilia Mirkin Reviewed-by: Ben Skeggs Cc: "10.5 10.6" --- src/gallium/drivers/nouveau/nv30/nv30_draw.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/src/gallium/drivers/nouveau/nv30/nv30_draw.c b/src/gallium/drivers/nouveau/nv30/nv30_draw.c index b0557b0d082..7ae1a1bd1d4 100644 --- a/src/gallium/drivers/nouveau/nv30/nv30_draw.c +++ b/src/gallium/drivers/nouveau/nv30/nv30_draw.c @@ -71,12 +71,12 @@ nv30_render_allocate_vertices(struct vbuf_render *render, struct nv30_render *r = nv30_render(render); struct nv30_context *nv30 = r->nv30; - r->length = vertex_size * nr_vertices; + r->length = (uint32_t)vertex_size * (uint32_t)nr_vertices; if (r->offset + r->length >= render->max_vertex_buffer_bytes) { pipe_resource_reference(&r->buffer, NULL); r->buffer = pipe_buffer_create(&nv30->screen->base.base, - PIPE_BIND_VERTEX_BUFFER, 0, + PIPE_BIND_VERTEX_BUFFER, PIPE_USAGE_STREAM, render->max_vertex_buffer_bytes); if (!r->buffer) return FALSE; @@ -91,10 +91,14 @@ static void * nv30_render_map_vertices(struct vbuf_render *render) { struct nv30_render *r = nv30_render(render); - char *map = pipe_buffer_map(&r->nv30->base.pipe, r->buffer, - PIPE_TRANSFER_WRITE | - PIPE_TRANSFER_UNSYNCHRONIZED, &r->transfer); - return map + r->offset; + char *map = pipe_buffer_map_range( + &r->nv30->base.pipe, r->buffer, + r->offset, r->length, + PIPE_TRANSFER_WRITE | + PIPE_TRANSFER_DISCARD_RANGE, + &r->transfer); + assert(map); + return map; } static void From 25be70462dbb7ee994e69ffccc3de94e4114e667 Mon Sep 17 00:00:00 2001 From: Ilia Mirkin Date: Sat, 23 May 2015 22:11:38 -0400 Subject: [PATCH 290/834] nv30/draw: switch varying hookup logic to know about texcoords Commit 8acaf862dfe switched things over to use TEXCOORD instead of GENERIC, but did not update the nv30 swtnl draw paths. This teaches the draw logic about TEXCOORD. Among other things, this fixes a crash in demos/arbocclude when using swtnl. Curiously enough, the point-sprite piglit works without this. Signed-off-by: Ilia Mirkin Cc: "10.5 10.6" --- src/gallium/drivers/nouveau/nv30/nv30_draw.c | 25 +++++++++++++------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/src/gallium/drivers/nouveau/nv30/nv30_draw.c b/src/gallium/drivers/nouveau/nv30/nv30_draw.c index 7ae1a1bd1d4..c1665b7ad2f 100644 --- a/src/gallium/drivers/nouveau/nv30/nv30_draw.c +++ b/src/gallium/drivers/nouveau/nv30/nv30_draw.c @@ -218,22 +218,24 @@ static const struct { [TGSI_SEMANTIC_BCOLOR ] = { EMIT_4F, INTERP_LINEAR , 1, 3, 0x00000004 }, [TGSI_SEMANTIC_FOG ] = { EMIT_4F, INTERP_PERSPECTIVE, 5, 5, 0x00000010 }, [TGSI_SEMANTIC_PSIZE ] = { EMIT_1F_PSIZE, INTERP_POS , 6, 6, 0x00000020 }, - [TGSI_SEMANTIC_GENERIC ] = { EMIT_4F, INTERP_PERSPECTIVE, 8, 7, 0x00004000 } + [TGSI_SEMANTIC_TEXCOORD] = { EMIT_4F, INTERP_PERSPECTIVE, 8, 7, 0x00004000 }, }; static boolean vroute_add(struct nv30_render *r, uint attrib, uint sem, uint *idx) { - struct pipe_screen *pscreen = &r->nv30->screen->base.base; + struct nv30_screen *screen = r->nv30->screen; struct nv30_fragprog *fp = r->nv30->fragprog.program; struct vertex_info *vinfo = &r->vertex_info; enum pipe_format format; uint emit = EMIT_OMIT; uint result = *idx; - if (sem == TGSI_SEMANTIC_GENERIC && result >= 8) { - for (result = 0; result < 8; result++) { - if (fp->texcoord[result] == *idx) { + if (sem == TGSI_SEMANTIC_GENERIC) { + uint num_texcoords = (screen->eng3d->oclass < NV40_3D_CLASS) ? 8 : 10; + for (result = 0; result < num_texcoords; result++) { + if (fp->texcoord[result] == *idx + 8) { + sem = TGSI_SEMANTIC_TEXCOORD; emit = vroute[sem].emit; break; } @@ -248,11 +250,11 @@ vroute_add(struct nv30_render *r, uint attrib, uint sem, uint *idx) draw_emit_vertex_attr(vinfo, emit, vroute[sem].interp, attrib); format = draw_translate_vinfo_format(emit); - r->vtxfmt[attrib] = nv30_vtxfmt(pscreen, format)->hw; + r->vtxfmt[attrib] = nv30_vtxfmt(&screen->base.base, format)->hw; r->vtxptr[attrib] = vinfo->size; vinfo->size += draw_translate_vinfo_size(emit); - if (nv30_screen(pscreen)->eng3d->oclass < NV40_3D_CLASS) { + if (screen->eng3d->oclass < NV40_3D_CLASS) { r->vtxprog[attrib][0] = 0x001f38d8; r->vtxprog[attrib][1] = 0x0080001b | (attrib << 9); r->vtxprog[attrib][2] = 0x0836106c; @@ -264,7 +266,12 @@ vroute_add(struct nv30_render *r, uint attrib, uint sem, uint *idx) r->vtxprog[attrib][3] = 0x6041ff80 | (result + vroute[sem].vp40) << 2; } - *idx = vroute[sem].ow40 << result; + if (result < 8) + *idx = vroute[sem].ow40 << result; + else { + assert(sem == TGSI_SEMANTIC_TEXCOORD); + *idx = 0x00001000 << (result - 8); + } return TRUE; } @@ -318,7 +325,7 @@ nv30_render_validate(struct nv30_context *nv30) while (pntc && attrib < 16) { uint index = ffs(pntc) - 1; pntc &= ~(1 << index); - if (vroute_add(r, attrib, TGSI_SEMANTIC_GENERIC, &index)) { + if (vroute_add(r, attrib, TGSI_SEMANTIC_TEXCOORD, &index)) { vp_attribs |= (1 << attrib++); vp_results |= index; } From 3ec18152858fd9aadb398d78d5ad2d2b938507c1 Mon Sep 17 00:00:00 2001 From: Ilia Mirkin Date: Mon, 25 May 2015 17:46:45 -0400 Subject: [PATCH 291/834] nv30: falling back to draw path for edgeflag does no good The problem is that the EDGEFLAG has to be toggled at vertex submission time. This can be done from either the draw or the regular paths. Avoid falling back to draw just because there's an edgeflag. Signed-off-by: Ilia Mirkin Cc: "10.5 10.6" --- src/gallium/drivers/nouveau/nv30/nvfx_vertprog.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/gallium/drivers/nouveau/nv30/nvfx_vertprog.c b/src/gallium/drivers/nouveau/nv30/nvfx_vertprog.c index c8960db4c5b..1ce0589be71 100644 --- a/src/gallium/drivers/nouveau/nv30/nvfx_vertprog.c +++ b/src/gallium/drivers/nouveau/nv30/nvfx_vertprog.c @@ -872,9 +872,8 @@ nvfx_vertprog_parse_decl_output(struct nvfx_vpc *vpc, } break; case TGSI_SEMANTIC_EDGEFLAG: - /* not really an error just a fallback */ - NOUVEAU_ERR("cannot handle edgeflag output\n"); - return FALSE; + vpc->r_result[idx] = nvfx_reg(NVFXSR_NONE, 0); + return TRUE; default: NOUVEAU_ERR("bad output semantic\n"); return FALSE; From 5ae6c7bfce5c9fb91ab6cef2ea74a39af091d5f6 Mon Sep 17 00:00:00 2001 From: Neil Roberts Date: Wed, 20 May 2015 19:26:02 +0100 Subject: [PATCH 292/834] i965/skl: Add a message header for the TXF_MCS instruction in vec4vs When using SIMD4x2 on Skylake, the sampler instructions need a message header to select the correct mode. This was added for most sample instructions in 0ac4c2727 but the TXF_MCS instruction is emitted separately and it was missed. This fixes a bunch of Piglit tests which test texelFetch in a geometry shader, for example: spec/arb_texture_multisample/texelfetch/2-gs-sampler2dms Cc: mesa-stable@lists.freedesktop.org Reviewed-by: Kenneth Graunke --- .../drivers/dri/i965/brw_vec4_visitor.cpp | 20 +++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp index 5a60fe43bf8..59a73a95fc2 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp @@ -2460,11 +2460,27 @@ vec4_visitor::emit_mcs_fetch(ir_texture *ir, src_reg coordinate, src_reg sampler new(mem_ctx) vec4_instruction(SHADER_OPCODE_TXF_MCS, dst_reg(this, glsl_type::uvec4_type)); inst->base_mrf = 2; - inst->mlen = 1; inst->src[1] = sampler; + int param_base; + + if (devinfo->gen >= 9) { + /* Gen9+ needs a message header in order to use SIMD4x2 mode */ + vec4_instruction *header_inst = new(mem_ctx) + vec4_instruction(VS_OPCODE_SET_SIMD4X2_HEADER_GEN9, + dst_reg(MRF, inst->base_mrf)); + + emit(header_inst); + + inst->mlen = 2; + inst->header_size = 1; + param_base = inst->base_mrf + 1; + } else { + inst->mlen = 1; + param_base = inst->base_mrf; + } + /* parameters are: u, v, r, lod; lod will always be zero due to api restrictions */ - int param_base = inst->base_mrf; int coord_mask = (1 << ir->coordinate->type->vector_elements) - 1; int zero_mask = 0xf & ~coord_mask; From 967825d053f71c5f5fc3ba31eabc0c6004fde4f1 Mon Sep 17 00:00:00 2001 From: Koop Mast Date: Tue, 26 May 2015 10:24:40 +0200 Subject: [PATCH 293/834] clover: Build fix for FreeBSD. Cc: 10.6 10.5 --- src/gallium/state_trackers/clover/core/error.hpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/gallium/state_trackers/clover/core/error.hpp b/src/gallium/state_trackers/clover/core/error.hpp index eb65d629cdd..780b973383a 100644 --- a/src/gallium/state_trackers/clover/core/error.hpp +++ b/src/gallium/state_trackers/clover/core/error.hpp @@ -26,6 +26,7 @@ #include "CL/cl.h" #include +#include namespace clover { class command_queue; From 92c31bb0dd8149d3e5db48b8dec62b242be80d28 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Mon, 18 May 2015 12:34:44 +0200 Subject: [PATCH 294/834] gallium: use const in set_tess_state Reviewed-by: Ilia Mirkin --- src/gallium/drivers/trace/tr_context.c | 4 ++-- src/gallium/include/pipe/p_context.h | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/gallium/drivers/trace/tr_context.c b/src/gallium/drivers/trace/tr_context.c index a1aeebdf65b..0013c963e7a 100644 --- a/src/gallium/drivers/trace/tr_context.c +++ b/src/gallium/drivers/trace/tr_context.c @@ -1511,8 +1511,8 @@ static void trace_context_memory_barrier(struct pipe_context *_context, static void trace_context_set_tess_state(struct pipe_context *_context, - float default_outer_level[4], - float default_inner_level[2]) + const float default_outer_level[4], + const float default_inner_level[2]) { struct trace_context *tr_context = trace_context(_context); struct pipe_context *context = tr_context->pipe; diff --git a/src/gallium/include/pipe/p_context.h b/src/gallium/include/pipe/p_context.h index c25bfa638c1..c2eedf8e7c7 100644 --- a/src/gallium/include/pipe/p_context.h +++ b/src/gallium/include/pipe/p_context.h @@ -232,8 +232,8 @@ struct pipe_context { struct pipe_sampler_view **); void (*set_tess_state)(struct pipe_context *, - float default_outer_level[4], - float default_inner_level[2]); + const float default_outer_level[4], + const float default_inner_level[2]); /** * Bind an array of shader resources that will be used by the From 0d84b6cf84971f3378bb95c85f7d39e0c6680b8f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Mon, 18 May 2015 13:52:30 +0200 Subject: [PATCH 295/834] gallium: rename TGSI tessellation processor types to match pipe shader names I forgot to do this when pushing the interface changes. Reviewed-by: Ilia Mirkin --- src/gallium/auxiliary/tgsi/tgsi_dump.c | 6 +++--- src/gallium/auxiliary/tgsi/tgsi_info.c | 4 ++-- src/gallium/auxiliary/tgsi/tgsi_sanity.c | 12 ++++++------ src/gallium/auxiliary/tgsi/tgsi_scan.c | 8 ++++---- src/gallium/include/pipe/p_shader_tokens.h | 4 ++-- 5 files changed, 17 insertions(+), 17 deletions(-) diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump.c b/src/gallium/auxiliary/tgsi/tgsi_dump.c index c584c2b0001..c80d7a20481 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_dump.c +++ b/src/gallium/auxiliary/tgsi/tgsi_dump.c @@ -286,15 +286,15 @@ iter_declaration( if (decl->Declaration.File == TGSI_FILE_INPUT && (iter->processor.Processor == TGSI_PROCESSOR_GEOMETRY || (!patch && - (iter->processor.Processor == TGSI_PROCESSOR_TESSCTRL || - iter->processor.Processor == TGSI_PROCESSOR_TESSEVAL)))) { + (iter->processor.Processor == TGSI_PROCESSOR_TESS_CTRL || + iter->processor.Processor == TGSI_PROCESSOR_TESS_EVAL)))) { TXT("[]"); } /* all non-patch tess ctrl shader outputs are two dimensional */ if (decl->Declaration.File == TGSI_FILE_OUTPUT && !patch && - iter->processor.Processor == TGSI_PROCESSOR_TESSCTRL) { + iter->processor.Processor == TGSI_PROCESSOR_TESS_CTRL) { TXT("[]"); } diff --git a/src/gallium/auxiliary/tgsi/tgsi_info.c b/src/gallium/auxiliary/tgsi/tgsi_info.c index eb447cb6557..929531109e5 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_info.c +++ b/src/gallium/auxiliary/tgsi/tgsi_info.c @@ -302,9 +302,9 @@ tgsi_get_processor_name( uint processor ) return "fragment shader"; case TGSI_PROCESSOR_GEOMETRY: return "geometry shader"; - case TGSI_PROCESSOR_TESSCTRL: + case TGSI_PROCESSOR_TESS_CTRL: return "tessellation control shader"; - case TGSI_PROCESSOR_TESSEVAL: + case TGSI_PROCESSOR_TESS_EVAL: return "tessellation evaluation shader"; default: return "unknown shader type!"; diff --git a/src/gallium/auxiliary/tgsi/tgsi_sanity.c b/src/gallium/auxiliary/tgsi/tgsi_sanity.c index 2ac74fb15d5..be4851f5dcb 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_sanity.c +++ b/src/gallium/auxiliary/tgsi/tgsi_sanity.c @@ -415,8 +415,8 @@ iter_declaration( decl->Semantic.Name == TGSI_SEMANTIC_TESSINNER; if (file == TGSI_FILE_INPUT && !patch && ( processor == TGSI_PROCESSOR_GEOMETRY || - processor == TGSI_PROCESSOR_TESSCTRL || - processor == TGSI_PROCESSOR_TESSEVAL)) { + processor == TGSI_PROCESSOR_TESS_CTRL || + processor == TGSI_PROCESSOR_TESS_EVAL)) { uint vert; for (vert = 0; vert < ctx->implied_array_size; ++vert) { scan_register *reg = MALLOC(sizeof(scan_register)); @@ -424,7 +424,7 @@ iter_declaration( check_and_declare(ctx, reg); } } else if (file == TGSI_FILE_OUTPUT && !patch && - processor == TGSI_PROCESSOR_TESSCTRL) { + processor == TGSI_PROCESSOR_TESS_CTRL) { uint vert; for (vert = 0; vert < ctx->implied_out_array_size; ++vert) { scan_register *reg = MALLOC(sizeof(scan_register)); @@ -489,7 +489,7 @@ iter_property( prop->Property.PropertyName == TGSI_PROPERTY_GS_INPUT_PRIM) { ctx->implied_array_size = u_vertices_per_prim(prop->u[0].Data); } - if (iter->processor.Processor == TGSI_PROCESSOR_TESSCTRL && + if (iter->processor.Processor == TGSI_PROCESSOR_TESS_CTRL && prop->Property.PropertyName == TGSI_PROPERTY_TCS_VERTICES_OUT) ctx->implied_out_array_size = prop->u[0].Data; return TRUE; @@ -499,8 +499,8 @@ static boolean prolog(struct tgsi_iterate_context *iter) { struct sanity_check_ctx *ctx = (struct sanity_check_ctx *) iter; - if (iter->processor.Processor == TGSI_PROCESSOR_TESSCTRL || - iter->processor.Processor == TGSI_PROCESSOR_TESSEVAL) + if (iter->processor.Processor == TGSI_PROCESSOR_TESS_CTRL || + iter->processor.Processor == TGSI_PROCESSOR_TESS_EVAL) ctx->implied_array_size = 32; return TRUE; } diff --git a/src/gallium/auxiliary/tgsi/tgsi_scan.c b/src/gallium/auxiliary/tgsi/tgsi_scan.c index 3f94bab4496..d821072935a 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_scan.c +++ b/src/gallium/auxiliary/tgsi/tgsi_scan.c @@ -74,8 +74,8 @@ tgsi_scan_shader(const struct tgsi_token *tokens, assert(procType == TGSI_PROCESSOR_FRAGMENT || procType == TGSI_PROCESSOR_VERTEX || procType == TGSI_PROCESSOR_GEOMETRY || - procType == TGSI_PROCESSOR_TESSCTRL || - procType == TGSI_PROCESSOR_TESSEVAL || + procType == TGSI_PROCESSOR_TESS_CTRL || + procType == TGSI_PROCESSOR_TESS_EVAL || procType == TGSI_PROCESSOR_COMPUTE); info->processor = procType; @@ -239,8 +239,8 @@ tgsi_scan_shader(const struct tgsi_token *tokens, if (procType == TGSI_PROCESSOR_VERTEX || procType == TGSI_PROCESSOR_GEOMETRY || - procType == TGSI_PROCESSOR_TESSCTRL || - procType == TGSI_PROCESSOR_TESSEVAL) { + procType == TGSI_PROCESSOR_TESS_CTRL || + procType == TGSI_PROCESSOR_TESS_EVAL) { if (semName == TGSI_SEMANTIC_CLIPDIST) { info->num_written_clipdistance += util_bitcount(fulldecl->Declaration.UsageMask); diff --git a/src/gallium/include/pipe/p_shader_tokens.h b/src/gallium/include/pipe/p_shader_tokens.h index 953bdf6fbbe..bb57e805c29 100644 --- a/src/gallium/include/pipe/p_shader_tokens.h +++ b/src/gallium/include/pipe/p_shader_tokens.h @@ -43,8 +43,8 @@ struct tgsi_header #define TGSI_PROCESSOR_FRAGMENT 0 #define TGSI_PROCESSOR_VERTEX 1 #define TGSI_PROCESSOR_GEOMETRY 2 -#define TGSI_PROCESSOR_TESSCTRL 3 -#define TGSI_PROCESSOR_TESSEVAL 4 +#define TGSI_PROCESSOR_TESS_CTRL 3 +#define TGSI_PROCESSOR_TESS_EVAL 4 #define TGSI_PROCESSOR_COMPUTE 5 struct tgsi_processor From c1266f28d6af7788e19634f0d36257e78d1139be Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Mon, 18 May 2015 02:21:47 +0200 Subject: [PATCH 296/834] tgsi/text: enable parsing tessellation shaders Reviewed-by: Ilia Mirkin --- src/gallium/auxiliary/tgsi/tgsi_text.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/gallium/auxiliary/tgsi/tgsi_text.c b/src/gallium/auxiliary/tgsi/tgsi_text.c index a9734db6355..a6675c5168d 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_text.c +++ b/src/gallium/auxiliary/tgsi/tgsi_text.c @@ -297,6 +297,10 @@ static boolean parse_header( struct translate_ctx *ctx ) processor = TGSI_PROCESSOR_VERTEX; else if (str_match_nocase_whole( &ctx->cur, "GEOM" )) processor = TGSI_PROCESSOR_GEOMETRY; + else if (str_match_nocase_whole( &ctx->cur, "TESS_CTRL" )) + processor = TGSI_PROCESSOR_TESS_CTRL; + else if (str_match_nocase_whole( &ctx->cur, "TESS_EVAL" )) + processor = TGSI_PROCESSOR_TESS_EVAL; else if (str_match_nocase_whole( &ctx->cur, "COMP" )) processor = TGSI_PROCESSOR_COMPUTE; else { From 3d35027fdc383c2bd009f3690b2b160e3b39d58b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Mon, 18 May 2015 02:23:04 +0200 Subject: [PATCH 297/834] tgsi/ureg: enable creating tessellation shaders with ureg_create_shader Reviewed-by: Ilia Mirkin --- src/gallium/auxiliary/tgsi/tgsi_ureg.c | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/src/gallium/auxiliary/tgsi/tgsi_ureg.c b/src/gallium/auxiliary/tgsi/tgsi_ureg.c index 7a8bf5404e3..037d31a1643 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_ureg.c +++ b/src/gallium/auxiliary/tgsi/tgsi_ureg.c @@ -1667,10 +1667,20 @@ void *ureg_create_shader( struct ureg_program *ureg, else memset(&state.stream_output, 0, sizeof(state.stream_output)); - if (ureg->processor == TGSI_PROCESSOR_VERTEX) - return pipe->create_vs_state( pipe, &state ); - else - return pipe->create_fs_state( pipe, &state ); + switch (ureg->processor) { + case TGSI_PROCESSOR_VERTEX: + return pipe->create_vs_state(pipe, &state); + case TGSI_PROCESSOR_TESS_CTRL: + return pipe->create_tcs_state(pipe, &state); + case TGSI_PROCESSOR_TESS_EVAL: + return pipe->create_tes_state(pipe, &state); + case TGSI_PROCESSOR_GEOMETRY: + return pipe->create_gs_state(pipe, &state); + case TGSI_PROCESSOR_FRAGMENT: + return pipe->create_fs_state(pipe, &state); + default: + return NULL; + } } From e4339bc9886a26d75b924ad045c3ddd003f802c3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Sat, 9 May 2015 19:36:17 +0200 Subject: [PATCH 298/834] radeonsi: add support for PIPE_CAP_TGSI_TEXCOORD MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Without it, texcoords are mapped to GENERIC[0..7], PointCoord is mapped to GENERIC[8], and user-defined varyings start from GENERIC[9]. Since texcoords can only be used between VS and PS, and PointCoord is PS-only, it's silly to always start from GENERIC[9] in all other shaders (such as LS, HS, ES, GS). This adds support for TEXCOORD and PCOORD semantics. As a result, st/mesa will use GENERIC[0] as a base for user-defined varyings, which should make linking ES and GS as well as tessellation shaders at runtime easier. Reviewed-by: Michel Dänzer --- src/gallium/drivers/radeonsi/si_pipe.c | 2 +- src/gallium/drivers/radeonsi/si_shader.c | 1 + src/gallium/drivers/radeonsi/si_state.c | 20 ++++++++----------- .../drivers/radeonsi/si_state_shaders.c | 5 +++-- 4 files changed, 13 insertions(+), 15 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c index 9d60ef1db38..b57aa5ff01b 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.c +++ b/src/gallium/drivers/radeonsi/si_pipe.c @@ -251,6 +251,7 @@ static int si_get_param(struct pipe_screen* pscreen, enum pipe_cap param) case PIPE_CAP_POLYGON_OFFSET_CLAMP: case PIPE_CAP_MULTISAMPLE_Z_RESOLVE: case PIPE_CAP_QUADS_FOLLOW_PROVOKING_VERTEX_CONVENTION: + case PIPE_CAP_TGSI_TEXCOORD: return 1; case PIPE_CAP_RESOURCE_FROM_USER_MEMORY: @@ -286,7 +287,6 @@ static int si_get_param(struct pipe_screen* pscreen, enum pipe_cap param) case PIPE_CAP_FRAGMENT_COLOR_CLAMPED: case PIPE_CAP_VERTEX_COLOR_CLAMPED: case PIPE_CAP_USER_VERTEX_BUFFERS: - case PIPE_CAP_TGSI_TEXCOORD: case PIPE_CAP_FAKE_SW_MSAA: case PIPE_CAP_TEXTURE_GATHER_OFFSETS: case PIPE_CAP_TGSI_FS_FINE_DERIVATIVE: diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index 89f02ab0410..5c2225277fd 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -1183,6 +1183,7 @@ handle_semantic: continue; case TGSI_SEMANTIC_PRIMID: case TGSI_SEMANTIC_FOG: + case TGSI_SEMANTIC_TEXCOORD: case TGSI_SEMANTIC_GENERIC: target = V_008DFC_SQ_EXP_PARAM + param_count; shader->vs_output_param_offset[i] = param_count; diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c index 7f0fdd599dc..d1b3ca2481a 100644 --- a/src/gallium/drivers/radeonsi/si_state.c +++ b/src/gallium/drivers/radeonsi/si_state.c @@ -636,18 +636,14 @@ static void *si_create_rs_state(struct pipe_context *ctx, rs->offset_units = state->offset_units; rs->offset_scale = state->offset_scale * 12.0f; - tmp = S_0286D4_FLAT_SHADE_ENA(1); - if (state->sprite_coord_enable) { - tmp |= S_0286D4_PNT_SPRITE_ENA(1) | - S_0286D4_PNT_SPRITE_OVRD_X(V_0286D4_SPI_PNT_SPRITE_SEL_S) | - S_0286D4_PNT_SPRITE_OVRD_Y(V_0286D4_SPI_PNT_SPRITE_SEL_T) | - S_0286D4_PNT_SPRITE_OVRD_Z(V_0286D4_SPI_PNT_SPRITE_SEL_0) | - S_0286D4_PNT_SPRITE_OVRD_W(V_0286D4_SPI_PNT_SPRITE_SEL_1); - if (state->sprite_coord_mode != PIPE_SPRITE_COORD_UPPER_LEFT) { - tmp |= S_0286D4_PNT_SPRITE_TOP_1(1); - } - } - si_pm4_set_reg(pm4, R_0286D4_SPI_INTERP_CONTROL_0, tmp); + si_pm4_set_reg(pm4, R_0286D4_SPI_INTERP_CONTROL_0, + S_0286D4_FLAT_SHADE_ENA(1) | + S_0286D4_PNT_SPRITE_ENA(1) | + S_0286D4_PNT_SPRITE_OVRD_X(V_0286D4_SPI_PNT_SPRITE_SEL_S) | + S_0286D4_PNT_SPRITE_OVRD_Y(V_0286D4_SPI_PNT_SPRITE_SEL_T) | + S_0286D4_PNT_SPRITE_OVRD_Z(V_0286D4_SPI_PNT_SPRITE_SEL_0) | + S_0286D4_PNT_SPRITE_OVRD_W(V_0286D4_SPI_PNT_SPRITE_SEL_1) | + S_0286D4_PNT_SPRITE_TOP_1(state->sprite_coord_mode != PIPE_SPRITE_COORD_UPPER_LEFT)); /* point size 12.4 fixed point */ tmp = (unsigned)(state->point_size * 8.0); diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c index 1bbc6b3ca7a..5974e77e374 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.c +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c @@ -661,8 +661,9 @@ bcolor: (interpolate == TGSI_INTERPOLATE_COLOR && sctx->flatshade)) tmp |= S_028644_FLAT_SHADE(1); - if (name == TGSI_SEMANTIC_GENERIC && - sctx->sprite_coord_enable & (1 << index)) { + if (name == TGSI_SEMANTIC_PCOORD || + (name == TGSI_SEMANTIC_TEXCOORD && + sctx->sprite_coord_enable & (1 << index))) { tmp |= S_028644_PT_SPRITE_TEX(1); } From af4b9c7c2e119d373f9684119b833a8b62cbb756 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Mon, 18 May 2015 14:50:19 +0200 Subject: [PATCH 299/834] radeonsi: don't count special outputs for the VS export count MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Michel Dänzer --- src/gallium/drivers/radeonsi/si_state_shaders.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c index 5974e77e374..25811ab413d 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.c +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c @@ -182,8 +182,13 @@ static void si_shader_vs(struct si_shader *shader) for (nparams = 0, i = 0 ; i < info->num_outputs; i++) { switch (info->output_semantic_name[i]) { case TGSI_SEMANTIC_CLIPVERTEX: + case TGSI_SEMANTIC_CLIPDIST: + case TGSI_SEMANTIC_CULLDIST: case TGSI_SEMANTIC_POSITION: case TGSI_SEMANTIC_PSIZE: + case TGSI_SEMANTIC_EDGEFLAG: + case TGSI_SEMANTIC_VIEWPORT_INDEX: + case TGSI_SEMANTIC_LAYER: break; default: nparams++; From f41517242aaad3cb67fb7f6c9a03dc34198ca500 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Sun, 10 May 2015 18:03:47 +0200 Subject: [PATCH 300/834] radeonsi: remove unused cases from si_shader_io_get_unique_index MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit These can't occur between VS and GS, because GS is only supported in the core profile. Reviewed-by: Michel Dänzer --- src/gallium/drivers/radeonsi/si_shader.c | 17 +++-------------- 1 file changed, 3 insertions(+), 14 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index 5c2225277fd..47e5f96cbed 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -128,21 +128,10 @@ unsigned si_shader_io_get_unique_index(unsigned semantic_name, unsigned index) case TGSI_SEMANTIC_CLIPDIST: assert(index <= 1); return 2 + index; - case TGSI_SEMANTIC_CLIPVERTEX: - return 4; - case TGSI_SEMANTIC_COLOR: - assert(index <= 1); - return 5 + index; - case TGSI_SEMANTIC_BCOLOR: - assert(index <= 1); - return 7 + index; - case TGSI_SEMANTIC_FOG: - return 9; - case TGSI_SEMANTIC_EDGEFLAG: - return 10; case TGSI_SEMANTIC_GENERIC: - assert(index <= 63-11); - return 11 + index; + assert(index <= 63-4); + return 4 + index; + default: assert(0); return 63; From fa7f606e89dc4447f07fec0b84d396a4ff25ee7e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Mon, 18 May 2015 14:56:34 +0200 Subject: [PATCH 301/834] radeonsi: fix scratch buffer setup for geometry shaders MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cc: 10.6 Reviewed-by: Michel Dänzer --- src/gallium/drivers/radeonsi/si_state_shaders.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c index 25811ab413d..610af948f12 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.c +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c @@ -841,8 +841,15 @@ static void si_update_spi_tmpring_size(struct si_context *sctx) si_pm4_bind_state(sctx, ps, sctx->ps_shader->current->pm4); if (si_update_scratch_buffer(sctx, sctx->gs_shader)) si_pm4_bind_state(sctx, gs, sctx->gs_shader->current->pm4); - if (si_update_scratch_buffer(sctx, sctx->vs_shader)) - si_pm4_bind_state(sctx, vs, sctx->vs_shader->current->pm4); + + /* VS can be bound as ES or VS. */ + if (sctx->gs_shader) { + if (si_update_scratch_buffer(sctx, sctx->vs_shader)) + si_pm4_bind_state(sctx, es, sctx->vs_shader->current->pm4); + } else { + if (si_update_scratch_buffer(sctx, sctx->vs_shader)) + si_pm4_bind_state(sctx, vs, sctx->vs_shader->current->pm4); + } } /* The LLVM shader backend should be reporting aligned scratch_sizes. */ From 0c5a309cee868cd6e3870f439f560f5f32eb7c40 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Sun, 22 Feb 2015 15:21:59 +0100 Subject: [PATCH 302/834] radeonsi: use a switch statement in si_shader_selector_key MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Michel Dänzer --- .../drivers/radeonsi/si_state_shaders.c | 25 ++++++++++++------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c index 610af948f12..e037ce41468 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.c +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c @@ -356,21 +356,25 @@ static INLINE void si_shader_selector_key(struct pipe_context *ctx, union si_shader_key *key) { struct si_context *sctx = (struct si_context *)ctx; + unsigned i; + memset(key, 0, sizeof(*key)); - if (sel->type == PIPE_SHADER_VERTEX) { - unsigned i; - if (!sctx->vertex_elements) - return; - - for (i = 0; i < sctx->vertex_elements->count; ++i) - key->vs.instance_divisors[i] = sctx->vertex_elements->elements[i].instance_divisor; + switch (sel->type) { + case PIPE_SHADER_VERTEX: + if (sctx->vertex_elements) + for (i = 0; i < sctx->vertex_elements->count; ++i) + key->vs.instance_divisors[i] = + sctx->vertex_elements->elements[i].instance_divisor; if (sctx->gs_shader) { key->vs.as_es = 1; key->vs.gs_used_inputs = sctx->gs_shader->gs_used_inputs; } - } else if (sel->type == PIPE_SHADER_FRAGMENT) { + break; + case PIPE_SHADER_GEOMETRY: + break; + case PIPE_SHADER_FRAGMENT: { struct si_state_rasterizer *rs = sctx->queued.named.rasterizer; if (sel->info.properties[TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS]) @@ -398,11 +402,14 @@ static INLINE void si_shader_selector_key(struct pipe_context *ctx, } key->ps.alpha_func = PIPE_FUNC_ALWAYS; - /* Alpha-test should be disabled if colorbuffer 0 is integer. */ if (sctx->queued.named.dsa && !sctx->framebuffer.cb0_is_integer) key->ps.alpha_func = sctx->queued.named.dsa->alpha_func; + break; + } + default: + assert(0); } } From 224a77cc60cc0e7f8a14e35ebca6e42544af39b1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Sun, 22 Feb 2015 15:38:21 +0100 Subject: [PATCH 303/834] radeonsi: use a switch statement in si_delete_shader_selector MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Michel Dänzer --- .../drivers/radeonsi/si_state_shaders.c | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c index e037ce41468..208c8523ef1 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.c +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c @@ -592,15 +592,22 @@ static void si_delete_shader_selector(struct pipe_context *ctx, while (p) { c = p->next_variant; - if (sel->type == PIPE_SHADER_GEOMETRY) { + switch (sel->type) { + case PIPE_SHADER_VERTEX: + if (p->key.vs.as_es) + si_pm4_delete_state(sctx, es, p->pm4); + else + si_pm4_delete_state(sctx, vs, p->pm4); + break; + case PIPE_SHADER_GEOMETRY: si_pm4_delete_state(sctx, gs, p->pm4); si_pm4_delete_state(sctx, vs, p->gs_copy_shader->pm4); - } else if (sel->type == PIPE_SHADER_FRAGMENT) + break; + case PIPE_SHADER_FRAGMENT: si_pm4_delete_state(sctx, ps, p->pm4); - else if (p->key.vs.as_es) - si_pm4_delete_state(sctx, es, p->pm4); - else - si_pm4_delete_state(sctx, vs, p->pm4); + break; + } + si_shader_destroy(ctx, p); free(p); p = c; From b787f48ed2a7e1855100afd943ae6b407abb401f Mon Sep 17 00:00:00 2001 From: Jose Fonseca Date: Tue, 26 May 2015 11:01:57 +0100 Subject: [PATCH 304/834] glapi: Avoid argparse type argument for API XML input files. argparse type is a nice type saver for simple data types, but it doesn't look a good fit for the input XML file: - Certain implementations of argparse (particularly python 2.7.3's) invoke the type constructor for the default argument even when an option is passed in the command line. Causing `No such file or directory: 'gl_API.xml'` when the current dir is not src/mapi/glapi/gen. - The parser takes multiple arguments. This is currently worked around using lambdas, but that unnecessarily complex and hard to read. Furthermore it's odd to have a side-effect as heavy as parsing XML happening deep inside the argument parsing. https://bugs.freedesktop.org/show_bug.cgi?id=90600 Reviewed-by: Brian Paul --- src/mapi/glapi/gen/gl_procs.py | 7 +++---- src/mapi/glapi/gen/gl_table.py | 9 +++++---- src/mapi/glapi/gen/remap_helper.py | 8 ++++---- 3 files changed, 12 insertions(+), 12 deletions(-) diff --git a/src/mapi/glapi/gen/gl_procs.py b/src/mapi/glapi/gen/gl_procs.py index cf6d2de8a4b..685e2fac345 100644 --- a/src/mapi/glapi/gen/gl_procs.py +++ b/src/mapi/glapi/gen/gl_procs.py @@ -165,14 +165,12 @@ typedef struct { def _parser(): """Parse arguments and return a namepsace.""" - api_type = lambda x: gl_XML.parse_GL_API(x, glX_XML.glx_item_factory()) parser = argparse.ArgumentParser() parser.add_argument('-f', '--filename', - type=api_type, default='gl_API.xml', metavar="input_file_name", - dest='api', + dest='file_name', help="Path to an XML description of OpenGL API.") parser.add_argument('-c', '--es-version', dest='es', @@ -184,7 +182,8 @@ def _parser(): def main(): """Main function.""" args = _parser() - PrintGlProcs(args.es).Print(args.api) + api = gl_XML.parse_GL_API(args.file_name, glX_XML.glx_item_factory()) + PrintGlProcs(args.es).Print(api) if __name__ == '__main__': diff --git a/src/mapi/glapi/gen/gl_table.py b/src/mapi/glapi/gen/gl_table.py index 30903fd8f60..3f029023087 100644 --- a/src/mapi/glapi/gen/gl_table.py +++ b/src/mapi/glapi/gen/gl_table.py @@ -206,10 +206,9 @@ def _parser(): """Parse arguments and return a namespace.""" parser = argparse.ArgumentParser() parser.add_argument('-f', '--filename', - type=gl_XML.parse_GL_API, default='gl_API.xml', metavar="input_file_name", - dest='api', + dest='file_name', help="Path to an XML description of OpenGL API.") parser.add_argument('-m', '--mode', choices=['table', 'remap_table'], @@ -229,15 +228,17 @@ def main(): """Main function.""" args = _parser() + api = gl_XML.parse_GL_API(args.file_name) + if args.mode == "table": printer = PrintGlTable(args.es) elif args.mode == "remap_table": printer = PrintRemapTable(args.es) if args.es is not None: - args.api.filter_functions_by_api(args.es) + api.filter_functions_by_api(args.es) - printer.Print(args.api) + printer.Print(api) if __name__ == '__main__': diff --git a/src/mapi/glapi/gen/remap_helper.py b/src/mapi/glapi/gen/remap_helper.py index 9e3c3908d8c..94ae1936d21 100644 --- a/src/mapi/glapi/gen/remap_helper.py +++ b/src/mapi/glapi/gen/remap_helper.py @@ -170,10 +170,9 @@ def _parser(): """Parse input options and return a namsepace.""" parser = argparse.ArgumentParser() parser.add_argument('-f', '--filename', - type=gl_XML.parse_GL_API, default="gl_API.xml", metavar="input_file_name", - dest='api', + dest='file_name', help="An xml description file.") parser.add_argument('-c', '--es-version', choices=[None, 'es1', 'es2'], @@ -188,11 +187,12 @@ def main(): """Main function.""" args = _parser() + api = gl_XML.parse_GL_API(args.file_name) if args.es is not None: - args.api.filter_functions_by_api(args.es) + api.filter_functions_by_api(args.es) printer = PrintGlRemap() - printer.Print(args.api) + printer.Print(api) if __name__ == '__main__': From 09eabf5be68b901999ef15733a22dfcb82dfec5f Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 22 May 2015 16:39:32 -0700 Subject: [PATCH 305/834] mesa: add const qualifer on _mesa_is_compressed_format() Reviewed-by: Topi Pohjolainen --- src/mesa/main/glformats.c | 2 +- src/mesa/main/glformats.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/mesa/main/glformats.c b/src/mesa/main/glformats.c index 8ced5794938..6a77c916a9c 100644 --- a/src/mesa/main/glformats.c +++ b/src/mesa/main/glformats.c @@ -1200,7 +1200,7 @@ _mesa_is_depth_or_stencil_format(GLenum format) * \return GL_TRUE if compressed, GL_FALSE if uncompressed */ GLboolean -_mesa_is_compressed_format(struct gl_context *ctx, GLenum format) +_mesa_is_compressed_format(const struct gl_context *ctx, GLenum format) { switch (format) { case GL_COMPRESSED_RGB_S3TC_DXT1_EXT: diff --git a/src/mesa/main/glformats.h b/src/mesa/main/glformats.h index e1ecd64d5f9..8881cb7d86b 100644 --- a/src/mesa/main/glformats.h +++ b/src/mesa/main/glformats.h @@ -96,7 +96,7 @@ extern GLboolean _mesa_is_depth_or_stencil_format(GLenum format); extern GLboolean -_mesa_is_compressed_format(struct gl_context *ctx, GLenum format); +_mesa_is_compressed_format(const struct gl_context *ctx, GLenum format); extern GLenum _mesa_base_format_to_integer_format(GLenum format); From 3ddd1cf7d128018639de7e4c8bc17896233ebdb7 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 22 May 2015 16:42:21 -0700 Subject: [PATCH 306/834] mesa: const qualify, return bool for _mesa_texture_view_compatible_format() Reviewed-by: Topi Pohjolainen --- src/mesa/main/textureview.c | 12 ++++++------ src/mesa/main/textureview.h | 4 ++-- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/src/mesa/main/textureview.c b/src/mesa/main/textureview.c index cd87a27d2db..a1cbec5b365 100644 --- a/src/mesa/main/textureview.c +++ b/src/mesa/main/textureview.c @@ -167,7 +167,7 @@ static const struct internal_format_class_info s3tc_compatible_internal_formats[ * \return VIEW_CLASS if internalformat found in table, false otherwise. */ static GLenum -lookup_view_class(struct gl_context *ctx, GLenum internalformat) +lookup_view_class(const struct gl_context *ctx, GLenum internalformat) { GLuint i; @@ -320,8 +320,8 @@ target_valid(struct gl_context *ctx, GLenum origTarget, GLenum newTarget) * If an error is found, record it with _mesa_error() * \return false if any error, true otherwise. */ -GLboolean -_mesa_texture_view_compatible_format(struct gl_context *ctx, +bool +_mesa_texture_view_compatible_format(const struct gl_context *ctx, GLenum origInternalFormat, GLenum newInternalFormat) { @@ -334,14 +334,14 @@ _mesa_texture_view_compatible_format(struct gl_context *ctx, * or an INVALID_OPERATION error is generated. */ if (origInternalFormat == newInternalFormat) - return GL_TRUE; + return true; origViewClass = lookup_view_class(ctx, origInternalFormat); newViewClass = lookup_view_class(ctx, newInternalFormat); if ((origViewClass == newViewClass) && origViewClass != false) - return GL_TRUE; + return true; - return GL_FALSE; + return false; } /** * Helper function for TexStorage and teximagemultisample to set immutable diff --git a/src/mesa/main/textureview.h b/src/mesa/main/textureview.h index 549a13cd809..596a3a8dbbe 100644 --- a/src/mesa/main/textureview.h +++ b/src/mesa/main/textureview.h @@ -29,8 +29,8 @@ #ifndef TEXTUREVIEW_H #define TEXTUREVIEW_H -GLboolean -_mesa_texture_view_compatible_format(struct gl_context *ctx, +bool +_mesa_texture_view_compatible_format(const struct gl_context *ctx, GLenum origInternalFormat, GLenum newInternalFormat); From 8369675a55ab300a84b3a82632042a33883ca255 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 22 May 2015 17:20:57 -0700 Subject: [PATCH 307/834] mesa: code clean-ups in textureview.[ch] Reviewed-by: Topi Pohjolainen --- src/mesa/main/textureview.c | 83 +++++++++++++++++++++++-------------- src/mesa/main/textureview.h | 5 ++- 2 files changed, 55 insertions(+), 33 deletions(-) diff --git a/src/mesa/main/textureview.c b/src/mesa/main/textureview.c index a1cbec5b365..6b0aed4ea1a 100644 --- a/src/mesa/main/textureview.c +++ b/src/mesa/main/textureview.c @@ -176,9 +176,11 @@ lookup_view_class(const struct gl_context *ctx, GLenum internalformat) return compatible_internal_formats[i].view_class; } - if (ctx->Extensions.EXT_texture_compression_s3tc && ctx->Extensions.EXT_texture_sRGB) { + if (ctx->Extensions.EXT_texture_compression_s3tc && + ctx->Extensions.EXT_texture_sRGB) { for (i = 0; i < ARRAY_SIZE(s3tc_compatible_internal_formats); i++) { - if (s3tc_compatible_internal_formats[i].internal_format == internalformat) + if (s3tc_compatible_internal_formats[i].internal_format + == internalformat) return s3tc_compatible_internal_formats[i].view_class; } } @@ -226,7 +228,8 @@ initialize_texture_fields(struct gl_context *ctx, 0, internalFormat, texFormat); } - _mesa_next_mipmap_level_size(target, 0, levelWidth, levelHeight, levelDepth, + _mesa_next_mipmap_level_size(target, 0, + levelWidth, levelHeight, levelDepth, &levelWidth, &levelHeight, &levelDepth); } @@ -343,6 +346,7 @@ _mesa_texture_view_compatible_format(const struct gl_context *ctx, return false; } + /** * Helper function for TexStorage and teximagemultisample to set immutable * texture state needed by ARB_texture_view. @@ -357,17 +361,19 @@ _mesa_set_texture_view_state(struct gl_context *ctx, /* Get a reference to what will become this View's base level */ texImage = _mesa_select_tex_image(texObj, target, 0); - /* When an immutable texture is created via glTexStorage or glTexImageMultisample, + /* When an immutable texture is created via glTexStorage or + * glTexImageMultisample, * TEXTURE_IMMUTABLE_FORMAT becomes TRUE. * TEXTURE_IMMUTABLE_LEVELS and TEXTURE_VIEW_NUM_LEVELS become levels. * If the texture target is TEXTURE_1D_ARRAY then * TEXTURE_VIEW_NUM_LAYERS becomes height. * If the texture target is TEXTURE_2D_ARRAY, TEXTURE_CUBE_MAP_ARRAY, - * or TEXTURE_2D_MULTISAMPLE_ARRAY then TEXTURE_VIEW_NUM_LAYERS becomes depth. + * or TEXTURE_2D_MULTISAMPLE_ARRAY then TEXTURE_VIEW_NUM_LAYERS becomes + * depth. * If the texture target is TEXTURE_CUBE_MAP, then * TEXTURE_VIEW_NUM_LAYERS becomes 6. * For any other texture target, TEXTURE_VIEW_NUM_LAYERS becomes 1. - * + * * ARB_texture_multisample: Multisample textures do * not have multiple image levels. */ @@ -401,7 +407,6 @@ _mesa_set_texture_view_state(struct gl_context *ctx, case GL_TEXTURE_CUBE_MAP: texObj->NumLayers = 6; break; - } } @@ -435,16 +440,20 @@ _mesa_TextureView(GLuint texture, GLenum target, GLuint origtexture, minlevel, numlevels, minlayer, numlayers); if (origtexture == 0) { - _mesa_error(ctx, GL_INVALID_VALUE, "glTextureView(origtexture = %u)", origtexture); + _mesa_error(ctx, GL_INVALID_VALUE, "glTextureView(origtexture = %u)", + origtexture); return; } /* Need original texture information to validate arguments */ origTexObj = _mesa_lookup_texture(ctx, origtexture); - /* If is not the name of a texture, INVALID_VALUE is generated. */ + /* If is not the name of a texture, INVALID_VALUE + * is generated. + */ if (!origTexObj) { - _mesa_error(ctx, GL_INVALID_VALUE, "glTextureView(origtexture = %u)", origtexture); + _mesa_error(ctx, GL_INVALID_VALUE, "glTextureView(origtexture = %u)", + origtexture); return; } @@ -452,7 +461,8 @@ _mesa_TextureView(GLuint texture, GLenum target, GLuint origtexture, * INVALID_OPERATION is generated. */ if (!origTexObj->Immutable) { - _mesa_error(ctx, GL_INVALID_OPERATION, "glTextureView(origtexture not immutable)"); + _mesa_error(ctx, GL_INVALID_OPERATION, + "glTextureView(origtexture not immutable)"); return; } @@ -467,7 +477,8 @@ _mesa_TextureView(GLuint texture, GLenum target, GLuint origtexture, */ texObj = _mesa_lookup_texture(ctx, texture); if (texObj == NULL) { - _mesa_error(ctx, GL_INVALID_OPERATION, "glTextureView(texture = %u non-gen name)", texture); + _mesa_error(ctx, GL_INVALID_OPERATION, + "glTextureView(texture = %u non-gen name)", texture); return; } @@ -475,7 +486,8 @@ _mesa_TextureView(GLuint texture, GLenum target, GLuint origtexture, * the error INVALID_OPERATION is generated. */ if (texObj->Target) { - _mesa_error(ctx, GL_INVALID_OPERATION, "glTextureView(texture = %u already bound)", texture); + _mesa_error(ctx, GL_INVALID_OPERATION, + "glTextureView(texture = %u already bound)", texture); return; } @@ -484,33 +496,35 @@ _mesa_TextureView(GLuint texture, GLenum target, GLuint origtexture, return; /* error was recorded */ } - /* minlevel and minlayer are relative to the view of origtexture + /* minlevel and minlayer are relative to the view of origtexture. * If minlevel or minlayer is greater than level or layer, respectively, - * of origtexture return INVALID_VALUE. + * return INVALID_VALUE. */ newViewMinLevel = origTexObj->MinLevel + minlevel; newViewMinLayer = origTexObj->MinLayer + minlayer; if (newViewMinLevel >= (origTexObj->MinLevel + origTexObj->NumLevels)) { _mesa_error(ctx, GL_INVALID_VALUE, - "glTextureView(new minlevel (%d) > orig minlevel (%d) + orig numlevels (%d))", + "glTextureView(new minlevel (%d) > orig minlevel (%d)" + " + orig numlevels (%d))", newViewMinLevel, origTexObj->MinLevel, origTexObj->NumLevels); return; } if (newViewMinLayer >= (origTexObj->MinLayer + origTexObj->NumLayers)) { _mesa_error(ctx, GL_INVALID_VALUE, - "glTextureView(new minlayer (%d) > orig minlayer (%d) + orig numlayers (%d))", + "glTextureView(new minlayer (%d) > orig minlayer (%d)" + " + orig numlayers (%d))", newViewMinLayer, origTexObj->MinLayer, origTexObj->NumLayers); return; } if (!_mesa_texture_view_compatible_format(ctx, - origTexObj->Image[0][0]->InternalFormat, - internalformat)) { + origTexObj->Image[0][0]->InternalFormat, + internalformat)) { _mesa_error(ctx, GL_INVALID_OPERATION, - "glTextureView(internalformat %s not compatible with origtexture %s)", - _mesa_lookup_enum_by_nr(internalformat), - _mesa_lookup_enum_by_nr(origTexObj->Image[0][0]->InternalFormat)); + "glTextureView(internalformat %s not compatible with origtexture %s)", + _mesa_lookup_enum_by_nr(internalformat), + _mesa_lookup_enum_by_nr(origTexObj->Image[0][0]->InternalFormat)); return; } @@ -569,14 +583,16 @@ _mesa_TextureView(GLuint texture, GLenum target, GLuint origtexture, dimensionsOK = _mesa_legal_texture_dimensions(ctx, target, 0, width, height, depth, 0); if (!dimensionsOK) { - _mesa_error(ctx, GL_INVALID_OPERATION, "glTextureView(invalid width or height or depth)"); + _mesa_error(ctx, GL_INVALID_OPERATION, + "glTextureView(invalid width or height or depth)"); return; } sizeOK = ctx->Driver.TestProxyTexImage(ctx, target, 0, texFormat, width, height, depth, 0); if (!sizeOK) { - _mesa_error(ctx, GL_INVALID_OPERATION, "glTextureView(invalid texture size)"); + _mesa_error(ctx, GL_INVALID_OPERATION, + "glTextureView(invalid texture size)"); return; } @@ -591,17 +607,19 @@ _mesa_TextureView(GLuint texture, GLenum target, GLuint origtexture, case GL_TEXTURE_RECTANGLE: case GL_TEXTURE_2D_MULTISAMPLE: if (numlayers != 1) { - _mesa_error(ctx, GL_INVALID_VALUE, "glTextureView(numlayers %d != 1)", numlayers); + _mesa_error(ctx, GL_INVALID_VALUE, "glTextureView(numlayers %d != 1)", + numlayers); return; } break; case GL_TEXTURE_CUBE_MAP: - /* If the new texture's target is TEXTURE_CUBE_MAP, the clamped - * must be equal to 6. + /* If the new texture's target is TEXTURE_CUBE_MAP, the clamped + * must be equal to 6. */ if (newViewNumLayers != 6) { - _mesa_error(ctx, GL_INVALID_VALUE, "glTextureView(clamped numlayers %d != 6)", + _mesa_error(ctx, GL_INVALID_VALUE, + "glTextureView(clamped numlayers %d != 6)", newViewNumLayers); return; } @@ -615,7 +633,8 @@ _mesa_TextureView(GLuint texture, GLenum target, GLuint origtexture, */ if ((newViewNumLayers % 6) != 0) { _mesa_error(ctx, GL_INVALID_VALUE, - "glTextureView(clamped numlayers %d is not a multiple of 6)", + "glTextureView(clamped numlayers %d is not" + " a multiple of 6)", newViewNumLayers); return; } @@ -628,7 +647,8 @@ _mesa_TextureView(GLuint texture, GLenum target, GLuint origtexture, */ if ((target == GL_TEXTURE_CUBE_MAP || target == GL_TEXTURE_CUBE_MAP_ARRAY) && (origTexImage->Width != origTexImage->Height)) { - _mesa_error(ctx, GL_INVALID_OPERATION, "glTextureView(origtexture width (%d) != height (%d))", + _mesa_error(ctx, GL_INVALID_OPERATION, + "glTextureView(origtexture width (%d) != height (%d))", origTexImage->Width, origTexImage->Height); return; } @@ -662,7 +682,8 @@ _mesa_TextureView(GLuint texture, GLenum target, GLuint origtexture, texObj->ImmutableLevels = origTexObj->ImmutableLevels; texObj->Target = target; - if (ctx->Driver.TextureView != NULL && !ctx->Driver.TextureView(ctx, texObj, origTexObj)) { + if (ctx->Driver.TextureView != NULL && + !ctx->Driver.TextureView(ctx, texObj, origTexObj)) { return; /* driver recorded error */ } } diff --git a/src/mesa/main/textureview.h b/src/mesa/main/textureview.h index 596a3a8dbbe..59e24b68dd0 100644 --- a/src/mesa/main/textureview.h +++ b/src/mesa/main/textureview.h @@ -41,7 +41,8 @@ _mesa_TextureView(GLuint texture, GLenum target, GLuint origtexture, GLuint minlayer, GLuint numlayers); extern void -_mesa_set_texture_view_state(struct gl_context *ctx, struct gl_texture_object *texObj, - GLenum target, GLuint levels); +_mesa_set_texture_view_state(struct gl_context *ctx, + struct gl_texture_object *texObj, + GLenum target, GLuint levels); #endif /* TEXTUREVIEW_H */ From 0b76541ce0cc34020ef1057a17149cbf9cb3dbe1 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 22 May 2015 17:22:47 -0700 Subject: [PATCH 308/834] mesa: move decls, add const qualifiers in copyimage.c Reviewed-by: Topi Pohjolainen --- src/mesa/main/copyimage.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/src/mesa/main/copyimage.c b/src/mesa/main/copyimage.c index fd22f28892c..789f9a67967 100644 --- a/src/mesa/main/copyimage.c +++ b/src/mesa/main/copyimage.c @@ -46,8 +46,6 @@ prepare_target(struct gl_context *ctx, GLuint name, GLenum *target, int level, struct gl_texture_image **tex_image, GLuint *tmp_tex, const char *dbg_prefix) { - struct gl_renderbuffer *rb; - if (name == 0) { _mesa_error(ctx, GL_INVALID_VALUE, "glCopyImageSubData(%sName = %d)", dbg_prefix, name); @@ -87,7 +85,7 @@ prepare_target(struct gl_context *ctx, GLuint name, GLenum *target, int level, } if (*target == GL_RENDERBUFFER) { - rb = _mesa_lookup_renderbuffer(ctx, name); + struct gl_renderbuffer *rb = _mesa_lookup_renderbuffer(ctx, name); if (!rb) { _mesa_error(ctx, GL_INVALID_VALUE, "glCopyImageSubData(%sName = %u)", dbg_prefix, name); @@ -170,7 +168,8 @@ prepare_target(struct gl_context *ctx, GLuint name, GLenum *target, int level, } static bool -check_region_bounds(struct gl_context *ctx, struct gl_texture_image *tex_image, +check_region_bounds(struct gl_context *ctx, + const struct gl_texture_image *tex_image, int x, int y, int z, int width, int height, int depth, const char *dbg_prefix) { @@ -260,7 +259,7 @@ check_region_bounds(struct gl_context *ctx, struct gl_texture_image *tex_image, } static bool -compressed_format_compatible(struct gl_context *ctx, +compressed_format_compatible(const struct gl_context *ctx, GLenum compressedFormat, GLenum otherFormat) { enum mesa_block_class compressedClass, otherClass; @@ -348,8 +347,8 @@ compressed_format_compatible(struct gl_context *ctx, } static bool -copy_format_compatible(struct gl_context *ctx, - GLenum srcFormat, GLenum dstFormat) +copy_format_compatible(const struct gl_context *ctx, + GLenum srcFormat, GLenum dstFormat) { /* * From ARB_copy_image spec: @@ -389,7 +388,7 @@ _mesa_CopyImageSubData(GLuint srcName, GLenum srcTarget, GLint srcLevel, struct gl_texture_object *srcTexObj, *dstTexObj; struct gl_texture_image *srcTexImage, *dstTexImage; GLuint src_bw, src_bh, dst_bw, dst_bh; - int i, srcNewZ, dstNewZ; + int i; if (MESA_VERBOSE & VERBOSE_API) _mesa_debug(ctx, "glCopyImageSubData(%u, %s, %d, %d, %d, %d, " @@ -447,6 +446,8 @@ _mesa_CopyImageSubData(GLuint srcName, GLenum srcTarget, GLint srcLevel, } for (i = 0; i < srcDepth; ++i) { + int srcNewZ, dstNewZ; + if (srcTexObj->Target == GL_TEXTURE_CUBE_MAP) { srcTexImage = srcTexObj->Image[i + srcZ][srcLevel]; srcNewZ = 0; From dce53a7d2453c0b2b69a345340455866e75f0a8d Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 22 May 2015 17:26:12 -0700 Subject: [PATCH 309/834] mesa: add some comments in copyimage.c Reviewed-by: Topi Pohjolainen --- src/mesa/main/copyimage.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/src/mesa/main/copyimage.c b/src/mesa/main/copyimage.c index 789f9a67967..e8732c6175b 100644 --- a/src/mesa/main/copyimage.c +++ b/src/mesa/main/copyimage.c @@ -40,6 +40,19 @@ enum mesa_block_class { BLOCK_CLASS_64_BITS }; +/** + * Prepare the source or destination resource, including: + * - Error checking + * - Creating texture wrappers for renderbuffers + * \param name the texture or renderbuffer name + * \param target GL_TEXTURE target or GL_RENDERBUFFER. For the later, will + * be changed to a compatible GL_TEXTURE target. + * \param level mipmap level + * \param tex_obj returns a pointer to a texture object + * \param tex_image returns a pointer to a texture image + * \param tmp_tex returns temporary texture object name + * \return true if success, false if error + */ static bool prepare_target(struct gl_context *ctx, GLuint name, GLenum *target, int level, struct gl_texture_object **tex_obj, @@ -167,6 +180,12 @@ prepare_target(struct gl_context *ctx, GLuint name, GLenum *target, int level, return true; } + +/** + * Check that the x,y,z,width,height,region is within the texture image + * dimensions. + * \return true if bounds OK, false if regions is out of bounds + */ static bool check_region_bounds(struct gl_context *ctx, const struct gl_texture_image *tex_image, @@ -187,6 +206,7 @@ check_region_bounds(struct gl_context *ctx, return false; } + /* Check X direction */ if (x + width > tex_image->Width) { _mesa_error(ctx, GL_INVALID_VALUE, "glCopyImageSubData(%sX or %sWidth exceeds image bounds)", @@ -194,6 +214,7 @@ check_region_bounds(struct gl_context *ctx, return false; } + /* Check Y direction */ switch (tex_image->TexObject->Target) { case GL_TEXTURE_1D: case GL_TEXTURE_1D_ARRAY: @@ -214,6 +235,7 @@ check_region_bounds(struct gl_context *ctx, break; } + /* Check Z direction */ switch (tex_image->TexObject->Target) { case GL_TEXTURE_1D: case GL_TEXTURE_2D: From eec904d29c0d996fb05f24771a2fdd33e152f519 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 22 May 2015 13:39:03 -0700 Subject: [PATCH 310/834] xlib: fix X_GLXCreateContextAtrribs/Attribs typo In case the glproto.h file isn't up to date, we provide the #define for X_GLXCreateContextAttribsARB. v2: fix other occurances, improve #ifndef test, per Jose. Reviewed-by: Jose Fonseca --- src/gallium/state_trackers/glx/xlib/glx_api.c | 27 ++++++++++++------- 1 file changed, 17 insertions(+), 10 deletions(-) diff --git a/src/gallium/state_trackers/glx/xlib/glx_api.c b/src/gallium/state_trackers/glx/xlib/glx_api.c index 0508255c4be..0456d44104e 100644 --- a/src/gallium/state_trackers/glx/xlib/glx_api.c +++ b/src/gallium/state_trackers/glx/xlib/glx_api.c @@ -40,6 +40,13 @@ #include "xm_api.h" +/* An "Atrribs/Attribs" typo was fixed in glxproto.h in Nov 2014. + * This is in case we don't have the updated header. + */ +#if !defined(X_GLXCreateContextAttribsARB) && \ + defined(X_GLXCreateContextAtrribsARB) +#define X_GLXCreateContextAttribsARB X_GLXCreateContextAtrribsARB +#endif /* This indicates the client-side GLX API and GLX encoder version. */ #define CLIENT_MAJOR_VERSION 1 @@ -2168,7 +2175,7 @@ glXQueryDrawable(Display *dpy, GLXDrawable draw, int attribute, #endif default: - generate_error(dpy, BadValue, 0, X_GLXCreateContextAtrribsARB, true); + generate_error(dpy, BadValue, 0, X_GLXCreateContextAttribsARB, true); return; } } @@ -2762,14 +2769,14 @@ glXCreateContextAttribsARB(Display *dpy, GLXFBConfig config, break; default: /* bad attribute */ - generate_error(dpy, BadValue, 0, X_GLXCreateContextAtrribsARB, True); + generate_error(dpy, BadValue, 0, X_GLXCreateContextAttribsARB, True); return NULL; } } /* check contextFlags */ if (contextFlags & ~contextFlagsAll) { - generate_error(dpy, BadValue, 0, X_GLXCreateContextAtrribsARB, True); + generate_error(dpy, BadValue, 0, X_GLXCreateContextAttribsARB, True); return NULL; } @@ -2777,14 +2784,14 @@ glXCreateContextAttribsARB(Display *dpy, GLXFBConfig config, if (profileMask != GLX_CONTEXT_CORE_PROFILE_BIT_ARB && profileMask != GLX_CONTEXT_COMPATIBILITY_PROFILE_BIT_ARB && profileMask != GLX_CONTEXT_ES_PROFILE_BIT_EXT) { - generate_error(dpy, GLXBadProfileARB, 0, X_GLXCreateContextAtrribsARB, False); + generate_error(dpy, GLXBadProfileARB, 0, X_GLXCreateContextAttribsARB, False); return NULL; } /* check renderType */ if (renderType != GLX_RGBA_TYPE && renderType != GLX_COLOR_INDEX_TYPE) { - generate_error(dpy, BadValue, 0, X_GLXCreateContextAtrribsARB, True); + generate_error(dpy, BadValue, 0, X_GLXCreateContextAttribsARB, True); return NULL; } @@ -2797,7 +2804,7 @@ glXCreateContextAttribsARB(Display *dpy, GLXFBConfig config, (majorVersion == 3 && minorVersion > 3) || (majorVersion == 4 && minorVersion > 5) || majorVersion > 4))) { - generate_error(dpy, BadMatch, 0, X_GLXCreateContextAtrribsARB, True); + generate_error(dpy, BadMatch, 0, X_GLXCreateContextAttribsARB, True); return NULL; } if (profileMask == GLX_CONTEXT_ES_PROFILE_BIT_EXT && @@ -2809,18 +2816,18 @@ glXCreateContextAttribsARB(Display *dpy, GLXFBConfig config, * different error code for invalid ES versions, but this is what NVIDIA * does and piglit expects. */ - generate_error(dpy, GLXBadProfileARB, 0, X_GLXCreateContextAtrribsARB, False); + generate_error(dpy, GLXBadProfileARB, 0, X_GLXCreateContextAttribsARB, False); return NULL; } if ((contextFlags & GLX_CONTEXT_FORWARD_COMPATIBLE_BIT_ARB) && majorVersion < 3) { - generate_error(dpy, BadMatch, 0, X_GLXCreateContextAtrribsARB, True); + generate_error(dpy, BadMatch, 0, X_GLXCreateContextAttribsARB, True); return NULL; } if (renderType == GLX_COLOR_INDEX_TYPE && majorVersion >= 3) { - generate_error(dpy, BadMatch, 0, X_GLXCreateContextAtrribsARB, True); + generate_error(dpy, BadMatch, 0, X_GLXCreateContextAttribsARB, True); return NULL; } @@ -2830,7 +2837,7 @@ glXCreateContextAttribsARB(Display *dpy, GLXFBConfig config, majorVersion, minorVersion, profileMask, contextFlags); if (!ctx) { - generate_error(dpy, GLXBadFBConfig, 0, X_GLXCreateContextAtrribsARB, False); + generate_error(dpy, GLXBadFBConfig, 0, X_GLXCreateContextAttribsARB, False); } return ctx; From 98f2f47f7a1d893bb482d508a690c417c2453c6e Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Mon, 25 May 2015 09:13:09 -0600 Subject: [PATCH 311/834] docs: reorganize devnotes.html file Move "Adding Extensions" to the end. Add a simple table of contents at the top. Reviewed-by: Thomas Helland --- docs/devinfo.html | 110 +++++++++++++++++++++++++--------------------- 1 file changed, 61 insertions(+), 49 deletions(-) diff --git a/docs/devinfo.html b/docs/devinfo.html index 8d20eea3c56..c7e4171a9cb 100644 --- a/docs/devinfo.html +++ b/docs/devinfo.html @@ -17,55 +17,15 @@

      Development Notes

      -

      Adding Extensions

      - -

      -To add a new GL extension to Mesa you have to do at least the following. -

        -
      • - If glext.h doesn't define the extension, edit include/GL/gl.h and add - code like this: -
        -     #ifndef GL_EXT_the_extension_name
        -     #define GL_EXT_the_extension_name 1
        -     /* declare the new enum tokens */
        -     /* prototype the new functions */
        -     /* TYPEDEFS for the new functions */
        -     #endif
        -   
        -
      • -
      • - In the src/mapi/glapi/gen/ directory, add the new extension functions and - enums to the gl_API.xml file. - Then, a bunch of source files must be regenerated by executing the - corresponding Python scripts. -
      • -
      • - Add a new entry to the gl_extensions struct in mtypes.h -
      • -
      • - Update the extensions.c file. -
      • -
      • - From this point, the best way to proceed is to find another extension, - similar to the new one, that's already implemented in Mesa and use it - as an example. -
      • -
      • - If the new extension adds new GL state, the functions in get.c, enable.c - and attrib.c will most likely require new code. -
      • -
      • - The dispatch tests check_table.cpp and dispatch_sanity.cpp - should be updated with details about the new extensions functions. These - tests are run using 'make check' -
      • +
      • Coding Style +
      • Submitting Patches +
      • Making a New Mesa Release +
      • Adding Extensions
      - -

      Coding Style

      +

      Coding Style

      Mesa's code style has changed over the years. Here's the latest. @@ -160,7 +120,8 @@ of bool, true, and src/mesa/state_tracker/st_glsl_to_tgsi.cpp can serve as examples.

      -

      Submitting patches

      + +

      Submitting patches

      You should always run the Mesa Testsuite before submitting patches. @@ -184,7 +145,7 @@ re-sending the whole series). Using --in-reply-to makes it harder for reviewers to accidentally review old patches.

      -

      Marking a commit as a candidate for a stable branch

      +

      Marking a commit as a candidate for a stable branch

      If you want a commit to be applied to a stable branch, @@ -221,7 +182,7 @@ the upcoming stable release can always be seen on the Mesa Stable Queue page. -

      Criteria for accepting patches to the stable branch

      +

      Criteria for accepting patches to the stable branch

      Mesa has a designated release manager for each stable branch, and the release manager is the only developer that should be pushing changes to these @@ -306,7 +267,8 @@ be rejected: regression that is unaacceptable for the stable branch.
    -

    Making a New Mesa Release

    + +

    Making a New Mesa Release

    These are the instructions for making a new Mesa release. @@ -543,6 +505,56 @@ release announcement:

    + +

    Adding Extensions

    + +

    +To add a new GL extension to Mesa you have to do at least the following. + +

      +
    • + If glext.h doesn't define the extension, edit include/GL/gl.h and add + code like this: +
      +     #ifndef GL_EXT_the_extension_name
      +     #define GL_EXT_the_extension_name 1
      +     /* declare the new enum tokens */
      +     /* prototype the new functions */
      +     /* TYPEDEFS for the new functions */
      +     #endif
      +   
      +
    • +
    • + In the src/mapi/glapi/gen/ directory, add the new extension functions and + enums to the gl_API.xml file. + Then, a bunch of source files must be regenerated by executing the + corresponding Python scripts. +
    • +
    • + Add a new entry to the gl_extensions struct in mtypes.h +
    • +
    • + Update the extensions.c file. +
    • +
    • + From this point, the best way to proceed is to find another extension, + similar to the new one, that's already implemented in Mesa and use it + as an example. +
    • +
    • + If the new extension adds new GL state, the functions in get.c, enable.c + and attrib.c will most likely require new code. +
    • +
    • + The dispatch tests check_table.cpp and dispatch_sanity.cpp + should be updated with details about the new extensions functions. These + tests are run using 'make check' +
    • +
    + + + + From d959885b9109878acc427b9321e46d8c6d133d1a Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Mon, 25 May 2015 09:42:04 -0600 Subject: [PATCH 312/834] docs: update documentation about patch formatting, testing, etc v2: correctly escape < and > chars. Reviewed-by: Thomas Helland --- docs/devinfo.html | 104 +++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 102 insertions(+), 2 deletions(-) diff --git a/docs/devinfo.html b/docs/devinfo.html index c7e4171a9cb..e068d87da13 100644 --- a/docs/devinfo.html +++ b/docs/devinfo.html @@ -124,12 +124,112 @@ src/mesa/state_tracker/st_glsl_to_tgsi.cpp can serve as examples.

    Submitting patches

    -You should always run the Mesa Testsuite before submitting patches. -The Testsuite can be run using the 'make check' command. All tests +The basic guidelines for submitting patches are: +

    + +
      +
    • Patches should be sufficiently tested before submitting. +
    • Code patches should follow Mesa coding conventions. +
    • Whenever possible, patches should only effect individual Mesa/Gallium +components. +
    • Patches should never introduce build breaks and should be bisectable (see +git bisect.) +
    • Patches should be properly formatted (see below). +
    • Patches should be submitted to mesa-dev for review using +git send-email. +
    • Patches should not mix code changes with code formatting changes (except, +perhaps, in very trivial cases.) +
    + +

    Patch formatting

    + +

    +The basic rules for patch formatting are: +

    + +
      +
    • Lines should be limited to 75 characters or less so that git logs +displayed in 80-column terminals avoid line wrapping. Note that git +log uses 4 spaces of indentation (4 + 75 < 80). +
    • The first line should be a short, concise summary of the change prefixed +with a module name. Examples: +
      +    mesa: Add support for querying GL_VERTEX_ATTRIB_ARRAY_LONG
      +
      +    gallium: add PIPE_CAP_DEVICE_RESET_STATUS_QUERY
      +
      +    i965: Fix missing type in local variable declaration.
      +
      +
    • Subsequent patch comments should describe the change in more detail, +if needed. For example: +
      +    i965: Remove end-of-thread SEND alignment code.
      +    
      +    This was present in Eric's initial implementation of the compaction code
      +    for Sandybridge (commit 077d01b6). There is no documentation saying this
      +    is necessary, and removing it causes no regressions in piglit on any
      +    platform.
      +
      +
    • A "Signed-off-by:" line is not required, but not discouraged either. +
    • If a patch address a bugzilla issue, that should be noted in the +patch comment. For example: +
      +   Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=89689
      +
      +
    • If there have been several revisions to a patch during the review +process, they should be noted such as in this example: +
      +    st/mesa: add ARB_texture_stencil8 support (v4)
      +    
      +    if we support stencil texturing, enable texture_stencil8
      +    there is no requirement to support native S8 for this,
      +    the texture can be converted to x24s8 fine.
      +    
      +    v2: fold fixes from Marek in:
      +       a) put S8 last in the list
      +       b) fix renderable to always test for d/s renderable
      +        fixup the texture case to use a stencil only format
      +        for picking the format for the texture view.
      +    v3: hit fallback for getteximage
      +    v4: put s8 back in front, it shouldn't get picked now (Ilia)
      +
      +
    • If someone tested your patch, document it with a line like this: +
      +    Tested-by: Joe Hacker <jhacker@foo.com>
      +
      +
    • If the patch was reviewed (usually the case) or acked by someone, +that should be documented with: +
      +    Reviewed-by: Joe Hacker <jhacker@foo.com>
      +    Acked-by: Joe Hacker <jhacker@foo.com>
      +
      +
    + + + +

    Testing Patches

    + +

    +It should go without saying that patches must be tested. In general, +do whatever testing is prudent. +

    + +

    +You should always run the Mesa test suite before submitting patches. +The test suite can be run using the 'make check' command. All tests must pass before patches will be accepted, this may mean you have to update the tests themselves.

    +

    +Whenever possible and applicable, test the patch with +Piglit to +check for regressions. +

    + + +

    Mailing Patches

    +

    Patches should be sent to the Mesa mailing list for review. When submitting a patch make sure to use git send-email rather than attaching From c6184f84b7227e1548947e42bca3ff3ddb7e379c Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Mon, 25 May 2015 10:18:35 -0600 Subject: [PATCH 313/834] docs: update the coding style information This hasn't been updated in a long time and from recent discussion on the mailing list, it's not always clear what's expected. Hopefully, this will help a bit. v2: document function brace placement, per Thomas Helland. Reviewed-by: Thomas Helland --- docs/devinfo.html | 165 ++++++++++++++++++++++++++-------------------- 1 file changed, 93 insertions(+), 72 deletions(-) diff --git a/docs/devinfo.html b/docs/devinfo.html index e068d87da13..f5113b0bd72 100644 --- a/docs/devinfo.html +++ b/docs/devinfo.html @@ -28,97 +28,118 @@

    Coding Style

    -Mesa's code style has changed over the years. Here's the latest. +Mesa is over 20 years old and the coding style has evolved over time. +Some old parts use a style that's a bit out of date. +If the guidelines below don't cover something, try following the format of +existing, neighboring code.

    -Comment your code! It's extremely important that open-source code be -well documented. Also, strive to write clean, easily understandable code. +Basic formatting guidelines

    -

    -3-space indentation -

    - -

    -If you use tabs, set them to 8 columns -

    - -

    -Line width: the preferred width to fill comments and code in Mesa is 78 -columns. Exceptions are sometimes made for clarity (e.g. tabular data is -sometimes filled to a much larger width so that extraneous carriage returns -don't obscure the table). -

    - -

    -Brace example: -

    +
      +
    • 3-space indentation, no tabs. +
    • Limit lines to 78 or fewer characters. The idea is to prevent line +wrapping in 80-column editors and terminals. There are exceptions, such +as if you're defining a large, static table of information. +
    • Opening braces go on the same line as the if/for/while statement. +For example:
      -	if (condition) {
      -	   foo;
      -	}
      -	else {
      -	   bar;
      -	}
      -
      -	switch (condition) {
      -	case 0:
      -	   foo();
      -	   break;
      -
      -	case 1: {
      -	   ...
      -	   break;
      -	}
      -
      -	default:
      -	   ...
      -	   break;
      -	}
      +   if (condition) {
      +      foo;
      +   } else {
      +      bar;
      +   }
       
      -

      -Here's the GNU indent command which will best approximate my preferred style: -(Note that it won't format switch statements in the preferred way) -

      +
    • Put a space before/after operators. For example, a = b + c; +and not a=b+c; + +
    • This GNU indent command generally does the right thing for formatting:
      -	indent -br -i3 -npcs --no-tabs infile.c -o outfile.c
      +   indent -br -i3 -npcs --no-tabs infile.c -o outfile.c
       
      - -

      -Local variable name example: localVarName (no underscores) -

      - -

      -Constants and macros are ALL_UPPERCASE, with _ between words -

      - -

      -Global variables are not allowed. -

      - -

      -Function name examples: -

      +
    • Use comments wherever you think it would be helpful for other developers. +Several specific cases and style examples follow. Note that we roughly +follow Doxygen conventions. +
      +
      +Single-line comments:
      -	glFooBar()       - a public GL entry point (in glapi_dispatch.c)
      -	_mesa_FooBar()   - the internal immediate mode function
      -	save_FooBar()    - retained mode (display list) function in dlist.c
      -	foo_bar()        - a static (private) function
      -	_mesa_foo_bar()  - an internal non-static Mesa function
      +   /* null-out pointer to prevent dangling reference below */
      +   bufferObj = NULL;
      +
      +Or, +
      +   bufferObj = NULL;  /* prevent dangling reference below */
      +
      +Multi-line comment: +
      +   /* If this is a new buffer object id, or one which was generated but
      +    * never used before, allocate a buffer object now.
      +    */
      +
      +We try to quote the OpenGL specification where prudent: +
      +   /* Page 38 of the PDF of the OpenGL ES 3.0 spec says:
      +    *
      +    *     "An INVALID_OPERATION error is generated for any of the following
      +    *     conditions:
      +    *
      +    *     *  is zero."
      +    *
      +    * Additionally, page 94 of the PDF of the OpenGL 4.5 core spec
      +    * (30.10.2014) also says this, so it's no longer allowed for desktop GL,
      +    * either.
      +    */
      +
      +Function comment example: +
      +   /**
      +    * Create and initialize a new buffer object.  Called via the
      +    * ctx->Driver.CreateObject() driver callback function.
      +    * \param  name  integer name of the object
      +    * \param  type  one of GL_FOO, GL_BAR, etc.
      +    * \return  pointer to new object or NULL if error
      +    */
      +   struct gl_object *
      +   _mesa_create_object(GLuint name, GLenum type)
      +   {
      +      /* function body */
      +   }
       
      -

      -Places that are not directly visible to the GL API should prefer the use -of bool, true, and +

    • Put the function return type and qualifiers on one line and the function +name and parameters on the next, as seen above. This makes it easy to use +grep ^function_name dir/* to find function definitions. Also, +the opening brace goes on the next line by itself (see above.) + +
    • Function names follow various conventions depending on the type of function: +
      +   glFooBar()       - a public GL entry point (in glapi_dispatch.c)
      +   _mesa_FooBar()   - the internal immediate mode function
      +   save_FooBar()    - retained mode (display list) function in dlist.c
      +   foo_bar()        - a static (private) function
      +   _mesa_foo_bar()  - an internal non-static Mesa function
      +
      + +
    • Constants, macros and enumerant names are ALL_UPPERCASE, with _ between +words. +
    • Mesa usually uses camel case for local variables (Ex: "localVarname") +while gallium typically uses underscores (Ex: "local_var_name"). +
    • Global variables are almost never used because Mesa should be thread-safe. + +
    • Booleans. Places that are not directly visible to the GL API +should prefer the use of bool, true, and false over GLboolean, GL_TRUE, and GL_FALSE. In C code, this may mean that #include <stdbool.h> needs to be added. The try_emit_* methods in src/mesa/program/ir_to_mesa.cpp and src/mesa/state_tracker/st_glsl_to_tgsi.cpp can serve as examples. -

      + +

    Submitting patches

    From 2ab0ca36c155cc77e3d5c950270c70a24efee3d3 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Tue, 26 May 2015 11:30:22 -0600 Subject: [PATCH 314/834] docs: add information about reviewing patches Reviewed-by: Matt Turner --- docs/devinfo.html | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/docs/devinfo.html b/docs/devinfo.html index f5113b0bd72..eb3aba1364a 100644 --- a/docs/devinfo.html +++ b/docs/devinfo.html @@ -266,6 +266,31 @@ re-sending the whole series). Using --in-reply-to makes it harder for reviewers to accidentally review old patches.

    +

    Reviewing Patches

    + +

    +When you've reviewed a patch on the mailing list, please be unambiguous +about your review. That is, state either +

    +    Reviewed-by: Joe Hacker <jhacker@foo.com>
    +
    +or +
    +    Acked-by: Joe Hacker <jhacker@foo.com>
    +
    +Rather than saying just "LGTM" or "Seems OK". +

    + +

    +If small changes are suggested, it's OK to say something like: +

    +   With the above fixes, Reviewed-by: Joe Hacker <jhacker@foo.com>
    +
    +which tells the patch author that the patch can be committed, as long +as the issues are resolved first. +

    + +

    Marking a commit as a candidate for a stable branch

    From be71bbfaa2ad201b570b56847a13328fc359d0ee Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 15 May 2015 09:08:14 -0600 Subject: [PATCH 315/834] mesa: do not use _glapi_new_nop_table() for DRI builds Commit 4bdbb588a9d38 introduced new _glapi_new_nop_table() and _glapi_set_nop_handler() functions in the glapi dispatcher (which live in libGL.so). The calls to those functions from context.c would be undefined (i.e. an ABI break) if the libGL used at runtime was older. For the time being, use the old single generic_nop() function for non-Windows builds to avoid this problem. At some point in the future it should be safe to remove this work-around. See comments for more details. v2: Incorporate feedback from Emil. Use _WIN32 instead of GLX_DIRECT_RENDERING to control behavior, move comments. Cc: 10.6 Reviewed-and-tested-by: Ian Romanick --- src/mesa/main/context.c | 62 +++++++++++++++++++++++++++++++++++++++-- 1 file changed, 60 insertions(+), 2 deletions(-) diff --git a/src/mesa/main/context.c b/src/mesa/main/context.c index 544cc142fde..02875ba2a06 100644 --- a/src/mesa/main/context.c +++ b/src/mesa/main/context.c @@ -883,6 +883,19 @@ update_default_objects(struct gl_context *ctx) } +/* XXX this is temporary and should be removed at some point in the + * future when there's a reasonable expectation that the libGL library + * contains the _glapi_new_nop_table() and _glapi_set_nop_handler() + * functions which were added in Mesa 10.6. + */ +#if !defined(_WIN32) +/* Avoid libGL / driver ABI break */ +#define USE_GLAPI_NOP_FEATURES 0 +#else +#define USE_GLAPI_NOP_FEATURES 1 +#endif + + /** * This function is called by the glapi no-op functions. For each OpenGL * function/entrypoint there's a simple no-op function. These "no-op" @@ -898,6 +911,7 @@ update_default_objects(struct gl_context *ctx) * * \param name the name of the OpenGL function */ +#if USE_GLAPI_NOP_FEATURES static void nop_handler(const char *name) { @@ -914,6 +928,7 @@ nop_handler(const char *name) } #endif } +#endif /** @@ -923,7 +938,45 @@ nop_handler(const char *name) static void GLAPIENTRY nop_glFlush(void) { - /* don't record an error like we do in _mesa_generic_nop() */ + /* don't record an error like we do in nop_handler() */ +} +#endif + + +#if !USE_GLAPI_NOP_FEATURES +static int +generic_nop(void) +{ + GET_CURRENT_CONTEXT(ctx); + _mesa_error(ctx, GL_INVALID_OPERATION, + "unsupported function called " + "(unsupported extension or deprecated function?)"); + return 0; +} +#endif + + +/** + * Create a new API dispatch table in which all entries point to the + * generic_nop() function. This will not work on Windows because of + * the __stdcall convention which requires the callee to clean up the + * call stack. That's impossible with one generic no-op function. + */ +#if !USE_GLAPI_NOP_FEATURES +static struct _glapi_table * +new_nop_table(unsigned numEntries) +{ + struct _glapi_table *table; + + table = malloc(numEntries * sizeof(_glapi_proc)); + if (table) { + _glapi_proc *entry = (_glapi_proc *) table; + unsigned i; + for (i = 0; i < numEntries; i++) { + entry[i] = (_glapi_proc) generic_nop; + } + } + return table; } #endif @@ -941,7 +994,11 @@ alloc_dispatch_table(void) * Mesa we do this to accommodate different versions of libGL and various * DRI drivers. */ - GLint numEntries = MAX2(_glapi_get_dispatch_table_size(), _gloffset_COUNT); + int numEntries = MAX2(_glapi_get_dispatch_table_size(), _gloffset_COUNT); + +#if !USE_GLAPI_NOP_FEATURES + struct _glapi_table *table = new_nop_table(numEntries); +#else struct _glapi_table *table = _glapi_new_nop_table(numEntries); #if defined(_WIN32) @@ -967,6 +1024,7 @@ alloc_dispatch_table(void) #endif _glapi_set_nop_handler(nop_handler); +#endif return table; } From 8d813d14e1f5c690c6737c6cd6fc01937a7d4246 Mon Sep 17 00:00:00 2001 From: Thomas Helland Date: Tue, 26 May 2015 12:14:00 -0600 Subject: [PATCH 316/834] docs: Fix some typos in the developer notes Found when double-checking my review on Brian's series. Signed-off-by: Thomas Helland Reviewed-by: Brian Paul --- docs/devinfo.html | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/devinfo.html b/docs/devinfo.html index eb3aba1364a..0da18b9b709 100644 --- a/docs/devinfo.html +++ b/docs/devinfo.html @@ -564,7 +564,7 @@ Edit docs/relnotes/X.Y.Z.html to add the sha256sums printed as part of "make tarballs" in the previous step. Commit this change.

    -

    Push all commits and the tag creates above

    +

    Push all commits and the tag created above

    This is the first step that cannot easily be undone. The release is going @@ -591,7 +591,7 @@ signatures to the freedesktop.org server: mv ~/MesaLib-X.Y.Z* . -

    Back on mesa master, andd the new release notes into the tree

    +

    Back on mesa master, add the new release notes into the tree

    Something like the following steps will do the trick: From 2b8c51834bcc34a70dec9b470a28c0ef972d6993 Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Tue, 26 May 2015 11:41:44 -0700 Subject: [PATCH 317/834] glapi: Encapsulate nop table knowledge in new _mesa_new_nop_table function Encapsulate the knowledge about how to build the nop table in a new _mesa_new_nop_table function. This makes it easier for dispatch_sanity to keep working now and in the future. Signed-off-by: Ian Romanick Reviewed-by: Brian Paul Reviewed-by: Emil Velikov Tested-by: Mark Janes Cc: 10.6 --- src/mesa/main/api_exec.h | 3 +++ src/mesa/main/context.c | 16 ++++++++-------- src/mesa/main/tests/dispatch_sanity.cpp | 2 +- 3 files changed, 12 insertions(+), 9 deletions(-) diff --git a/src/mesa/main/api_exec.h b/src/mesa/main/api_exec.h index 12249fec228..655cb32d0a4 100644 --- a/src/mesa/main/api_exec.h +++ b/src/mesa/main/api_exec.h @@ -38,6 +38,9 @@ _mesa_initialize_exec_table(struct gl_context *ctx); extern void _mesa_initialize_dispatch_tables(struct gl_context *ctx); +extern struct _glapi_table * +_mesa_new_nop_table(unsigned numEntries); + #ifdef __cplusplus } // extern "C" #endif diff --git a/src/mesa/main/context.c b/src/mesa/main/context.c index 02875ba2a06..e4faf3d462a 100644 --- a/src/mesa/main/context.c +++ b/src/mesa/main/context.c @@ -962,12 +962,12 @@ generic_nop(void) * the __stdcall convention which requires the callee to clean up the * call stack. That's impossible with one generic no-op function. */ -#if !USE_GLAPI_NOP_FEATURES -static struct _glapi_table * -new_nop_table(unsigned numEntries) +struct _glapi_table * +_mesa_new_nop_table(unsigned numEntries) { struct _glapi_table *table; +#if !USE_GLAPI_NOP_FEATURES table = malloc(numEntries * sizeof(_glapi_proc)); if (table) { _glapi_proc *entry = (_glapi_proc *) table; @@ -976,9 +976,11 @@ new_nop_table(unsigned numEntries) entry[i] = (_glapi_proc) generic_nop; } } +#else + table = _glapi_new_nop_table(numEntries); +#endif return table; } -#endif /** @@ -996,10 +998,7 @@ alloc_dispatch_table(void) */ int numEntries = MAX2(_glapi_get_dispatch_table_size(), _gloffset_COUNT); -#if !USE_GLAPI_NOP_FEATURES - struct _glapi_table *table = new_nop_table(numEntries); -#else - struct _glapi_table *table = _glapi_new_nop_table(numEntries); + struct _glapi_table *table = _mesa_new_nop_table(numEntries); #if defined(_WIN32) if (table) { @@ -1023,6 +1022,7 @@ alloc_dispatch_table(void) } #endif +#if USE_GLAPI_NOP_FEATURES _glapi_set_nop_handler(nop_handler); #endif diff --git a/src/mesa/main/tests/dispatch_sanity.cpp b/src/mesa/main/tests/dispatch_sanity.cpp index d38b68d0c9a..c6f3c395733 100644 --- a/src/mesa/main/tests/dispatch_sanity.cpp +++ b/src/mesa/main/tests/dispatch_sanity.cpp @@ -96,7 +96,7 @@ DispatchSanity_test::SetUp() _mesa_init_driver_functions(&driver_functions); const unsigned size = _glapi_get_dispatch_table_size(); - nop_table = (_glapi_proc *) _glapi_new_nop_table(size); + nop_table = (_glapi_proc *) _mesa_new_nop_table(size); } void From 40665362fd660a8d58f9edbdfec79a33d44b1534 Mon Sep 17 00:00:00 2001 From: EdB Date: Mon, 11 May 2015 17:45:08 +0200 Subject: [PATCH 318/834] clover: Log build options when dumping clc source. Reviewed-by: Francisco Jerez --- src/gallium/state_trackers/clover/llvm/invocation.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gallium/state_trackers/clover/llvm/invocation.cpp b/src/gallium/state_trackers/clover/llvm/invocation.cpp index 7d2d941407e..9b91fee9032 100644 --- a/src/gallium/state_trackers/clover/llvm/invocation.cpp +++ b/src/gallium/state_trackers/clover/llvm/invocation.cpp @@ -709,7 +709,7 @@ clover::compile_program_llvm(const std::string &source, llvm_ctx.setDiagnosticHandler(diagnostic_handler, &r_log); if (get_debug_flags() & DBG_CLC) - debug_log(source, ".cl"); + debug_log("// Build options: " + opts + '\n' + source, ".cl"); // The input file name must have the .cl extension in order for the // CompilerInvocation class to recognize it as an OpenCL source file. From bb18df008e31dd93a364289d003697587d7d78fa Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Sat, 23 May 2015 14:27:40 -0700 Subject: [PATCH 319/834] i965: Delete GS scratch space workaround warning. This workaround is documented in the 3DSTATE_GS documentation. It appears to only apply to early steppings of Broadwell and Skylake. I don't think it ever affected production hardware, so at this point it probably makes sense to delete it. Signed-off-by: Kenneth Graunke Reviewed-by: Matt Turner --- src/mesa/drivers/dri/i965/gen8_gs_state.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/mesa/drivers/dri/i965/gen8_gs_state.c b/src/mesa/drivers/dri/i965/gen8_gs_state.c index 46b97131e20..6a0e215eca3 100644 --- a/src/mesa/drivers/dri/i965/gen8_gs_state.c +++ b/src/mesa/drivers/dri/i965/gen8_gs_state.c @@ -59,10 +59,6 @@ gen8_upload_gs_state(struct brw_context *brw) OUT_RELOC64(stage_state->scratch_bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, ffs(brw->gs.prog_data->base.base.total_scratch) - 11); - WARN_ONCE(true, - "May need to implement a temporary workaround: GS Number of " - "URB Entries must be less than or equal to the GS Maximum " - "Number of Threads.\n"); } else { OUT_BATCH(0); OUT_BATCH(0); From 70c6f2323e602d115b21db8f2bf212223fdef921 Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Thu, 21 May 2015 07:53:09 -0700 Subject: [PATCH 320/834] i965: Remove _NEW_MULTISAMPLE dirty bit from 3DSTATE_PS_EXTRA. BRW_NEW_NUM_SAMPLES is sufficient. Signed-off-by: Kenneth Graunke Reviewed-by: Chris Forbes --- src/mesa/drivers/dri/i965/gen8_ps_state.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/mesa/drivers/dri/i965/gen8_ps_state.c b/src/mesa/drivers/dri/i965/gen8_ps_state.c index 85ad3b6c551..6b9489bf7f6 100644 --- a/src/mesa/drivers/dri/i965/gen8_ps_state.c +++ b/src/mesa/drivers/dri/i965/gen8_ps_state.c @@ -72,7 +72,7 @@ upload_ps_extra(struct brw_context *brw) brw_fragment_program_const(brw->fragment_program); /* BRW_NEW_FS_PROG_DATA */ const struct brw_wm_prog_data *prog_data = brw->wm.prog_data; - /* BRW_NEW_NUM_SAMPLES | _NEW_MULTISAMPLE */ + /* BRW_NEW_NUM_SAMPLES */ const bool multisampled_fbo = brw->num_samples > 1; gen8_upload_ps_extra(brw, &fp->program, prog_data, multisampled_fbo); @@ -80,7 +80,7 @@ upload_ps_extra(struct brw_context *brw) const struct brw_tracked_state gen8_ps_extra = { .dirty = { - .mesa = _NEW_MULTISAMPLE, + .mesa = 0, .brw = BRW_NEW_CONTEXT | BRW_NEW_FRAGMENT_PROGRAM | BRW_NEW_FS_PROG_DATA | From 147ffd48166d851341cadd12de98895f32ec25a2 Mon Sep 17 00:00:00 2001 From: Vinson Lee Date: Tue, 26 May 2015 22:18:28 -0700 Subject: [PATCH 321/834] gallivm: Do not use NoFramePointerElim with LLVM 3.7. TargetOptions::NoFramePointerElim was removed in llvm-3.7.0svn r238244 "Remove NoFramePointerElim and NoFramePointerElimOverride from TargetOptions and remove ExecutionEngine's dependence on CodeGen. NFC." Signed-off-by: Vinson Lee Reviewed-by: Tom Stellard --- src/gallium/auxiliary/gallivm/lp_bld_debug.cpp | 2 ++ src/gallium/auxiliary/gallivm/lp_bld_misc.cpp | 2 ++ 2 files changed, 4 insertions(+) diff --git a/src/gallium/auxiliary/gallivm/lp_bld_debug.cpp b/src/gallium/auxiliary/gallivm/lp_bld_debug.cpp index be3e834b774..76c302f6531 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_debug.cpp +++ b/src/gallium/auxiliary/gallivm/lp_bld_debug.cpp @@ -277,7 +277,9 @@ disassemble(const void* func, llvm::raw_ostream & Out) options.StackAlignmentOverride = 4; #endif #if defined(DEBUG) || defined(PROFILE) +#if HAVE_LLVM < 0x0307 options.NoFramePointerElim = true; +#endif #endif OwningPtr TM(T->createTargetMachine(Triple, sys::getHostCPUName(), "", options)); diff --git a/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp b/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp index 5e8a634f019..ffed9e6b69a 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp +++ b/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp @@ -439,7 +439,9 @@ lp_build_create_jit_compiler_for_module(LLVMExecutionEngineRef *OutJIT, #if HAVE_LLVM < 0x0304 options.NoFramePointerElimNonLeaf = true; #endif +#if HAVE_LLVM < 0x0307 options.NoFramePointerElim = true; +#endif #endif builder.setEngineKind(EngineKind::JIT) From e2d84d99f5a66738e8f584bdfea66182f36fe46c Mon Sep 17 00:00:00 2001 From: Ben Widawsky Date: Wed, 20 May 2015 19:20:14 -0700 Subject: [PATCH 322/834] i965: Emit 3DSTATE_MULTISAMPLE before WM_HZ_OP (gen8+) Starting with GEN8, there is documentation that the multisample state command must be emitted before the 3DSTATE_WM_HZ_OP command any time the multisample count changes. The 3DSTATE_WM_HZ_OP packet gets emitted as a result of a intel_hix_exec(), which is called upon a fast clear and/or a resolve. This can happen before the state atoms are checked, and so the multisample state must be put directly in the function. v1: - In v0, I was always emitting the command, but Ken came up with the condition to determine whether or not the sample count actually changed. - Ken's recommendation was to set brw->num_multisamples after emitting 3DSTATE_MULTISAMPLE. This doesn't work. I put my best guess as to why in the XXX (it was causing 7 regressions on BDW). v2: Flag NEW_MULTISAMPLE state. As Ken found, in state upload we check for the multisample change to determine whether or not to emit certain packets. Since the hiz code doesn't actually care about the number of multisamples, set the flag and let the later code take care of it. Jenkins results: http://otc-mesa-ci.jf.intel.com/view/dev/job/bwidawsk/136/ Fixes around 200 piglit tests on SKL. I'm somewhat surprised that it seems to have no impact on BDW as the restriction is needed there as well. Cc: "10.5 10.6" Signed-off-by: Ben Widawsky Reviewed-by: Neil Roberts (v0) Reviewed-by: Kenneth Graunke (v2) --- src/mesa/drivers/dri/i965/gen8_depth_state.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/mesa/drivers/dri/i965/gen8_depth_state.c b/src/mesa/drivers/dri/i965/gen8_depth_state.c index b502650f991..12ac97a5d14 100644 --- a/src/mesa/drivers/dri/i965/gen8_depth_state.c +++ b/src/mesa/drivers/dri/i965/gen8_depth_state.c @@ -417,6 +417,16 @@ gen8_hiz_exec(struct brw_context *brw, struct intel_mipmap_tree *mt, uint32_t surface_width = ALIGN(mt->logical_width0, level == 0 ? 8 : 1); uint32_t surface_height = ALIGN(mt->logical_height0, level == 0 ? 4 : 1); + /* From the documentation for 3DSTATE_WM_HZ_OP: "3DSTATE_MULTISAMPLE packet + * must be used prior to this packet to change the Number of Multisamples. + * This packet must not be used to change Number of Multisamples in a + * rendering sequence." + */ + if (brw->num_samples != mt->num_samples) { + gen8_emit_3dstate_multisample(brw, mt->num_samples); + brw->NewGLState |= _NEW_MULTISAMPLE; + } + /* The basic algorithm is: * - If needed, emit 3DSTATE_{DEPTH,HIER_DEPTH,STENCIL}_BUFFER and * 3DSTATE_CLEAR_PARAMS packets to set up the relevant buffers. From 41630c0653578db0c237296aaeec0a85a4e7f4ad Mon Sep 17 00:00:00 2001 From: Samuel Pitoiset Date: Fri, 22 May 2015 12:45:43 +0200 Subject: [PATCH 323/834] vc4: make vc4_begin_query() return a boolean I forgot to make the change in 96f164f6f047833091eb98a73aa80c31dc94f962. This fixes a warning with GCC and probably an error with Clang. Signed-off-by: Samuel Pitoiset Reviewed-by: Eric Anholt --- src/gallium/drivers/vc4/vc4_query.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/gallium/drivers/vc4/vc4_query.c b/src/gallium/drivers/vc4/vc4_query.c index 1792becb08f..270832eae3a 100644 --- a/src/gallium/drivers/vc4/vc4_query.c +++ b/src/gallium/drivers/vc4/vc4_query.c @@ -50,9 +50,10 @@ vc4_destroy_query(struct pipe_context *ctx, struct pipe_query *query) free(query); } -static void +static boolean vc4_begin_query(struct pipe_context *ctx, struct pipe_query *query) { + return true; } static void From b0edc19a521853371a63e9ffbc519424c8f82942 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Wed, 27 May 2015 16:01:00 -0700 Subject: [PATCH 324/834] vc4: Don't forget to make our raster shadow textures non-raster. Not sure what happened in my testing that made the previous shadow code fix glxgears swapbuffering, but this also fixes lots of CopyArea in X (like dragging xlogo around in metacity). --- src/gallium/drivers/vc4/vc4_state.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/gallium/drivers/vc4/vc4_state.c b/src/gallium/drivers/vc4/vc4_state.c index 80e963ea2ee..7875eff0325 100644 --- a/src/gallium/drivers/vc4/vc4_state.c +++ b/src/gallium/drivers/vc4/vc4_state.c @@ -538,6 +538,7 @@ vc4_create_sampler_view(struct pipe_context *pctx, struct pipe_resource *prsc, struct pipe_resource tmpl = shadow_parent->base.b; struct vc4_resource *clone; + tmpl.bind = PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_RENDER_TARGET; tmpl.width0 = u_minify(tmpl.width0, so->u.tex.first_level); tmpl.height0 = u_minify(tmpl.height0, so->u.tex.first_level); tmpl.last_level = so->u.tex.last_level - so->u.tex.first_level; @@ -547,6 +548,8 @@ vc4_create_sampler_view(struct pipe_context *pctx, struct pipe_resource *prsc, clone->shadow_parent = &shadow_parent->base.b; /* Flag it as needing update of the contents from the parent. */ clone->writes = shadow_parent->writes - 1; + + assert(clone->vc4_format != VC4_TEXTURE_TYPE_RGBA32R); } so->texture = prsc; so->reference.count = 1; From f8de6277bfa1a7db9a8c0f0baaa441276264a982 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Wed, 27 May 2015 16:20:28 -0700 Subject: [PATCH 325/834] vc4: Don't try to put our dmabuf-exported BOs into the BO cache. We'd sometimes try to reallocate something that X was using as a new pipe_resource, and potentially conflict in our rendering. But even worse, if we reallocated the BO as a shader, the kernel would reject rendering using the shader. --- src/gallium/drivers/vc4/vc4_bufmgr.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/gallium/drivers/vc4/vc4_bufmgr.c b/src/gallium/drivers/vc4/vc4_bufmgr.c index 4bb2c711e16..6b3a8c3070c 100644 --- a/src/gallium/drivers/vc4/vc4_bufmgr.c +++ b/src/gallium/drivers/vc4/vc4_bufmgr.c @@ -286,6 +286,7 @@ vc4_bo_get_dmabuf(struct vc4_bo *bo) bo->handle); return -1; } + bo->private = false; return fd; } From 10aacf5ae8f3e90e2f0967fbdcf96df93e346e20 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Tue, 14 Apr 2015 22:42:02 -0700 Subject: [PATCH 326/834] vc4: Just stream out fallback IB contents. The idea I had when I wrote the original shadow code was that you'd see a set_index_buffer to the IB, then a bunch of draws out of it. What's actually happening in openarena is that set_index_buffer occurs at every draw, so we end up making a new shadow BO every time, and converting more of the BO than is actually used in the draw. While I could maybe come up with a better caching scheme, for now just do the simple thing that doesn't result in a new shadow IB allocation per draw. Improves performance of isosurf in drawelements mode by 58.7967% +/- 3.86152% (n=8). --- src/gallium/drivers/vc4/vc4_context.c | 7 ++++++ src/gallium/drivers/vc4/vc4_context.h | 2 ++ src/gallium/drivers/vc4/vc4_draw.c | 15 ++++++++---- src/gallium/drivers/vc4/vc4_resource.c | 33 ++++++++++++-------------- src/gallium/drivers/vc4/vc4_resource.h | 6 +++-- src/gallium/drivers/vc4/vc4_state.c | 20 ++-------------- 6 files changed, 41 insertions(+), 42 deletions(-) diff --git a/src/gallium/drivers/vc4/vc4_context.c b/src/gallium/drivers/vc4/vc4_context.c index b394c186efb..a2b1cac952d 100644 --- a/src/gallium/drivers/vc4/vc4_context.c +++ b/src/gallium/drivers/vc4/vc4_context.c @@ -29,6 +29,7 @@ #include "util/u_inlines.h" #include "util/u_memory.h" #include "util/u_blitter.h" +#include "util/u_upload_mgr.h" #include "indices/u_primconvert.h" #include "pipe/p_screen.h" @@ -410,6 +411,9 @@ vc4_context_destroy(struct pipe_context *pctx) if (vc4->primconvert) util_primconvert_destroy(vc4->primconvert); + if (vc4->uploader) + u_upload_destroy(vc4->uploader); + util_slab_destroy(&vc4->transfer_pool); pipe_surface_reference(&vc4->framebuffer.cbufs[0], NULL); @@ -466,6 +470,9 @@ vc4_context_create(struct pipe_screen *pscreen, void *priv) if (!vc4->primconvert) goto fail; + vc4->uploader = u_upload_create(pctx, 16 * 1024, 4, + PIPE_BIND_INDEX_BUFFER); + vc4_debug |= saved_shaderdb_flag; return &vc4->base; diff --git a/src/gallium/drivers/vc4/vc4_context.h b/src/gallium/drivers/vc4/vc4_context.h index d89f1974e12..41dacb9172d 100644 --- a/src/gallium/drivers/vc4/vc4_context.h +++ b/src/gallium/drivers/vc4/vc4_context.h @@ -243,6 +243,8 @@ struct vc4_context { /** Seqno of the last CL flush's job. */ uint64_t last_emit_seqno; + struct u_upload_mgr *uploader; + /** @{ Current pipeline state objects */ struct pipe_scissor_state scissor; struct pipe_blend_state *blend; diff --git a/src/gallium/drivers/vc4/vc4_draw.c b/src/gallium/drivers/vc4/vc4_draw.c index 16418bf12da..15743ea7671 100644 --- a/src/gallium/drivers/vc4/vc4_draw.c +++ b/src/gallium/drivers/vc4/vc4_draw.c @@ -266,13 +266,17 @@ vc4_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info) * definitions, up to but not including QUADS. */ if (info->indexed) { - struct vc4_resource *rsc = vc4_resource(vc4->indexbuf.buffer); uint32_t offset = vc4->indexbuf.offset; uint32_t index_size = vc4->indexbuf.index_size; - if (rsc->shadow_parent) { - vc4_update_shadow_index_buffer(pctx, &vc4->indexbuf); - offset = 0; + struct pipe_resource *prsc; + if (vc4->indexbuf.index_size == 4) { + prsc = vc4_get_shadow_index_buffer(pctx, &vc4->indexbuf, + info->count, &offset); + index_size = 2; + } else { + prsc = vc4->indexbuf.buffer; } + struct vc4_resource *rsc = vc4_resource(prsc); cl_start_reloc(&vc4->bcl, 1); cl_u8(&vc4->bcl, VC4_PACKET_GL_INDEXED_PRIMITIVE); @@ -284,6 +288,9 @@ vc4_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info) cl_u32(&vc4->bcl, info->count); cl_reloc(vc4, &vc4->bcl, rsc->bo, offset); cl_u32(&vc4->bcl, max_index); + + if (vc4->indexbuf.index_size == 4) + pipe_resource_reference(&prsc, NULL); } else { cl_u8(&vc4->bcl, VC4_PACKET_GL_ARRAY_PRIMITIVE); cl_u8(&vc4->bcl, info->mode); diff --git a/src/gallium/drivers/vc4/vc4_resource.c b/src/gallium/drivers/vc4/vc4_resource.c index 3f180d5845d..14b135e2f45 100644 --- a/src/gallium/drivers/vc4/vc4_resource.c +++ b/src/gallium/drivers/vc4/vc4_resource.c @@ -26,6 +26,7 @@ #include "util/u_format.h" #include "util/u_inlines.h" #include "util/u_surface.h" +#include "util/u_upload_mgr.h" #include "vc4_screen.h" #include "vc4_context.h" @@ -638,41 +639,37 @@ vc4_update_shadow_baselevel_texture(struct pipe_context *pctx, * was in user memory, it would be nice to not have uploaded it to a VBO * before translating. */ -void -vc4_update_shadow_index_buffer(struct pipe_context *pctx, - const struct pipe_index_buffer *ib) +struct pipe_resource * +vc4_get_shadow_index_buffer(struct pipe_context *pctx, + const struct pipe_index_buffer *ib, + uint32_t count, + uint32_t *shadow_offset) { - struct vc4_resource *shadow = vc4_resource(ib->buffer); - struct vc4_resource *orig = vc4_resource(shadow->shadow_parent); - uint32_t count = shadow->base.b.width0 / 2; - - if (shadow->writes == orig->writes) - return; - + struct vc4_context *vc4 = vc4_context(pctx); + struct vc4_resource *orig = vc4_resource(ib->buffer); perf_debug("Fallback conversion for %d uint indices\n", count); + void *data; + struct pipe_resource *shadow_rsc = NULL; + u_upload_alloc(vc4->uploader, 0, count * 2, + shadow_offset, &shadow_rsc, &data); + uint16_t *dst = data; + struct pipe_transfer *src_transfer; uint32_t *src = pipe_buffer_map_range(pctx, &orig->base.b, ib->offset, count * 4, PIPE_TRANSFER_READ, &src_transfer); - struct pipe_transfer *dst_transfer; - uint16_t *dst = pipe_buffer_map_range(pctx, &shadow->base.b, - 0, - count * 2, - PIPE_TRANSFER_WRITE, &dst_transfer); - for (int i = 0; i < count; i++) { uint32_t src_index = src[i]; assert(src_index <= 0xffff); dst[i] = src_index; } - pctx->transfer_unmap(pctx, dst_transfer); pctx->transfer_unmap(pctx, src_transfer); - shadow->writes = orig->writes; + return shadow_rsc; } void diff --git a/src/gallium/drivers/vc4/vc4_resource.h b/src/gallium/drivers/vc4/vc4_resource.h index 2ed848bc7b9..b3cba8f2f65 100644 --- a/src/gallium/drivers/vc4/vc4_resource.h +++ b/src/gallium/drivers/vc4/vc4_resource.h @@ -107,8 +107,10 @@ struct pipe_resource *vc4_resource_create(struct pipe_screen *pscreen, const struct pipe_resource *tmpl); void vc4_update_shadow_baselevel_texture(struct pipe_context *pctx, struct pipe_sampler_view *view); -void vc4_update_shadow_index_buffer(struct pipe_context *pctx, - const struct pipe_index_buffer *ib); +struct pipe_resource *vc4_get_shadow_index_buffer(struct pipe_context *pctx, + const struct pipe_index_buffer *ib, + uint32_t count, + uint32_t *offset); void vc4_dump_surface(struct pipe_surface *psurf); #endif /* VC4_RESOURCE_H */ diff --git a/src/gallium/drivers/vc4/vc4_state.c b/src/gallium/drivers/vc4/vc4_state.c index 7875eff0325..4a1d4c3a4d6 100644 --- a/src/gallium/drivers/vc4/vc4_state.c +++ b/src/gallium/drivers/vc4/vc4_state.c @@ -304,24 +304,8 @@ vc4_set_index_buffer(struct pipe_context *pctx, if (ib) { assert(!ib->user_buffer); - - if (ib->index_size == 4) { - struct pipe_resource tmpl = *ib->buffer; - assert(tmpl.format == PIPE_FORMAT_R8_UNORM); - assert(tmpl.height0 == 1); - tmpl.width0 = (tmpl.width0 - ib->offset) / 2; - struct pipe_resource *pshadow = - vc4_resource_create(&vc4->screen->base, &tmpl); - struct vc4_resource *shadow = vc4_resource(pshadow); - pipe_resource_reference(&shadow->shadow_parent, ib->buffer); - - pipe_resource_reference(&vc4->indexbuf.buffer, NULL); - vc4->indexbuf.buffer = pshadow; - vc4->indexbuf.index_size = 2; - } else { - pipe_resource_reference(&vc4->indexbuf.buffer, ib->buffer); - vc4->indexbuf.index_size = ib->index_size; - } + pipe_resource_reference(&vc4->indexbuf.buffer, ib->buffer); + vc4->indexbuf.index_size = ib->index_size; vc4->indexbuf.offset = ib->offset; } else { pipe_resource_reference(&vc4->indexbuf.buffer, NULL); From 09d6243aed016eed4518435c9885275dbb6d2aa9 Mon Sep 17 00:00:00 2001 From: Jose Fonseca Date: Thu, 28 May 2015 10:11:36 +0100 Subject: [PATCH 327/834] gallivm: Workaround LLVM PR23628. Temporarily undefine DEBUG macro while including LLVM C++ headers, leveraging the push/pop_macro pragmas, which are supported both by GCC and MSVC. https://bugs.freedesktop.org/show_bug.cgi?id=90621 Trivial. --- src/gallium/auxiliary/gallivm/lp_bld_misc.cpp | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp b/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp index ffed9e6b69a..5e25819ac55 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp +++ b/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp @@ -50,6 +50,12 @@ #include +// Workaround http://llvm.org/PR23628 +#if HAVE_LLVM >= 0x0307 +# pragma push_macro("DEBUG") +# undef DEBUG +#endif + #include #include #include @@ -70,6 +76,11 @@ #include #include +// Workaround http://llvm.org/PR23628 +#if HAVE_LLVM >= 0x0307 +# pragma pop_macro("DEBUG") +#endif + #include "pipe/p_config.h" #include "util/u_debug.h" #include "util/u_cpu_detect.h" From 2231cf0ba3a79d9abb08065e0f72811c5eea807f Mon Sep 17 00:00:00 2001 From: Iago Toral Quiroga Date: Thu, 28 May 2015 09:06:33 +0200 Subject: [PATCH 328/834] nir: Fix output swizzle in get_mul_for_src When we compute the output swizzle we want to consider the number of components in the add operation. So far we were using the writemask of the multiplication for this instead, which is not correct. Reviewed-by: Jason Ekstrand --- src/glsl/nir/nir_opt_peephole_ffma.c | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/src/glsl/nir/nir_opt_peephole_ffma.c b/src/glsl/nir/nir_opt_peephole_ffma.c index b430eac8eab..798506b7595 100644 --- a/src/glsl/nir/nir_opt_peephole_ffma.c +++ b/src/glsl/nir/nir_opt_peephole_ffma.c @@ -73,7 +73,8 @@ are_all_uses_fadd(nir_ssa_def *def) } static nir_alu_instr * -get_mul_for_src(nir_alu_src *src, uint8_t swizzle[4], bool *negate, bool *abs) +get_mul_for_src(nir_alu_src *src, int num_components, + uint8_t swizzle[4], bool *negate, bool *abs) { assert(src->src.is_ssa && !src->abs && !src->negate); @@ -85,16 +86,16 @@ get_mul_for_src(nir_alu_src *src, uint8_t swizzle[4], bool *negate, bool *abs) switch (alu->op) { case nir_op_imov: case nir_op_fmov: - alu = get_mul_for_src(&alu->src[0], swizzle, negate, abs); + alu = get_mul_for_src(&alu->src[0], num_components, swizzle, negate, abs); break; case nir_op_fneg: - alu = get_mul_for_src(&alu->src[0], swizzle, negate, abs); + alu = get_mul_for_src(&alu->src[0], num_components, swizzle, negate, abs); *negate = !*negate; break; case nir_op_fabs: - alu = get_mul_for_src(&alu->src[0], swizzle, negate, abs); + alu = get_mul_for_src(&alu->src[0], num_components, swizzle, negate, abs); *negate = false; *abs = true; break; @@ -115,12 +116,8 @@ get_mul_for_src(nir_alu_src *src, uint8_t swizzle[4], bool *negate, bool *abs) if (!alu) return NULL; - for (unsigned i = 0; i < 4; i++) { - if (!(alu->dest.write_mask & (1 << i))) - break; - + for (unsigned i = 0; i < num_components; i++) swizzle[i] = swizzle[src->swizzle[i]]; - } return alu; } @@ -160,7 +157,9 @@ nir_opt_peephole_ffma_block(nir_block *block, void *void_state) negate = false; abs = false; - mul = get_mul_for_src(&add->src[add_mul_src], swizzle, &negate, &abs); + mul = get_mul_for_src(&add->src[add_mul_src], + add->dest.dest.ssa.num_components, + swizzle, &negate, &abs); if (mul != NULL) break; From 0596134410a0decc2f6bba77bfedb82d308aabbe Mon Sep 17 00:00:00 2001 From: Matt Turner Date: Wed, 27 May 2015 10:44:45 -0700 Subject: [PATCH 329/834] i965/fs: Fix lowering of integer multiplication with cmod. If the multiplication's result is unused, except by a conditional_mod, the destination will be null. Since the final instruction in the lowered sequence is a partial-write, we can't put the conditional mod on it and we have to store the full result to a register and do a MOV with a conditional mod. Cc: "10.6" Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=90580 Reviewed-by: Kenneth Graunke --- src/mesa/drivers/dri/i965/brw_fs.cpp | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 42a0d78e542..5cc4fe66e99 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -3606,6 +3606,10 @@ fs_visitor::lower_integer_multiplication() * schedule multi-component multiplications much better. */ + if (inst->conditional_mod && inst->dst.is_null()) { + inst->dst = fs_reg(GRF, alloc.allocate(dispatch_width / 8), + inst->dst.type, dispatch_width); + } fs_reg low = inst->dst; fs_reg high(GRF, alloc.allocate(dispatch_width / 8), inst->dst.type, dispatch_width); @@ -3655,6 +3659,13 @@ fs_visitor::lower_integer_multiplication() low.stride = 2; insert(ADD(dst, low, high)); + + if (inst->conditional_mod) { + fs_reg null(retype(brw_null_reg(), inst->dst.type)); + fs_inst *mov = MOV(null, inst->dst); + mov->conditional_mod = inst->conditional_mod; + insert(mov); + } } #undef insert From e354cc9b791cf025d26de7e19c58d499b83a3570 Mon Sep 17 00:00:00 2001 From: Matt Turner Date: Wed, 27 May 2015 12:19:07 -0700 Subject: [PATCH 330/834] i965: Silence warning in 3-src type-setting. Reviewed-by: Topi Pohjolainen --- src/mesa/drivers/dri/i965/brw_eu_emit.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c index e78d0bec268..a1d11f30433 100644 --- a/src/mesa/drivers/dri/i965/brw_eu_emit.c +++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c @@ -914,6 +914,8 @@ brw_alu3(struct brw_codegen *p, unsigned opcode, struct brw_reg dest, brw_inst_set_3src_src_type(devinfo, inst, BRW_3SRC_TYPE_UD); brw_inst_set_3src_dst_type(devinfo, inst, BRW_3SRC_TYPE_UD); break; + default: + unreachable("not reached"); } } From 8bbe7fa7a853d8ebf69e5d2d0fdc4343a20b638f Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 25 May 2015 09:31:55 -0700 Subject: [PATCH 331/834] i965/fs: Properly handle explicit depth in SIMD16 with dual-source blend Cc: "10.6" Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=90629 Tested-by: Markus Wick Reviewed-by: Matt Turner --- src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp index ead77686640..314136c7819 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp @@ -3711,7 +3711,11 @@ fs_visitor::emit_single_fb_write(fs_reg color0, fs_reg color1, if (prog->OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) { /* Hand over gl_FragDepth. */ assert(this->frag_depth.file != BAD_FILE); - sources[length] = this->frag_depth; + if (exec_size < dispatch_width) { + sources[length] = half(this->frag_depth, use_2nd_half); + } else { + sources[length] = this->frag_depth; + } } else { /* Pass through the payload depth. */ sources[length] = fs_reg(brw_vec8_grf(payload.source_depth_reg, 0)); From 366ceacf72258a4a81d9c6b412dd565a4c611b17 Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Tue, 28 Apr 2015 18:00:43 -0700 Subject: [PATCH 332/834] gles/es3.1: Enable dispatch of almost all new GLES 3.1 functions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A couple functions are missing because there are no implementations of them yet. These are: glFramebufferParameteri (from GL_ARB_framebuffer_no_attachments) glGetFramebufferParameteriv (from GL_ARB_framebuffer_no_attachments) glMemoryBarrierByRegion v2: Rebase on updated dispatch_sanity.cpp test. v3: Add support for glDraw{Arrays,Elements}Indirect in vbo_exec_array.c. The updated dispatch_sanity.cpp test discovered this omission. v4: Rebase on glapi changes. Signed-off-by: Ian Romanick Reviewed-by: Tapani Pälli --- src/mapi/glapi/gen/ARB_compute_shader.xml | 4 +- src/mapi/glapi/gen/ARB_draw_indirect.xml | 4 +- .../glapi/gen/ARB_program_interface_query.xml | 10 +- .../glapi/gen/ARB_separate_shader_objects.xml | 86 ++++++++--------- .../glapi/gen/ARB_shader_image_load_store.xml | 4 +- .../glapi/gen/ARB_texture_multisample.xml | 4 +- .../gen/ARB_texture_storage_multisample.xml | 2 +- .../glapi/gen/ARB_vertex_attrib_binding.xml | 10 +- src/mapi/glapi/gen/GL3x.xml | 2 +- src/mapi/glapi/gen/gl_API.xml | 4 +- src/mesa/main/tests/dispatch_sanity.cpp | 95 +++++++++++++++++++ src/mesa/vbo/vbo_exec_array.c | 5 +- 12 files changed, 164 insertions(+), 66 deletions(-) diff --git a/src/mapi/glapi/gen/ARB_compute_shader.xml b/src/mapi/glapi/gen/ARB_compute_shader.xml index 78d352f1f37..c2ec842efe1 100644 --- a/src/mapi/glapi/gen/ARB_compute_shader.xml +++ b/src/mapi/glapi/gen/ARB_compute_shader.xml @@ -26,13 +26,13 @@ - + - + diff --git a/src/mapi/glapi/gen/ARB_draw_indirect.xml b/src/mapi/glapi/gen/ARB_draw_indirect.xml index 2001eb00b59..3b29d6b8674 100644 --- a/src/mapi/glapi/gen/ARB_draw_indirect.xml +++ b/src/mapi/glapi/gen/ARB_draw_indirect.xml @@ -8,12 +8,12 @@ - + - + diff --git a/src/mapi/glapi/gen/ARB_program_interface_query.xml b/src/mapi/glapi/gen/ARB_program_interface_query.xml index 5b6d5cc59bd..c3162f5ed16 100644 --- a/src/mapi/glapi/gen/ARB_program_interface_query.xml +++ b/src/mapi/glapi/gen/ARB_program_interface_query.xml @@ -56,21 +56,21 @@ - + - + - + @@ -79,7 +79,7 @@ - + @@ -90,7 +90,7 @@ - + diff --git a/src/mapi/glapi/gen/ARB_separate_shader_objects.xml b/src/mapi/glapi/gen/ARB_separate_shader_objects.xml index c20ee4b50ff..c9f481d8b6b 100644 --- a/src/mapi/glapi/gen/ARB_separate_shader_objects.xml +++ b/src/mapi/glapi/gen/ARB_separate_shader_objects.xml @@ -15,33 +15,33 @@ - + - + - + - + - + - + - + @@ -54,30 +54,30 @@ --> - + - + - + - + - + @@ -85,25 +85,25 @@ - + - + - + - + @@ -111,25 +111,25 @@ - + - + - + - + @@ -137,145 +137,145 @@ - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + diff --git a/src/mapi/glapi/gen/ARB_shader_image_load_store.xml b/src/mapi/glapi/gen/ARB_shader_image_load_store.xml index c6a97bf1878..178e930f1d5 100644 --- a/src/mapi/glapi/gen/ARB_shader_image_load_store.xml +++ b/src/mapi/glapi/gen/ARB_shader_image_load_store.xml @@ -70,7 +70,7 @@ - + @@ -80,7 +80,7 @@ - + diff --git a/src/mapi/glapi/gen/ARB_texture_multisample.xml b/src/mapi/glapi/gen/ARB_texture_multisample.xml index d7cf2a30aa2..595e1c7eae6 100644 --- a/src/mapi/glapi/gen/ARB_texture_multisample.xml +++ b/src/mapi/glapi/gen/ARB_texture_multisample.xml @@ -53,13 +53,13 @@ - + - + diff --git a/src/mapi/glapi/gen/ARB_texture_storage_multisample.xml b/src/mapi/glapi/gen/ARB_texture_storage_multisample.xml index 0d39fa235da..6ed8f1a01d8 100644 --- a/src/mapi/glapi/gen/ARB_texture_storage_multisample.xml +++ b/src/mapi/glapi/gen/ARB_texture_storage_multisample.xml @@ -7,7 +7,7 @@ - + diff --git a/src/mapi/glapi/gen/ARB_vertex_attrib_binding.xml b/src/mapi/glapi/gen/ARB_vertex_attrib_binding.xml index 0f000639f1f..ba9ca57bb54 100644 --- a/src/mapi/glapi/gen/ARB_vertex_attrib_binding.xml +++ b/src/mapi/glapi/gen/ARB_vertex_attrib_binding.xml @@ -7,14 +7,14 @@ - + - + @@ -22,7 +22,7 @@ - + @@ -36,12 +36,12 @@ - + - + diff --git a/src/mapi/glapi/gen/GL3x.xml b/src/mapi/glapi/gen/GL3x.xml index 348d5221fb5..7919d657774 100644 --- a/src/mapi/glapi/gen/GL3x.xml +++ b/src/mapi/glapi/gen/GL3x.xml @@ -166,7 +166,7 @@ - + diff --git a/src/mapi/glapi/gen/gl_API.xml b/src/mapi/glapi/gen/gl_API.xml index 3090b9f7e02..bd8db62033e 100644 --- a/src/mapi/glapi/gen/gl_API.xml +++ b/src/mapi/glapi/gen/gl_API.xml @@ -2824,7 +2824,7 @@ - + @@ -2832,7 +2832,7 @@ - + diff --git a/src/mesa/main/tests/dispatch_sanity.cpp b/src/mesa/main/tests/dispatch_sanity.cpp index c6f3c395733..ab66f884673 100644 --- a/src/mesa/main/tests/dispatch_sanity.cpp +++ b/src/mesa/main/tests/dispatch_sanity.cpp @@ -72,6 +72,7 @@ extern const struct function gl_core_functions_possible[]; extern const struct function gles11_functions_possible[]; extern const struct function gles2_functions_possible[]; extern const struct function gles3_functions_possible[]; +extern const struct function gles31_functions_possible[]; class DispatchSanity_test : public ::testing::Test { public: @@ -201,6 +202,15 @@ TEST_F(DispatchSanity_test, GLES3) validate_nops(&ctx, nop_table); } +TEST_F(DispatchSanity_test, GLES31) +{ + SetUpCtx(API_OPENGLES2, 31); + validate_functions(&ctx, gles2_functions_possible, nop_table); + validate_functions(&ctx, gles3_functions_possible, nop_table); + validate_functions(&ctx, gles31_functions_possible, nop_table); + validate_nops(&ctx, nop_table); +} + const struct function gl_core_functions_possible[] = { { "glCullFace", 10, -1 }, { "glFrontFace", 10, -1 }, @@ -1611,3 +1621,88 @@ const struct function gles3_functions_possible[] = { { NULL, 0, -1 } }; + +const struct function gles31_functions_possible[] = { + { "glDispatchCompute", 31, -1 }, + { "glDispatchComputeIndirect", 31, -1 }, + { "glDrawArraysIndirect", 31, -1 }, + { "glDrawElementsIndirect", 31, -1 }, + + // FINISHME: These two functions have not been implemented yet. They come + // FINISHME: from the ARB_framebuffer_no_attachments extension. + // { "glFramebufferParameteri", 31, -1 }, + // { "glGetFramebufferParameteriv", 31, -1 }, + + { "glGetProgramInterfaceiv", 31, -1 }, + { "glGetProgramResourceIndex", 31, -1 }, + { "glGetProgramResourceName", 31, -1 }, + { "glGetProgramResourceiv", 31, -1 }, + { "glGetProgramResourceLocation", 31, -1 }, + + // We check for the aliased EXT versions in GLES 2 + // { "glUseProgramStages", 31, -1 }, + // { "glActiveShaderProgram", 31, -1 }, + // { "glCreateShaderProgramv", 31, -1 }, + // { "glBindProgramPipeline", 31, -1 }, + // { "glDeleteProgramPipelines", 31, -1 }, + // { "glGenProgramPipelines", 31, -1 }, + // { "glIsProgramPipeline", 31, -1 }, + // { "glGetProgramPipelineiv", 31, -1 }, + // { "glProgramUniform1i", 31, -1 }, + // { "glProgramUniform2i", 31, -1 }, + // { "glProgramUniform3i", 31, -1 }, + // { "glProgramUniform4i", 31, -1 }, + // { "glProgramUniform1f", 31, -1 }, + // { "glProgramUniform2f", 31, -1 }, + // { "glProgramUniform3f", 31, -1 }, + // { "glProgramUniform4f", 31, -1 }, + // { "glProgramUniform1iv", 31, -1 }, + // { "glProgramUniform2iv", 31, -1 }, + // { "glProgramUniform3iv", 31, -1 }, + // { "glProgramUniform4iv", 31, -1 }, + // { "glProgramUniform1fv", 31, -1 }, + // { "glProgramUniform2fv", 31, -1 }, + // { "glProgramUniform3fv", 31, -1 }, + // { "glProgramUniform4fv", 31, -1 }, + // { "glProgramUniformMatrix2fv", 31, -1 }, + // { "glProgramUniformMatrix3fv", 31, -1 }, + // { "glProgramUniformMatrix4fv", 31, -1 }, + // { "glProgramUniformMatrix2x3fv", 31, -1 }, + // { "glProgramUniformMatrix3x2fv", 31, -1 }, + // { "glProgramUniformMatrix2x4fv", 31, -1 }, + // { "glProgramUniformMatrix4x2fv", 31, -1 }, + // { "glProgramUniformMatrix3x4fv", 31, -1 }, + // { "glProgramUniformMatrix4x3fv", 31, -1 }, + // { "glValidateProgramPipeline", 31, -1 }, + // { "glGetProgramPipelineInfoLog", 31, -1 }, + + // We check for the aliased EXT versions in GLES 3 + // { "glProgramUniform1ui", 31, -1 }, + // { "glProgramUniform2ui", 31, -1 }, + // { "glProgramUniform3ui", 31, -1 }, + // { "glProgramUniform4ui", 31, -1 }, + // { "glProgramUniform1uiv", 31, -1 }, + // { "glProgramUniform2uiv", 31, -1 }, + // { "glProgramUniform3uiv", 31, -1 }, + // { "glProgramUniform4uiv", 31, -1 }, + + { "glBindImageTexture", 31, -1 }, + { "glGetBooleani_v", 31, -1 }, + { "glMemoryBarrier", 31, -1 }, + + // FINISHME: This function has not been implemented yet. + // { "glMemoryBarrierByRegion", 31, -1 }, + + { "glTexStorage2DMultisample", 31, -1 }, + { "glGetMultisamplefv", 31, -1 }, + { "glSampleMaski", 31, -1 }, + { "glGetTexLevelParameteriv", 31, -1 }, + { "glGetTexLevelParameterfv", 31, -1 }, + { "glBindVertexBuffer", 31, -1 }, + { "glVertexAttribFormat", 31, -1 }, + { "glVertexAttribIFormat", 31, -1 }, + { "glVertexAttribBinding", 31, -1 }, + { "glVertexBindingDivisor", 31, -1 }, + + { NULL, 0, -1 }, + }; diff --git a/src/mesa/vbo/vbo_exec_array.c b/src/mesa/vbo/vbo_exec_array.c index 3ea775c0e4a..72b8206ec23 100644 --- a/src/mesa/vbo/vbo_exec_array.c +++ b/src/mesa/vbo/vbo_exec_array.c @@ -1817,9 +1817,12 @@ vbo_initialize_exec_dispatch(const struct gl_context *ctx, SET_DrawElementsInstancedBaseVertexBaseInstance(exec, vbo_exec_DrawElementsInstancedBaseVertexBaseInstance); } - if (ctx->API == API_OPENGL_CORE) { + if (ctx->API == API_OPENGL_CORE || _mesa_is_gles31(ctx)) { SET_DrawArraysIndirect(exec, vbo_exec_DrawArraysIndirect); SET_DrawElementsIndirect(exec, vbo_exec_DrawElementsIndirect); + } + + if (ctx->API == API_OPENGL_CORE) { SET_MultiDrawArraysIndirect(exec, vbo_exec_MultiDrawArraysIndirect); SET_MultiDrawElementsIndirect(exec, vbo_exec_MultiDrawElementsIndirect); } From 1fe243938b11be740417cf016d8c50cd69228628 Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Tue, 28 Apr 2015 13:13:47 -0700 Subject: [PATCH 333/834] mesa/es3.1: Enable ES 3.1 API and shading language version MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is a bit of a hack for now. Several of the extensions required for OpenGL ES 3.1 have no support, at all, in Mesa. However, with this patch and a patch to allow MESA_GL_VERSION_OVERRIDE to work with ES contexts, people can begin testing the ES "version" of the functionality that is supported. Signed-off-by: Ian Romanick Reviewed-by: Tapani Pälli --- src/mesa/main/getstring.c | 16 ++++++++++++---- src/mesa/main/version.c | 18 +++++++++++++++++- 2 files changed, 29 insertions(+), 5 deletions(-) diff --git a/src/mesa/main/getstring.c b/src/mesa/main/getstring.c index 1b2c7f054f6..72d99ca4e22 100644 --- a/src/mesa/main/getstring.c +++ b/src/mesa/main/getstring.c @@ -72,10 +72,18 @@ shading_language_version(struct gl_context *ctx) break; case API_OPENGLES2: - return (ctx->Version < 30) - ? (const GLubyte *) "OpenGL ES GLSL ES 1.0.16" - : (const GLubyte *) "OpenGL ES GLSL ES 3.00"; - + switch (ctx->Version) { + case 20: + return (const GLubyte *) "OpenGL ES GLSL ES 1.0.16"; + case 30: + return (const GLubyte *) "OpenGL ES GLSL ES 3.00"; + case 31: + return (const GLubyte *) "OpenGL ES GLSL ES 3.10"; + default: + _mesa_problem(ctx, + "Invalid OpenGL ES version in shading_language_version()"); + return (const GLubyte *) 0; + } case API_OPENGLES: /* fall-through */ diff --git a/src/mesa/main/version.c b/src/mesa/main/version.c index 699a0de46c2..e817e2d92a0 100644 --- a/src/mesa/main/version.c +++ b/src/mesa/main/version.c @@ -433,7 +433,23 @@ compute_version_es2(const struct gl_extensions *extensions) extensions->EXT_texture_snorm && extensions->NV_primitive_restart && extensions->OES_depth_texture_cube_map); - if (ver_3_0) { + const bool ver_3_1 = (ver_3_0 && + extensions->ARB_arrays_of_arrays && + extensions->ARB_compute_shader && + extensions->ARB_draw_indirect && + false /*extensions->ARB_framebuffer_no_attachments*/ && + extensions->ARB_shader_atomic_counters && + extensions->ARB_shader_image_load_store && + false /*extensions->ARB_shader_image_size*/ && + false /*extensions->ARB_shader_storage_buffer_object*/ && + extensions->ARB_shading_language_packing && + extensions->ARB_stencil_texturing && + extensions->ARB_gpu_shader5 && + extensions->EXT_shader_integer_mix); + + if (ver_3_1) { + return 31; + } else if (ver_3_0) { return 30; } else if (ver_2_0) { return 20; From 464c56d3d5ca2c9d6e437e756950f0fa2996d8da Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Tue, 26 May 2015 12:14:39 -0700 Subject: [PATCH 334/834] dri_util: Use _mesa_override_gl_version_contextless MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove _mesa_get_gl_version_override. We don't need two functions that do basically the same thing. This change seemed easier (esp. with the next patch) than going the other way. Signed-off-by: Ian Romanick Reviewed-by: Tapani Pälli --- src/mesa/drivers/dri/common/dri_util.c | 18 +++++++++++------- src/mesa/main/version.c | 16 ---------------- src/mesa/main/version.h | 3 --- 3 files changed, 11 insertions(+), 26 deletions(-) diff --git a/src/mesa/drivers/dri/common/dri_util.c b/src/mesa/drivers/dri/common/dri_util.c index d6e875fcfeb..f0e5440c225 100644 --- a/src/mesa/drivers/dri/common/dri_util.c +++ b/src/mesa/drivers/dri/common/dri_util.c @@ -162,13 +162,17 @@ driCreateNewScreen2(int scrn, int fd, return NULL; } - int gl_version_override = _mesa_get_gl_version_override(); - if (gl_version_override >= 31) { - psp->max_gl_core_version = MAX2(psp->max_gl_core_version, - gl_version_override); - } else { - psp->max_gl_compat_version = MAX2(psp->max_gl_compat_version, - gl_version_override); + struct gl_constants consts = { 0 }; + gl_api api; + int version; + + api = API_OPENGL_COMPAT; + if (_mesa_override_gl_version_contextless(&consts, &api, &version)) { + if (api == API_OPENGL_CORE) { + psp->max_gl_core_version = version; + } else { + psp->max_gl_compat_version = version; + } } psp->api_mask = (1 << __DRI_API_OPENGL); diff --git a/src/mesa/main/version.c b/src/mesa/main/version.c index e817e2d92a0..668658fdbf7 100644 --- a/src/mesa/main/version.c +++ b/src/mesa/main/version.c @@ -156,22 +156,6 @@ _mesa_override_gl_version(struct gl_context *ctx) } } -/** - * Returns the gl override value - * - * version > 0 indicates there is an override requested - */ -int -_mesa_get_gl_version_override(void) -{ - int version; - bool fwd_context, compat_context; - - get_gl_override(&version, &fwd_context, &compat_context); - - return version; -} - /** * Override the context's GLSL version if the environment variable * MESA_GLSL_VERSION_OVERRIDE is set. Valid values for diff --git a/src/mesa/main/version.h b/src/mesa/main/version.h index 450a0e31d3d..ee7cb7501eb 100644 --- a/src/mesa/main/version.h +++ b/src/mesa/main/version.h @@ -47,7 +47,4 @@ _mesa_override_gl_version(struct gl_context *ctx); extern void _mesa_override_glsl_version(struct gl_constants *consts); -extern int -_mesa_get_gl_version_override(void); - #endif /* VERSION_H */ From 03fd6704db9f1d0f203bf8da18bd587c7e35ce60 Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Tue, 26 May 2015 12:07:13 -0700 Subject: [PATCH 335/834] mesa: Add support for a new override string MESA_GLES_VERSION_OVERRIDE MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The string is only applied when the context is API_OPENGLES2. The bulk of the change is to prevent overriding the context to API_OPENGL_CORE based on the requested version. If the context is API_OPENGL_ES2, don't change it. Signed-off-by: Ian Romanick Reviewed-by: Tapani Pälli --- src/mesa/main/version.c | 40 +++++++++++++++++++++++++++++----------- 1 file changed, 29 insertions(+), 11 deletions(-) diff --git a/src/mesa/main/version.c b/src/mesa/main/version.c index 668658fdbf7..409e5ae3cba 100644 --- a/src/mesa/main/version.c +++ b/src/mesa/main/version.c @@ -51,15 +51,20 @@ check_for_ending(const char *string, const char *ending) * fwd_context is only valid if version > 0 */ static void -get_gl_override(int *version, bool *fwd_context, bool *compat_context) +get_gl_override(gl_api api, int *version, bool *fwd_context, + bool *compat_context) { - const char *env_var = "MESA_GL_VERSION_OVERRIDE"; + const char *env_var = (api == API_OPENGL_CORE || api == API_OPENGL_COMPAT) + ? "MESA_GL_VERSION_OVERRIDE" : "MESA_GLES_VERSION_OVERRIDE"; const char *version_str; int major, minor, n; static int override_version = -1; static bool fc_suffix = false; static bool compat_suffix = false; + if (api == API_OPENGLES) + return; + if (override_version < 0) { override_version = 0; @@ -75,7 +80,12 @@ get_gl_override(int *version, bool *fwd_context, bool *compat_context) override_version = 0; } else { override_version = major * 10 + minor; - if (override_version < 30 && fc_suffix) { + + /* There is no such thing as compatibility or forward-compatible for + * OpenGL ES 2.0 or 3.x APIs. + */ + if ((override_version < 30 && fc_suffix) || + (api == API_OPENGLES2 && (fc_suffix || compat_suffix))) { fprintf(stderr, "error: invalid value for %s: %s\n", env_var, version_str); } @@ -130,18 +140,26 @@ _mesa_override_gl_version_contextless(struct gl_constants *consts, int version; bool fwd_context, compat_context; - get_gl_override(&version, &fwd_context, &compat_context); + get_gl_override(*apiOut, &version, &fwd_context, &compat_context); if (version > 0) { *versionOut = version; - if (version >= 30 && fwd_context) { - *apiOut = API_OPENGL_CORE; - consts->ContextFlags |= GL_CONTEXT_FLAG_FORWARD_COMPATIBLE_BIT; - } else if (version >= 31 && !compat_context) { - *apiOut = API_OPENGL_CORE; - } else { - *apiOut = API_OPENGL_COMPAT; + + /* If the API is a desktop API, adjust the context flags. We may also + * need to modify the API depending on the version. For example, Mesa + * does not support a GL 3.3 compatibility profile. + */ + if (*apiOut == API_OPENGL_CORE || *apiOut == API_OPENGL_COMPAT) { + if (version >= 30 && fwd_context) { + *apiOut = API_OPENGL_CORE; + consts->ContextFlags |= GL_CONTEXT_FLAG_FORWARD_COMPATIBLE_BIT; + } else if (version >= 31 && !compat_context) { + *apiOut = API_OPENGL_CORE; + } else { + *apiOut = API_OPENGL_COMPAT; + } } + return true; } return false; From 9b5e92f4ccc6ee1cb9caea947f6efaad2b391cf1 Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Wed, 29 Apr 2015 16:12:40 -0700 Subject: [PATCH 336/834] mesa: Allow overriding the version of ES2+ contexts MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Ian Romanick Reviewed-by: Tapani Pälli --- src/mesa/drivers/dri/common/dri_util.c | 4 ++++ src/mesa/main/context.c | 4 +--- src/mesa/state_tracker/st_manager.c | 3 +-- 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/src/mesa/drivers/dri/common/dri_util.c b/src/mesa/drivers/dri/common/dri_util.c index f0e5440c225..2d847ef7bc8 100644 --- a/src/mesa/drivers/dri/common/dri_util.c +++ b/src/mesa/drivers/dri/common/dri_util.c @@ -166,6 +166,10 @@ driCreateNewScreen2(int scrn, int fd, gl_api api; int version; + api = API_OPENGLES2; + if (_mesa_override_gl_version_contextless(&consts, &api, &version)) + psp->max_gl_es2_version = version; + api = API_OPENGL_COMPAT; if (_mesa_override_gl_version_contextless(&consts, &api, &version)) { if (api == API_OPENGL_CORE) { diff --git a/src/mesa/main/context.c b/src/mesa/main/context.c index e4faf3d462a..8a59b5ed42f 100644 --- a/src/mesa/main/context.c +++ b/src/mesa/main/context.c @@ -1169,9 +1169,7 @@ _mesa_initialize_context(struct gl_context *ctx, ctx->HasConfig = GL_FALSE; } - if (_mesa_is_desktop_gl(ctx)) { - _mesa_override_gl_version(ctx); - } + _mesa_override_gl_version(ctx); /* misc one-time initializations */ one_time_init(ctx); diff --git a/src/mesa/state_tracker/st_manager.c b/src/mesa/state_tracker/st_manager.c index 0376954f742..a2dee6298fa 100644 --- a/src/mesa/state_tracker/st_manager.c +++ b/src/mesa/state_tracker/st_manager.c @@ -924,8 +924,7 @@ static unsigned get_version(struct pipe_screen *screen, struct gl_extensions extensions = {0}; GLuint version; - if ((api == API_OPENGL_COMPAT || api == API_OPENGL_CORE) && - _mesa_override_gl_version_contextless(&consts, &api, &version)) { + if (_mesa_override_gl_version_contextless(&consts, &api, &version)) { return version; } From 73cf10e6236fbf119c8262e69cd24f55557116f1 Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Mon, 18 May 2015 13:53:49 -0700 Subject: [PATCH 337/834] Revert "st/mesa: Enable ARB_direct_state_access" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit 357bf80caade9e0be20dcc88ec38884e34abc986. Acked-by: Fredrik Höglund Cc: "10.6" --- src/mesa/state_tracker/st_extensions.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/src/mesa/state_tracker/st_extensions.c b/src/mesa/state_tracker/st_extensions.c index 23a45883d9a..b1057f3eadd 100644 --- a/src/mesa/state_tracker/st_extensions.c +++ b/src/mesa/state_tracker/st_extensions.c @@ -650,12 +650,6 @@ void st_init_extensions(struct pipe_screen *screen, ARRAY_SIZE(vertex_mapping), PIPE_BUFFER, PIPE_BIND_VERTEX_BUFFER); - /* ARB_direct_state_access requires OpenGL 2.0. Assume that all drivers - * that support NPOT textures are able to support GL 2.0. - */ - if (extensions->ARB_texture_non_power_of_two) - extensions->ARB_direct_state_access = GL_TRUE; - if (extensions->ARB_stencil_texturing) extensions->ARB_texture_stencil8 = GL_TRUE; From 4bc00b1a4b34abc3e6e26f126686608ccfa33f52 Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Mon, 18 May 2015 13:53:58 -0700 Subject: [PATCH 338/834] Revert "i965: Enable ARB_direct_state_access" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit a57feba0a35de35728269aeb26b039e4f2393d69. Acked-by: Fredrik Höglund Cc: "10.6" --- src/mesa/drivers/dri/i965/intel_extensions.c | 1 - 1 file changed, 1 deletion(-) diff --git a/src/mesa/drivers/dri/i965/intel_extensions.c b/src/mesa/drivers/dri/i965/intel_extensions.c index 18b69a0fd8b..cafb77455d7 100644 --- a/src/mesa/drivers/dri/i965/intel_extensions.c +++ b/src/mesa/drivers/dri/i965/intel_extensions.c @@ -183,7 +183,6 @@ intelInitExtensions(struct gl_context *ctx) ctx->Extensions.ARB_depth_buffer_float = true; ctx->Extensions.ARB_depth_clamp = true; ctx->Extensions.ARB_depth_texture = true; - ctx->Extensions.ARB_direct_state_access = true; ctx->Extensions.ARB_draw_elements_base_vertex = true; ctx->Extensions.ARB_draw_instanced = true; ctx->Extensions.ARB_ES2_compatibility = true; From f1fcf79e3c8e4f3594dc1b6d268430f8e8d4eb97 Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Mon, 18 May 2015 13:54:08 -0700 Subject: [PATCH 339/834] Revert "i915: Enable ARB_direct_state_access" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit 121030eed8fc41789d2f4f7517bbc0dd6199667b. Acked-by: Fredrik Höglund Cc: "10.6" --- src/mesa/drivers/dri/i915/intel_extensions.c | 1 - 1 file changed, 1 deletion(-) diff --git a/src/mesa/drivers/dri/i915/intel_extensions.c b/src/mesa/drivers/dri/i915/intel_extensions.c index 590c6efcccd..ab7820f1232 100644 --- a/src/mesa/drivers/dri/i915/intel_extensions.c +++ b/src/mesa/drivers/dri/i915/intel_extensions.c @@ -83,7 +83,6 @@ intelInitExtensions(struct gl_context *ctx) if (intel->gen >= 3) { ctx->Extensions.ARB_ES2_compatibility = true; ctx->Extensions.ARB_depth_texture = true; - ctx->Extensions.ARB_direct_state_access = true; ctx->Extensions.ARB_fragment_program = true; ctx->Extensions.ARB_shadow = true; ctx->Extensions.ARB_texture_non_power_of_two = true; From a9f678a8f4d4f9806dc4e931477fad300c61b4a1 Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Mon, 18 May 2015 13:54:11 -0700 Subject: [PATCH 340/834] Revert "mesa: Add ARB_direct_state_access checks in query object functions" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit d3368e0c9e27ced6059eb2ecdf2aa999a00e90b0. Acked-by: Fredrik Höglund Cc: "10.6" --- src/mesa/main/queryobj.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/src/mesa/main/queryobj.c b/src/mesa/main/queryobj.c index 2784b4c0532..5ff1b953231 100644 --- a/src/mesa/main/queryobj.c +++ b/src/mesa/main/queryobj.c @@ -284,13 +284,6 @@ _mesa_CreateQueries(GLenum target, GLsizei n, GLuint *ids) { GET_CURRENT_CONTEXT(ctx); - if (!ctx->Extensions.ARB_direct_state_access) { - _mesa_error(ctx, GL_INVALID_OPERATION, - "glCreateQueries(GL_ARB_direct_state_access " - "is not supported)"); - return; - } - switch (target) { case GL_SAMPLES_PASSED: case GL_ANY_SAMPLES_PASSED: From a9dcf45cd88b6e4d7816f45756d0b96d9c77cffe Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Mon, 18 May 2015 13:54:15 -0700 Subject: [PATCH 341/834] Revert "mesa: Add ARB_direct_state_access checks in program pipeline functions" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit bebf3c6ab314bde05ac5a3b4d3e63fd36243c58e. Acked-by: Fredrik Höglund Cc: "10.6" --- src/mesa/main/pipelineobj.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/src/mesa/main/pipelineobj.c b/src/mesa/main/pipelineobj.c index a33cdd139c8..0fefa7d568b 100644 --- a/src/mesa/main/pipelineobj.c +++ b/src/mesa/main/pipelineobj.c @@ -553,12 +553,6 @@ _mesa_CreateProgramPipelines(GLsizei n, GLuint *pipelines) { GET_CURRENT_CONTEXT(ctx); - if (!ctx->Extensions.ARB_direct_state_access) { - _mesa_error(ctx, GL_INVALID_OPERATION, "glCreateProgramPipelines(" - "GL_ARB_direct_state_access is not supported)"); - return; - } - create_program_pipelines(ctx, n, pipelines, true); } From ae5457754492b594c55911433a9b3675216c46c1 Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Mon, 18 May 2015 13:54:18 -0700 Subject: [PATCH 342/834] Revert "mesa: Add ARB_direct_state_access checks in sampler object functions" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit 9e7149c8986348bf9567f049444783ef52775f4e. Acked-by: Fredrik Höglund Cc: "10.6" --- src/mesa/main/samplerobj.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/src/mesa/main/samplerobj.c b/src/mesa/main/samplerobj.c index 60711a5b5e3..a3aacc66aa3 100644 --- a/src/mesa/main/samplerobj.c +++ b/src/mesa/main/samplerobj.c @@ -221,13 +221,6 @@ void GLAPIENTRY _mesa_CreateSamplers(GLsizei count, GLuint *samplers) { GET_CURRENT_CONTEXT(ctx); - - if (!ctx->Extensions.ARB_direct_state_access) { - _mesa_error(ctx, GL_INVALID_OPERATION, "glCreateSamplers(" - "GL_ARB_direct_state_access is not supported)"); - return; - } - create_samplers(ctx, count, samplers, "glCreateSamplers"); } From 92e362191e6c1c15e3944464fbf6bbda9e7d9892 Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Mon, 18 May 2015 13:54:21 -0700 Subject: [PATCH 343/834] Revert "mesa: Add ARB_direct_state_access checks in VAO functions" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit 36b05793372b86b914d9b95d0188f5f387e01d68. Acked-by: Fredrik Höglund Cc: "10.6" --- src/mesa/main/arrayobj.c | 22 -------------- src/mesa/main/varray.c | 64 ---------------------------------------- 2 files changed, 86 deletions(-) diff --git a/src/mesa/main/arrayobj.c b/src/mesa/main/arrayobj.c index 320f435ea94..7c4004043de 100644 --- a/src/mesa/main/arrayobj.c +++ b/src/mesa/main/arrayobj.c @@ -617,14 +617,6 @@ void GLAPIENTRY _mesa_CreateVertexArrays(GLsizei n, GLuint *arrays) { GET_CURRENT_CONTEXT(ctx); - - if (!ctx->Extensions.ARB_direct_state_access) { - _mesa_error(ctx, GL_INVALID_OPERATION, - "glCreateVertexArrays(GL_ARB_direct_state_access " - "is not supported"); - return; - } - gen_vertex_arrays(ctx, n, arrays, true, "glCreateVertexArrays"); } @@ -667,13 +659,6 @@ _mesa_VertexArrayElementBuffer(GLuint vaobj, GLuint buffer) struct gl_vertex_array_object *vao; struct gl_buffer_object *bufObj; - if (!ctx->Extensions.ARB_direct_state_access) { - _mesa_error(ctx, GL_INVALID_OPERATION, - "glVertexArrayElementBuffer(GL_ARB_direct_state_access " - "is not supported"); - return; - } - ASSERT_OUTSIDE_BEGIN_END(ctx); /* The GL_ARB_direct_state_access specification says: @@ -710,13 +695,6 @@ _mesa_GetVertexArrayiv(GLuint vaobj, GLenum pname, GLint *param) ASSERT_OUTSIDE_BEGIN_END(ctx); - if (!ctx->Extensions.ARB_direct_state_access) { - _mesa_error(ctx, GL_INVALID_OPERATION, - "glGetVertexArrayiv(GL_ARB_direct_state_access " - "is not supported"); - return; - } - /* The GL_ARB_direct_state_access specification says: * * "An INVALID_OPERATION error is generated if is not diff --git a/src/mesa/main/varray.c b/src/mesa/main/varray.c index da6bbce52aa..7389037ae85 100644 --- a/src/mesa/main/varray.c +++ b/src/mesa/main/varray.c @@ -777,13 +777,6 @@ _mesa_EnableVertexArrayAttrib(GLuint vaobj, GLuint index) GET_CURRENT_CONTEXT(ctx); struct gl_vertex_array_object *vao; - if (!ctx->Extensions.ARB_direct_state_access) { - _mesa_error(ctx, GL_INVALID_OPERATION, - "glEnableVertexArrayAttrib(GL_ARB_direct_state_access " - "is not supported"); - return; - } - /* The ARB_direct_state_access specification says: * * "An INVALID_OPERATION error is generated by EnableVertexArrayAttrib @@ -837,13 +830,6 @@ _mesa_DisableVertexArrayAttrib(GLuint vaobj, GLuint index) GET_CURRENT_CONTEXT(ctx); struct gl_vertex_array_object *vao; - if (!ctx->Extensions.ARB_direct_state_access) { - _mesa_error(ctx, GL_INVALID_OPERATION, - "glDisableVertexArrayAttrib(GL_ARB_direct_state_access " - "is not supported"); - return; - } - /* The ARB_direct_state_access specification says: * * "An INVALID_OPERATION error is generated by EnableVertexArrayAttrib @@ -1108,13 +1094,6 @@ _mesa_GetVertexArrayIndexediv(GLuint vaobj, GLuint index, GET_CURRENT_CONTEXT(ctx); struct gl_vertex_array_object *vao; - if (!ctx->Extensions.ARB_direct_state_access) { - _mesa_error(ctx, GL_INVALID_OPERATION, - "glGetVertexArrayIndexediv(GL_ARB_direct_state_access " - "is not supported"); - return; - } - /* The ARB_direct_state_access specification says: * * "An INVALID_OPERATION error is generated if is not @@ -1178,14 +1157,6 @@ _mesa_GetVertexArrayIndexed64iv(GLuint vaobj, GLuint index, GET_CURRENT_CONTEXT(ctx); struct gl_vertex_array_object *vao; - if (!ctx->Extensions.ARB_direct_state_access) { - _mesa_error(ctx, GL_INVALID_OPERATION, - "glGetVertexArrayIndexed64iv(GL_ARB_direct_state_access " - "is not supported"); - return; - } - - /* The ARB_direct_state_access specification says: * * "An INVALID_OPERATION error is generated if is not @@ -1774,13 +1745,6 @@ _mesa_VertexArrayVertexBuffer(GLuint vaobj, GLuint bindingIndex, GLuint buffer, GET_CURRENT_CONTEXT(ctx); struct gl_vertex_array_object *vao; - if (!ctx->Extensions.ARB_direct_state_access) { - _mesa_error(ctx, GL_INVALID_OPERATION, - "glVertexArrayVertexBuffer(GL_ARB_direct_state_access " - "is not supported"); - return; - } - /* The ARB_direct_state_access specification says: * * "An INVALID_OPERATION error is generated by VertexArrayVertexBuffer @@ -1946,14 +1910,6 @@ _mesa_VertexArrayVertexBuffers(GLuint vaobj, GLuint first, GLsizei count, GET_CURRENT_CONTEXT(ctx); struct gl_vertex_array_object *vao; - if (!ctx->Extensions.ARB_direct_state_access) { - _mesa_error(ctx, GL_INVALID_OPERATION, - "glVertexArrayVertexBuffers(GL_ARB_direct_state_access " - "is not supported"); - return; - } - - /* The ARB_direct_state_access specification says: * * "An INVALID_OPERATION error is generated by VertexArrayVertexBuffer @@ -2062,12 +2018,6 @@ vertex_array_attrib_format(GLuint vaobj, GLuint attribIndex, GLint size, GET_CURRENT_CONTEXT(ctx); struct gl_vertex_array_object *vao; - if (!ctx->Extensions.ARB_direct_state_access) { - _mesa_error(ctx, GL_INVALID_OPERATION, - "%s(GL_ARB_direct_state_access is not supported", func); - return; - } - ASSERT_OUTSIDE_BEGIN_END(ctx); /* The ARB_direct_state_access spec says: @@ -2205,13 +2155,6 @@ _mesa_VertexArrayAttribBinding(GLuint vaobj, GLuint attribIndex, GLuint bindingI GET_CURRENT_CONTEXT(ctx); struct gl_vertex_array_object *vao; - if (!ctx->Extensions.ARB_direct_state_access) { - _mesa_error(ctx, GL_INVALID_OPERATION, - "glVertexArrayAttribBinding(GL_ARB_direct_state_access " - "is not supported"); - return; - } - /* The ARB_direct_state_access specification says: * * "An INVALID_OPERATION error is generated by VertexArrayAttribBinding @@ -2286,13 +2229,6 @@ _mesa_VertexArrayBindingDivisor(GLuint vaobj, GLuint bindingIndex, GLuint diviso struct gl_vertex_array_object *vao; GET_CURRENT_CONTEXT(ctx); - if (!ctx->Extensions.ARB_direct_state_access) { - _mesa_error(ctx, GL_INVALID_OPERATION, - "glVertexArrayBindingDivisor(GL_ARB_direct_state_access " - "is not supported"); - return; - } - /* The ARB_direct_state_access specification says: * * "An INVALID_OPERATION error is generated by VertexArrayBindingDivisor From 1ac6a8f1d1952a20d54df3e513c253d7988402ac Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Mon, 18 May 2015 13:54:25 -0700 Subject: [PATCH 344/834] Revert "mesa: Add ARB_direct_state_access checks in texture functions" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit 8940957238e8584ce27295791cee4cc3d6f7cf1e. Acked-by: Fredrik Höglund Cc: "10.6" --- src/mesa/main/genmipmap.c | 7 ---- src/mesa/main/texgetimage.c | 14 ------- src/mesa/main/teximage.c | 74 -------------------------------- src/mesa/main/texobj.c | 14 ------- src/mesa/main/texparam.c | 84 ------------------------------------- src/mesa/main/texstorage.c | 7 ---- 6 files changed, 200 deletions(-) diff --git a/src/mesa/main/genmipmap.c b/src/mesa/main/genmipmap.c index 32b9460ad07..9aef090194e 100644 --- a/src/mesa/main/genmipmap.c +++ b/src/mesa/main/genmipmap.c @@ -158,13 +158,6 @@ _mesa_GenerateTextureMipmap(GLuint texture) struct gl_texture_object *texObj; GET_CURRENT_CONTEXT(ctx); - if (!ctx->Extensions.ARB_direct_state_access) { - _mesa_error(ctx, GL_INVALID_OPERATION, - "glGenerateTextureMipmap(GL_ARB_direct_state_access " - "is not supported)"); - return; - } - texObj = _mesa_lookup_texture_err(ctx, texture, "glGenerateTextureMipmap"); if (!texObj) return; diff --git a/src/mesa/main/texgetimage.c b/src/mesa/main/texgetimage.c index f582a7f78b0..92b4d6795c6 100644 --- a/src/mesa/main/texgetimage.c +++ b/src/mesa/main/texgetimage.c @@ -1108,13 +1108,6 @@ _mesa_GetTextureImage(GLuint texture, GLint level, GLenum format, GLenum err; GET_CURRENT_CONTEXT(ctx); - if (!ctx->Extensions.ARB_direct_state_access) { - _mesa_error(ctx, GL_INVALID_OPERATION, - "glGetTextureImage(GL_ARB_direct_state_access " - "is not supported)"); - return; - } - /* * This has been moved here because a format/type mismatch can cause a NULL * texImage object, which in turn causes the mismatch error to be @@ -1351,13 +1344,6 @@ _mesa_GetCompressedTextureImage(GLuint texture, GLint level, GLint image_stride; GET_CURRENT_CONTEXT(ctx); - if (!ctx->Extensions.ARB_direct_state_access) { - _mesa_error(ctx, GL_INVALID_OPERATION, - "glGetCompressedTextureImage(GL_ARB_direct_state_access " - "is not supported)"); - return; - } - texObj = _mesa_lookup_texture_err(ctx, texture, "glGetCompressedTextureImage"); if (!texObj) diff --git a/src/mesa/main/teximage.c b/src/mesa/main/teximage.c index 7616fd7cec9..7bc1da7f805 100644 --- a/src/mesa/main/teximage.c +++ b/src/mesa/main/teximage.c @@ -3624,13 +3624,6 @@ texturesubimage(struct gl_context *ctx, GLuint dims, _mesa_lookup_enum_by_nr(format), _mesa_lookup_enum_by_nr(type), pixels); - if (!ctx->Extensions.ARB_direct_state_access) { - _mesa_error(ctx, GL_INVALID_OPERATION, - "glTextureSubImage%uD(GL_ARB_direct_state_access " - "is not supported)", dims); - return; - } - /* Get the texture object by Name. */ texObj = _mesa_lookup_texture(ctx, texture); if (!texObj) { @@ -4190,12 +4183,6 @@ _mesa_CopyTextureSubImage1D(GLuint texture, GLint level, const char *self = "glCopyTextureSubImage1D"; GET_CURRENT_CONTEXT(ctx); - if (!ctx->Extensions.ARB_direct_state_access) { - _mesa_error(ctx, GL_INVALID_OPERATION, - "%s(GL_ARB_direct_state_access is not supported)", self); - return; - } - texObj = _mesa_lookup_texture_err(ctx, texture, self); if (!texObj) return; @@ -4220,12 +4207,6 @@ _mesa_CopyTextureSubImage2D(GLuint texture, GLint level, const char *self = "glCopyTextureSubImage2D"; GET_CURRENT_CONTEXT(ctx); - if (!ctx->Extensions.ARB_direct_state_access) { - _mesa_error(ctx, GL_INVALID_OPERATION, - "%s(GL_ARB_direct_state_access is not supported)", self); - return; - } - texObj = _mesa_lookup_texture_err(ctx, texture, self); if (!texObj) return; @@ -4253,12 +4234,6 @@ _mesa_CopyTextureSubImage3D(GLuint texture, GLint level, const char *self = "glCopyTextureSubImage3D"; GET_CURRENT_CONTEXT(ctx); - if (!ctx->Extensions.ARB_direct_state_access) { - _mesa_error(ctx, GL_INVALID_OPERATION, - "%s(GL_ARB_direct_state_access is not supported)", self); - return; - } - texObj = _mesa_lookup_texture_err(ctx, texture, self); if (!texObj) return; @@ -4854,13 +4829,6 @@ _mesa_CompressedTextureSubImage1D(GLuint texture, GLint level, GLint xoffset, GET_CURRENT_CONTEXT(ctx); - if (!ctx->Extensions.ARB_direct_state_access) { - _mesa_error(ctx, GL_INVALID_OPERATION, - "glCompressedTextureSubImage1D(GL_ARB_direct_state_access " - "is not supported)"); - return; - } - texObj = _mesa_lookup_texture_err(ctx, texture, "glCompressedTextureSubImage1D"); if (!texObj) @@ -4939,13 +4907,6 @@ _mesa_CompressedTextureSubImage2D(GLuint texture, GLint level, GLint xoffset, GET_CURRENT_CONTEXT(ctx); - if (!ctx->Extensions.ARB_direct_state_access) { - _mesa_error(ctx, GL_INVALID_OPERATION, - "glCompressedTextureSubImage2D(GL_ARB_direct_state_access " - "is not supported)"); - return; - } - texObj = _mesa_lookup_texture_err(ctx, texture, "glCompressedTextureSubImage2D"); if (!texObj) @@ -5024,13 +4985,6 @@ _mesa_CompressedTextureSubImage3D(GLuint texture, GLint level, GLint xoffset, GET_CURRENT_CONTEXT(ctx); - if (!ctx->Extensions.ARB_direct_state_access) { - _mesa_error(ctx, GL_INVALID_OPERATION, - "glCompressedTextureSubImage3D(GL_ARB_direct_state_access " - "is not supported)"); - return; - } - texObj = _mesa_lookup_texture_err(ctx, texture, "glCompressedTextureSubImage3D"); if (!texObj) @@ -5515,13 +5469,6 @@ _mesa_TextureBuffer(GLuint texture, GLenum internalFormat, GLuint buffer) GET_CURRENT_CONTEXT(ctx); - if (!ctx->Extensions.ARB_direct_state_access) { - _mesa_error(ctx, GL_INVALID_OPERATION, - "glTextureBuffer(GL_ARB_direct_state_access " - "is not supported)"); - return; - } - if (buffer) { bufObj = _mesa_lookup_bufferobj_err(ctx, buffer, "glTextureBuffer"); if (!bufObj) @@ -5550,13 +5497,6 @@ _mesa_TextureBufferRange(GLuint texture, GLenum internalFormat, GLuint buffer, GET_CURRENT_CONTEXT(ctx); - if (!ctx->Extensions.ARB_direct_state_access) { - _mesa_error(ctx, GL_INVALID_OPERATION, - "glTextureBufferRange(GL_ARB_direct_state_access " - "is not supported)"); - return; - } - if (buffer) { bufObj = _mesa_lookup_bufferobj_err(ctx, buffer, "glTextureBufferRange"); @@ -5861,13 +5801,6 @@ _mesa_TextureStorage2DMultisample(GLuint texture, GLsizei samples, struct gl_texture_object *texObj; GET_CURRENT_CONTEXT(ctx); - if (!ctx->Extensions.ARB_direct_state_access) { - _mesa_error(ctx, GL_INVALID_OPERATION, - "glTextureStorage2DMultisample(GL_ARB_direct_state_access " - "is not supported)"); - return; - } - texObj = _mesa_lookup_texture_err(ctx, texture, "glTextureStorage2DMultisample"); if (!texObj) @@ -5888,13 +5821,6 @@ _mesa_TextureStorage3DMultisample(GLuint texture, GLsizei samples, struct gl_texture_object *texObj; GET_CURRENT_CONTEXT(ctx); - if (!ctx->Extensions.ARB_direct_state_access) { - _mesa_error(ctx, GL_INVALID_OPERATION, - "glTextureStorage3DMultisample(GL_ARB_direct_state_access " - "is not supported)"); - return; - } - /* Get the texture object by Name. */ texObj = _mesa_lookup_texture_err(ctx, texture, "glTextureStorage3DMultisample"); diff --git a/src/mesa/main/texobj.c b/src/mesa/main/texobj.c index d51e6954ba0..c563f1e7434 100644 --- a/src/mesa/main/texobj.c +++ b/src/mesa/main/texobj.c @@ -1317,13 +1317,6 @@ _mesa_CreateTextures(GLenum target, GLsizei n, GLuint *textures) GLint targetIndex; GET_CURRENT_CONTEXT(ctx); - if (!ctx->Extensions.ARB_direct_state_access) { - _mesa_error(ctx, GL_INVALID_OPERATION, - "glCreateTextures(GL_ARB_direct_state_access " - "is not supported)"); - return; - } - /* * The 4.5 core profile spec (30.10.2014) doesn't specify what * glCreateTextures should do with invalid targets, which was probably an @@ -1815,13 +1808,6 @@ _mesa_BindTextureUnit(GLuint unit, GLuint texture) _mesa_debug(ctx, "glBindTextureUnit %s %d\n", _mesa_lookup_enum_by_nr(GL_TEXTURE0+unit), (GLint) texture); - if (!ctx->Extensions.ARB_direct_state_access) { - _mesa_error(ctx, GL_INVALID_OPERATION, - "glBindTextureUnit(GL_ARB_direct_state_access " - "is not supported)"); - return; - } - /* Section 8.1 (Texture Objects) of the OpenGL 4.5 core profile spec * (20141030) says: * "When texture is zero, each of the targets enumerated at the diff --git a/src/mesa/main/texparam.c b/src/mesa/main/texparam.c index 1fa5830026a..d74134f41b1 100644 --- a/src/mesa/main/texparam.c +++ b/src/mesa/main/texparam.c @@ -1108,13 +1108,6 @@ _mesa_TextureParameterfv(GLuint texture, GLenum pname, const GLfloat *params) struct gl_texture_object *texObj; GET_CURRENT_CONTEXT(ctx); - if (!ctx->Extensions.ARB_direct_state_access) { - _mesa_error(ctx, GL_INVALID_OPERATION, - "glTextureParameterfv(GL_ARB_direct_state_access " - "is not supported)"); - return; - } - texObj = get_texobj_by_name(ctx, texture, GL_FALSE); if (!texObj) { /* User passed a non-generated name. */ @@ -1131,13 +1124,6 @@ _mesa_TextureParameterf(GLuint texture, GLenum pname, GLfloat param) struct gl_texture_object *texObj; GET_CURRENT_CONTEXT(ctx); - if (!ctx->Extensions.ARB_direct_state_access) { - _mesa_error(ctx, GL_INVALID_OPERATION, - "glTextureParameterf(GL_ARB_direct_state_access " - "is not supported)"); - return; - } - texObj = get_texobj_by_name(ctx, texture, GL_FALSE); if (!texObj) { /* User passed a non-generated name. */ @@ -1154,13 +1140,6 @@ _mesa_TextureParameteri(GLuint texture, GLenum pname, GLint param) struct gl_texture_object *texObj; GET_CURRENT_CONTEXT(ctx); - if (!ctx->Extensions.ARB_direct_state_access) { - _mesa_error(ctx, GL_INVALID_OPERATION, - "glTextureParameteri(GL_ARB_direct_state_access " - "is not supported)"); - return; - } - texObj = get_texobj_by_name(ctx, texture, GL_FALSE); if (!texObj) { /* User passed a non-generated name. */ @@ -1178,13 +1157,6 @@ _mesa_TextureParameteriv(GLuint texture, GLenum pname, struct gl_texture_object *texObj; GET_CURRENT_CONTEXT(ctx); - if (!ctx->Extensions.ARB_direct_state_access) { - _mesa_error(ctx, GL_INVALID_OPERATION, - "glTextureParameteriv(GL_ARB_direct_state_access " - "is not supported)"); - return; - } - texObj = get_texobj_by_name(ctx, texture, GL_FALSE); if (!texObj) { /* User passed a non-generated name. */ @@ -1202,13 +1174,6 @@ _mesa_TextureParameterIiv(GLuint texture, GLenum pname, const GLint *params) struct gl_texture_object *texObj; GET_CURRENT_CONTEXT(ctx); - if (!ctx->Extensions.ARB_direct_state_access) { - _mesa_error(ctx, GL_INVALID_OPERATION, - "glTextureParameterIiv(GL_ARB_direct_state_access " - "is not supported)"); - return; - } - texObj = get_texobj_by_name(ctx, texture, GL_FALSE); if (!texObj) { /* User passed a non-generated name. */ @@ -1226,13 +1191,6 @@ _mesa_TextureParameterIuiv(GLuint texture, GLenum pname, const GLuint *params) struct gl_texture_object *texObj; GET_CURRENT_CONTEXT(ctx); - if (!ctx->Extensions.ARB_direct_state_access) { - _mesa_error(ctx, GL_INVALID_OPERATION, - "glTextureParameterIuiv(GL_ARB_direct_state_access " - "is not supported)"); - return; - } - texObj = get_texobj_by_name(ctx, texture, GL_FALSE); if (!texObj) { /* User passed a non-generated name. */ @@ -1692,13 +1650,6 @@ _mesa_GetTextureLevelParameterfv(GLuint texture, GLint level, GLint iparam; GET_CURRENT_CONTEXT(ctx); - if (!ctx->Extensions.ARB_direct_state_access) { - _mesa_error(ctx, GL_INVALID_OPERATION, - "glGetTextureLevelParameterfv(GL_ARB_direct_state_access " - "is not supported)"); - return; - } - texObj = _mesa_lookup_texture_err(ctx, texture, "glGetTextureLevelParameterfv"); if (!texObj) @@ -1717,13 +1668,6 @@ _mesa_GetTextureLevelParameteriv(GLuint texture, GLint level, struct gl_texture_object *texObj; GET_CURRENT_CONTEXT(ctx); - if (!ctx->Extensions.ARB_direct_state_access) { - _mesa_error(ctx, GL_INVALID_OPERATION, - "glGetTextureLevelParameteriv(GL_ARB_direct_state_access " - "is not supported)"); - return; - } - texObj = _mesa_lookup_texture_err(ctx, texture, "glGetTextureLevelParameteriv"); if (!texObj) @@ -2283,13 +2227,6 @@ _mesa_GetTextureParameterfv(GLuint texture, GLenum pname, GLfloat *params) struct gl_texture_object *obj; GET_CURRENT_CONTEXT(ctx); - if (!ctx->Extensions.ARB_direct_state_access) { - _mesa_error(ctx, GL_INVALID_OPERATION, - "glGetTextureParameterfv(GL_ARB_direct_state_access " - "is not supported)"); - return; - } - obj = get_texobj_by_name(ctx, texture, GL_TRUE); if (!obj) { /* User passed a non-generated name. */ @@ -2307,13 +2244,6 @@ _mesa_GetTextureParameteriv(GLuint texture, GLenum pname, GLint *params) struct gl_texture_object *obj; GET_CURRENT_CONTEXT(ctx); - if (!ctx->Extensions.ARB_direct_state_access) { - _mesa_error(ctx, GL_INVALID_OPERATION, - "glGetTextureParameteriv(GL_ARB_direct_state_access " - "is not supported)"); - return; - } - obj = get_texobj_by_name(ctx, texture, GL_TRUE); if (!obj) { /* User passed a non-generated name. */ @@ -2331,13 +2261,6 @@ _mesa_GetTextureParameterIiv(GLuint texture, GLenum pname, GLint *params) struct gl_texture_object *texObj; GET_CURRENT_CONTEXT(ctx); - if (!ctx->Extensions.ARB_direct_state_access) { - _mesa_error(ctx, GL_INVALID_OPERATION, - "glGetTextureParameterIiv(GL_ARB_direct_state_access " - "is not supported)"); - return; - } - texObj = get_texobj_by_name(ctx, texture, GL_TRUE); if (!texObj) { /* User passed a non-generated name. */ @@ -2356,13 +2279,6 @@ _mesa_GetTextureParameterIuiv(GLuint texture, GLenum pname, GLuint *params) struct gl_texture_object *texObj; GET_CURRENT_CONTEXT(ctx); - if (!ctx->Extensions.ARB_direct_state_access) { - _mesa_error(ctx, GL_INVALID_OPERATION, - "glGetTextureParameterIuiv(GL_ARB_direct_state_access " - "is not supported)"); - return; - } - texObj = get_texobj_by_name(ctx, texture, GL_TRUE); if (!texObj) { /* User passed a non-generated name. */ diff --git a/src/mesa/main/texstorage.c b/src/mesa/main/texstorage.c index dee74a825ea..53cb2c091f8 100644 --- a/src/mesa/main/texstorage.c +++ b/src/mesa/main/texstorage.c @@ -507,13 +507,6 @@ texturestorage(GLuint dims, GLuint texture, GLsizei levels, _mesa_lookup_enum_by_nr(internalformat), width, height, depth); - if (!ctx->Extensions.ARB_direct_state_access) { - _mesa_error(ctx, GL_INVALID_OPERATION, - "glTextureStorage%uD(GL_ARB_direct_state_access " - "is not supported)", dims); - return; - } - /* Check the format to make sure it is sized. */ if (!_mesa_is_legal_tex_storage_format(ctx, internalformat)) { _mesa_error(ctx, GL_INVALID_ENUM, From f3e8596a371c3708e9d9d68a021c39982c676cf1 Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Mon, 18 May 2015 13:54:29 -0700 Subject: [PATCH 345/834] Revert "mesa: Add ARB_direct_state_access checks in renderbuffer functions" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit cb49940766b581c6656473d89c221653c69fa0f9. Acked-by: Fredrik Höglund Cc: "10.6" --- src/mesa/main/fbobject.c | 21 --------------------- 1 file changed, 21 deletions(-) diff --git a/src/mesa/main/fbobject.c b/src/mesa/main/fbobject.c index 1859c277293..de8af4677d1 100644 --- a/src/mesa/main/fbobject.c +++ b/src/mesa/main/fbobject.c @@ -1489,14 +1489,6 @@ void GLAPIENTRY _mesa_CreateRenderbuffers(GLsizei n, GLuint *renderbuffers) { GET_CURRENT_CONTEXT(ctx); - - if (!ctx->Extensions.ARB_direct_state_access) { - _mesa_error(ctx, GL_INVALID_OPERATION, - "glCreateRenderbuffers(GL_ARB_direct_state_access " - "is not supported)"); - return; - } - create_render_buffers(ctx, n, renderbuffers, true); } @@ -1937,12 +1929,6 @@ renderbuffer_storage_named(GLuint renderbuffer, GLenum internalFormat, { GET_CURRENT_CONTEXT(ctx); - if (!ctx->Extensions.ARB_direct_state_access) { - _mesa_error(ctx, GL_INVALID_OPERATION, - "%s(GL_ARB_direct_state_access is not supported)", func); - return; - } - if (MESA_VERBOSE & VERBOSE_API) { if (samples == NO_SAMPLES) _mesa_debug(ctx, "%s(%u, %s, %d, %d)\n", @@ -2197,13 +2183,6 @@ _mesa_GetNamedRenderbufferParameteriv(GLuint renderbuffer, GLenum pname, { GET_CURRENT_CONTEXT(ctx); - if (!ctx->Extensions.ARB_direct_state_access) { - _mesa_error(ctx, GL_INVALID_OPERATION, - "glGetNamedRenderbufferParameteriv(" - "GL_ARB_direct_state_access is not supported)"); - return; - } - struct gl_renderbuffer *rb = _mesa_lookup_renderbuffer(ctx, renderbuffer); if (!rb || rb == &DummyRenderbuffer) { /* ID was reserved, but no real renderbuffer object made yet */ From 8bcd14fab9a86276980a8859740999a1db4c55d5 Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Mon, 18 May 2015 13:54:35 -0700 Subject: [PATCH 346/834] Revert "mesa: Add ARB_direct_state_access checks in FBO functions" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit 6ad0b7e07a0445e9e0f368e079c4f7b8a6757bb3. Acked-by: Fredrik Höglund Cc: "10.6" --- src/mesa/main/blit.c | 7 ----- src/mesa/main/buffers.c | 21 ------------- src/mesa/main/clear.c | 32 ------------------- src/mesa/main/fbobject.c | 67 ---------------------------------------- 4 files changed, 127 deletions(-) diff --git a/src/mesa/main/blit.c b/src/mesa/main/blit.c index fac97245082..db8fee5a414 100644 --- a/src/mesa/main/blit.c +++ b/src/mesa/main/blit.c @@ -540,13 +540,6 @@ _mesa_BlitNamedFramebuffer(GLuint readFramebuffer, GLuint drawFramebuffer, GET_CURRENT_CONTEXT(ctx); struct gl_framebuffer *readFb, *drawFb; - if (!ctx->Extensions.ARB_direct_state_access) { - _mesa_error(ctx, GL_INVALID_OPERATION, - "glBlitNamedFramebuffer(GL_ARB_direct_state_access " - "is not supported)"); - return; - } - if (MESA_VERBOSE & VERBOSE_API) _mesa_debug(ctx, "glBlitNamedFramebuffer(%u %u %d, %d, %d, %d, " diff --git a/src/mesa/main/buffers.c b/src/mesa/main/buffers.c index c83459addf7..0536266d756 100644 --- a/src/mesa/main/buffers.c +++ b/src/mesa/main/buffers.c @@ -303,13 +303,6 @@ _mesa_NamedFramebufferDrawBuffer(GLuint framebuffer, GLenum buf) GET_CURRENT_CONTEXT(ctx); struct gl_framebuffer *fb; - if (!ctx->Extensions.ARB_direct_state_access) { - _mesa_error(ctx, GL_INVALID_OPERATION, - "glNamedFramebufferDrawBuffer(GL_ARB_direct_state_access " - "is not supported)"); - return; - } - if (framebuffer) { fb = _mesa_lookup_framebuffer_err(ctx, framebuffer, "glNamedFramebufferDrawBuffer"); @@ -520,13 +513,6 @@ _mesa_NamedFramebufferDrawBuffers(GLuint framebuffer, GLsizei n, GET_CURRENT_CONTEXT(ctx); struct gl_framebuffer *fb; - if (!ctx->Extensions.ARB_direct_state_access) { - _mesa_error(ctx, GL_INVALID_OPERATION, - "glNamedFramebufferDrawBuffers(GL_ARB_direct_state_access " - "is not supported)"); - return; - } - if (framebuffer) { fb = _mesa_lookup_framebuffer_err(ctx, framebuffer, "glNamedFramebufferDrawBuffers"); @@ -764,13 +750,6 @@ _mesa_NamedFramebufferReadBuffer(GLuint framebuffer, GLenum src) GET_CURRENT_CONTEXT(ctx); struct gl_framebuffer *fb; - if (!ctx->Extensions.ARB_direct_state_access) { - _mesa_error(ctx, GL_INVALID_OPERATION, - "glNamedFramebufferReadBuffer(GL_ARB_direct_state_access " - "is not supported)"); - return; - } - if (framebuffer) { fb = _mesa_lookup_framebuffer_err(ctx, framebuffer, "glNamedFramebufferReadBuffer"); diff --git a/src/mesa/main/clear.c b/src/mesa/main/clear.c index c6999f7fdb6..426caea4709 100644 --- a/src/mesa/main/clear.c +++ b/src/mesa/main/clear.c @@ -412,14 +412,6 @@ _mesa_ClearNamedFramebufferiv(GLuint framebuffer, GLenum buffer, { GLint oldfb; - GET_CURRENT_CONTEXT(ctx); - if (!ctx->Extensions.ARB_direct_state_access) { - _mesa_error(ctx, GL_INVALID_OPERATION, - "glClearNamedFramebufferiv(GL_ARB_direct_state_access " - "is not supported)"); - return; - } - _mesa_GetIntegerv(GL_DRAW_FRAMEBUFFER_BINDING, &oldfb); _mesa_BindFramebuffer(GL_DRAW_FRAMEBUFFER, framebuffer); _mesa_ClearBufferiv(buffer, drawbuffer, value); @@ -510,14 +502,6 @@ _mesa_ClearNamedFramebufferuiv(GLuint framebuffer, GLenum buffer, { GLint oldfb; - GET_CURRENT_CONTEXT(ctx); - if (!ctx->Extensions.ARB_direct_state_access) { - _mesa_error(ctx, GL_INVALID_OPERATION, - "glClearNamedFramebufferuiv(GL_ARB_direct_state_access " - "is not supported)"); - return; - } - _mesa_GetIntegerv(GL_DRAW_FRAMEBUFFER_BINDING, &oldfb); _mesa_BindFramebuffer(GL_DRAW_FRAMEBUFFER, framebuffer); _mesa_ClearBufferuiv(buffer, drawbuffer, value); @@ -629,14 +613,6 @@ _mesa_ClearNamedFramebufferfv(GLuint framebuffer, GLenum buffer, { GLint oldfb; - GET_CURRENT_CONTEXT(ctx); - if (!ctx->Extensions.ARB_direct_state_access) { - _mesa_error(ctx, GL_INVALID_OPERATION, - "glClearNamedFramebufferfv(GL_ARB_direct_state_access " - "is not supported)"); - return; - } - _mesa_GetIntegerv(GL_DRAW_FRAMEBUFFER_BINDING, &oldfb); _mesa_BindFramebuffer(GL_DRAW_FRAMEBUFFER, framebuffer); _mesa_ClearBufferfv(buffer, drawbuffer, value); @@ -719,14 +695,6 @@ _mesa_ClearNamedFramebufferfi(GLuint framebuffer, GLenum buffer, { GLint oldfb; - GET_CURRENT_CONTEXT(ctx); - if (!ctx->Extensions.ARB_direct_state_access) { - _mesa_error(ctx, GL_INVALID_OPERATION, - "glClearNamedFramebufferfi(GL_ARB_direct_state_access " - "is not supported)"); - return; - } - _mesa_GetIntegerv(GL_DRAW_FRAMEBUFFER_BINDING, &oldfb); _mesa_BindFramebuffer(GL_DRAW_FRAMEBUFFER, framebuffer); _mesa_ClearBufferfi(buffer, 0, depth, stencil); diff --git a/src/mesa/main/fbobject.c b/src/mesa/main/fbobject.c index de8af4677d1..5b8c52ab2ad 100644 --- a/src/mesa/main/fbobject.c +++ b/src/mesa/main/fbobject.c @@ -2454,12 +2454,6 @@ create_framebuffers(GLsizei n, GLuint *framebuffers, bool dsa) const char *func = dsa ? "glCreateFramebuffers" : "glGenFramebuffers"; - if (dsa && !ctx->Extensions.ARB_direct_state_access) { - _mesa_error(ctx, GL_INVALID_OPERATION, - "%s(GL_ARB_direct_state_access is not supported)", func); - return; - } - if (n < 0) { _mesa_error(ctx, GL_INVALID_VALUE, "%s(n < 0)", func); return; @@ -2558,13 +2552,6 @@ _mesa_CheckNamedFramebufferStatus(GLuint framebuffer, GLenum target) struct gl_framebuffer *fb; GET_CURRENT_CONTEXT(ctx); - if (!ctx->Extensions.ARB_direct_state_access) { - _mesa_error(ctx, GL_INVALID_OPERATION, - "glCheckNamedFramebufferStatus(GL_ARB_direct_state_access " - "is not supported)"); - return 0; - } - /* Validate the target (for conformance's sake) and grab a reference to the * default framebuffer in case framebuffer = 0. * Section 9.4 Framebuffer Completeness of the OpenGL 4.5 core spec @@ -3110,12 +3097,6 @@ _mesa_NamedFramebufferTextureLayer(GLuint framebuffer, GLenum attachment, const char *func = "glNamedFramebufferTextureLayer"; - if (!ctx->Extensions.ARB_direct_state_access) { - _mesa_error(ctx, GL_INVALID_OPERATION, - "%s(GL_ARB_direct_state_access is not supported)", func); - return; - } - /* Get the framebuffer object */ fb = _mesa_lookup_framebuffer_err(ctx, framebuffer, func); if (!fb) @@ -3201,12 +3182,6 @@ _mesa_NamedFramebufferTexture(GLuint framebuffer, GLenum attachment, const char *func = "glNamedFramebufferTexture"; - if (!ctx->Extensions.ARB_direct_state_access) { - _mesa_error(ctx, GL_INVALID_OPERATION, - "%s(GL_ARB_direct_state_access is not supported)", func); - return; - } - if (!_mesa_has_geometry_shaders(ctx)) { _mesa_error(ctx, GL_INVALID_OPERATION, "unsupported function (glNamedFramebufferTexture) called"); @@ -3332,13 +3307,6 @@ _mesa_NamedFramebufferRenderbuffer(GLuint framebuffer, GLenum attachment, struct gl_renderbuffer *rb; GET_CURRENT_CONTEXT(ctx); - if (!ctx->Extensions.ARB_direct_state_access) { - _mesa_error(ctx, GL_INVALID_OPERATION, - "glNamedFramebufferRenderbuffer(GL_ARB_direct_state_access " - "is not supported)"); - return; - } - fb = _mesa_lookup_framebuffer_err(ctx, framebuffer, "glNamedFramebufferRenderbuffer"); if (!fb) @@ -3673,13 +3641,6 @@ _mesa_GetNamedFramebufferAttachmentParameteriv(GLuint framebuffer, GET_CURRENT_CONTEXT(ctx); struct gl_framebuffer *buffer; - if (!ctx->Extensions.ARB_direct_state_access) { - _mesa_error(ctx, GL_INVALID_OPERATION, - "glGetNamedFramebufferAttachmentParameteriv(" - "GL_ARB_direct_state_access is not supported)"); - return; - } - if (framebuffer) { buffer = _mesa_lookup_framebuffer_err(ctx, framebuffer, "glGetNamedFramebufferAttachmentParameteriv"); @@ -3712,13 +3673,6 @@ _mesa_NamedFramebufferParameteri(GLuint framebuffer, GLenum pname, (void) pname; (void) param; - if (!ctx->Extensions.ARB_direct_state_access) { - _mesa_error(ctx, GL_INVALID_OPERATION, - "glNamedFramebufferParameteri(" - "GL_ARB_direct_state_access is not supported)"); - return; - } - _mesa_error(ctx, GL_INVALID_OPERATION, "glNamedFramebufferParameteri not supported " "(ARB_framebuffer_no_attachments not implemented)"); @@ -3735,13 +3689,6 @@ _mesa_GetNamedFramebufferParameteriv(GLuint framebuffer, GLenum pname, (void) pname; (void) param; - if (!ctx->Extensions.ARB_direct_state_access) { - _mesa_error(ctx, GL_INVALID_OPERATION, - "glNamedFramebufferParameteriv(" - "GL_ARB_direct_state_access is not supported)"); - return; - } - _mesa_error(ctx, GL_INVALID_OPERATION, "glGetNamedFramebufferParameteriv not supported " "(ARB_framebuffer_no_attachments not implemented)"); @@ -3910,13 +3857,6 @@ _mesa_InvalidateNamedFramebufferSubData(GLuint framebuffer, struct gl_framebuffer *fb; GET_CURRENT_CONTEXT(ctx); - if (!ctx->Extensions.ARB_direct_state_access) { - _mesa_error(ctx, GL_INVALID_OPERATION, - "glInvalidateNamedFramebufferSubData(" - "GL_ARB_direct_state_access is not supported)"); - return; - } - /* The OpenGL 4.5 core spec (02.02.2015) says (in Section 17.4 Whole * Framebuffer Operations, PDF page 522): "If framebuffer is zero, the * default draw framebuffer is affected." @@ -3978,13 +3918,6 @@ _mesa_InvalidateNamedFramebufferData(GLuint framebuffer, struct gl_framebuffer *fb; GET_CURRENT_CONTEXT(ctx); - if (!ctx->Extensions.ARB_direct_state_access) { - _mesa_error(ctx, GL_INVALID_OPERATION, - "glInvalidateNamedFramebufferData(" - "GL_ARB_direct_state_access is not supported)"); - return; - } - /* The OpenGL 4.5 core spec (02.02.2015) says (in Section 17.4 Whole * Framebuffer Operations, PDF page 522): "If framebuffer is zero, the * default draw framebuffer is affected." From cab233f277936f4cdc49aa0bbfc7ed1a85c925f1 Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Mon, 18 May 2015 13:54:39 -0700 Subject: [PATCH 347/834] Revert "mesa: Add ARB_direct_state_access checks in buffer object functions" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit 339ed0984d4f54fca91235a1df2ce3a850f6123f. Acked-by: Fredrik Höglund Cc: "10.6" --- src/mesa/main/bufferobj.c | 105 -------------------------------------- 1 file changed, 105 deletions(-) diff --git a/src/mesa/main/bufferobj.c b/src/mesa/main/bufferobj.c index 660bc94892e..66dee680258 100644 --- a/src/mesa/main/bufferobj.c +++ b/src/mesa/main/bufferobj.c @@ -1303,12 +1303,6 @@ create_buffers(GLsizei n, GLuint *buffers, bool dsa) const char *func = dsa ? "glCreateBuffers" : "glGenBuffers"; - if (dsa && !ctx->Extensions.ARB_direct_state_access) { - _mesa_error(ctx, GL_INVALID_OPERATION, - "%s(GL_ARB_direct_state_access is not supported)", func); - return; - } - if (MESA_VERBOSE & VERBOSE_API) _mesa_debug(ctx, "%s(%d)\n", func, n); @@ -1483,13 +1477,6 @@ _mesa_NamedBufferStorage(GLuint buffer, GLsizeiptr size, const GLvoid *data, GET_CURRENT_CONTEXT(ctx); struct gl_buffer_object *bufObj; - if (!ctx->Extensions.ARB_direct_state_access) { - _mesa_error(ctx, GL_INVALID_OPERATION, - "glNamedBufferStorage(GL_ARB_direct_state_access " - "is not supported)"); - return; - } - bufObj = _mesa_lookup_bufferobj_err(ctx, buffer, "glNamedBufferStorage"); if (!bufObj) return; @@ -1616,13 +1603,6 @@ _mesa_NamedBufferData(GLuint buffer, GLsizeiptr size, const GLvoid *data, GET_CURRENT_CONTEXT(ctx); struct gl_buffer_object *bufObj; - if (!ctx->Extensions.ARB_direct_state_access) { - _mesa_error(ctx, GL_INVALID_OPERATION, - "glNamedBufferData(GL_ARB_direct_state_access " - "is not supported)"); - return; - } - bufObj = _mesa_lookup_bufferobj_err(ctx, buffer, "glNamedBufferData"); if (!bufObj) return; @@ -1693,13 +1673,6 @@ _mesa_NamedBufferSubData(GLuint buffer, GLintptr offset, GET_CURRENT_CONTEXT(ctx); struct gl_buffer_object *bufObj; - if (!ctx->Extensions.ARB_direct_state_access) { - _mesa_error(ctx, GL_INVALID_OPERATION, - "glNamedBufferSubData(GL_ARB_direct_state_access " - "is not supported)"); - return; - } - bufObj = _mesa_lookup_bufferobj_err(ctx, buffer, "glNamedBufferSubData"); if (!bufObj) return; @@ -1737,13 +1710,6 @@ _mesa_GetNamedBufferSubData(GLuint buffer, GLintptr offset, GET_CURRENT_CONTEXT(ctx); struct gl_buffer_object *bufObj; - if (!ctx->Extensions.ARB_direct_state_access) { - _mesa_error(ctx, GL_INVALID_OPERATION, - "glGetNamedBufferSubData(GL_ARB_direct_state_access " - "is not supported)"); - return; - } - bufObj = _mesa_lookup_bufferobj_err(ctx, buffer, "glGetNamedBufferSubData"); if (!bufObj) @@ -1839,13 +1805,6 @@ _mesa_ClearNamedBufferData(GLuint buffer, GLenum internalformat, GET_CURRENT_CONTEXT(ctx); struct gl_buffer_object *bufObj; - if (!ctx->Extensions.ARB_direct_state_access) { - _mesa_error(ctx, GL_INVALID_OPERATION, - "glClearNamedBufferData(GL_ARB_direct_state_access " - "is not supported)"); - return; - } - bufObj = _mesa_lookup_bufferobj_err(ctx, buffer, "glClearNamedBufferData"); if (!bufObj) return; @@ -1883,13 +1842,6 @@ _mesa_ClearNamedBufferSubData(GLuint buffer, GLenum internalformat, GET_CURRENT_CONTEXT(ctx); struct gl_buffer_object *bufObj; - if (!ctx->Extensions.ARB_direct_state_access) { - _mesa_error(ctx, GL_INVALID_OPERATION, - "glClearNamedBufferSubData(GL_ARB_direct_state_access " - "is not supported)"); - return; - } - bufObj = _mesa_lookup_bufferobj_err(ctx, buffer, "glClearNamedBufferSubData"); if (!bufObj) @@ -1978,13 +1930,6 @@ _mesa_UnmapNamedBuffer(GLuint buffer) GET_CURRENT_CONTEXT(ctx); struct gl_buffer_object *bufObj; - if (!ctx->Extensions.ARB_direct_state_access) { - _mesa_error(ctx, GL_INVALID_OPERATION, - "glUnmapNamedBuffer(GL_ARB_direct_state_access " - "is not supported)"); - return GL_FALSE; - } - bufObj = _mesa_lookup_bufferobj_err(ctx, buffer, "glUnmapNamedBuffer"); if (!bufObj) return GL_FALSE; @@ -2094,13 +2039,6 @@ _mesa_GetNamedBufferParameteriv(GLuint buffer, GLenum pname, GLint *params) struct gl_buffer_object *bufObj; GLint64 parameter; - if (!ctx->Extensions.ARB_direct_state_access) { - _mesa_error(ctx, GL_INVALID_OPERATION, - "glGetNamedBufferParameteriv(GL_ARB_direct_state_access " - "is not supported)"); - return; - } - bufObj = _mesa_lookup_bufferobj_err(ctx, buffer, "glGetNamedBufferParameteriv"); if (!bufObj) @@ -2121,13 +2059,6 @@ _mesa_GetNamedBufferParameteri64v(GLuint buffer, GLenum pname, struct gl_buffer_object *bufObj; GLint64 parameter; - if (!ctx->Extensions.ARB_direct_state_access) { - _mesa_error(ctx, GL_INVALID_OPERATION, - "glGetNamedBufferParameteri64v(GL_ARB_direct_state_access " - "is not supported)"); - return; - } - bufObj = _mesa_lookup_bufferobj_err(ctx, buffer, "glGetNamedBufferParameteri64v"); if (!bufObj) @@ -2167,13 +2098,6 @@ _mesa_GetNamedBufferPointerv(GLuint buffer, GLenum pname, GLvoid **params) GET_CURRENT_CONTEXT(ctx); struct gl_buffer_object *bufObj; - if (!ctx->Extensions.ARB_direct_state_access) { - _mesa_error(ctx, GL_INVALID_OPERATION, - "glGetNamedBufferPointerv(GL_ARB_direct_state_access " - "is not supported)"); - return; - } - if (pname != GL_BUFFER_MAP_POINTER) { _mesa_error(ctx, GL_INVALID_ENUM, "glGetNamedBufferPointerv(pname != " "GL_BUFFER_MAP_POINTER)"); @@ -2288,13 +2212,6 @@ _mesa_CopyNamedBufferSubData(GLuint readBuffer, GLuint writeBuffer, GET_CURRENT_CONTEXT(ctx); struct gl_buffer_object *src, *dst; - if (!ctx->Extensions.ARB_direct_state_access) { - _mesa_error(ctx, GL_INVALID_OPERATION, - "glCopyNamedBufferSubData(GL_ARB_direct_state_access " - "is not supported)"); - return; - } - src = _mesa_lookup_bufferobj_err(ctx, readBuffer, "glCopyNamedBufferSubData"); if (!src) @@ -2513,13 +2430,6 @@ _mesa_MapNamedBufferRange(GLuint buffer, GLintptr offset, GLsizeiptr length, GET_CURRENT_CONTEXT(ctx); struct gl_buffer_object *bufObj; - if (!ctx->Extensions.ARB_direct_state_access) { - _mesa_error(ctx, GL_INVALID_OPERATION, - "glMapNamedBufferRange(GL_ARB_direct_state_access " - "is not supported)"); - return NULL; - } - if (!ctx->Extensions.ARB_map_buffer_range) { _mesa_error(ctx, GL_INVALID_OPERATION, "glMapNamedBufferRange(" @@ -2587,13 +2497,6 @@ _mesa_MapNamedBuffer(GLuint buffer, GLenum access) struct gl_buffer_object *bufObj; GLbitfield accessFlags; - if (!ctx->Extensions.ARB_direct_state_access) { - _mesa_error(ctx, GL_INVALID_OPERATION, - "glMapNamedBuffer(GL_ARB_direct_state_access " - "is not supported)"); - return NULL; - } - if (!get_map_buffer_access_flags(ctx, access, &accessFlags)) { _mesa_error(ctx, GL_INVALID_ENUM, "glMapNamedBuffer(invalid access)"); return NULL; @@ -2684,14 +2587,6 @@ _mesa_FlushMappedNamedBufferRange(GLuint buffer, GLintptr offset, GET_CURRENT_CONTEXT(ctx); struct gl_buffer_object *bufObj; - if (!ctx->Extensions.ARB_direct_state_access) { - _mesa_error(ctx, GL_INVALID_OPERATION, - "glFlushMappedNamedBufferRange(GL_ARB_direct_state_access " - "is not supported)"); - return; - } - - bufObj = _mesa_lookup_bufferobj_err(ctx, buffer, "glFlushMappedNamedBufferRange"); if (!bufObj) From 90e98ea215906bb7e9ecadc4d30d2718ba2186ad Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Mon, 18 May 2015 13:54:55 -0700 Subject: [PATCH 348/834] Revert "mesa: Add ARB_direct_state_access checks in XFB functions" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit 7d212765a470972f4712e42caf6406b257220369. Acked-by: Fredrik Höglund Cc: "10.6" --- src/mesa/main/transformfeedback.c | 42 ------------------------------- 1 file changed, 42 deletions(-) diff --git a/src/mesa/main/transformfeedback.c b/src/mesa/main/transformfeedback.c index 642fa96477a..103011ce572 100644 --- a/src/mesa/main/transformfeedback.c +++ b/src/mesa/main/transformfeedback.c @@ -706,13 +706,6 @@ _mesa_TransformFeedbackBufferBase(GLuint xfb, GLuint index, GLuint buffer) struct gl_transform_feedback_object *obj; struct gl_buffer_object *bufObj; - if (!ctx->Extensions.ARB_direct_state_access) { - _mesa_error(ctx, GL_INVALID_OPERATION, - "glTransformFeedbackBufferBase(GL_ARB_direct_state_access " - "is not supported)"); - return; - } - obj = lookup_transform_feedback_object_err(ctx, xfb, "glTransformFeedbackBufferBase"); if(!obj) { @@ -736,13 +729,6 @@ _mesa_TransformFeedbackBufferRange(GLuint xfb, GLuint index, GLuint buffer, struct gl_transform_feedback_object *obj; struct gl_buffer_object *bufObj; - if (!ctx->Extensions.ARB_direct_state_access) { - _mesa_error(ctx, GL_INVALID_OPERATION, - "glTransformFeedbackBufferRange(GL_ARB_direct_state_access " - "is not supported)"); - return; - } - obj = lookup_transform_feedback_object_err(ctx, xfb, "glTransformFeedbackBufferRange"); if(!obj) { @@ -1059,13 +1045,6 @@ _mesa_CreateTransformFeedbacks(GLsizei n, GLuint *names) { GET_CURRENT_CONTEXT(ctx); - if (!ctx->Extensions.ARB_direct_state_access) { - _mesa_error(ctx, GL_INVALID_OPERATION, - "glCreateTransformFeedbacks(GL_ARB_direct_state_access " - "is not supported)"); - return; - } - create_transform_feedbacks(ctx, n, names, true); } @@ -1236,13 +1215,6 @@ _mesa_GetTransformFeedbackiv(GLuint xfb, GLenum pname, GLint *param) struct gl_transform_feedback_object *obj; GET_CURRENT_CONTEXT(ctx); - if (!ctx->Extensions.ARB_direct_state_access) { - _mesa_error(ctx, GL_INVALID_OPERATION, - "glGetTransformFeedbackiv(GL_ARB_direct_state_access " - "is not supported)"); - return; - } - obj = lookup_transform_feedback_object_err(ctx, xfb, "glGetTransformFeedbackiv"); if(!obj) { @@ -1269,13 +1241,6 @@ _mesa_GetTransformFeedbacki_v(GLuint xfb, GLenum pname, GLuint index, struct gl_transform_feedback_object *obj; GET_CURRENT_CONTEXT(ctx); - if (!ctx->Extensions.ARB_direct_state_access) { - _mesa_error(ctx, GL_INVALID_OPERATION, - "glGetTransformFeedbacki_v(GL_ARB_direct_state_access " - "is not supported)"); - return; - } - obj = lookup_transform_feedback_object_err(ctx, xfb, "glGetTransformFeedbacki_v"); if(!obj) { @@ -1305,13 +1270,6 @@ _mesa_GetTransformFeedbacki64_v(GLuint xfb, GLenum pname, GLuint index, struct gl_transform_feedback_object *obj; GET_CURRENT_CONTEXT(ctx); - if (!ctx->Extensions.ARB_direct_state_access) { - _mesa_error(ctx, GL_INVALID_OPERATION, - "glGetTransformFeedbacki64_v(GL_ARB_direct_state_access " - "is not supported)"); - return; - } - obj = lookup_transform_feedback_object_err(ctx, xfb, "glGetTransformFeedbacki64_v"); if(!obj) { From 832ea2345a96388950bb39ce8a2e4ca8bfdb4fe5 Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Wed, 20 May 2015 17:19:29 -0700 Subject: [PATCH 349/834] mesa: Use the profile instead of an extension bit to validate GL_TEXTURE_CUBE_MAP MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The extension on which this depends will always be enabled in core profile, and the extension bit is about to be removed. Signed-off-by: Ian Romanick Reviewed-by: Fredrik Höglund Cc: "10.6" --- src/mesa/main/fbobject.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/src/mesa/main/fbobject.c b/src/mesa/main/fbobject.c index 5b8c52ab2ad..c5a702636a8 100644 --- a/src/mesa/main/fbobject.c +++ b/src/mesa/main/fbobject.c @@ -2707,6 +2707,10 @@ check_texture_target(struct gl_context *ctx, GLenum target, /* We're being called by glFramebufferTextureLayer(). * The only legal texture types for that function are 3D, * cube-map, and 1D/2D/cube-map array textures. + * + * We don't need to check for GL_ARB_texture_cube_map_array because the + * application wouldn't have been able to create a texture with a + * GL_TEXTURE_CUBE_MAP_ARRAY target if the extension were not enabled. */ switch (target) { case GL_TEXTURE_3D: @@ -2716,10 +2720,13 @@ check_texture_target(struct gl_context *ctx, GLenum target, case GL_TEXTURE_2D_MULTISAMPLE_ARRAY: return true; case GL_TEXTURE_CUBE_MAP: - /* This target is valid in TextureLayer when ARB_direct_state_access - * or OpenGL 4.5 is supported. + /* We don't need to check the extension (GL_ARB_direct_state_access) or + * GL version (4.5) for GL_TEXTURE_CUBE_MAP because DSA is always + * enabled in core profile. This can be called from + * _mesa_FramebufferTextureLayer in compatibility profile (OpenGL 3.0), + * so we do have to check the profile. */ - return ctx->Extensions.ARB_direct_state_access; + return ctx->API == API_OPENGL_CORE; } _mesa_error(ctx, GL_INVALID_OPERATION, From 5c4aab58ee79a8bfa3d96f3ec442f37da587ff45 Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Mon, 18 May 2015 13:55:04 -0700 Subject: [PATCH 350/834] Revert "mesa: Add an extension flag for ARB_direct_state_access" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit 30dcaaec356cc117d7227c6680620cd50ff534e7. Acked-by: Fredrik Höglund Cc: "10.6" --- src/mesa/main/extensions.c | 2 +- src/mesa/main/mtypes.h | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/src/mesa/main/extensions.c b/src/mesa/main/extensions.c index c82416aa072..f7ce0642aef 100644 --- a/src/mesa/main/extensions.c +++ b/src/mesa/main/extensions.c @@ -104,7 +104,7 @@ static const struct extension extension_table[] = { { "GL_ARB_depth_clamp", o(ARB_depth_clamp), GL, 2003 }, { "GL_ARB_depth_texture", o(ARB_depth_texture), GLL, 2001 }, { "GL_ARB_derivative_control", o(ARB_derivative_control), GL, 2014 }, - { "GL_ARB_direct_state_access", o(ARB_direct_state_access), GL, 2014 }, + { "GL_ARB_direct_state_access", o(dummy_false), GL, 2014 }, { "GL_ARB_draw_buffers", o(dummy_true), GL, 2002 }, { "GL_ARB_draw_buffers_blend", o(ARB_draw_buffers_blend), GL, 2009 }, { "GL_ARB_draw_elements_base_vertex", o(ARB_draw_elements_base_vertex), GL, 2009 }, diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h index 83425176a3f..737f0be6d62 100644 --- a/src/mesa/main/mtypes.h +++ b/src/mesa/main/mtypes.h @@ -3621,7 +3621,6 @@ struct gl_extensions GLboolean ARB_depth_clamp; GLboolean ARB_depth_texture; GLboolean ARB_derivative_control; - GLboolean ARB_direct_state_access; GLboolean ARB_draw_buffers_blend; GLboolean ARB_draw_elements_base_vertex; GLboolean ARB_draw_indirect; From f20899b7276b73e1b60c3ed8d8abdf959e787c0c Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Tue, 19 May 2015 11:24:26 -0700 Subject: [PATCH 351/834] glapi: Store exec table version info outside the XML Currently on the functions that are exclusive to core-profile are implemented. The remainder continue to live in the XML. Additional functions can be moved later. The functions for GL_ARB_draw_indirect and GL_ARB_multi_draw_indirect are put in the dispatch table inside the VBO module, so they do not need to be moved over. The diff of src/mesa/main/api_exec.c before and after this patch is as expected. All of the functions listed in apiexec.py moved out of a 'if (_mesa_is_desktop(ctx))' block into a new 'if (ctx->API == API_OPENGL_CORE)' block. v2: Remove stray shebang line in apiexec.py. Suggested by Ilia. Signed-off-by: Ian Romanick Reviewed-by: Ilia Mirkin Cc: Dave Airlie Cc: Dylan Baker Cc: "10.6" --- src/mapi/glapi/gen/Makefile.am | 3 +- src/mapi/glapi/gen/apiexec.py | 140 +++++++++++++++++++++++++++++++ src/mapi/glapi/gen/gl_genexec.py | 54 +++++++++--- 3 files changed, 185 insertions(+), 12 deletions(-) create mode 100644 src/mapi/glapi/gen/apiexec.py diff --git a/src/mapi/glapi/gen/Makefile.am b/src/mapi/glapi/gen/Makefile.am index d7742f7b9d4..34602579c8a 100644 --- a/src/mapi/glapi/gen/Makefile.am +++ b/src/mapi/glapi/gen/Makefile.am @@ -61,6 +61,7 @@ EXTRA_DIST= \ $(MESA_GLAPI_DIR)/glapi_x86-64.S \ $(MESA_GLAPI_DIR)/glapi_sparc.S \ $(COMMON_GLX) \ + apiexec.py \ gl_apitemp.py \ gl_enums.py \ gl_genexec.py \ @@ -267,7 +268,7 @@ $(MESA_GLAPI_DIR)/glapi_sparc.S: gl_SPARC_asm.py $(COMMON) $(MESA_DIR)/main/enums.c: gl_enums.py $(COMMON) $(PYTHON_GEN) $< -f $(srcdir)/gl_and_es_API.xml > $@ -$(MESA_DIR)/main/api_exec.c: gl_genexec.py $(COMMON) +$(MESA_DIR)/main/api_exec.c: gl_genexec.py apiexec.py $(COMMON) $(PYTHON_GEN) $< -f $(srcdir)/gl_and_es_API.xml > $@ $(MESA_DIR)/main/dispatch.h: gl_table.py $(COMMON) diff --git a/src/mapi/glapi/gen/apiexec.py b/src/mapi/glapi/gen/apiexec.py new file mode 100644 index 00000000000..4f16a0babee --- /dev/null +++ b/src/mapi/glapi/gen/apiexec.py @@ -0,0 +1,140 @@ +# Copyright (C) 2015 Intel Corporation +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice (including the next +# paragraph) shall be included in all copies or substantial portions of the +# Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +# IN THE SOFTWARE. + +class exec_info(): + """Information relating GL APIs to a function. + + Each of the four attributes of this class, compatibility, core, es1, and + es2, specify the minimum API version where a function can possibly exist + in Mesa. The version is specified as an integer of (real GL version * + 10). For example, glCreateProgram was added in OpenGL 2.0, so + compatibility=20 and core=31. + + If the attribute is None, then it cannot be supported by that + API. For example, glNewList was removed from core profiles, so + compatibility=10 and core=None. + + Each of the attributes that is not None must have a valid value. The + valid ranges are: + + compatiblity: [10, 30] + core: [31, ) + es1: [10, 11] + es2: [20, ) + + These ranges are enforced by the constructor. + """ + def __init__(self, compatibility=None, core=None, es1=None, es2=None): + if compatibility is not None: + assert isinstance(compatibility, int) + assert compatibility >= 10 + assert compatibility <= 30 + + if core is not None: + assert isinstance(core, int) + assert core >= 31 + + if es1 is not None: + assert isinstance(es1, int) + assert es1 == 10 or es1 == 11 + + if es2 is not None: + assert isinstance(es2, int) + assert es2 >= 20 + + self.compatibility = compatibility + self.core = core + self.es1 = es1 + self.es2 = es2 + +functions = { + # OpenGL 3.1 / GL_ARB_texture_buffer_object. Mesa only exposes this + # extension with core profile. + "TexBuffer": exec_info(core=31), + + # OpenGL 3.2 / GL_ARB_geometry_shader4. Mesa does not support + # GL_ARB_geometry_shader4, so OpenGL 3.2 is required. + "FramebufferTexture": exec_info(core=32), + + # OpenGL 4.0 / GL_ARB_gpu_shader_fp64. The extension spec says: + # + # "OpenGL 3.2 and GLSL 1.50 are required." + "Uniform1d": exec_info(core=32), + "Uniform2d": exec_info(core=32), + "Uniform3d": exec_info(core=32), + "Uniform4d": exec_info(core=32), + "Uniform1dv": exec_info(core=32), + "Uniform2dv": exec_info(core=32), + "Uniform3dv": exec_info(core=32), + "Uniform4dv": exec_info(core=32), + "UniformMatrix2dv": exec_info(core=32), + "UniformMatrix3dv": exec_info(core=32), + "UniformMatrix4dv": exec_info(core=32), + "UniformMatrix2x3dv": exec_info(core=32), + "UniformMatrix2x4dv": exec_info(core=32), + "UniformMatrix3x2dv": exec_info(core=32), + "UniformMatrix3x4dv": exec_info(core=32), + "UniformMatrix4x2dv": exec_info(core=32), + "UniformMatrix4x3dv": exec_info(core=32), + "GetUniformdv": exec_info(core=32), + + # OpenGL 4.1 / GL_ARB_vertex_attrib_64bit. The extension spec says: + # + # "OpenGL 3.0 and GLSL 1.30 are required. + # + # ARB_gpu_shader_fp64 (or equivalent functionality) is required." + # + # For Mesa this effectively means OpenGL 3.2 is required. It seems + # unlikely that Mesa will ever get support for any of the NV extensions + # that add "equivalent functionality." + "VertexAttribL1d": exec_info(core=32), + "VertexAttribL2d": exec_info(core=32), + "VertexAttribL3d": exec_info(core=32), + "VertexAttribL4d": exec_info(core=32), + "VertexAttribL1dv": exec_info(core=32), + "VertexAttribL2dv": exec_info(core=32), + "VertexAttribL3dv": exec_info(core=32), + "VertexAttribL4dv": exec_info(core=32), + "VertexAttribLPointer": exec_info(core=32), + "GetVertexAttribLdv": exec_info(core=32), + + # OpenGL 4.1 / GL_ARB_viewport_array. The extension spec says: + # + # "OpenGL 3.2 or the EXT_geometry_shader4 or ARB_geometry_shader4 + # extensions are required." + # + # Mesa does not support either of the geometry shader extensions, so + # OpenGL 3.2 is required. + "ViewportArrayv": exec_info(core=32), + "ViewportIndexedf": exec_info(core=32), + "ViewportIndexedfv": exec_info(core=32), + "ScissorArrayv": exec_info(core=32), + "ScissorIndexed": exec_info(core=32), + "ScissorIndexedv": exec_info(core=32), + "DepthRangeArrayv": exec_info(core=32), + "DepthRangeIndexed": exec_info(core=32), + # GetFloati_v also GL_ARB_shader_atomic_counters + # GetDoublei_v also GL_ARB_shader_atomic_counters + + # OpenGL 4.3 / GL_ARB_texture_buffer_range. Mesa can expose the extension + # with OpenGL 3.1. + "TexBufferRange": exec_info(core=31), +} diff --git a/src/mapi/glapi/gen/gl_genexec.py b/src/mapi/glapi/gen/gl_genexec.py index 0d58a8a2914..26d8e7bfb3a 100644 --- a/src/mapi/glapi/gen/gl_genexec.py +++ b/src/mapi/glapi/gen/gl_genexec.py @@ -30,6 +30,7 @@ import collections import license import gl_XML import sys +import apiexec exec_flavor_map = { @@ -176,18 +177,49 @@ class PrintCode(gl_XML.gl_print_base): raise Exception( 'Unrecognized exec flavor {0!r}'.format(f.exec_flavor)) condition_parts = [] - if f.desktop: - if f.deprecated: + if f.name in apiexec.functions: + ex = apiexec.functions[f.name] + unconditional_count = 0 + + if ex.compatibility is not None: condition_parts.append('ctx->API == API_OPENGL_COMPAT') - else: - condition_parts.append('_mesa_is_desktop_gl(ctx)') - if 'es1' in f.api_map: - condition_parts.append('ctx->API == API_OPENGLES') - if 'es2' in f.api_map: - if f.api_map['es2'] > 2.0: - condition_parts.append('(ctx->API == API_OPENGLES2 && ctx->Version >= {0})'.format(int(f.api_map['es2'] * 10))) - else: - condition_parts.append('ctx->API == API_OPENGLES2') + unconditional_count += 1 + + if ex.core is not None: + condition_parts.append('ctx->API == API_OPENGL_CORE') + unconditional_count += 1 + + if ex.es1 is not None: + condition_parts.append('ctx->API == API_OPENGLES') + unconditional_count += 1 + + if ex.es2 is not None: + if ex.es2 > 20: + condition_parts.append('(ctx->API == API_OPENGLES2 && ctx->Version >= {0})'.format(ex.es2)) + else: + condition_parts.append('ctx->API == API_OPENGLES2') + unconditional_count += 1 + + # If the function is unconditionally available in all four + # APIs, then it is always available. Replace the complex + # tautology condition with "true" and let GCC do the right + # thing. + if unconditional_count == 4: + condition_parts = ['true'] + else: + if f.desktop: + if f.deprecated: + condition_parts.append('ctx->API == API_OPENGL_COMPAT') + else: + condition_parts.append('_mesa_is_desktop_gl(ctx)') + if 'es1' in f.api_map: + condition_parts.append('ctx->API == API_OPENGLES') + if 'es2' in f.api_map: + if f.api_map['es2'] > 2.0: + condition_parts.append('(ctx->API == API_OPENGLES2 && ctx->Version >= {0})'.format(int(f.api_map['es2'] * 10))) + else: + condition_parts.append('ctx->API == API_OPENGLES2') + if not condition_parts: # This function does not exist in any API. continue From 4e5efa9e7ddb6d5273996cf9b09677d918759d17 Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Tue, 19 May 2015 11:48:11 -0700 Subject: [PATCH 352/834] glapi: Make GL_ARB_direct_state_access functions exclusive to core profile Signed-off-by: Ian Romanick Cc: Dave Airlie Cc: Ilia Mirkin Cc: Dylan Baker Cc: "10.6" --- src/mapi/glapi/gen/apiexec.py | 100 ++++++++++++++++++++++++++++++++++ 1 file changed, 100 insertions(+) diff --git a/src/mapi/glapi/gen/apiexec.py b/src/mapi/glapi/gen/apiexec.py index 4f16a0babee..535de8a434b 100644 --- a/src/mapi/glapi/gen/apiexec.py +++ b/src/mapi/glapi/gen/apiexec.py @@ -137,4 +137,104 @@ functions = { # OpenGL 4.3 / GL_ARB_texture_buffer_range. Mesa can expose the extension # with OpenGL 3.1. "TexBufferRange": exec_info(core=31), + + # OpenGL 4.5 / GL_ARB_direct_state_access. Mesa can expose the extension + # with core profile. + "CreateTransformFeedbacks": exec_info(core=31), + "TransformFeedbackBufferBase": exec_info(core=31), + "TransformFeedbackBufferRange": exec_info(core=31), + "GetTransformFeedbackiv": exec_info(core=31), + "GetTransformFeedbacki_v": exec_info(core=31), + "GetTransformFeedbacki64_v": exec_info(core=31), + "CreateBuffers": exec_info(core=31), + "NamedBufferStorage": exec_info(core=31), + "NamedBufferData": exec_info(core=31), + "NamedBufferSubData": exec_info(core=31), + "CopyNamedBufferSubData": exec_info(core=31), + "ClearNamedBufferData": exec_info(core=31), + "ClearNamedBufferSubData": exec_info(core=31), + "MapNamedBuffer": exec_info(core=31), + "MapNamedBufferRange": exec_info(core=31), + "UnmapNamedBuffer": exec_info(core=31), + "FlushMappedNamedBufferRange": exec_info(core=31), + "GetNamedBufferParameteriv": exec_info(core=31), + "GetNamedBufferParameteri64v": exec_info(core=31), + "GetNamedBufferPointerv": exec_info(core=31), + "GetNamedBufferSubData": exec_info(core=31), + "CreateFramebuffers": exec_info(core=31), + "NamedFramebufferRenderbuffer": exec_info(core=31), + "NamedFramebufferParameteri": exec_info(core=31), + "NamedFramebufferTexture": exec_info(core=31), + "NamedFramebufferTextureLayer": exec_info(core=31), + "NamedFramebufferDrawBuffer": exec_info(core=31), + "NamedFramebufferDrawBuffers": exec_info(core=31), + "NamedFramebufferReadBuffer": exec_info(core=31), + "InvalidateNamedFramebufferData": exec_info(core=31), + "InvalidateNamedFramebufferSubData": exec_info(core=31), + "ClearNamedFramebufferiv": exec_info(core=31), + "ClearNamedFramebufferuiv": exec_info(core=31), + "ClearNamedFramebufferfv": exec_info(core=31), + "ClearNamedFramebufferfi": exec_info(core=31), + "BlitNamedFramebuffer": exec_info(core=31), + "CheckNamedFramebufferStatus": exec_info(core=31), + "GetNamedFramebufferParameteriv": exec_info(core=31), + "GetNamedFramebufferAttachmentParameteriv": exec_info(core=31), + "CreateRenderbuffers": exec_info(core=31), + "NamedRenderbufferStorage": exec_info(core=31), + "NamedRenderbufferStorageMultisample": exec_info(core=31), + "GetNamedRenderbufferParameteriv": exec_info(core=31), + "CreateTextures": exec_info(core=31), + "TextureBuffer": exec_info(core=31), + "TextureBufferRange": exec_info(core=31), + "TextureStorage1D": exec_info(core=31), + "TextureStorage2D": exec_info(core=31), + "TextureStorage3D": exec_info(core=31), + "TextureStorage2DMultisample": exec_info(core=31), + "TextureStorage3DMultisample": exec_info(core=31), + "TextureSubImage1D": exec_info(core=31), + "TextureSubImage2D": exec_info(core=31), + "TextureSubImage3D": exec_info(core=31), + "CompressedTextureSubImage1D": exec_info(core=31), + "CompressedTextureSubImage2D": exec_info(core=31), + "CompressedTextureSubImage3D": exec_info(core=31), + "CopyTextureSubImage1D": exec_info(core=31), + "CopyTextureSubImage2D": exec_info(core=31), + "CopyTextureSubImage3D": exec_info(core=31), + "TextureParameterf": exec_info(core=31), + "TextureParameterfv": exec_info(core=31), + "TextureParameteri": exec_info(core=31), + "TextureParameterIiv": exec_info(core=31), + "TextureParameterIuiv": exec_info(core=31), + "TextureParameteriv": exec_info(core=31), + "GenerateTextureMipmap": exec_info(core=31), + "BindTextureUnit": exec_info(core=31), + "GetTextureImage": exec_info(core=31), + "GetCompressedTextureImage": exec_info(core=31), + "GetTextureLevelParameterfv": exec_info(core=31), + "GetTextureLevelParameteriv": exec_info(core=31), + "GetTextureParameterfv": exec_info(core=31), + "GetTextureParameterIiv": exec_info(core=31), + "GetTextureParameterIuiv": exec_info(core=31), + "GetTextureParameteriv": exec_info(core=31), + "CreateVertexArrays": exec_info(core=31), + "DisableVertexArrayAttrib": exec_info(core=31), + "EnableVertexArrayAttrib": exec_info(core=31), + "VertexArrayElementBuffer": exec_info(core=31), + "VertexArrayVertexBuffer": exec_info(core=31), + "VertexArrayVertexBuffers": exec_info(core=31), + "VertexArrayAttribFormat": exec_info(core=31), + "VertexArrayAttribIFormat": exec_info(core=31), + "VertexArrayAttribLFormat": exec_info(core=31), + "VertexArrayAttribBinding": exec_info(core=31), + "VertexArrayBindingDivisor": exec_info(core=31), + "GetVertexArrayiv": exec_info(core=31), + "GetVertexArrayIndexediv": exec_info(core=31), + "GetVertexArrayIndexed64iv": exec_info(core=31), + "CreateSamplers": exec_info(core=31), + "CreateProgramPipelines": exec_info(core=31), + "CreateQueries": exec_info(core=31), + "GetQueryBufferObjectiv": exec_info(core=31), + "GetQueryBufferObjectuiv": exec_info(core=31), + "GetQueryBufferObjecti64v": exec_info(core=31), + "GetQueryBufferObjectui64v": exec_info(core=31), } From a6fa74e6bb65f852ad1608f43dd0731e854ea42f Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Wed, 20 May 2015 20:13:12 -0700 Subject: [PATCH 353/834] mesa: Don't install glVertexAttribL* functions in compatibility profile GL_ARB_vertex_attrib_64bit is exclusive to core profile, and none of the other functions added by the extension are advertised in other profiles. Signed-off-by: Ian Romanick Cc: Dave Airlie Cc: Ilia Mirkin Cc: "10.6" --- src/mesa/main/api_loopback.c | 2 ++ src/mesa/main/vtxfmt.c | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/src/mesa/main/api_loopback.c b/src/mesa/main/api_loopback.c index 9932a837336..a7fd82c531f 100644 --- a/src/mesa/main/api_loopback.c +++ b/src/mesa/main/api_loopback.c @@ -1772,7 +1772,9 @@ _mesa_loopback_init_api_table(const struct gl_context *ctx, SET_VertexAttribI4sv(dest, _mesa_VertexAttribI4sv); SET_VertexAttribI4ubv(dest, _mesa_VertexAttribI4ubv); SET_VertexAttribI4usv(dest, _mesa_VertexAttribI4usv); + } + if (ctx->API == API_OPENGL_CORE) { /* GL 4.1 / GL_ARB_vertex_attrib_64bit */ SET_VertexAttribL1d(dest, _mesa_VertexAttribL1d); SET_VertexAttribL2d(dest, _mesa_VertexAttribL2d); diff --git a/src/mesa/main/vtxfmt.c b/src/mesa/main/vtxfmt.c index d7ef7e278cd..81bf4c589ea 100644 --- a/src/mesa/main/vtxfmt.c +++ b/src/mesa/main/vtxfmt.c @@ -207,7 +207,7 @@ install_vtxfmt(struct gl_context *ctx, struct _glapi_table *tab, SET_VertexAttribP4uiv(tab, vfmt->VertexAttribP4uiv); } - if (_mesa_is_desktop_gl(ctx)) { + if (ctx->API == API_OPENGL_CORE) { SET_VertexAttribL1d(tab, vfmt->VertexAttribL1d); SET_VertexAttribL2d(tab, vfmt->VertexAttribL2d); SET_VertexAttribL3d(tab, vfmt->VertexAttribL3d); From 49ab670f52947dda04585cc5156e55b89f0c1c4a Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Wed, 20 May 2015 20:17:19 -0700 Subject: [PATCH 354/834] dispatch_sanity: Split list of GL 3.1 functions in to core and common The next patch will add a test for compatibility profile dispatch, and it seems to make more sense to share the lists. Signed-off-by: Ian Romanick Cc: Ilia Mirkin Cc: "10.6" --- src/mesa/main/tests/dispatch_sanity.cpp | 413 ++++++++++++++++++++---- 1 file changed, 342 insertions(+), 71 deletions(-) diff --git a/src/mesa/main/tests/dispatch_sanity.cpp b/src/mesa/main/tests/dispatch_sanity.cpp index ab66f884673..3d9539b474d 100644 --- a/src/mesa/main/tests/dispatch_sanity.cpp +++ b/src/mesa/main/tests/dispatch_sanity.cpp @@ -68,6 +68,7 @@ struct function { int offset; }; +extern const struct function common_desktop_functions_possible[]; extern const struct function gl_core_functions_possible[]; extern const struct function gles11_functions_possible[]; extern const struct function gles2_functions_possible[]; @@ -176,6 +177,7 @@ validate_nops(struct gl_context *ctx, const _glapi_proc *nop_table) TEST_F(DispatchSanity_test, GL31_CORE) { SetUpCtx(API_OPENGL_CORE, 31); + validate_functions(&ctx, common_desktop_functions_possible, nop_table); validate_functions(&ctx, gl_core_functions_possible, nop_table); validate_nops(&ctx, nop_table); } @@ -211,7 +213,7 @@ TEST_F(DispatchSanity_test, GLES31) validate_nops(&ctx, nop_table); } -const struct function gl_core_functions_possible[] = { +const struct function common_desktop_functions_possible[] = { { "glCullFace", 10, -1 }, { "glFrontFace", 10, -1 }, { "glHint", 10, -1 }, @@ -223,8 +225,8 @@ const struct function gl_core_functions_possible[] = { { "glTexParameterfv", 10, -1 }, { "glTexParameteri", 10, -1 }, { "glTexParameteriv", 10, -1 }, - { "glTexImage1D", 10, -1 }, - { "glTexImage2D", 10, -1 }, + { "glTexImage1D", 10, _gloffset_TexImage1D }, + { "glTexImage2D", 10, _gloffset_TexImage2D }, { "glDrawBuffer", 10, -1 }, { "glClear", 10, -1 }, { "glClearColor", 10, -1 }, @@ -492,7 +494,6 @@ const struct function gl_core_functions_possible[] = { /* GL 3.1 */ { "glDrawArraysInstanced", 31, -1 }, { "glDrawElementsInstanced", 31, -1 }, - { "glTexBuffer", 31, -1 }, { "glPrimitiveRestartIndex", 31, -1 }, /* GL_ARB_shader_objects */ @@ -546,7 +547,6 @@ const struct function gl_core_functions_possible[] = { { "glGetBufferParameteri64v", 32, -1 }, { "glFramebufferTexture", 32, -1 }, { "glProgramParameteri", 32, -1 }, - { "glFramebufferTexture", 32, -1 }, { "glFramebufferTextureLayer", 32, -1 }, /* GL 3.3 */ @@ -680,34 +680,6 @@ const struct function gl_core_functions_possible[] = { { "glVertexAttribP4uiv", 43, -1 }, { "glDrawArraysIndirect", 43, -1 }, { "glDrawElementsIndirect", 43, -1 }, - { "glUniform1d", 40, -1 }, - { "glUniform2d", 40, -1 }, - { "glUniform3d", 40, -1 }, - { "glUniform4d", 40, -1 }, - { "glUniform1dv", 40, -1 }, - { "glUniform2dv", 40, -1 }, - { "glUniform3dv", 40, -1 }, - { "glUniform4dv", 40, -1 }, - { "glUniformMatrix2dv", 40, -1 }, - { "glUniformMatrix3dv", 40, -1 }, - { "glUniformMatrix4dv", 40, -1 }, - { "glUniformMatrix2x3dv", 40, -1 }, - { "glUniformMatrix2x4dv", 40, -1 }, - { "glUniformMatrix3x2dv", 40, -1 }, - { "glUniformMatrix3x4dv", 40, -1 }, - { "glUniformMatrix4x2dv", 40, -1 }, - { "glUniformMatrix4x3dv", 40, -1 }, - { "glGetUniformdv", 43, -1 }, -// { "glGetSubroutineUniformLocation", 43, -1 }, // XXX: Add to xml -// { "glGetSubroutineIndex", 43, -1 }, // XXX: Add to xml -// { "glGetActiveSubroutineUniformiv", 43, -1 }, // XXX: Add to xml -// { "glGetActiveSubroutineUniformName", 43, -1 }, // XXX: Add to xml -// { "glGetActiveSubroutineName", 43, -1 }, // XXX: Add to xml -// { "glUniformSubroutinesuiv", 43, -1 }, // XXX: Add to xml -// { "glGetUniformSubroutineuiv", 43, -1 }, // XXX: Add to xml -// { "glGetProgramStageiv", 43, -1 }, // XXX: Add to xml -// { "glPatchParameteri", 43, -1 }, // XXX: Add to xml -// { "glPatchParameterfv", 43, -1 }, // XXX: Add to xml { "glBindTransformFeedback", 43, -1 }, { "glDeleteTransformFeedbacks", 43, -1 }, { "glGenTransformFeedbacks", 43, -1 }, @@ -735,12 +707,12 @@ const struct function gl_core_functions_possible[] = { { "glGenProgramPipelines", 43, -1 }, { "glIsProgramPipeline", 43, -1 }, { "glGetProgramPipelineiv", 43, -1 }, + { "glProgramUniform1d", 43, -1 }, + { "glProgramUniform1dv", 43, -1 }, { "glProgramUniform1i", 43, -1 }, { "glProgramUniform1iv", 43, -1 }, { "glProgramUniform1f", 43, -1 }, { "glProgramUniform1fv", 43, -1 }, - { "glProgramUniform1d", 40, -1 }, - { "glProgramUniform1dv", 40, -1 }, { "glProgramUniform1ui", 43, -1 }, { "glProgramUniform1uiv", 43, -1 }, { "glProgramUniform2i", 43, -1 }, @@ -761,50 +733,32 @@ const struct function gl_core_functions_possible[] = { { "glProgramUniform3uiv", 43, -1 }, { "glProgramUniform4i", 43, -1 }, { "glProgramUniform4iv", 43, -1 }, + { "glProgramUniform4d", 43, -1 }, + { "glProgramUniform4dv", 43, -1 }, { "glProgramUniform4f", 43, -1 }, { "glProgramUniform4fv", 43, -1 }, - { "glProgramUniform4d", 40, -1 }, - { "glProgramUniform4dv", 40, -1 }, { "glProgramUniform4ui", 43, -1 }, { "glProgramUniform4uiv", 43, -1 }, + { "glProgramUniformMatrix2dv", 43, -1 }, { "glProgramUniformMatrix2fv", 43, -1 }, + { "glProgramUniformMatrix3dv", 43, -1 }, { "glProgramUniformMatrix3fv", 43, -1 }, + { "glProgramUniformMatrix4dv", 43, -1 }, { "glProgramUniformMatrix4fv", 43, -1 }, - { "glProgramUniformMatrix2dv", 40, -1 }, - { "glProgramUniformMatrix3dv", 40, -1 }, - { "glProgramUniformMatrix4dv", 40, -1 }, + { "glProgramUniformMatrix2x3dv", 43, -1 }, { "glProgramUniformMatrix2x3fv", 43, -1 }, + { "glProgramUniformMatrix3x2dv", 43, -1 }, { "glProgramUniformMatrix3x2fv", 43, -1 }, + { "glProgramUniformMatrix2x4dv", 43, -1 }, { "glProgramUniformMatrix2x4fv", 43, -1 }, + { "glProgramUniformMatrix4x2dv", 43, -1 }, { "glProgramUniformMatrix4x2fv", 43, -1 }, + { "glProgramUniformMatrix3x4dv", 43, -1 }, { "glProgramUniformMatrix3x4fv", 43, -1 }, + { "glProgramUniformMatrix4x3dv", 43, -1 }, { "glProgramUniformMatrix4x3fv", 43, -1 }, - { "glProgramUniformMatrix2x3dv", 40, -1 }, - { "glProgramUniformMatrix3x2dv", 40, -1 }, - { "glProgramUniformMatrix2x4dv", 40, -1 }, - { "glProgramUniformMatrix4x2dv", 40, -1 }, - { "glProgramUniformMatrix3x4dv", 40, -1 }, - { "glProgramUniformMatrix4x3dv", 40, -1 }, { "glValidateProgramPipeline", 43, -1 }, { "glGetProgramPipelineInfoLog", 43, -1 }, - { "glVertexAttribL1d", 41, -1 }, - { "glVertexAttribL2d", 41, -1 }, - { "glVertexAttribL3d", 41, -1 }, - { "glVertexAttribL4d", 41, -1 }, - { "glVertexAttribL1dv", 41, -1 }, - { "glVertexAttribL2dv", 41, -1 }, - { "glVertexAttribL3dv", 41, -1 }, - { "glVertexAttribL4dv", 41, -1 }, - { "glVertexAttribLPointer", 41, -1 }, - { "glGetVertexAttribLdv", 41, -1 }, - { "glViewportArrayv", 43, -1 }, - { "glViewportIndexedf", 43, -1 }, - { "glViewportIndexedfv", 43, -1 }, - { "glScissorArrayv", 43, -1 }, - { "glScissorIndexed", 43, -1 }, - { "glScissorIndexedv", 43, -1 }, - { "glDepthRangeArrayv", 43, -1 }, - { "glDepthRangeIndexed", 43, -1 }, { "glGetFloati_v", 43, -1 }, { "glGetDoublei_v", 43, -1 }, // { "glCreateSyncFromCLeventARB", 43, -1 }, // XXX: Add to xml @@ -847,8 +801,6 @@ const struct function gl_core_functions_possible[] = { { "glClearBufferSubData", 43, -1 }, // { "glClearNamedBufferDataEXT", 43, -1 }, // XXX: Add to xml // { "glClearNamedBufferSubDataEXT", 43, -1 }, // XXX: Add to xml - { "glDispatchCompute", 43, -1 }, - { "glDispatchComputeIndirect", 43, -1 }, { "glCopyImageSubData", 43, -1 }, { "glTextureView", 43, -1 }, { "glBindVertexBuffer", 43, -1 }, @@ -860,7 +812,6 @@ const struct function gl_core_functions_possible[] = { // { "glVertexArrayBindVertexBufferEXT", 43, -1 }, // XXX: Add to xml // { "glVertexArrayVertexAttribFormatEXT", 43, -1 }, // XXX: Add to xml // { "glVertexArrayVertexAttribIFormatEXT", 43, -1 }, // XXX: Add to xml -// { "glVertexArrayVertexAttribLFormatEXT", 43, -1 }, // XXX: Add to xml // { "glVertexArrayVertexAttribBindingEXT", 43, -1 }, // XXX: Add to xml // { "glVertexArrayVertexBindingDivisorEXT", 43, -1 }, // XXX: Add to xml // { "glFramebufferParameteri", 43, -1 }, // XXX: Add to xml @@ -883,7 +834,6 @@ const struct function gl_core_functions_possible[] = { { "glGetProgramResourceLocation", 43, -1 }, { "glGetProgramResourceLocationIndex", 43, -1 }, // { "glShaderStorageBlockBinding", 43, -1 }, // XXX: Add to xml - { "glTexBufferRange", 43, -1 }, // { "glTextureBufferRangeEXT", 43, -1 }, // XXX: Add to xml { "glTexStorage2DMultisample", 43, -1 }, { "glTexStorage3DMultisample", 43, -1 }, @@ -965,6 +915,330 @@ const struct function gl_core_functions_possible[] = { /* GL_ARB_clip_control */ { "glClipControl", 45, -1 }, + /* GL_ARB_compute_shader */ + { "glDispatchCompute", 43, -1 }, + { "glDispatchComputeIndirect", 43, -1 }, + + /* GL_EXT_polygon_offset_clamp */ + { "glPolygonOffsetClampEXT", 11, -1 }, + { NULL, 0, -1 } +}; + +const struct function gl_core_functions_possible[] = { + /* GL 3.1 */ + { "glTexBuffer", 31, -1 }, + + /* GL 3.2 */ + { "glFramebufferTexture", 32, -1 }, + + /* GL 4.3 */ + { "glIsRenderbuffer", 43, -1 }, + { "glBindRenderbuffer", 43, -1 }, + { "glDeleteRenderbuffers", 43, -1 }, + { "glGenRenderbuffers", 43, -1 }, + { "glRenderbufferStorage", 43, -1 }, + { "glGetRenderbufferParameteriv", 43, -1 }, + { "glIsFramebuffer", 43, -1 }, + { "glBindFramebuffer", 43, -1 }, + { "glDeleteFramebuffers", 43, -1 }, + { "glGenFramebuffers", 43, -1 }, + { "glCheckFramebufferStatus", 43, -1 }, + { "glFramebufferTexture1D", 43, -1 }, + { "glFramebufferTexture2D", 43, -1 }, + { "glFramebufferTexture3D", 43, -1 }, + { "glFramebufferRenderbuffer", 43, -1 }, + { "glGetFramebufferAttachmentParameteriv", 43, -1 }, + { "glGenerateMipmap", 43, -1 }, + { "glBlitFramebuffer", 43, -1 }, + { "glRenderbufferStorageMultisample", 43, -1 }, + { "glFramebufferTextureLayer", 43, -1 }, + { "glMapBufferRange", 43, -1 }, + { "glFlushMappedBufferRange", 43, -1 }, + { "glBindVertexArray", 43, -1 }, + { "glDeleteVertexArrays", 43, -1 }, + { "glGenVertexArrays", 43, -1 }, + { "glIsVertexArray", 43, -1 }, + { "glGetUniformIndices", 43, -1 }, + { "glGetActiveUniformsiv", 43, -1 }, + { "glGetActiveUniformName", 43, -1 }, + { "glGetUniformBlockIndex", 43, -1 }, + { "glGetActiveUniformBlockiv", 43, -1 }, + { "glGetActiveUniformBlockName", 43, -1 }, + { "glUniformBlockBinding", 43, -1 }, + { "glCopyBufferSubData", 43, -1 }, + { "glDrawElementsBaseVertex", 43, -1 }, + { "glDrawRangeElementsBaseVertex", 43, -1 }, + { "glDrawElementsInstancedBaseVertex", 43, -1 }, + { "glMultiDrawElementsBaseVertex", 43, -1 }, + { "glProvokingVertex", 43, -1 }, + { "glFenceSync", 43, -1 }, + { "glIsSync", 43, -1 }, + { "glDeleteSync", 43, -1 }, + { "glClientWaitSync", 43, -1 }, + { "glWaitSync", 43, -1 }, + { "glGetInteger64v", 43, -1 }, + { "glGetSynciv", 43, -1 }, + { "glTexImage2DMultisample", 43, -1 }, + { "glTexImage3DMultisample", 43, -1 }, + { "glGetMultisamplefv", 43, -1 }, + { "glSampleMaski", 43, -1 }, + { "glBlendEquationiARB", 43, -1 }, + { "glBlendEquationSeparateiARB", 43, -1 }, + { "glBlendFunciARB", 43, -1 }, + { "glBlendFuncSeparateiARB", 43, -1 }, + { "glMinSampleShadingARB", 43, -1 }, // XXX: Add to xml +// { "glNamedStringARB", 43, -1 }, // XXX: Add to xml +// { "glDeleteNamedStringARB", 43, -1 }, // XXX: Add to xml +// { "glCompileShaderIncludeARB", 43, -1 }, // XXX: Add to xml +// { "glIsNamedStringARB", 43, -1 }, // XXX: Add to xml +// { "glGetNamedStringARB", 43, -1 }, // XXX: Add to xml +// { "glGetNamedStringivARB", 43, -1 }, // XXX: Add to xml + { "glBindFragDataLocationIndexed", 43, -1 }, + { "glGetFragDataIndex", 43, -1 }, + { "glGenSamplers", 43, -1 }, + { "glDeleteSamplers", 43, -1 }, + { "glIsSampler", 43, -1 }, + { "glBindSampler", 43, -1 }, + { "glSamplerParameteri", 43, -1 }, + { "glSamplerParameteriv", 43, -1 }, + { "glSamplerParameterf", 43, -1 }, + { "glSamplerParameterfv", 43, -1 }, + { "glSamplerParameterIiv", 43, -1 }, + { "glSamplerParameterIuiv", 43, -1 }, + { "glGetSamplerParameteriv", 43, -1 }, + { "glGetSamplerParameterIiv", 43, -1 }, + { "glGetSamplerParameterfv", 43, -1 }, + { "glGetSamplerParameterIuiv", 43, -1 }, + { "glQueryCounter", 43, -1 }, + { "glGetQueryObjecti64v", 43, -1 }, + { "glGetQueryObjectui64v", 43, -1 }, + { "glVertexP2ui", 43, -1 }, + { "glVertexP2uiv", 43, -1 }, + { "glVertexP3ui", 43, -1 }, + { "glVertexP3uiv", 43, -1 }, + { "glVertexP4ui", 43, -1 }, + { "glVertexP4uiv", 43, -1 }, + { "glTexCoordP1ui", 43, -1 }, + { "glTexCoordP1uiv", 43, -1 }, + { "glTexCoordP2ui", 43, -1 }, + { "glTexCoordP2uiv", 43, -1 }, + { "glTexCoordP3ui", 43, -1 }, + { "glTexCoordP3uiv", 43, -1 }, + { "glTexCoordP4ui", 43, -1 }, + { "glTexCoordP4uiv", 43, -1 }, + { "glMultiTexCoordP1ui", 43, -1 }, + { "glMultiTexCoordP1uiv", 43, -1 }, + { "glMultiTexCoordP2ui", 43, -1 }, + { "glMultiTexCoordP2uiv", 43, -1 }, + { "glMultiTexCoordP3ui", 43, -1 }, + { "glMultiTexCoordP3uiv", 43, -1 }, + { "glMultiTexCoordP4ui", 43, -1 }, + { "glMultiTexCoordP4uiv", 43, -1 }, + { "glNormalP3ui", 43, -1 }, + { "glNormalP3uiv", 43, -1 }, + { "glColorP3ui", 43, -1 }, + { "glColorP3uiv", 43, -1 }, + { "glColorP4ui", 43, -1 }, + { "glColorP4uiv", 43, -1 }, + { "glVertexAttribP1ui", 43, -1 }, + { "glVertexAttribP1uiv", 43, -1 }, + { "glVertexAttribP2ui", 43, -1 }, + { "glVertexAttribP2uiv", 43, -1 }, + { "glVertexAttribP3ui", 43, -1 }, + { "glVertexAttribP3uiv", 43, -1 }, + { "glVertexAttribP4ui", 43, -1 }, + { "glVertexAttribP4uiv", 43, -1 }, + { "glDrawArraysIndirect", 43, -1 }, + { "glDrawElementsIndirect", 43, -1 }, + + { "glUniform1d", 40, -1 }, + { "glUniform2d", 40, -1 }, + { "glUniform3d", 40, -1 }, + { "glUniform4d", 40, -1 }, + { "glUniform1dv", 40, -1 }, + { "glUniform2dv", 40, -1 }, + { "glUniform3dv", 40, -1 }, + { "glUniform4dv", 40, -1 }, + { "glUniformMatrix2dv", 40, -1 }, + { "glUniformMatrix3dv", 40, -1 }, + { "glUniformMatrix4dv", 40, -1 }, + { "glUniformMatrix2x3dv", 40, -1 }, + { "glUniformMatrix2x4dv", 40, -1 }, + { "glUniformMatrix3x2dv", 40, -1 }, + { "glUniformMatrix3x4dv", 40, -1 }, + { "glUniformMatrix4x2dv", 40, -1 }, + { "glUniformMatrix4x3dv", 40, -1 }, + { "glGetUniformdv", 43, -1 }, +// { "glGetSubroutineUniformLocation", 43, -1 }, // XXX: Add to xml +// { "glGetSubroutineIndex", 43, -1 }, // XXX: Add to xml +// { "glGetActiveSubroutineUniformiv", 43, -1 }, // XXX: Add to xml +// { "glGetActiveSubroutineUniformName", 43, -1 }, // XXX: Add to xml +// { "glGetActiveSubroutineName", 43, -1 }, // XXX: Add to xml +// { "glUniformSubroutinesuiv", 43, -1 }, // XXX: Add to xml +// { "glGetUniformSubroutineuiv", 43, -1 }, // XXX: Add to xml +// { "glGetProgramStageiv", 43, -1 }, // XXX: Add to xml +// { "glPatchParameteri", 43, -1 }, // XXX: Add to xml +// { "glPatchParameterfv", 43, -1 }, // XXX: Add to xml + + { "glBindTransformFeedback", 43, -1 }, + { "glDeleteTransformFeedbacks", 43, -1 }, + { "glGenTransformFeedbacks", 43, -1 }, + { "glIsTransformFeedback", 43, -1 }, + { "glPauseTransformFeedback", 43, -1 }, + { "glResumeTransformFeedback", 43, -1 }, + { "glDrawTransformFeedback", 43, -1 }, + { "glDrawTransformFeedbackStream", 43, -1 }, + { "glBeginQueryIndexed", 43, -1 }, + { "glEndQueryIndexed", 43, -1 }, + { "glGetQueryIndexediv", 43, -1 }, + { "glReleaseShaderCompiler", 43, -1 }, + { "glShaderBinary", 43, -1 }, + { "glGetShaderPrecisionFormat", 43, -1 }, + { "glDepthRangef", 43, -1 }, + { "glClearDepthf", 43, -1 }, + { "glGetProgramBinary", 43, -1 }, + { "glProgramBinary", 43, -1 }, + { "glProgramParameteri", 43, -1 }, + { "glUseProgramStages", 43, -1 }, + { "glActiveShaderProgram", 43, -1 }, + { "glCreateShaderProgramv", 43, -1 }, + { "glBindProgramPipeline", 43, -1 }, + { "glDeleteProgramPipelines", 43, -1 }, + { "glGenProgramPipelines", 43, -1 }, + { "glIsProgramPipeline", 43, -1 }, + { "glGetProgramPipelineiv", 43, -1 }, + { "glProgramUniform1i", 43, -1 }, + { "glProgramUniform1iv", 43, -1 }, + { "glProgramUniform1f", 43, -1 }, + { "glProgramUniform1fv", 43, -1 }, + { "glProgramUniform1d", 40, -1 }, + { "glProgramUniform1dv", 40, -1 }, + { "glProgramUniform1ui", 43, -1 }, + { "glProgramUniform1uiv", 43, -1 }, + { "glProgramUniform2i", 43, -1 }, + { "glProgramUniform2iv", 43, -1 }, + { "glProgramUniform2f", 43, -1 }, + { "glProgramUniform2fv", 43, -1 }, + { "glProgramUniform2d", 40, -1 }, + { "glProgramUniform2dv", 40, -1 }, + { "glProgramUniform2ui", 43, -1 }, + { "glProgramUniform2uiv", 43, -1 }, + { "glProgramUniform3i", 43, -1 }, + { "glProgramUniform3iv", 43, -1 }, + { "glProgramUniform3f", 43, -1 }, + { "glProgramUniform3fv", 43, -1 }, + { "glProgramUniform3d", 40, -1 }, + { "glProgramUniform3dv", 40, -1 }, + { "glProgramUniform3ui", 43, -1 }, + { "glProgramUniform3uiv", 43, -1 }, + { "glProgramUniform4i", 43, -1 }, + { "glProgramUniform4iv", 43, -1 }, + { "glProgramUniform4f", 43, -1 }, + { "glProgramUniform4fv", 43, -1 }, + { "glProgramUniform4d", 40, -1 }, + { "glProgramUniform4dv", 40, -1 }, + { "glProgramUniform4ui", 43, -1 }, + { "glProgramUniform4uiv", 43, -1 }, + { "glProgramUniformMatrix2fv", 43, -1 }, + { "glProgramUniformMatrix3fv", 43, -1 }, + { "glProgramUniformMatrix4fv", 43, -1 }, + { "glProgramUniformMatrix2dv", 40, -1 }, + { "glProgramUniformMatrix3dv", 40, -1 }, + { "glProgramUniformMatrix4dv", 40, -1 }, + { "glProgramUniformMatrix2x3fv", 43, -1 }, + { "glProgramUniformMatrix3x2fv", 43, -1 }, + { "glProgramUniformMatrix2x4fv", 43, -1 }, + { "glProgramUniformMatrix4x2fv", 43, -1 }, + { "glProgramUniformMatrix3x4fv", 43, -1 }, + { "glProgramUniformMatrix4x3fv", 43, -1 }, + { "glProgramUniformMatrix2x3dv", 40, -1 }, + { "glProgramUniformMatrix3x2dv", 40, -1 }, + { "glProgramUniformMatrix2x4dv", 40, -1 }, + { "glProgramUniformMatrix4x2dv", 40, -1 }, + { "glProgramUniformMatrix3x4dv", 40, -1 }, + { "glProgramUniformMatrix4x3dv", 40, -1 }, + { "glValidateProgramPipeline", 43, -1 }, + { "glGetProgramPipelineInfoLog", 43, -1 }, + + { "glVertexAttribL1d", 41, -1 }, + { "glVertexAttribL2d", 41, -1 }, + { "glVertexAttribL3d", 41, -1 }, + { "glVertexAttribL4d", 41, -1 }, + { "glVertexAttribL1dv", 41, -1 }, + { "glVertexAttribL2dv", 41, -1 }, + { "glVertexAttribL3dv", 41, -1 }, + { "glVertexAttribL4dv", 41, -1 }, + { "glVertexAttribLPointer", 41, -1 }, + { "glGetVertexAttribLdv", 41, -1 }, + { "glViewportArrayv", 43, -1 }, + { "glViewportIndexedf", 43, -1 }, + { "glViewportIndexedfv", 43, -1 }, + { "glScissorArrayv", 43, -1 }, + { "glScissorIndexed", 43, -1 }, + { "glScissorIndexedv", 43, -1 }, + { "glDepthRangeArrayv", 43, -1 }, + { "glDepthRangeIndexed", 43, -1 }, + +// { "glCreateSyncFromCLeventARB", 43, -1 }, // XXX: Add to xml + + { "glDrawArraysInstancedBaseInstance", 43, -1 }, + { "glDrawElementsInstancedBaseInstance", 43, -1 }, + { "glDrawElementsInstancedBaseVertexBaseInstance", 43, -1 }, + { "glDrawTransformFeedbackInstanced", 43, -1 }, + { "glDrawTransformFeedbackStreamInstanced", 43, -1 }, + { "glGetActiveAtomicCounterBufferiv", 43, -1 }, + { "glBindImageTexture", 43, -1 }, + { "glMemoryBarrier", 43, -1 }, + { "glTexStorage1D", 43, -1 }, + { "glTexStorage2D", 43, -1 }, + { "glTexStorage3D", 43, -1 }, + { "glTextureStorage1DEXT", 43, -1 }, + { "glTextureStorage2DEXT", 43, -1 }, + { "glTextureStorage3DEXT", 43, -1 }, + { "glClearBufferData", 43, -1 }, + { "glClearBufferSubData", 43, -1 }, +// { "glClearNamedBufferDataEXT", 43, -1 }, // XXX: Add to xml +// { "glClearNamedBufferSubDataEXT", 43, -1 }, // XXX: Add to xml + { "glCopyImageSubData", 43, -1 }, + { "glTextureView", 43, -1 }, + { "glBindVertexBuffer", 43, -1 }, + { "glVertexAttribFormat", 43, -1 }, + { "glVertexAttribIFormat", 43, -1 }, + { "glVertexAttribBinding", 43, -1 }, + { "glVertexBindingDivisor", 43, -1 }, +// { "glVertexArrayBindVertexBufferEXT", 43, -1 }, // XXX: Add to xml +// { "glVertexArrayVertexAttribFormatEXT", 43, -1 }, // XXX: Add to xml +// { "glVertexArrayVertexAttribIFormatEXT", 43, -1 }, // XXX: Add to xml +// { "glVertexArrayVertexAttribLFormatEXT", 43, -1 }, // XXX: Add to xml +// { "glVertexArrayVertexAttribBindingEXT", 43, -1 }, // XXX: Add to xml +// { "glVertexArrayVertexBindingDivisorEXT", 43, -1 }, // XXX: Add to xml +// { "glFramebufferParameteri", 43, -1 }, // XXX: Add to xml +// { "glGetFramebufferParameteriv", 43, -1 }, // XXX: Add to xml +// { "glNamedFramebufferParameteriEXT", 43, -1 }, // XXX: Add to xml +// { "glGetNamedFramebufferParameterivEXT", 43, -1 }, // XXX: Add to xml +// { "glGetInternalformati64v", 43, -1 }, // XXX: Add to xml + { "glInvalidateTexSubImage", 43, -1 }, + { "glInvalidateTexImage", 43, -1 }, + { "glInvalidateBufferSubData", 43, -1 }, + { "glInvalidateBufferData", 43, -1 }, + { "glInvalidateFramebuffer", 43, -1 }, + { "glInvalidateSubFramebuffer", 43, -1 }, + { "glMultiDrawArraysIndirect", 43, -1 }, + { "glMultiDrawElementsIndirect", 43, -1 }, + { "glGetProgramInterfaceiv", 43, -1 }, + { "glGetProgramResourceIndex", 43, -1 }, + { "glGetProgramResourceName", 43, -1 }, + { "glGetProgramResourceiv", 43, -1 }, + { "glGetProgramResourceLocation", 43, -1 }, + { "glGetProgramResourceLocationIndex", 43, -1 }, +// { "glShaderStorageBlockBinding", 43, -1 }, // XXX: Add to xml + { "glTexBufferRange", 43, -1 }, +// { "glTextureBufferRangeEXT", 43, -1 }, // XXX: Add to xml + { "glTexStorage2DMultisample", 43, -1 }, + { "glTexStorage3DMultisample", 43, -1 }, +// { "glTextureStorage2DMultisampleEXT", 43, -1 }, // XXX: Add to xml +// { "glTextureStorage3DMultisampleEXT", 43, -1 }, // XXX: Add to xml + /* GL_ARB_direct_state_access */ { "glCreateTransformFeedbacks", 45, -1 }, { "glTransformFeedbackBufferBase", 45, -1 }, @@ -1064,9 +1338,6 @@ const struct function gl_core_functions_possible[] = { { "glGetQueryBufferObjecti64v", 45, -1 }, { "glGetQueryBufferObjectui64v", 45, -1 }, - /* GL_EXT_polygon_offset_clamp */ - { "glPolygonOffsetClampEXT", 11, -1 }, - { NULL, 0, -1 } }; From ef4dd0fc3e6b5ffbad6bd286ef9c6c25d0b25bae Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Wed, 20 May 2015 20:19:07 -0700 Subject: [PATCH 355/834] dispatch_sanity: Validate the compatibility profile dispatch table too Signed-off-by: Ian Romanick Suggested-by: Ilia Mirkin Cc: Ilia Mirkin Cc: "10.6" --- src/mesa/main/tests/dispatch_sanity.cpp | 493 ++++++++++++++++++++++++ 1 file changed, 493 insertions(+) diff --git a/src/mesa/main/tests/dispatch_sanity.cpp b/src/mesa/main/tests/dispatch_sanity.cpp index 3d9539b474d..0b7262a21e7 100644 --- a/src/mesa/main/tests/dispatch_sanity.cpp +++ b/src/mesa/main/tests/dispatch_sanity.cpp @@ -69,6 +69,7 @@ struct function { }; extern const struct function common_desktop_functions_possible[]; +extern const struct function gl_compatibility_functions_possible[]; extern const struct function gl_core_functions_possible[]; extern const struct function gles11_functions_possible[]; extern const struct function gles2_functions_possible[]; @@ -182,6 +183,14 @@ TEST_F(DispatchSanity_test, GL31_CORE) validate_nops(&ctx, nop_table); } +TEST_F(DispatchSanity_test, GL30) +{ + SetUpCtx(API_OPENGL_COMPAT, 30); + validate_functions(&ctx, common_desktop_functions_possible, nop_table); + validate_functions(&ctx, gl_compatibility_functions_possible, nop_table); + validate_nops(&ctx, nop_table); +} + TEST_F(DispatchSanity_test, GLES11) { SetUpCtx(API_OPENGLES, 11); @@ -924,6 +933,490 @@ const struct function common_desktop_functions_possible[] = { { NULL, 0, -1 } }; +const struct function gl_compatibility_functions_possible[] = { + { "glBindVertexArrayAPPLE", 10, -1 }, + { "glGenVertexArraysAPPLE", 10, -1 }, + { "glBindRenderbufferEXT", 10, -1 }, + { "glBindFramebufferEXT", 10, -1 }, + { "glNewList", 10, _gloffset_NewList }, + { "glEndList", 10, _gloffset_EndList }, + { "glCallList", 10, _gloffset_CallList }, + { "glCallLists", 10, _gloffset_CallLists }, + { "glDeleteLists", 10, _gloffset_DeleteLists }, + { "glGenLists", 10, _gloffset_GenLists }, + { "glListBase", 10, _gloffset_ListBase }, + { "glBegin", 10, _gloffset_Begin }, + { "glBitmap", 10, _gloffset_Bitmap }, + { "glColor3b", 10, _gloffset_Color3b }, + { "glColor3bv", 10, _gloffset_Color3bv }, + { "glColor3d", 10, _gloffset_Color3d }, + { "glColor3dv", 10, _gloffset_Color3dv }, + { "glColor3f", 10, _gloffset_Color3f }, + { "glColor3fv", 10, _gloffset_Color3fv }, + { "glColor3i", 10, _gloffset_Color3i }, + { "glColor3iv", 10, _gloffset_Color3iv }, + { "glColor3s", 10, _gloffset_Color3s }, + { "glColor3sv", 10, _gloffset_Color3sv }, + { "glColor3ub", 10, _gloffset_Color3ub }, + { "glColor3ubv", 10, _gloffset_Color3ubv }, + { "glColor3ui", 10, _gloffset_Color3ui }, + { "glColor3uiv", 10, _gloffset_Color3uiv }, + { "glColor3us", 10, _gloffset_Color3us }, + { "glColor3usv", 10, _gloffset_Color3usv }, + { "glColor4b", 10, _gloffset_Color4b }, + { "glColor4bv", 10, _gloffset_Color4bv }, + { "glColor4d", 10, _gloffset_Color4d }, + { "glColor4dv", 10, _gloffset_Color4dv }, + { "glColor4f", 10, _gloffset_Color4f }, + { "glColor4fv", 10, _gloffset_Color4fv }, + { "glColor4i", 10, _gloffset_Color4i }, + { "glColor4iv", 10, _gloffset_Color4iv }, + { "glColor4s", 10, _gloffset_Color4s }, + { "glColor4sv", 10, _gloffset_Color4sv }, + { "glColor4ub", 10, _gloffset_Color4ub }, + { "glColor4ubv", 10, _gloffset_Color4ubv }, + { "glColor4ui", 10, _gloffset_Color4ui }, + { "glColor4uiv", 10, _gloffset_Color4uiv }, + { "glColor4us", 10, _gloffset_Color4us }, + { "glColor4usv", 10, _gloffset_Color4usv }, + { "glEdgeFlag", 10, _gloffset_EdgeFlag }, + { "glEdgeFlagv", 10, _gloffset_EdgeFlagv }, + { "glEnd", 10, _gloffset_End }, + { "glIndexd", 10, _gloffset_Indexd }, + { "glIndexdv", 10, _gloffset_Indexdv }, + { "glIndexf", 10, _gloffset_Indexf }, + { "glIndexfv", 10, _gloffset_Indexfv }, + { "glIndexi", 10, _gloffset_Indexi }, + { "glIndexiv", 10, _gloffset_Indexiv }, + { "glIndexs", 10, _gloffset_Indexs }, + { "glIndexsv", 10, _gloffset_Indexsv }, + { "glNormal3b", 10, _gloffset_Normal3b }, + { "glNormal3bv", 10, _gloffset_Normal3bv }, + { "glNormal3d", 10, _gloffset_Normal3d }, + { "glNormal3dv", 10, _gloffset_Normal3dv }, + { "glNormal3f", 10, _gloffset_Normal3f }, + { "glNormal3fv", 10, _gloffset_Normal3fv }, + { "glNormal3i", 10, _gloffset_Normal3i }, + { "glNormal3iv", 10, _gloffset_Normal3iv }, + { "glNormal3s", 10, _gloffset_Normal3s }, + { "glNormal3sv", 10, _gloffset_Normal3sv }, + { "glRasterPos2d", 10, _gloffset_RasterPos2d }, + { "glRasterPos2dv", 10, _gloffset_RasterPos2dv }, + { "glRasterPos2f", 10, _gloffset_RasterPos2f }, + { "glRasterPos2fv", 10, _gloffset_RasterPos2fv }, + { "glRasterPos2i", 10, _gloffset_RasterPos2i }, + { "glRasterPos2iv", 10, _gloffset_RasterPos2iv }, + { "glRasterPos2s", 10, _gloffset_RasterPos2s }, + { "glRasterPos2sv", 10, _gloffset_RasterPos2sv }, + { "glRasterPos3d", 10, _gloffset_RasterPos3d }, + { "glRasterPos3dv", 10, _gloffset_RasterPos3dv }, + { "glRasterPos3f", 10, _gloffset_RasterPos3f }, + { "glRasterPos3fv", 10, _gloffset_RasterPos3fv }, + { "glRasterPos3i", 10, _gloffset_RasterPos3i }, + { "glRasterPos3iv", 10, _gloffset_RasterPos3iv }, + { "glRasterPos3s", 10, _gloffset_RasterPos3s }, + { "glRasterPos3sv", 10, _gloffset_RasterPos3sv }, + { "glRasterPos4d", 10, _gloffset_RasterPos4d }, + { "glRasterPos4dv", 10, _gloffset_RasterPos4dv }, + { "glRasterPos4f", 10, _gloffset_RasterPos4f }, + { "glRasterPos4fv", 10, _gloffset_RasterPos4fv }, + { "glRasterPos4i", 10, _gloffset_RasterPos4i }, + { "glRasterPos4iv", 10, _gloffset_RasterPos4iv }, + { "glRasterPos4s", 10, _gloffset_RasterPos4s }, + { "glRasterPos4sv", 10, _gloffset_RasterPos4sv }, + { "glRectd", 10, _gloffset_Rectd }, + { "glRectdv", 10, _gloffset_Rectdv }, + { "glRectf", 10, _gloffset_Rectf }, + { "glRectfv", 10, _gloffset_Rectfv }, + { "glRecti", 10, _gloffset_Recti }, + { "glRectiv", 10, _gloffset_Rectiv }, + { "glRects", 10, _gloffset_Rects }, + { "glRectsv", 10, _gloffset_Rectsv }, + { "glTexCoord1d", 10, _gloffset_TexCoord1d }, + { "glTexCoord1dv", 10, _gloffset_TexCoord1dv }, + { "glTexCoord1f", 10, _gloffset_TexCoord1f }, + { "glTexCoord1fv", 10, _gloffset_TexCoord1fv }, + { "glTexCoord1i", 10, _gloffset_TexCoord1i }, + { "glTexCoord1iv", 10, _gloffset_TexCoord1iv }, + { "glTexCoord1s", 10, _gloffset_TexCoord1s }, + { "glTexCoord1sv", 10, _gloffset_TexCoord1sv }, + { "glTexCoord2d", 10, _gloffset_TexCoord2d }, + { "glTexCoord2dv", 10, _gloffset_TexCoord2dv }, + { "glTexCoord2f", 10, _gloffset_TexCoord2f }, + { "glTexCoord2fv", 10, _gloffset_TexCoord2fv }, + { "glTexCoord2i", 10, _gloffset_TexCoord2i }, + { "glTexCoord2iv", 10, _gloffset_TexCoord2iv }, + { "glTexCoord2s", 10, _gloffset_TexCoord2s }, + { "glTexCoord2sv", 10, _gloffset_TexCoord2sv }, + { "glTexCoord3d", 10, _gloffset_TexCoord3d }, + { "glTexCoord3dv", 10, _gloffset_TexCoord3dv }, + { "glTexCoord3f", 10, _gloffset_TexCoord3f }, + { "glTexCoord3fv", 10, _gloffset_TexCoord3fv }, + { "glTexCoord3i", 10, _gloffset_TexCoord3i }, + { "glTexCoord3iv", 10, _gloffset_TexCoord3iv }, + { "glTexCoord3s", 10, _gloffset_TexCoord3s }, + { "glTexCoord3sv", 10, _gloffset_TexCoord3sv }, + { "glTexCoord4d", 10, _gloffset_TexCoord4d }, + { "glTexCoord4dv", 10, _gloffset_TexCoord4dv }, + { "glTexCoord4f", 10, _gloffset_TexCoord4f }, + { "glTexCoord4fv", 10, _gloffset_TexCoord4fv }, + { "glTexCoord4i", 10, _gloffset_TexCoord4i }, + { "glTexCoord4iv", 10, _gloffset_TexCoord4iv }, + { "glTexCoord4s", 10, _gloffset_TexCoord4s }, + { "glTexCoord4sv", 10, _gloffset_TexCoord4sv }, + { "glVertex2d", 10, _gloffset_Vertex2d }, + { "glVertex2dv", 10, _gloffset_Vertex2dv }, + { "glVertex2f", 10, _gloffset_Vertex2f }, + { "glVertex2fv", 10, _gloffset_Vertex2fv }, + { "glVertex2i", 10, _gloffset_Vertex2i }, + { "glVertex2iv", 10, _gloffset_Vertex2iv }, + { "glVertex2s", 10, _gloffset_Vertex2s }, + { "glVertex2sv", 10, _gloffset_Vertex2sv }, + { "glVertex3d", 10, _gloffset_Vertex3d }, + { "glVertex3dv", 10, _gloffset_Vertex3dv }, + { "glVertex3f", 10, _gloffset_Vertex3f }, + { "glVertex3fv", 10, _gloffset_Vertex3fv }, + { "glVertex3i", 10, _gloffset_Vertex3i }, + { "glVertex3iv", 10, _gloffset_Vertex3iv }, + { "glVertex3s", 10, _gloffset_Vertex3s }, + { "glVertex3sv", 10, _gloffset_Vertex3sv }, + { "glVertex4d", 10, _gloffset_Vertex4d }, + { "glVertex4dv", 10, _gloffset_Vertex4dv }, + { "glVertex4f", 10, _gloffset_Vertex4f }, + { "glVertex4fv", 10, _gloffset_Vertex4fv }, + { "glVertex4i", 10, _gloffset_Vertex4i }, + { "glVertex4iv", 10, _gloffset_Vertex4iv }, + { "glVertex4s", 10, _gloffset_Vertex4s }, + { "glVertex4sv", 10, _gloffset_Vertex4sv }, + { "glClipPlane", 10, _gloffset_ClipPlane }, + { "glColorMaterial", 10, _gloffset_ColorMaterial }, + { "glFogf", 10, _gloffset_Fogf }, + { "glFogfv", 10, _gloffset_Fogfv }, + { "glFogi", 10, _gloffset_Fogi }, + { "glFogiv", 10, _gloffset_Fogiv }, + { "glLightf", 10, _gloffset_Lightf }, + { "glLightfv", 10, _gloffset_Lightfv }, + { "glLighti", 10, _gloffset_Lighti }, + { "glLightiv", 10, _gloffset_Lightiv }, + { "glLightModelf", 10, _gloffset_LightModelf }, + { "glLightModelfv", 10, _gloffset_LightModelfv }, + { "glLightModeli", 10, _gloffset_LightModeli }, + { "glLightModeliv", 10, _gloffset_LightModeliv }, + { "glLineStipple", 10, _gloffset_LineStipple }, + { "glMaterialf", 10, _gloffset_Materialf }, + { "glMaterialfv", 10, _gloffset_Materialfv }, + { "glMateriali", 10, _gloffset_Materiali }, + { "glMaterialiv", 10, _gloffset_Materialiv }, + { "glPolygonStipple", 10, _gloffset_PolygonStipple }, + { "glShadeModel", 10, _gloffset_ShadeModel }, + { "glTexEnvf", 10, _gloffset_TexEnvf }, + { "glTexEnvfv", 10, _gloffset_TexEnvfv }, + { "glTexEnvi", 10, _gloffset_TexEnvi }, + { "glTexEnviv", 10, _gloffset_TexEnviv }, + { "glTexGend", 10, _gloffset_TexGend }, + { "glTexGendv", 10, _gloffset_TexGendv }, + { "glTexGenf", 10, _gloffset_TexGenf }, + { "glTexGenfv", 10, _gloffset_TexGenfv }, + { "glTexGeni", 10, _gloffset_TexGeni }, + { "glTexGeniv", 10, _gloffset_TexGeniv }, + { "glFeedbackBuffer", 10, _gloffset_FeedbackBuffer }, + { "glSelectBuffer", 10, _gloffset_SelectBuffer }, + { "glRenderMode", 10, _gloffset_RenderMode }, + { "glInitNames", 10, _gloffset_InitNames }, + { "glLoadName", 10, _gloffset_LoadName }, + { "glPassThrough", 10, _gloffset_PassThrough }, + { "glPopName", 10, _gloffset_PopName }, + { "glPushName", 10, _gloffset_PushName }, + { "glClearAccum", 10, _gloffset_ClearAccum }, + { "glClearIndex", 10, _gloffset_ClearIndex }, + { "glIndexMask", 10, _gloffset_IndexMask }, + { "glAccum", 10, _gloffset_Accum }, + { "glPopAttrib", 10, _gloffset_PopAttrib }, + { "glPushAttrib", 10, _gloffset_PushAttrib }, + { "glMap1d", 10, _gloffset_Map1d }, + { "glMap1f", 10, _gloffset_Map1f }, + { "glMap2d", 10, _gloffset_Map2d }, + { "glMap2f", 10, _gloffset_Map2f }, + { "glMapGrid1d", 10, _gloffset_MapGrid1d }, + { "glMapGrid1f", 10, _gloffset_MapGrid1f }, + { "glMapGrid2d", 10, _gloffset_MapGrid2d }, + { "glMapGrid2f", 10, _gloffset_MapGrid2f }, + { "glEvalCoord1d", 10, _gloffset_EvalCoord1d }, + { "glEvalCoord1dv", 10, _gloffset_EvalCoord1dv }, + { "glEvalCoord1f", 10, _gloffset_EvalCoord1f }, + { "glEvalCoord1fv", 10, _gloffset_EvalCoord1fv }, + { "glEvalCoord2d", 10, _gloffset_EvalCoord2d }, + { "glEvalCoord2dv", 10, _gloffset_EvalCoord2dv }, + { "glEvalCoord2f", 10, _gloffset_EvalCoord2f }, + { "glEvalCoord2fv", 10, _gloffset_EvalCoord2fv }, + { "glEvalMesh1", 10, _gloffset_EvalMesh1 }, + { "glEvalPoint1", 10, _gloffset_EvalPoint1 }, + { "glEvalMesh2", 10, _gloffset_EvalMesh2 }, + { "glEvalPoint2", 10, _gloffset_EvalPoint2 }, + { "glAlphaFunc", 10, _gloffset_AlphaFunc }, + { "glPixelZoom", 10, _gloffset_PixelZoom }, + { "glPixelTransferf", 10, _gloffset_PixelTransferf }, + { "glPixelTransferi", 10, _gloffset_PixelTransferi }, + { "glPixelMapfv", 10, _gloffset_PixelMapfv }, + { "glPixelMapuiv", 10, _gloffset_PixelMapuiv }, + { "glPixelMapusv", 10, _gloffset_PixelMapusv }, + { "glCopyPixels", 10, _gloffset_CopyPixels }, + { "glDrawPixels", 10, _gloffset_DrawPixels }, + { "glGetClipPlane", 10, _gloffset_GetClipPlane }, + { "glGetLightfv", 10, _gloffset_GetLightfv }, + { "glGetLightiv", 10, _gloffset_GetLightiv }, + { "glGetMapdv", 10, _gloffset_GetMapdv }, + { "glGetMapfv", 10, _gloffset_GetMapfv }, + { "glGetMapiv", 10, _gloffset_GetMapiv }, + { "glGetMaterialfv", 10, _gloffset_GetMaterialfv }, + { "glGetMaterialiv", 10, _gloffset_GetMaterialiv }, + { "glGetPixelMapfv", 10, _gloffset_GetPixelMapfv }, + { "glGetPixelMapuiv", 10, _gloffset_GetPixelMapuiv }, + { "glGetPixelMapusv", 10, _gloffset_GetPixelMapusv }, + { "glGetPolygonStipple", 10, _gloffset_GetPolygonStipple }, + { "glGetTexEnvfv", 10, _gloffset_GetTexEnvfv }, + { "glGetTexEnviv", 10, _gloffset_GetTexEnviv }, + { "glGetTexGendv", 10, _gloffset_GetTexGendv }, + { "glGetTexGenfv", 10, _gloffset_GetTexGenfv }, + { "glGetTexGeniv", 10, _gloffset_GetTexGeniv }, + { "glIsList", 10, _gloffset_IsList }, + { "glFrustum", 10, _gloffset_Frustum }, + { "glLoadIdentity", 10, _gloffset_LoadIdentity }, + { "glLoadMatrixf", 10, _gloffset_LoadMatrixf }, + { "glLoadMatrixd", 10, _gloffset_LoadMatrixd }, + { "glMatrixMode", 10, _gloffset_MatrixMode }, + { "glMultMatrixf", 10, _gloffset_MultMatrixf }, + { "glMultMatrixd", 10, _gloffset_MultMatrixd }, + { "glOrtho", 10, _gloffset_Ortho }, + { "glPopMatrix", 10, _gloffset_PopMatrix }, + { "glPushMatrix", 10, _gloffset_PushMatrix }, + { "glRotated", 10, _gloffset_Rotated }, + { "glRotatef", 10, _gloffset_Rotatef }, + { "glScaled", 10, _gloffset_Scaled }, + { "glScalef", 10, _gloffset_Scalef }, + { "glTranslated", 10, _gloffset_Translated }, + { "glTranslatef", 10, _gloffset_Translatef }, + { "glArrayElement", 10, _gloffset_ArrayElement }, + { "glColorPointer", 10, _gloffset_ColorPointer }, + { "glDisableClientState", 10, _gloffset_DisableClientState }, + { "glEdgeFlagPointer", 10, _gloffset_EdgeFlagPointer }, + { "glEnableClientState", 10, _gloffset_EnableClientState }, + { "glIndexPointer", 10, _gloffset_IndexPointer }, + { "glInterleavedArrays", 10, _gloffset_InterleavedArrays }, + { "glNormalPointer", 10, _gloffset_NormalPointer }, + { "glTexCoordPointer", 10, _gloffset_TexCoordPointer }, + { "glVertexPointer", 10, _gloffset_VertexPointer }, + { "glAreTexturesResident", 10, _gloffset_AreTexturesResident }, + { "glPrioritizeTextures", 10, _gloffset_PrioritizeTextures }, + { "glIndexub", 10, _gloffset_Indexub }, + { "glIndexubv", 10, _gloffset_Indexubv }, + { "glPopClientAttrib", 10, _gloffset_PopClientAttrib }, + { "glPushClientAttrib", 10, _gloffset_PushClientAttrib }, + { "glColorTable", 10, _gloffset_ColorTable }, + { "glColorTableParameterfv", 10, _gloffset_ColorTableParameterfv }, + { "glColorTableParameteriv", 10, _gloffset_ColorTableParameteriv }, + { "glCopyColorTable", 10, _gloffset_CopyColorTable }, + { "glGetColorTable", 10, _gloffset_GetColorTable }, + { "glGetColorTableParameterfv", 10, _gloffset_GetColorTableParameterfv }, + { "glGetColorTableParameteriv", 10, _gloffset_GetColorTableParameteriv }, + { "glColorSubTable", 10, _gloffset_ColorSubTable }, + { "glCopyColorSubTable", 10, _gloffset_CopyColorSubTable }, + { "glConvolutionFilter1D", 10, _gloffset_ConvolutionFilter1D }, + { "glConvolutionFilter2D", 10, _gloffset_ConvolutionFilter2D }, + { "glConvolutionParameterf", 10, _gloffset_ConvolutionParameterf }, + { "glConvolutionParameterfv", 10, _gloffset_ConvolutionParameterfv }, + { "glConvolutionParameteri", 10, _gloffset_ConvolutionParameteri }, + { "glConvolutionParameteriv", 10, _gloffset_ConvolutionParameteriv }, + { "glCopyConvolutionFilter1D", 10, _gloffset_CopyConvolutionFilter1D }, + { "glCopyConvolutionFilter2D", 10, _gloffset_CopyConvolutionFilter2D }, + { "glGetConvolutionFilter", 10, _gloffset_GetConvolutionFilter }, + { "glGetConvolutionParameterfv", 10, _gloffset_GetConvolutionParameterfv }, + { "glGetConvolutionParameteriv", 10, _gloffset_GetConvolutionParameteriv }, + { "glGetSeparableFilter", 10, _gloffset_GetSeparableFilter }, + { "glSeparableFilter2D", 10, _gloffset_SeparableFilter2D }, + { "glGetHistogram", 10, _gloffset_GetHistogram }, + { "glGetHistogramParameterfv", 10, _gloffset_GetHistogramParameterfv }, + { "glGetHistogramParameteriv", 10, _gloffset_GetHistogramParameteriv }, + { "glGetMinmax", 10, _gloffset_GetMinmax }, + { "glGetMinmaxParameterfv", 10, _gloffset_GetMinmaxParameterfv }, + { "glGetMinmaxParameteriv", 10, _gloffset_GetMinmaxParameteriv }, + { "glHistogram", 10, _gloffset_Histogram }, + { "glMinmax", 10, _gloffset_Minmax }, + { "glResetHistogram", 10, _gloffset_ResetHistogram }, + { "glResetMinmax", 10, _gloffset_ResetMinmax }, + { "glClientActiveTexture", 10, _gloffset_ClientActiveTexture }, + { "glMultiTexCoord1d", 10, _gloffset_MultiTexCoord1d }, + { "glMultiTexCoord1dv", 10, _gloffset_MultiTexCoord1dv }, + { "glMultiTexCoord1f", 10, _gloffset_MultiTexCoord1fARB }, + { "glMultiTexCoord1fv", 10, _gloffset_MultiTexCoord1fvARB }, + { "glMultiTexCoord1i", 10, _gloffset_MultiTexCoord1i }, + { "glMultiTexCoord1iv", 10, _gloffset_MultiTexCoord1iv }, + { "glMultiTexCoord1s", 10, _gloffset_MultiTexCoord1s }, + { "glMultiTexCoord1sv", 10, _gloffset_MultiTexCoord1sv }, + { "glMultiTexCoord2d", 10, _gloffset_MultiTexCoord2d }, + { "glMultiTexCoord2dv", 10, _gloffset_MultiTexCoord2dv }, + { "glMultiTexCoord2f", 10, _gloffset_MultiTexCoord2fARB }, + { "glMultiTexCoord2fv", 10, _gloffset_MultiTexCoord2fvARB }, + { "glMultiTexCoord2i", 10, _gloffset_MultiTexCoord2i }, + { "glMultiTexCoord2iv", 10, _gloffset_MultiTexCoord2iv }, + { "glMultiTexCoord2s", 10, _gloffset_MultiTexCoord2s }, + { "glMultiTexCoord2sv", 10, _gloffset_MultiTexCoord2sv }, + { "glMultiTexCoord3d", 10, _gloffset_MultiTexCoord3d }, + { "glMultiTexCoord3dv", 10, _gloffset_MultiTexCoord3dv }, + { "glMultiTexCoord3f", 10, _gloffset_MultiTexCoord3fARB }, + { "glMultiTexCoord3fv", 10, _gloffset_MultiTexCoord3fvARB }, + { "glMultiTexCoord3i", 10, _gloffset_MultiTexCoord3i }, + { "glMultiTexCoord3iv", 10, _gloffset_MultiTexCoord3iv }, + { "glMultiTexCoord3s", 10, _gloffset_MultiTexCoord3s }, + { "glMultiTexCoord3sv", 10, _gloffset_MultiTexCoord3sv }, + { "glMultiTexCoord4d", 10, _gloffset_MultiTexCoord4d }, + { "glMultiTexCoord4dv", 10, _gloffset_MultiTexCoord4dv }, + { "glMultiTexCoord4f", 10, _gloffset_MultiTexCoord4fARB }, + { "glMultiTexCoord4fv", 10, _gloffset_MultiTexCoord4fvARB }, + { "glMultiTexCoord4i", 10, _gloffset_MultiTexCoord4i }, + { "glMultiTexCoord4iv", 10, _gloffset_MultiTexCoord4iv }, + { "glMultiTexCoord4s", 10, _gloffset_MultiTexCoord4s }, + { "glMultiTexCoord4sv", 10, _gloffset_MultiTexCoord4sv }, + { "glLoadTransposeMatrixf", 10, -1 }, + { "glLoadTransposeMatrixd", 10, -1 }, + { "glMultTransposeMatrixf", 10, -1 }, + { "glMultTransposeMatrixd", 10, -1 }, + { "glFogCoordf", 10, -1 }, + { "glFogCoordfv", 10, -1 }, + { "glFogCoordd", 10, -1 }, + { "glFogCoorddv", 10, -1 }, + { "glFogCoordPointer", 10, -1 }, + { "glSecondaryColor3b", 10, -1 }, + { "glSecondaryColor3bv", 10, -1 }, + { "glSecondaryColor3d", 10, -1 }, + { "glSecondaryColor3dv", 10, -1 }, + { "glSecondaryColor3f", 10, -1 }, + { "glSecondaryColor3fv", 10, -1 }, + { "glSecondaryColor3i", 10, -1 }, + { "glSecondaryColor3iv", 10, -1 }, + { "glSecondaryColor3s", 10, -1 }, + { "glSecondaryColor3sv", 10, -1 }, + { "glSecondaryColor3ub", 10, -1 }, + { "glSecondaryColor3ubv", 10, -1 }, + { "glSecondaryColor3ui", 10, -1 }, + { "glSecondaryColor3uiv", 10, -1 }, + { "glSecondaryColor3us", 10, -1 }, + { "glSecondaryColor3usv", 10, -1 }, + { "glSecondaryColorPointer", 10, -1 }, + { "glWindowPos2d", 10, -1 }, + { "glWindowPos2dv", 10, -1 }, + { "glWindowPos2f", 10, -1 }, + { "glWindowPos2fv", 10, -1 }, + { "glWindowPos2i", 10, -1 }, + { "glWindowPos2iv", 10, -1 }, + { "glWindowPos2s", 10, -1 }, + { "glWindowPos2sv", 10, -1 }, + { "glWindowPos3d", 10, -1 }, + { "glWindowPos3dv", 10, -1 }, + { "glWindowPos3f", 10, -1 }, + { "glWindowPos3fv", 10, -1 }, + { "glWindowPos3i", 10, -1 }, + { "glWindowPos3iv", 10, -1 }, + { "glWindowPos3s", 10, -1 }, + { "glWindowPos3sv", 10, -1 }, + { "glProgramStringARB", 10, -1 }, + { "glProgramEnvParameter4dARB", 10, -1 }, + { "glProgramEnvParameter4dvARB", 10, -1 }, + { "glProgramEnvParameter4fARB", 10, -1 }, + { "glProgramEnvParameter4fvARB", 10, -1 }, + { "glProgramLocalParameter4dARB", 10, -1 }, + { "glProgramLocalParameter4dvARB", 10, -1 }, + { "glProgramLocalParameter4fARB", 10, -1 }, + { "glProgramLocalParameter4fvARB", 10, -1 }, + { "glGetProgramEnvParameterdvARB", 10, -1 }, + { "glGetProgramEnvParameterfvARB", 10, -1 }, + { "glGetProgramLocalParameterdvARB", 10, -1 }, + { "glGetProgramLocalParameterfvARB", 10, -1 }, + { "glGetProgramivARB", 10, -1 }, + { "glGetProgramStringARB", 10, -1 }, + { "glPolygonOffsetEXT", 10, -1 }, + { "glColorPointerEXT", 10, -1 }, + { "glEdgeFlagPointerEXT", 10, -1 }, + { "glIndexPointerEXT", 10, -1 }, + { "glNormalPointerEXT", 10, -1 }, + { "glTexCoordPointerEXT", 10, -1 }, + { "glVertexPointerEXT", 10, -1 }, + { "glLockArraysEXT", 10, -1 }, + { "glUnlockArraysEXT", 10, -1 }, + { "glWindowPos4dMESA", 10, -1 }, + { "glWindowPos4dvMESA", 10, -1 }, + { "glWindowPos4fMESA", 10, -1 }, + { "glWindowPos4fvMESA", 10, -1 }, + { "glWindowPos4iMESA", 10, -1 }, + { "glWindowPos4ivMESA", 10, -1 }, + { "glWindowPos4sMESA", 10, -1 }, + { "glWindowPos4svMESA", 10, -1 }, + { "glBindProgramNV", 10, -1 }, + { "glDeleteProgramsNV", 10, -1 }, + { "glGenProgramsNV", 10, -1 }, + { "glIsProgramNV", 10, -1 }, + { "glVertexAttrib1sNV", 10, -1 }, + { "glVertexAttrib1svNV", 10, -1 }, + { "glVertexAttrib2sNV", 10, -1 }, + { "glVertexAttrib2svNV", 10, -1 }, + { "glVertexAttrib3sNV", 10, -1 }, + { "glVertexAttrib3svNV", 10, -1 }, + { "glVertexAttrib4sNV", 10, -1 }, + { "glVertexAttrib4svNV", 10, -1 }, + { "glVertexAttrib1fNV", 10, -1 }, + { "glVertexAttrib1fvNV", 10, -1 }, + { "glVertexAttrib2fNV", 10, -1 }, + { "glVertexAttrib2fvNV", 10, -1 }, + { "glVertexAttrib3fNV", 10, -1 }, + { "glVertexAttrib3fvNV", 10, -1 }, + { "glVertexAttrib4fNV", 10, -1 }, + { "glVertexAttrib4fvNV", 10, -1 }, + { "glVertexAttrib1dNV", 10, -1 }, + { "glVertexAttrib1dvNV", 10, -1 }, + { "glVertexAttrib2dNV", 10, -1 }, + { "glVertexAttrib2dvNV", 10, -1 }, + { "glVertexAttrib3dNV", 10, -1 }, + { "glVertexAttrib3dvNV", 10, -1 }, + { "glVertexAttrib4dNV", 10, -1 }, + { "glVertexAttrib4dvNV", 10, -1 }, + { "glVertexAttrib4ubNV", 10, -1 }, + { "glVertexAttrib4ubvNV", 10, -1 }, + { "glVertexAttribs1svNV", 10, -1 }, + { "glVertexAttribs2svNV", 10, -1 }, + { "glVertexAttribs3svNV", 10, -1 }, + { "glVertexAttribs4svNV", 10, -1 }, + { "glVertexAttribs1fvNV", 10, -1 }, + { "glVertexAttribs2fvNV", 10, -1 }, + { "glVertexAttribs3fvNV", 10, -1 }, + { "glVertexAttribs4fvNV", 10, -1 }, + { "glVertexAttribs1dvNV", 10, -1 }, + { "glVertexAttribs2dvNV", 10, -1 }, + { "glVertexAttribs3dvNV", 10, -1 }, + { "glVertexAttribs4dvNV", 10, -1 }, + { "glVertexAttribs4ubvNV", 10, -1 }, + { "glGenFragmentShadersATI", 10, -1 }, + { "glBindFragmentShaderATI", 10, -1 }, + { "glDeleteFragmentShaderATI", 10, -1 }, + { "glBeginFragmentShaderATI", 10, -1 }, + { "glEndFragmentShaderATI", 10, -1 }, + { "glPassTexCoordATI", 10, -1 }, + { "glSampleMapATI", 10, -1 }, + { "glColorFragmentOp1ATI", 10, -1 }, + { "glColorFragmentOp2ATI", 10, -1 }, + { "glColorFragmentOp3ATI", 10, -1 }, + { "glAlphaFragmentOp1ATI", 10, -1 }, + { "glAlphaFragmentOp2ATI", 10, -1 }, + { "glAlphaFragmentOp3ATI", 10, -1 }, + { "glSetFragmentShaderConstantATI", 10, -1 }, + { "glActiveStencilFaceEXT", 10, -1 }, + { "glStencilFuncSeparateATI", 10, -1 }, + { "glProgramEnvParameters4fvEXT", 10, -1 }, + { "glProgramLocalParameters4fvEXT", 10, -1 }, + { "glPrimitiveRestartNV", 10, -1 }, + + { NULL, 0, -1 } +}; + const struct function gl_core_functions_possible[] = { /* GL 3.1 */ { "glTexBuffer", 31, -1 }, From 1ca60de4c00e864bffbee8265f631b2267c8ea29 Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Mon, 18 May 2015 13:56:45 -0700 Subject: [PATCH 356/834] mesa: Enable ARB_direct_state_access by default for core profile MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit And core profile only. Signed-off-by: Ian Romanick Reviewed-by: Fredrik Höglund Cc: "10.6" --- src/mesa/main/extensions.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mesa/main/extensions.c b/src/mesa/main/extensions.c index f7ce0642aef..f9bf503a066 100644 --- a/src/mesa/main/extensions.c +++ b/src/mesa/main/extensions.c @@ -104,7 +104,7 @@ static const struct extension extension_table[] = { { "GL_ARB_depth_clamp", o(ARB_depth_clamp), GL, 2003 }, { "GL_ARB_depth_texture", o(ARB_depth_texture), GLL, 2001 }, { "GL_ARB_derivative_control", o(ARB_derivative_control), GL, 2014 }, - { "GL_ARB_direct_state_access", o(dummy_false), GL, 2014 }, + { "GL_ARB_direct_state_access", o(dummy_true), GLC, 2014 }, { "GL_ARB_draw_buffers", o(dummy_true), GL, 2002 }, { "GL_ARB_draw_buffers_blend", o(ARB_draw_buffers_blend), GL, 2009 }, { "GL_ARB_draw_elements_base_vertex", o(ARB_draw_elements_base_vertex), GL, 2009 }, From 99cb4233205edcfa1a1e2967eef7bb16ff19bec4 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 20 May 2015 09:44:01 -0700 Subject: [PATCH 357/834] i965: Rename backend_visitor to backend_shader The backend_shader class really is a representation of a shader. The fact that it inherits from ir_visitor is somewhat immaterial. Reviewed-by: Matt Turner Reviewed-by: Kenneth Graunke --- src/mesa/drivers/dri/i965/brw_cfg.cpp | 10 +++++----- src/mesa/drivers/dri/i965/brw_cfg.h | 4 ++-- .../dri/i965/brw_dead_control_flow.cpp | 6 +++--- .../drivers/dri/i965/brw_dead_control_flow.h | 2 +- src/mesa/drivers/dri/i965/brw_fs.cpp | 4 ++-- src/mesa/drivers/dri/i965/brw_fs.h | 2 +- src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 2 +- .../dri/i965/brw_schedule_instructions.cpp | 18 ++++++++--------- src/mesa/drivers/dri/i965/brw_shader.cpp | 20 +++++++++---------- src/mesa/drivers/dri/i965/brw_shader.h | 12 +++++------ src/mesa/drivers/dri/i965/brw_vec4.cpp | 4 ++-- src/mesa/drivers/dri/i965/brw_vec4.h | 2 +- .../drivers/dri/i965/brw_vec4_visitor.cpp | 2 +- 13 files changed, 44 insertions(+), 44 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_cfg.cpp b/src/mesa/drivers/dri/i965/brw_cfg.cpp index 7e7770e43cd..39c419b9b96 100644 --- a/src/mesa/drivers/dri/i965/brw_cfg.cpp +++ b/src/mesa/drivers/dri/i965/brw_cfg.cpp @@ -141,12 +141,12 @@ bblock_t::combine_with(bblock_t *that) } void -bblock_t::dump(backend_visitor *v) const +bblock_t::dump(backend_shader *s) const { int ip = this->start_ip; foreach_inst_in_block(backend_instruction, inst, this) { fprintf(stderr, "%5d: ", ip); - v->dump_instruction(inst); + s->dump_instruction(inst); ip++; } } @@ -411,7 +411,7 @@ cfg_t::make_block_array() } void -cfg_t::dump(backend_visitor *v) +cfg_t::dump(backend_shader *s) { if (idom_dirty) calculate_idom(); @@ -423,8 +423,8 @@ cfg_t::dump(backend_visitor *v) link->block->num); } fprintf(stderr, "\n"); - if (v != NULL) - block->dump(v); + if (s != NULL) + block->dump(s); fprintf(stderr, "END B%d", block->num); foreach_list_typed(bblock_link, link, link, &block->children) { fprintf(stderr, " ->B%d", diff --git a/src/mesa/drivers/dri/i965/brw_cfg.h b/src/mesa/drivers/dri/i965/brw_cfg.h index 56d7d07abdf..a09491781e6 100644 --- a/src/mesa/drivers/dri/i965/brw_cfg.h +++ b/src/mesa/drivers/dri/i965/brw_cfg.h @@ -60,7 +60,7 @@ struct bblock_t { bool is_successor_of(const bblock_t *block) const; bool can_combine_with(const bblock_t *that) const; void combine_with(bblock_t *that); - void dump(backend_visitor *v) const; + void dump(backend_shader *s) const; backend_instruction *start(); const backend_instruction *start() const; @@ -273,7 +273,7 @@ struct cfg_t { void calculate_idom(); static bblock_t *intersect(bblock_t *b1, bblock_t *b2); - void dump(backend_visitor *v); + void dump(backend_shader *s); void dump_cfg(); void dump_domtree(); #endif diff --git a/src/mesa/drivers/dri/i965/brw_dead_control_flow.cpp b/src/mesa/drivers/dri/i965/brw_dead_control_flow.cpp index 03f838dd9ae..61f25811cb2 100644 --- a/src/mesa/drivers/dri/i965/brw_dead_control_flow.cpp +++ b/src/mesa/drivers/dri/i965/brw_dead_control_flow.cpp @@ -36,11 +36,11 @@ * - if/else/endif */ bool -dead_control_flow_eliminate(backend_visitor *v) +dead_control_flow_eliminate(backend_shader *s) { bool progress = false; - foreach_block_safe (block, v->cfg) { + foreach_block_safe (block, s->cfg) { bblock_t *if_block = NULL, *else_block = NULL, *endif_block = block; bool found = false; @@ -115,7 +115,7 @@ dead_control_flow_eliminate(backend_visitor *v) } if (progress) - v->invalidate_live_intervals(); + s->invalidate_live_intervals(); return progress; } diff --git a/src/mesa/drivers/dri/i965/brw_dead_control_flow.h b/src/mesa/drivers/dri/i965/brw_dead_control_flow.h index 57a4dabc83c..83fd9b1e79e 100644 --- a/src/mesa/drivers/dri/i965/brw_dead_control_flow.h +++ b/src/mesa/drivers/dri/i965/brw_dead_control_flow.h @@ -23,4 +23,4 @@ #include "brw_shader.h" -bool dead_control_flow_eliminate(backend_visitor *v); +bool dead_control_flow_eliminate(backend_shader *s); diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 5cc4fe66e99..aebde5d5f34 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -4100,7 +4100,7 @@ fs_visitor::optimize() snprintf(filename, 64, "%s%d-%04d-%02d-%02d-" #pass, \ stage_abbrev, dispatch_width, shader_prog ? shader_prog->Name : 0, iteration, pass_num); \ \ - backend_visitor::dump_instructions(filename); \ + backend_shader::dump_instructions(filename); \ } \ \ progress = progress || this_progress; \ @@ -4113,7 +4113,7 @@ fs_visitor::optimize() stage_abbrev, dispatch_width, shader_prog ? shader_prog->Name : 0); - backend_visitor::dump_instructions(filename); + backend_shader::dump_instructions(filename); } bool progress; diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h index f2aa0ae9576..f63b149dfcf 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.h +++ b/src/mesa/drivers/dri/i965/brw_fs.h @@ -66,7 +66,7 @@ namespace brw { * * Translates either GLSL IR or Mesa IR (for ARB_fragment_program) into FS IR. */ -class fs_visitor : public backend_visitor +class fs_visitor : public backend_shader { public: const fs_reg reg_null_f; diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp index 314136c7819..7553c3542fd 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp @@ -4125,7 +4125,7 @@ fs_visitor::fs_visitor(struct brw_context *brw, struct gl_shader_program *shader_prog, struct gl_program *prog, unsigned dispatch_width) - : backend_visitor(brw, shader_prog, prog, prog_data, stage), + : backend_shader(brw, shader_prog, prog, prog_data, stage), reg_null_f(retype(brw_null_vec(dispatch_width), BRW_REGISTER_TYPE_F)), reg_null_d(retype(brw_null_vec(dispatch_width), BRW_REGISTER_TYPE_D)), reg_null_ud(retype(brw_null_vec(dispatch_width), BRW_REGISTER_TYPE_UD)), diff --git a/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp b/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp index 34f75fdd814..ee0add5d765 100644 --- a/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp +++ b/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp @@ -399,10 +399,10 @@ schedule_node::set_latency_gen7(bool is_haswell) class instruction_scheduler { public: - instruction_scheduler(backend_visitor *v, int grf_count, + instruction_scheduler(backend_shader *s, int grf_count, instruction_scheduler_mode mode) { - this->bv = v; + this->bs = s; this->mem_ctx = ralloc_context(NULL); this->grf_count = grf_count; this->instructions.make_empty(); @@ -455,7 +455,7 @@ public: int grf_count; int time; exec_list instructions; - backend_visitor *bv; + backend_shader *bs; instruction_scheduler_mode mode; @@ -606,7 +606,7 @@ vec4_instruction_scheduler::get_register_pressure_benefit(backend_instruction *b schedule_node::schedule_node(backend_instruction *inst, instruction_scheduler *sched) { - const struct brw_device_info *devinfo = sched->bv->devinfo; + const struct brw_device_info *devinfo = sched->bs->devinfo; this->inst = inst; this->child_array_size = 0; @@ -1384,7 +1384,7 @@ vec4_instruction_scheduler::issue_time(backend_instruction *inst) void instruction_scheduler::schedule_instructions(bblock_t *block) { - const struct brw_device_info *devinfo = bv->devinfo; + const struct brw_device_info *devinfo = bs->devinfo; backend_instruction *inst = block->end(); time = 0; @@ -1419,7 +1419,7 @@ instruction_scheduler::schedule_instructions(bblock_t *block) if (debug) { fprintf(stderr, "clock %4d, scheduled: ", time); - bv->dump_instruction(chosen->inst); + bs->dump_instruction(chosen->inst); } /* Now that we've scheduled a new instruction, some of its @@ -1435,7 +1435,7 @@ instruction_scheduler::schedule_instructions(bblock_t *block) if (debug) { fprintf(stderr, "\tchild %d, %d parents: ", i, child->parent_count); - bv->dump_instruction(child->inst); + bs->dump_instruction(child->inst); } child->cand_generation = cand_generation; @@ -1474,7 +1474,7 @@ instruction_scheduler::run(cfg_t *cfg) if (debug) { fprintf(stderr, "\nInstructions before scheduling (reg_alloc %d)\n", post_reg_alloc); - bv->dump_instructions(); + bs->dump_instructions(); } /* Populate the remaining GRF uses array to improve the pre-regalloc @@ -1504,7 +1504,7 @@ instruction_scheduler::run(cfg_t *cfg) if (debug) { fprintf(stderr, "\nInstructions after scheduling (reg_alloc %d)\n", post_reg_alloc); - bv->dump_instructions(); + bs->dump_instructions(); } } diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp index c1fd859fef5..6222d5258de 100644 --- a/src/mesa/drivers/dri/i965/brw_shader.cpp +++ b/src/mesa/drivers/dri/i965/brw_shader.cpp @@ -754,11 +754,11 @@ brw_abs_immediate(enum brw_reg_type type, struct brw_reg *reg) return false; } -backend_visitor::backend_visitor(struct brw_context *brw, - struct gl_shader_program *shader_prog, - struct gl_program *prog, - struct brw_stage_prog_data *stage_prog_data, - gl_shader_stage stage) +backend_shader::backend_shader(struct brw_context *brw, + struct gl_shader_program *shader_prog, + struct gl_program *prog, + struct brw_stage_prog_data *stage_prog_data, + gl_shader_stage stage) : brw(brw), devinfo(brw->intelScreen->devinfo), ctx(&brw->ctx), @@ -1147,13 +1147,13 @@ backend_instruction::remove(bblock_t *block) } void -backend_visitor::dump_instructions() +backend_shader::dump_instructions() { dump_instructions(NULL); } void -backend_visitor::dump_instructions(const char *name) +backend_shader::dump_instructions(const char *name) { FILE *file = stderr; if (name && geteuid() != 0) { @@ -1182,7 +1182,7 @@ backend_visitor::dump_instructions(const char *name) } void -backend_visitor::calculate_cfg() +backend_shader::calculate_cfg() { if (this->cfg) return; @@ -1190,7 +1190,7 @@ backend_visitor::calculate_cfg() } void -backend_visitor::invalidate_cfg() +backend_shader::invalidate_cfg() { ralloc_free(this->cfg); this->cfg = NULL; @@ -1205,7 +1205,7 @@ backend_visitor::invalidate_cfg() * trigger some of our asserts that surface indices are < BRW_MAX_SURFACES. */ void -backend_visitor::assign_common_binding_table_offsets(uint32_t next_binding_table_offset) +backend_shader::assign_common_binding_table_offsets(uint32_t next_binding_table_offset) { int num_textures = _mesa_fls(prog->SamplersUsed); diff --git a/src/mesa/drivers/dri/i965/brw_shader.h b/src/mesa/drivers/dri/i965/brw_shader.h index 59a0eff824e..ac9acfc8cd9 100644 --- a/src/mesa/drivers/dri/i965/brw_shader.h +++ b/src/mesa/drivers/dri/i965/brw_shader.h @@ -211,14 +211,14 @@ enum instruction_scheduler_mode { SCHEDULE_POST, }; -class backend_visitor : public ir_visitor { +class backend_shader : public ir_visitor { protected: - backend_visitor(struct brw_context *brw, - struct gl_shader_program *shader_prog, - struct gl_program *prog, - struct brw_stage_prog_data *stage_prog_data, - gl_shader_stage stage); + backend_shader(struct brw_context *brw, + struct gl_shader_program *shader_prog, + struct gl_program *prog, + struct brw_stage_prog_data *stage_prog_data, + gl_shader_stage stage); public: diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp index e9681b73343..02a7e33f791 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp @@ -1768,7 +1768,7 @@ vec4_visitor::run() snprintf(filename, 64, "%s-%04d-%02d-%02d-" #pass, \ stage_abbrev, shader_prog ? shader_prog->Name : 0, iteration, pass_num); \ \ - backend_visitor::dump_instructions(filename); \ + backend_shader::dump_instructions(filename); \ } \ \ progress = progress || this_progress; \ @@ -1781,7 +1781,7 @@ vec4_visitor::run() snprintf(filename, 64, "%s-%04d-00-start", stage_abbrev, shader_prog ? shader_prog->Name : 0); - backend_visitor::dump_instructions(filename); + backend_shader::dump_instructions(filename); } bool progress; diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h index 628c6313cc9..f78134339ea 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.h +++ b/src/mesa/drivers/dri/i965/brw_vec4.h @@ -73,7 +73,7 @@ class vec4_live_variables; * Translates either GLSL IR or Mesa IR (for ARB_vertex_program and * fixed-function) into VS IR. */ -class vec4_visitor : public backend_visitor +class vec4_visitor : public backend_shader { public: vec4_visitor(struct brw_context *brw, diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp index 59a73a95fc2..d3347ecce38 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp @@ -3682,7 +3682,7 @@ vec4_visitor::vec4_visitor(struct brw_context *brw, shader_time_shader_type st_base, shader_time_shader_type st_written, shader_time_shader_type st_reset) - : backend_visitor(brw, shader_prog, prog, &prog_data->base, stage), + : backend_shader(brw, shader_prog, prog, &prog_data->base, stage), c(c), key(key), prog_data(prog_data), From 8b9ecfff360711cffc41a0a062de5ad810f9cf2b Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 20 May 2015 09:45:47 -0700 Subject: [PATCH 358/834] i965: Make fs/vec4_visitor inherit from ir_visitor directly This is using multiple inheritance in C++. However, ir_visitor is really just an interface with no data so it shouldn't be so bad. Reviewed-by: Matt Turner Reviewed-by: Kenneth Graunke --- src/mesa/drivers/dri/i965/brw_fs.h | 2 +- src/mesa/drivers/dri/i965/brw_shader.h | 2 +- src/mesa/drivers/dri/i965/brw_vec4.h | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h index f63b149dfcf..6f16c3140ac 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.h +++ b/src/mesa/drivers/dri/i965/brw_fs.h @@ -66,7 +66,7 @@ namespace brw { * * Translates either GLSL IR or Mesa IR (for ARB_fragment_program) into FS IR. */ -class fs_visitor : public backend_shader +class fs_visitor : public backend_shader, public ir_visitor { public: const fs_reg reg_null_f; diff --git a/src/mesa/drivers/dri/i965/brw_shader.h b/src/mesa/drivers/dri/i965/brw_shader.h index ac9acfc8cd9..da01d2f7185 100644 --- a/src/mesa/drivers/dri/i965/brw_shader.h +++ b/src/mesa/drivers/dri/i965/brw_shader.h @@ -211,7 +211,7 @@ enum instruction_scheduler_mode { SCHEDULE_POST, }; -class backend_shader : public ir_visitor { +class backend_shader { protected: backend_shader(struct brw_context *brw, diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h index f78134339ea..06a16a49b6f 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.h +++ b/src/mesa/drivers/dri/i965/brw_vec4.h @@ -73,7 +73,7 @@ class vec4_live_variables; * Translates either GLSL IR or Mesa IR (for ARB_vertex_program and * fixed-function) into VS IR. */ -class vec4_visitor : public backend_shader +class vec4_visitor : public backend_shader, public ir_visitor { public: vec4_visitor(struct brw_context *brw, From 114497afff4e49139b8c7d61f11a7872b81398bf Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 20 May 2015 10:03:50 -0700 Subject: [PATCH 359/834] i965: Make NIR non-optional for scalar shaders Reviewed-by: Matt Turner Reviewed-by: Kenneth Graunke --- src/mesa/drivers/dri/i965/brw_context.c | 7 ++----- src/mesa/drivers/dri/i965/brw_fs.cpp | 25 +++---------------------- 2 files changed, 5 insertions(+), 27 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c index ea56859707b..274a2379437 100644 --- a/src/mesa/drivers/dri/i965/brw_context.c +++ b/src/mesa/drivers/dri/i965/brw_context.c @@ -590,13 +590,10 @@ brw_initialize_context_constants(struct brw_context *brw) ctx->Const.ShaderCompilerOptions[MESA_SHADER_VERTEX].EmitNoIndirectTemp = true; ctx->Const.ShaderCompilerOptions[MESA_SHADER_VERTEX].OptimizeForAOS = false; - if (brw_env_var_as_boolean("INTEL_USE_NIR", true)) - ctx->Const.ShaderCompilerOptions[MESA_SHADER_VERTEX].NirOptions = &nir_options; + ctx->Const.ShaderCompilerOptions[MESA_SHADER_VERTEX].NirOptions = &nir_options; } - if (brw_env_var_as_boolean("INTEL_USE_NIR", true)) - ctx->Const.ShaderCompilerOptions[MESA_SHADER_FRAGMENT].NirOptions = &nir_options; - + ctx->Const.ShaderCompilerOptions[MESA_SHADER_FRAGMENT].NirOptions = &nir_options; ctx->Const.ShaderCompilerOptions[MESA_SHADER_COMPUTE].NirOptions = &nir_options; /* ARB_viewport_array */ diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index aebde5d5f34..cc76e4ad94e 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -4255,16 +4255,7 @@ fs_visitor::run_vs() if (INTEL_DEBUG & DEBUG_SHADER_TIME) emit_shader_time_begin(); - if (brw->ctx.Const.ShaderCompilerOptions[MESA_SHADER_VERTEX].NirOptions) { - emit_nir_code(); - } else { - foreach_in_list(ir_instruction, ir, shader->base.ir) { - base_ir = ir; - this->result = reg_undef; - ir->accept(this); - } - base_ir = NULL; - } + emit_nir_code(); if (failed) return false; @@ -4331,18 +4322,8 @@ fs_visitor::run_fs() /* Generate FS IR for main(). (the visitor only descends into * functions called "main"). */ - if (brw->ctx.Const.ShaderCompilerOptions[MESA_SHADER_FRAGMENT].NirOptions) { - emit_nir_code(); - } else if (shader) { - foreach_in_list(ir_instruction, ir, shader->base.ir) { - base_ir = ir; - this->result = reg_undef; - ir->accept(this); - } - } else { - emit_fragment_program_code(); - } - base_ir = NULL; + emit_nir_code(); + if (failed) return false; From 66a03a4c4bb416a30b65e0334b248660a268c6a8 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 20 May 2015 10:39:03 -0700 Subject: [PATCH 360/834] i965: Remove the old fragment program code Now that everything is running through NIR, this is all dead. Reviewed-by: Matt Turner Reviewed-by: Kenneth Graunke --- src/mesa/drivers/dri/i965/Makefile.sources | 1 - src/mesa/drivers/dri/i965/brw_fs.h | 26 - src/mesa/drivers/dri/i965/brw_fs_fp.cpp | 742 --------------------- 3 files changed, 769 deletions(-) delete mode 100644 src/mesa/drivers/dri/i965/brw_fs_fp.cpp diff --git a/src/mesa/drivers/dri/i965/Makefile.sources b/src/mesa/drivers/dri/i965/Makefile.sources index ad73e6b0fa2..3f852cd21ec 100644 --- a/src/mesa/drivers/dri/i965/Makefile.sources +++ b/src/mesa/drivers/dri/i965/Makefile.sources @@ -49,7 +49,6 @@ i965_FILES = \ brw_fs.cpp \ brw_fs_cse.cpp \ brw_fs_dead_code_eliminate.cpp \ - brw_fs_fp.cpp \ brw_fs_generator.cpp \ brw_fs.h \ brw_fs_live_variables.cpp \ diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h index 6f16c3140ac..63414eb2831 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.h +++ b/src/mesa/drivers/dri/i965/brw_fs.h @@ -332,29 +332,6 @@ public: void emit_spill(bblock_t *block, fs_inst *inst, fs_reg reg, uint32_t spill_offset, int count); - void emit_fragment_program_code(); - void setup_fp_regs(); - fs_reg get_fp_src_reg(const prog_src_register *src); - fs_reg get_fp_dst_reg(const prog_dst_register *dst); - void emit_fp_alu1(enum opcode opcode, - const struct prog_instruction *fpi, - fs_reg dst, fs_reg src); - void emit_fp_alu2(enum opcode opcode, - const struct prog_instruction *fpi, - fs_reg dst, fs_reg src0, fs_reg src1); - void emit_fp_scalar_write(const struct prog_instruction *fpi, - fs_reg dst, fs_reg src); - void emit_fp_scalar_math(enum opcode opcode, - const struct prog_instruction *fpi, - fs_reg dst, fs_reg src); - - void emit_fp_minmax(const struct prog_instruction *fpi, - fs_reg dst, fs_reg src0, fs_reg src1); - - void emit_fp_sop(enum brw_conditional_mod conditional_mod, - const struct prog_instruction *fpi, - fs_reg dst, fs_reg src0, fs_reg src1, fs_reg one); - void emit_nir_code(); void nir_setup_inputs(nir_shader *shader); void nir_setup_outputs(nir_shader *shader); @@ -472,9 +449,6 @@ public: /** Either BRW_MAX_GRF or GEN7_MRF_HACK_START */ unsigned max_grf; - fs_reg *fp_temp_regs; - fs_reg *fp_input_regs; - fs_reg *nir_locals; fs_reg *nir_globals; fs_reg nir_inputs; diff --git a/src/mesa/drivers/dri/i965/brw_fs_fp.cpp b/src/mesa/drivers/dri/i965/brw_fs_fp.cpp deleted file mode 100644 index 6518ff60c3b..00000000000 --- a/src/mesa/drivers/dri/i965/brw_fs_fp.cpp +++ /dev/null @@ -1,742 +0,0 @@ -/* - * Copyright © 2012 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -/** @file brw_fs_fp.cpp - * - * Implementation of the compiler for GL_ARB_fragment_program shaders on top - * of the GLSL compiler backend. - */ - -#include "brw_context.h" -#include "brw_fs.h" - -void -fs_visitor::emit_fp_alu1(enum opcode opcode, - const struct prog_instruction *fpi, - fs_reg dst, fs_reg src) -{ - for (int i = 0; i < 4; i++) { - if (fpi->DstReg.WriteMask & (1 << i)) - emit(opcode, offset(dst, i), offset(src, i)); - } -} - -void -fs_visitor::emit_fp_alu2(enum opcode opcode, - const struct prog_instruction *fpi, - fs_reg dst, fs_reg src0, fs_reg src1) -{ - for (int i = 0; i < 4; i++) { - if (fpi->DstReg.WriteMask & (1 << i)) - emit(opcode, offset(dst, i), - offset(src0, i), offset(src1, i)); - } -} - -void -fs_visitor::emit_fp_minmax(const prog_instruction *fpi, - fs_reg dst, fs_reg src0, fs_reg src1) -{ - enum brw_conditional_mod conditionalmod; - if (fpi->Opcode == OPCODE_MIN) - conditionalmod = BRW_CONDITIONAL_L; - else - conditionalmod = BRW_CONDITIONAL_GE; - - for (int i = 0; i < 4; i++) { - if (fpi->DstReg.WriteMask & (1 << i)) { - emit_minmax(conditionalmod, offset(dst, i), - offset(src0, i), offset(src1, i)); - } - } -} - -void -fs_visitor::emit_fp_sop(enum brw_conditional_mod conditional_mod, - const struct prog_instruction *fpi, - fs_reg dst, fs_reg src0, fs_reg src1, - fs_reg one) -{ - for (int i = 0; i < 4; i++) { - if (fpi->DstReg.WriteMask & (1 << i)) { - fs_inst *inst; - - emit(CMP(reg_null_d, offset(src0, i), offset(src1, i), - conditional_mod)); - - inst = emit(BRW_OPCODE_SEL, offset(dst, i), one, fs_reg(0.0f)); - inst->predicate = BRW_PREDICATE_NORMAL; - } - } -} - -void -fs_visitor::emit_fp_scalar_write(const struct prog_instruction *fpi, - fs_reg dst, fs_reg src) -{ - for (int i = 0; i < 4; i++) { - if (fpi->DstReg.WriteMask & (1 << i)) - emit(MOV(offset(dst, i), src)); - } -} - -void -fs_visitor::emit_fp_scalar_math(enum opcode opcode, - const struct prog_instruction *fpi, - fs_reg dst, fs_reg src) -{ - fs_reg temp = vgrf(glsl_type::float_type); - emit_math(opcode, temp, src); - emit_fp_scalar_write(fpi, dst, temp); -} - -void -fs_visitor::emit_fragment_program_code() -{ - setup_fp_regs(); - - /* Keep a reg with 1.0 around, for reuse by emit_fp_sop so that it can just - * be: - * - * sel.f0 dst 1.0 0.0 - * - * instead of - * - * mov dst 0.0 - * mov.f0 dst 1.0 - */ - fs_reg one = vgrf(glsl_type::float_type); - emit(MOV(one, fs_reg(1.0f))); - - for (unsigned int insn = 0; insn < prog->NumInstructions; insn++) { - const struct prog_instruction *fpi = &prog->Instructions[insn]; - base_ir = fpi; - - fs_reg dst; - fs_reg src[3]; - - /* We always emit into a temporary destination register to avoid - * aliasing issues. - */ - dst = vgrf(glsl_type::vec4_type); - - for (int i = 0; i < 3; i++) - src[i] = get_fp_src_reg(&fpi->SrcReg[i]); - - switch (fpi->Opcode) { - case OPCODE_ABS: - src[0].abs = true; - src[0].negate = false; - emit_fp_alu1(BRW_OPCODE_MOV, fpi, dst, src[0]); - break; - - case OPCODE_ADD: - emit_fp_alu2(BRW_OPCODE_ADD, fpi, dst, src[0], src[1]); - break; - - case OPCODE_CMP: - for (int i = 0; i < 4; i++) { - if (fpi->DstReg.WriteMask & (1 << i)) { - fs_inst *inst; - - emit(CMP(reg_null_f, offset(src[0], i), fs_reg(0.0f), - BRW_CONDITIONAL_L)); - - inst = emit(BRW_OPCODE_SEL, offset(dst, i), - offset(src[1], i), offset(src[2], i)); - inst->predicate = BRW_PREDICATE_NORMAL; - } - } - break; - - case OPCODE_COS: - emit_fp_scalar_math(SHADER_OPCODE_COS, fpi, dst, src[0]); - break; - - case OPCODE_DP2: - case OPCODE_DP3: - case OPCODE_DP4: - case OPCODE_DPH: { - fs_reg mul = vgrf(glsl_type::float_type); - fs_reg acc = vgrf(glsl_type::float_type); - int count; - - switch (fpi->Opcode) { - case OPCODE_DP2: count = 2; break; - case OPCODE_DP3: count = 3; break; - case OPCODE_DP4: count = 4; break; - case OPCODE_DPH: count = 3; break; - default: unreachable("not reached"); - } - - emit(MUL(acc, offset(src[0], 0), offset(src[1], 0))); - for (int i = 1; i < count; i++) { - emit(MUL(mul, offset(src[0], i), offset(src[1], i))); - emit(ADD(acc, acc, mul)); - } - - if (fpi->Opcode == OPCODE_DPH) - emit(ADD(acc, acc, offset(src[1], 3))); - - emit_fp_scalar_write(fpi, dst, acc); - break; - } - - case OPCODE_DST: - if (fpi->DstReg.WriteMask & WRITEMASK_X) - emit(MOV(dst, fs_reg(1.0f))); - if (fpi->DstReg.WriteMask & WRITEMASK_Y) { - emit(MUL(offset(dst, 1), - offset(src[0], 1), offset(src[1], 1))); - } - if (fpi->DstReg.WriteMask & WRITEMASK_Z) - emit(MOV(offset(dst, 2), offset(src[0], 2))); - if (fpi->DstReg.WriteMask & WRITEMASK_W) - emit(MOV(offset(dst, 3), offset(src[1], 3))); - break; - - case OPCODE_EX2: - emit_fp_scalar_math(SHADER_OPCODE_EXP2, fpi, dst, src[0]); - break; - - case OPCODE_FLR: - emit_fp_alu1(BRW_OPCODE_RNDD, fpi, dst, src[0]); - break; - - case OPCODE_FRC: - emit_fp_alu1(BRW_OPCODE_FRC, fpi, dst, src[0]); - break; - - case OPCODE_KIL: { - for (int i = 0; i < 4; i++) { - /* In most cases the argument to a KIL will be something like - * TEMP[0].wwww, so there's no point in checking whether .w is < 0 - * 4 times in a row. - */ - if (i > 0 && - GET_SWZ(fpi->SrcReg[0].Swizzle, i) == - GET_SWZ(fpi->SrcReg[0].Swizzle, i - 1) && - ((fpi->SrcReg[0].Negate >> i) & 1) == - ((fpi->SrcReg[0].Negate >> (i - 1)) & 1)) { - continue; - } - - - /* Emit an instruction that's predicated on the current - * undiscarded pixels, and updates just those pixels to be - * turned off. - */ - fs_inst *cmp = emit(CMP(reg_null_f, offset(src[0], i), - fs_reg(0.0f), BRW_CONDITIONAL_GE)); - cmp->predicate = BRW_PREDICATE_NORMAL; - cmp->flag_subreg = 1; - - if (devinfo->gen >= 6) - emit_discard_jump(); - } - break; - } - - case OPCODE_LG2: - emit_fp_scalar_math(SHADER_OPCODE_LOG2, fpi, dst, src[0]); - break; - - case OPCODE_LIT: - /* From the ARB_fragment_program spec: - * - * tmp = VectorLoad(op0); - * if (tmp.x < 0) tmp.x = 0; - * if (tmp.y < 0) tmp.y = 0; - * if (tmp.w < -(128.0-epsilon)) tmp.w = -(128.0-epsilon); - * else if (tmp.w > 128-epsilon) tmp.w = 128-epsilon; - * result.x = 1.0; - * result.y = tmp.x; - * result.z = (tmp.x > 0) ? RoughApproxPower(tmp.y, tmp.w) : 0.0; - * result.w = 1.0; - * - * Note that we don't do the clamping to +/- 128. We didn't in - * brw_wm_emit.c either. - */ - if (fpi->DstReg.WriteMask & WRITEMASK_X) - emit(MOV(offset(dst, 0), fs_reg(1.0f))); - - if (fpi->DstReg.WriteMask & WRITEMASK_YZ) { - fs_inst *inst; - emit(CMP(reg_null_f, offset(src[0], 0), fs_reg(0.0f), - BRW_CONDITIONAL_LE)); - - if (fpi->DstReg.WriteMask & WRITEMASK_Y) { - emit(MOV(offset(dst, 1), offset(src[0], 0))); - inst = emit(MOV(offset(dst, 1), fs_reg(0.0f))); - inst->predicate = BRW_PREDICATE_NORMAL; - } - - if (fpi->DstReg.WriteMask & WRITEMASK_Z) { - emit_math(SHADER_OPCODE_POW, offset(dst, 2), - offset(src[0], 1), offset(src[0], 3)); - - inst = emit(MOV(offset(dst, 2), fs_reg(0.0f))); - inst->predicate = BRW_PREDICATE_NORMAL; - } - } - - if (fpi->DstReg.WriteMask & WRITEMASK_W) - emit(MOV(offset(dst, 3), fs_reg(1.0f))); - - break; - - case OPCODE_LRP: - for (int i = 0; i < 4; i++) { - if (fpi->DstReg.WriteMask & (1 << i)) { - fs_reg a = offset(src[0], i); - fs_reg y = offset(src[1], i); - fs_reg x = offset(src[2], i); - emit_lrp(offset(dst, i), x, y, a); - } - } - break; - - case OPCODE_MAD: - for (int i = 0; i < 4; i++) { - if (fpi->DstReg.WriteMask & (1 << i)) { - if (devinfo->gen >= 6) { - emit(MAD(offset(dst, i), offset(src[2], i), - offset(src[1], i), offset(src[0], i))); - } else { - fs_reg temp = vgrf(glsl_type::float_type); - emit(MUL(temp, offset(src[0], i), offset(src[1], i))); - emit(ADD(offset(dst, i), temp, offset(src[2], i))); - } - } - } - break; - - case OPCODE_MAX: - emit_fp_minmax(fpi, dst, src[0], src[1]); - break; - - case OPCODE_MOV: - emit_fp_alu1(BRW_OPCODE_MOV, fpi, dst, src[0]); - break; - - case OPCODE_MIN: - emit_fp_minmax(fpi, dst, src[0], src[1]); - break; - - case OPCODE_MUL: - emit_fp_alu2(BRW_OPCODE_MUL, fpi, dst, src[0], src[1]); - break; - - case OPCODE_POW: { - fs_reg temp = vgrf(glsl_type::float_type); - emit_math(SHADER_OPCODE_POW, temp, src[0], src[1]); - emit_fp_scalar_write(fpi, dst, temp); - break; - } - - case OPCODE_RCP: - emit_fp_scalar_math(SHADER_OPCODE_RCP, fpi, dst, src[0]); - break; - - case OPCODE_RSQ: - emit_fp_scalar_math(SHADER_OPCODE_RSQ, fpi, dst, src[0]); - break; - - case OPCODE_SCS: - if (fpi->DstReg.WriteMask & WRITEMASK_X) { - emit_math(SHADER_OPCODE_COS, offset(dst, 0), - offset(src[0], 0)); - } - - if (fpi->DstReg.WriteMask & WRITEMASK_Y) { - emit_math(SHADER_OPCODE_SIN, offset(dst, 1), - offset(src[0], 1)); - } - break; - - case OPCODE_SGE: - emit_fp_sop(BRW_CONDITIONAL_GE, fpi, dst, src[0], src[1], one); - break; - - case OPCODE_SIN: - emit_fp_scalar_math(SHADER_OPCODE_SIN, fpi, dst, src[0]); - break; - - case OPCODE_SLT: - emit_fp_sop(BRW_CONDITIONAL_L, fpi, dst, src[0], src[1], one); - break; - - case OPCODE_SUB: { - fs_reg neg_src1 = src[1]; - neg_src1.negate = !src[1].negate; - - emit_fp_alu2(BRW_OPCODE_ADD, fpi, dst, src[0], neg_src1); - break; - } - - case OPCODE_TEX: - case OPCODE_TXB: - case OPCODE_TXP: { - ir_texture_opcode op; - fs_reg lod; - fs_reg dpdy; - fs_reg coordinate = src[0]; - fs_reg shadow_c; - fs_reg sample_index; - fs_reg texel_offset; /* No offsets; leave as BAD_FILE. */ - - switch (fpi->Opcode) { - case OPCODE_TEX: - op = ir_tex; - break; - case OPCODE_TXP: { - op = ir_tex; - - coordinate = vgrf(glsl_type::vec3_type); - fs_reg invproj = vgrf(glsl_type::float_type); - emit_math(SHADER_OPCODE_RCP, invproj, offset(src[0], 3)); - for (int i = 0; i < 3; i++) { - emit(MUL(offset(coordinate, i), - offset(src[0], i), invproj)); - } - break; - } - case OPCODE_TXB: - op = ir_txb; - lod = offset(src[0], 3); - break; - default: - unreachable("not reached"); - } - - int coord_components; - switch (fpi->TexSrcTarget) { - case TEXTURE_1D_INDEX: - coord_components = 1; - break; - - case TEXTURE_2D_INDEX: - case TEXTURE_1D_ARRAY_INDEX: - case TEXTURE_RECT_INDEX: - case TEXTURE_EXTERNAL_INDEX: - coord_components = 2; - break; - - case TEXTURE_3D_INDEX: - case TEXTURE_2D_ARRAY_INDEX: - coord_components = 3; - break; - - case TEXTURE_CUBE_INDEX: { - coord_components = 3; - - fs_reg temp = vgrf(glsl_type::float_type); - fs_reg cubecoord = vgrf(glsl_type::vec3_type); - fs_reg abscoord = coordinate; - abscoord.negate = false; - abscoord.abs = true; - emit_minmax(BRW_CONDITIONAL_GE, temp, - offset(abscoord, 0), offset(abscoord, 1)); - emit_minmax(BRW_CONDITIONAL_GE, temp, - temp, offset(abscoord, 2)); - emit_math(SHADER_OPCODE_RCP, temp, temp); - for (int i = 0; i < 3; i++) { - emit(MUL(offset(cubecoord, i), - offset(coordinate, i), temp)); - } - - coordinate = cubecoord; - break; - } - - default: - unreachable("not reached"); - } - - if (fpi->TexShadow) - shadow_c = offset(coordinate, 2); - - emit_texture(op, glsl_type::vec4_type, coordinate, coord_components, - shadow_c, lod, dpdy, 0, sample_index, - reg_undef, /* offset */ - reg_undef, /* mcs */ - 0, /* gather component */ - false, /* is cube array */ - fpi->TexSrcTarget == TEXTURE_RECT_INDEX, - fpi->TexSrcUnit, fs_reg(fpi->TexSrcUnit), - fpi->TexSrcUnit); - dst = this->result; - - break; - } - - case OPCODE_SWZ: - /* Note that SWZ's extended swizzles are handled in the general - * get_src_reg() code. - */ - emit_fp_alu1(BRW_OPCODE_MOV, fpi, dst, src[0]); - break; - - case OPCODE_XPD: - for (int i = 0; i < 3; i++) { - if (fpi->DstReg.WriteMask & (1 << i)) { - int i1 = (i + 1) % 3; - int i2 = (i + 2) % 3; - - fs_reg temp = vgrf(glsl_type::float_type); - fs_reg neg_src1_1 = offset(src[1], i1); - neg_src1_1.negate = !neg_src1_1.negate; - emit(MUL(temp, offset(src[0], i2), neg_src1_1)); - emit(MUL(offset(dst, i), - offset(src[0], i1), offset(src[1], i2))); - emit(ADD(offset(dst, i), offset(dst, i), temp)); - } - } - break; - - case OPCODE_END: - break; - - default: - _mesa_problem(ctx, "Unsupported opcode %s in fragment program\n", - _mesa_opcode_string(fpi->Opcode)); - } - - /* To handle saturates, we emit a MOV with a saturate bit, which - * optimization should fold into the preceding instructions when safe. - */ - if (_mesa_num_inst_dst_regs(fpi->Opcode) != 0) { - fs_reg real_dst = get_fp_dst_reg(&fpi->DstReg); - - for (int i = 0; i < 4; i++) { - if (fpi->DstReg.WriteMask & (1 << i)) { - fs_inst *inst = emit(MOV(offset(real_dst, i), - offset(dst, i))); - inst->saturate = fpi->SaturateMode; - } - } - } - } - - /* Epilogue: - * - * Fragment depth has this strange convention of being the .z component of - * a vec4. emit_fb_write() wants to see a float value, instead. - */ - this->current_annotation = "result.depth write"; - if (frag_depth.file != BAD_FILE) { - fs_reg temp = vgrf(glsl_type::float_type); - emit(MOV(temp, offset(frag_depth, 2))); - frag_depth = temp; - } -} - -void -fs_visitor::setup_fp_regs() -{ - /* PROGRAM_TEMPORARY */ - int num_temp = prog->NumTemporaries; - fp_temp_regs = rzalloc_array(mem_ctx, fs_reg, num_temp); - for (int i = 0; i < num_temp; i++) - fp_temp_regs[i] = vgrf(glsl_type::vec4_type); - - /* PROGRAM_STATE_VAR etc. */ - if (dispatch_width == 8) { - for (unsigned p = 0; - p < prog->Parameters->NumParameters; p++) { - for (unsigned int i = 0; i < 4; i++) { - stage_prog_data->param[uniforms++] = - &prog->Parameters->ParameterValues[p][i]; - } - } - } - - fp_input_regs = rzalloc_array(mem_ctx, fs_reg, VARYING_SLOT_MAX); - for (int i = 0; i < VARYING_SLOT_MAX; i++) { - if (prog->InputsRead & BITFIELD64_BIT(i)) { - this->current_annotation = ralloc_asprintf(ctx, "interpolate input %d", - i); - - switch (i) { - case VARYING_SLOT_POS: - { - assert(stage == MESA_SHADER_FRAGMENT); - gl_fragment_program *fp = (gl_fragment_program*) prog; - fp_input_regs[i] = - *emit_fragcoord_interpolation(fp->PixelCenterInteger, - fp->OriginUpperLeft); - } - break; - case VARYING_SLOT_FACE: - fp_input_regs[i] = *emit_frontfacing_interpolation(); - break; - default: - fp_input_regs[i] = vgrf(glsl_type::vec4_type); - emit_general_interpolation(fp_input_regs[i], "fp_input", - glsl_type::vec4_type, - INTERP_QUALIFIER_NONE, - i, false, false); - - if (i == VARYING_SLOT_FOGC) { - emit(MOV(offset(fp_input_regs[i], 1), fs_reg(0.0f))); - emit(MOV(offset(fp_input_regs[i], 2), fs_reg(0.0f))); - emit(MOV(offset(fp_input_regs[i], 3), fs_reg(1.0f))); - } - - break; - } - - this->current_annotation = NULL; - } - } -} - -fs_reg -fs_visitor::get_fp_dst_reg(const prog_dst_register *dst) -{ - assert(stage == MESA_SHADER_FRAGMENT); - brw_wm_prog_key *key = (brw_wm_prog_key*) this->key; - - switch (dst->File) { - case PROGRAM_TEMPORARY: - return fp_temp_regs[dst->Index]; - - case PROGRAM_OUTPUT: - if (dst->Index == FRAG_RESULT_DEPTH) { - if (frag_depth.file == BAD_FILE) - frag_depth = vgrf(glsl_type::vec4_type); - return frag_depth; - } else if (dst->Index == FRAG_RESULT_COLOR) { - if (outputs[0].file == BAD_FILE) { - outputs[0] = vgrf(glsl_type::vec4_type); - output_components[0] = 4; - - /* Tell emit_fb_writes() to smear fragment.color across all the - * color attachments. - */ - for (int i = 1; i < key->nr_color_regions; i++) { - outputs[i] = outputs[0]; - output_components[i] = output_components[0]; - } - } - return outputs[0]; - } else { - int output_index = dst->Index - FRAG_RESULT_DATA0; - if (outputs[output_index].file == BAD_FILE) { - outputs[output_index] = vgrf(glsl_type::vec4_type); - } - output_components[output_index] = 4; - return outputs[output_index]; - } - - case PROGRAM_UNDEFINED: - return fs_reg(); - - default: - _mesa_problem(ctx, "bad dst register file: %s\n", - _mesa_register_file_name((gl_register_file)dst->File)); - return vgrf(glsl_type::vec4_type); - } -} - -fs_reg -fs_visitor::get_fp_src_reg(const prog_src_register *src) -{ - struct gl_program_parameter_list *plist = prog->Parameters; - - fs_reg result; - - assert(!src->Abs); - - switch (src->File) { - case PROGRAM_UNDEFINED: - return fs_reg(); - case PROGRAM_TEMPORARY: - result = fp_temp_regs[src->Index]; - break; - - case PROGRAM_INPUT: - result = fp_input_regs[src->Index]; - break; - - case PROGRAM_STATE_VAR: - case PROGRAM_UNIFORM: - case PROGRAM_CONSTANT: - /* We actually want to look at the type in the Parameters list for this, - * because this lets us upload constant builtin uniforms, as actual - * constants. - */ - switch (plist->Parameters[src->Index].Type) { - case PROGRAM_CONSTANT: { - result = vgrf(glsl_type::vec4_type); - - for (int i = 0; i < 4; i++) { - emit(MOV(offset(result, i), - fs_reg(plist->ParameterValues[src->Index][i].f))); - } - break; - } - - case PROGRAM_STATE_VAR: - case PROGRAM_UNIFORM: - result = fs_reg(UNIFORM, src->Index * 4); - break; - - default: - _mesa_problem(ctx, "bad uniform src register file: %s\n", - _mesa_register_file_name((gl_register_file)src->File)); - return vgrf(glsl_type::vec4_type); - } - break; - - default: - _mesa_problem(ctx, "bad src register file: %s\n", - _mesa_register_file_name((gl_register_file)src->File)); - return vgrf(glsl_type::vec4_type); - } - - if (src->Swizzle != SWIZZLE_NOOP || src->Negate) { - fs_reg unswizzled = result; - result = vgrf(glsl_type::vec4_type); - for (int i = 0; i < 4; i++) { - bool negate = src->Negate & (1 << i); - /* The ZERO, ONE, and Negate options are only used for OPCODE_SWZ, - * but it costs us nothing to support it. - */ - int src_swiz = GET_SWZ(src->Swizzle, i); - if (src_swiz == SWIZZLE_ZERO) { - emit(MOV(offset(result, i), fs_reg(0.0f))); - } else if (src_swiz == SWIZZLE_ONE) { - emit(MOV(offset(result, i), - negate ? fs_reg(-1.0f) : fs_reg(1.0f))); - } else { - fs_reg src = offset(unswizzled, src_swiz); - if (negate) - src.negate = !src.negate; - emit(MOV(offset(result, i), src)); - } - } - } - - return result; -} From 78644ffc4d341deb431145108f0b2d377e59b61e Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 20 May 2015 10:35:34 -0700 Subject: [PATCH 361/834] i965/fs: Remove the ir_visitor code Now that everything is running through NIR, this is all dead. Reviewed-by: Matt Turner Reviewed-by: Kenneth Graunke --- src/mesa/drivers/dri/i965/brw_fs.cpp | 99 - src/mesa/drivers/dri/i965/brw_fs.h | 50 +- src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 2081 ------------------ 3 files changed, 2 insertions(+), 2228 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index cc76e4ad94e..28a19bd14ec 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -1129,117 +1129,18 @@ fs_reg::fs_reg(enum register_file file, int reg, enum brw_reg_type type, this->width = width; } -fs_reg * -fs_visitor::variable_storage(ir_variable *var) -{ - return (fs_reg *)hash_table_find(this->variable_ht, var); -} - -void -import_uniforms_callback(const void *key, - void *data, - void *closure) -{ - struct hash_table *dst_ht = (struct hash_table *)closure; - const fs_reg *reg = (const fs_reg *)data; - - if (reg->file != UNIFORM) - return; - - hash_table_insert(dst_ht, data, key); -} - /* For SIMD16, we need to follow from the uniform setup of SIMD8 dispatch. * This brings in those uniform definitions */ void fs_visitor::import_uniforms(fs_visitor *v) { - hash_table_call_foreach(v->variable_ht, - import_uniforms_callback, - variable_ht); this->push_constant_loc = v->push_constant_loc; this->pull_constant_loc = v->pull_constant_loc; this->uniforms = v->uniforms; this->param_size = v->param_size; } -/* Our support for uniforms is piggy-backed on the struct - * gl_fragment_program, because that's where the values actually - * get stored, rather than in some global gl_shader_program uniform - * store. - */ -void -fs_visitor::setup_uniform_values(ir_variable *ir) -{ - int namelen = strlen(ir->name); - - /* The data for our (non-builtin) uniforms is stored in a series of - * gl_uniform_driver_storage structs for each subcomponent that - * glGetUniformLocation() could name. We know it's been set up in the same - * order we'd walk the type, so walk the list of storage and find anything - * with our name, or the prefix of a component that starts with our name. - */ - unsigned params_before = uniforms; - for (unsigned u = 0; u < shader_prog->NumUserUniformStorage; u++) { - struct gl_uniform_storage *storage = &shader_prog->UniformStorage[u]; - - if (strncmp(ir->name, storage->name, namelen) != 0 || - (storage->name[namelen] != 0 && - storage->name[namelen] != '.' && - storage->name[namelen] != '[')) { - continue; - } - - unsigned slots = storage->type->component_slots(); - if (storage->array_elements) - slots *= storage->array_elements; - - for (unsigned i = 0; i < slots; i++) { - stage_prog_data->param[uniforms++] = &storage->storage[i]; - } - } - - /* Make sure we actually initialized the right amount of stuff here. */ - assert(params_before + ir->type->component_slots() == uniforms); - (void)params_before; -} - - -/* Our support for builtin uniforms is even scarier than non-builtin. - * It sits on top of the PROG_STATE_VAR parameters that are - * automatically updated from GL context state. - */ -void -fs_visitor::setup_builtin_uniform_values(ir_variable *ir) -{ - const ir_state_slot *const slots = ir->get_state_slots(); - assert(slots != NULL); - - for (unsigned int i = 0; i < ir->get_num_state_slots(); i++) { - /* This state reference has already been setup by ir_to_mesa, but we'll - * get the same index back here. - */ - int index = _mesa_add_state_reference(this->prog->Parameters, - (gl_state_index *)slots[i].tokens); - - /* Add each of the unique swizzles of the element as a parameter. - * This'll end up matching the expected layout of the - * array/matrix/structure we're trying to fill in. - */ - int last_swiz = -1; - for (unsigned int j = 0; j < 4; j++) { - int swiz = GET_SWZ(slots[i].swizzle, j); - if (swiz == last_swiz) - break; - last_swiz = swiz; - - stage_prog_data->param[uniforms++] = - &prog->Parameters->ParameterValues[index][swiz]; - } - } -} - fs_reg * fs_visitor::emit_fragcoord_interpolation(bool pixel_center_integer, bool origin_upper_left) diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h index 63414eb2831..3bdf0a25f37 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.h +++ b/src/mesa/drivers/dri/i965/brw_fs.h @@ -66,7 +66,7 @@ namespace brw { * * Translates either GLSL IR or Mesa IR (for ARB_fragment_program) into FS IR. */ -class fs_visitor : public backend_shader, public ir_visitor +class fs_visitor : public backend_shader { public: const fs_reg reg_null_f; @@ -84,33 +84,12 @@ public: ~fs_visitor(); - fs_reg *variable_storage(ir_variable *var); fs_reg vgrf(const glsl_type *const type); fs_reg vgrf(int num_components); void import_uniforms(fs_visitor *v); void setup_uniform_clipplane_values(); void compute_clip_distance(); - void visit(ir_variable *ir); - void visit(ir_assignment *ir); - void visit(ir_dereference_variable *ir); - void visit(ir_dereference_record *ir); - void visit(ir_dereference_array *ir); - void visit(ir_expression *ir); - void visit(ir_texture *ir); - void visit(ir_if *ir); - void visit(ir_constant *ir); - void visit(ir_swizzle *ir); - void visit(ir_return *ir); - void visit(ir_loop *ir); - void visit(ir_loop_jump *ir); - void visit(ir_discard *ir); - void visit(ir_call *ir); - void visit(ir_function *ir); - void visit(ir_function_signature *ir); - void visit(ir_emit_vertex *); - void visit(ir_end_primitive *); - uint32_t gather_channel(int orig_chan, uint32_t sampler); void swizzle_result(ir_texture_opcode op, int dest_components, fs_reg orig_val, uint32_t sampler); @@ -308,25 +287,15 @@ public: fs_inst *emit_math(enum opcode op, fs_reg dst, fs_reg src0, fs_reg src1); fs_inst *emit_lrp(const fs_reg &dst, const fs_reg &x, const fs_reg &y, const fs_reg &a); - void emit_minmax(enum brw_conditional_mod conditionalmod, const fs_reg &dst, - const fs_reg &src0, const fs_reg &src1); void emit_discard_jump(); /** Copy any live channel from \p src to the first channel of \p dst. */ void emit_uniformize(const fs_reg &dst, const fs_reg &src); - bool try_emit_b2f_of_comparison(ir_expression *ir); - bool try_emit_saturate(ir_expression *ir); - bool try_emit_line(ir_expression *ir); - bool try_emit_mad(ir_expression *ir); bool try_replace_with_sel(); - bool try_opt_frontfacing_ternary(ir_if *ir); bool opt_peephole_sel(); bool opt_peephole_predicated_break(); bool opt_saturate_propagation(); bool opt_cmod_propagation(); bool opt_zero_samples(); - void emit_bool_to_cond_code(ir_rvalue *condition); - void emit_bool_to_cond_code_of_reg(ir_expression *expr, fs_reg op[3]); - void emit_if_gen6(ir_if *ir); void emit_unspill(bblock_t *block, fs_inst *inst, fs_reg reg, uint32_t spill_offset, int count); void emit_spill(bblock_t *block, fs_inst *inst, fs_reg reg, @@ -377,23 +346,11 @@ public: void emit_untyped_surface_read(unsigned surf_index, fs_reg dst, fs_reg offset); - void emit_interpolate_expression(ir_expression *ir); - - bool try_rewrite_rhs_to_dst(ir_assignment *ir, - fs_reg dst, - fs_reg src, - fs_inst *pre_rhs_inst, - fs_inst *last_rhs_inst); - void emit_assignment_writes(fs_reg &l, fs_reg &r, - const glsl_type *type, bool predicated); void resolve_ud_negate(fs_reg *reg); - void resolve_bool_comparison(ir_rvalue *rvalue, fs_reg *reg); fs_reg get_timestamp(fs_inst **out_mov); struct brw_reg interp_reg(int location, int channel); - void setup_uniform_values(ir_variable *ir); - void setup_builtin_uniform_values(ir_variable *ir); int implied_mrf_writes(fs_inst *inst); virtual void dump_instructions(); @@ -401,8 +358,6 @@ public: void dump_instruction(backend_instruction *inst); void dump_instruction(backend_instruction *inst, FILE *file); - void visit_atomic_counter_intrinsic(ir_call *ir); - const void *const key; const struct brw_sampler_prog_key_data *key_tex; @@ -438,7 +393,6 @@ public: */ int *push_constant_loc; - struct hash_table *variable_ht; fs_reg frag_depth; fs_reg sample_mask; fs_reg outputs[VARYING_SLOT_MAX]; @@ -465,7 +419,7 @@ public: bool simd16_unsupported; char *no16_msg; - /* Result of last visit() method. */ + /* Result of last visit() method. Still used by emit_texture() */ fs_reg result; /** Register numbers for thread payload fields. */ diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp index 7553c3542fd..e336b73392c 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp @@ -76,219 +76,6 @@ fs_visitor::emit_vs_system_value(int location) return reg; } -void -fs_visitor::visit(ir_variable *ir) -{ - fs_reg *reg = NULL; - - if (variable_storage(ir)) - return; - - if (ir->data.mode == ir_var_shader_in) { - assert(ir->data.location != -1); - if (stage == MESA_SHADER_VERTEX) { - reg = new(this->mem_ctx) - fs_reg(ATTR, ir->data.location, - brw_type_for_base_type(ir->type->get_scalar_type())); - } else if (ir->data.location == VARYING_SLOT_POS) { - reg = emit_fragcoord_interpolation(ir->data.pixel_center_integer, - ir->data.origin_upper_left); - } else if (ir->data.location == VARYING_SLOT_FACE) { - reg = emit_frontfacing_interpolation(); - } else { - reg = new(this->mem_ctx) fs_reg(vgrf(ir->type)); - emit_general_interpolation(*reg, ir->name, ir->type, - (glsl_interp_qualifier) ir->data.interpolation, - ir->data.location, ir->data.centroid, - ir->data.sample); - } - assert(reg); - hash_table_insert(this->variable_ht, reg, ir); - return; - } else if (ir->data.mode == ir_var_shader_out) { - reg = new(this->mem_ctx) fs_reg(vgrf(ir->type)); - - if (stage == MESA_SHADER_VERTEX) { - int vector_elements = - ir->type->is_array() ? ir->type->fields.array->vector_elements - : ir->type->vector_elements; - - for (int i = 0; i < (type_size(ir->type) + 3) / 4; i++) { - int output = ir->data.location + i; - this->outputs[output] = *reg; - this->outputs[output].reg_offset = i * 4; - this->output_components[output] = vector_elements; - } - - } else if (ir->data.index > 0) { - assert(ir->data.location == FRAG_RESULT_DATA0); - assert(ir->data.index == 1); - this->dual_src_output = *reg; - this->do_dual_src = true; - } else if (ir->data.location == FRAG_RESULT_COLOR) { - /* Writing gl_FragColor outputs to all color regions. */ - assert(stage == MESA_SHADER_FRAGMENT); - brw_wm_prog_key *key = (brw_wm_prog_key*) this->key; - for (unsigned int i = 0; i < MAX2(key->nr_color_regions, 1); i++) { - this->outputs[i] = *reg; - this->output_components[i] = 4; - } - } else if (ir->data.location == FRAG_RESULT_DEPTH) { - this->frag_depth = *reg; - } else if (ir->data.location == FRAG_RESULT_SAMPLE_MASK) { - this->sample_mask = *reg; - } else { - /* gl_FragData or a user-defined FS output */ - assert(ir->data.location >= FRAG_RESULT_DATA0 && - ir->data.location < FRAG_RESULT_DATA0 + BRW_MAX_DRAW_BUFFERS); - - int vector_elements = - ir->type->is_array() ? ir->type->fields.array->vector_elements - : ir->type->vector_elements; - - /* General color output. */ - for (unsigned int i = 0; i < MAX2(1, ir->type->length); i++) { - int output = ir->data.location - FRAG_RESULT_DATA0 + i; - this->outputs[output] = offset(*reg, vector_elements * i); - this->output_components[output] = vector_elements; - } - } - } else if (ir->data.mode == ir_var_uniform) { - int param_index = uniforms; - - /* Thanks to the lower_ubo_reference pass, we will see only - * ir_binop_ubo_load expressions and not ir_dereference_variable for UBO - * variables, so no need for them to be in variable_ht. - * - * Some uniforms, such as samplers and atomic counters, have no actual - * storage, so we should ignore them. - */ - if (ir->is_in_uniform_block() || type_size(ir->type) == 0) - return; - - if (dispatch_width == 16) { - if (!variable_storage(ir)) { - fail("Failed to find uniform '%s' in SIMD16\n", ir->name); - } - return; - } - - param_size[param_index] = type_size(ir->type); - if (!strncmp(ir->name, "gl_", 3)) { - setup_builtin_uniform_values(ir); - } else { - setup_uniform_values(ir); - } - - reg = new(this->mem_ctx) fs_reg(UNIFORM, param_index); - reg->type = brw_type_for_base_type(ir->type); - - } else if (ir->data.mode == ir_var_system_value) { - switch (ir->data.location) { - case SYSTEM_VALUE_BASE_VERTEX: - case SYSTEM_VALUE_VERTEX_ID: - case SYSTEM_VALUE_VERTEX_ID_ZERO_BASE: - case SYSTEM_VALUE_INSTANCE_ID: - reg = emit_vs_system_value(ir->data.location); - break; - case SYSTEM_VALUE_SAMPLE_POS: - reg = emit_samplepos_setup(); - break; - case SYSTEM_VALUE_SAMPLE_ID: - reg = emit_sampleid_setup(); - break; - case SYSTEM_VALUE_SAMPLE_MASK_IN: - assert(devinfo->gen >= 7); - reg = new(mem_ctx) - fs_reg(retype(brw_vec8_grf(payload.sample_mask_in_reg, 0), - BRW_REGISTER_TYPE_D)); - break; - } - } - - if (!reg) - reg = new(this->mem_ctx) fs_reg(vgrf(ir->type)); - - hash_table_insert(this->variable_ht, reg, ir); -} - -void -fs_visitor::visit(ir_dereference_variable *ir) -{ - fs_reg *reg = variable_storage(ir->var); - - if (!reg) { - fail("Failed to find variable storage for %s\n", ir->var->name); - this->result = fs_reg(reg_null_d); - return; - } - this->result = *reg; -} - -void -fs_visitor::visit(ir_dereference_record *ir) -{ - const glsl_type *struct_type = ir->record->type; - - ir->record->accept(this); - - unsigned int off = 0; - for (unsigned int i = 0; i < struct_type->length; i++) { - if (strcmp(struct_type->fields.structure[i].name, ir->field) == 0) - break; - off += type_size(struct_type->fields.structure[i].type); - } - this->result = offset(this->result, off); - this->result.type = brw_type_for_base_type(ir->type); -} - -void -fs_visitor::visit(ir_dereference_array *ir) -{ - ir_constant *constant_index; - fs_reg src; - int element_size = type_size(ir->type); - - constant_index = ir->array_index->as_constant(); - - ir->array->accept(this); - src = this->result; - src.type = brw_type_for_base_type(ir->type); - - if (constant_index) { - if (src.file == ATTR) { - /* Attribute arrays get loaded as one vec4 per element. In that case - * offset the source register. - */ - src.reg += constant_index->value.i[0]; - } else { - assert(src.file == UNIFORM || src.file == GRF || src.file == HW_REG); - src = offset(src, constant_index->value.i[0] * element_size); - } - } else { - /* Variable index array dereference. We attach the variable index - * component to the reg as a pointer to a register containing the - * offset. Currently only uniform arrays are supported in this patch, - * and that reladdr pointer is resolved by - * move_uniform_array_access_to_pull_constants(). All other array types - * are lowered by lower_variable_index_to_cond_assign(). - */ - ir->array_index->accept(this); - - fs_reg index_reg; - index_reg = vgrf(glsl_type::int_type); - emit(BRW_OPCODE_MUL, index_reg, this->result, fs_reg(element_size)); - - if (src.reladdr) { - emit(BRW_OPCODE_ADD, index_reg, *src.reladdr, index_reg); - } - - src.reladdr = ralloc(mem_ctx, fs_reg); - memcpy(src.reladdr, &index_reg, sizeof(index_reg)); - } - this->result = src; -} - fs_inst * fs_visitor::emit_lrp(const fs_reg &dst, const fs_reg &x, const fs_reg &y, const fs_reg &a) @@ -315,26 +102,6 @@ fs_visitor::emit_lrp(const fs_reg &dst, const fs_reg &x, const fs_reg &y, } } -void -fs_visitor::emit_minmax(enum brw_conditional_mod conditionalmod, const fs_reg &dst, - const fs_reg &src0, const fs_reg &src1) -{ - assert(conditionalmod == BRW_CONDITIONAL_GE || - conditionalmod == BRW_CONDITIONAL_L); - - fs_inst *inst; - - if (devinfo->gen >= 6) { - inst = emit(BRW_OPCODE_SEL, dst, src0, src1); - inst->conditional_mod = conditionalmod; - } else { - emit(CMP(reg_null_d, src0, src1, conditionalmod)); - - inst = emit(BRW_OPCODE_SEL, dst, src0, src1); - inst->predicate = BRW_PREDICATE_NORMAL; - } -} - void fs_visitor::emit_uniformize(const fs_reg &dst, const fs_reg &src) { @@ -347,1064 +114,6 @@ fs_visitor::emit_uniformize(const fs_reg &dst, const fs_reg &src) ->force_writemask_all = true; } -bool -fs_visitor::try_emit_saturate(ir_expression *ir) -{ - if (ir->operation != ir_unop_saturate) - return false; - - ir_rvalue *sat_val = ir->operands[0]; - - fs_inst *pre_inst = (fs_inst *) this->instructions.get_tail(); - - sat_val->accept(this); - fs_reg src = this->result; - - fs_inst *last_inst = (fs_inst *) this->instructions.get_tail(); - - /* If the last instruction from our accept() generated our - * src, just set the saturate flag instead of emmitting a separate mov. - */ - fs_inst *modify = get_instruction_generating_reg(pre_inst, last_inst, src); - if (modify && modify->regs_written == modify->dst.width / 8 && - modify->can_do_saturate()) { - modify->saturate = true; - this->result = src; - return true; - } - - return false; -} - -bool -fs_visitor::try_emit_line(ir_expression *ir) -{ - /* LINE's src0 must be of type float. */ - if (ir->type != glsl_type::float_type) - return false; - - ir_rvalue *nonmul = ir->operands[1]; - ir_expression *mul = ir->operands[0]->as_expression(); - - if (!mul || mul->operation != ir_binop_mul) { - nonmul = ir->operands[0]; - mul = ir->operands[1]->as_expression(); - - if (!mul || mul->operation != ir_binop_mul) - return false; - } - - ir_constant *const_add = nonmul->as_constant(); - if (!const_add) - return false; - - int add_operand_vf = brw_float_to_vf(const_add->value.f[0]); - if (add_operand_vf == -1) - return false; - - ir_rvalue *non_const_mul = mul->operands[1]; - ir_constant *const_mul = mul->operands[0]->as_constant(); - if (!const_mul) { - const_mul = mul->operands[1]->as_constant(); - - if (!const_mul) - return false; - - non_const_mul = mul->operands[0]; - } - - int mul_operand_vf = brw_float_to_vf(const_mul->value.f[0]); - if (mul_operand_vf == -1) - return false; - - non_const_mul->accept(this); - fs_reg src1 = this->result; - - fs_reg src0 = vgrf(ir->type); - emit(BRW_OPCODE_MOV, src0, - fs_reg((uint8_t)mul_operand_vf, 0, 0, (uint8_t)add_operand_vf)); - - this->result = vgrf(ir->type); - emit(BRW_OPCODE_LINE, this->result, src0, src1); - return true; -} - -bool -fs_visitor::try_emit_mad(ir_expression *ir) -{ - /* 3-src instructions were introduced in gen6. */ - if (devinfo->gen < 6) - return false; - - /* MAD can only handle floating-point data. */ - if (ir->type != glsl_type::float_type) - return false; - - ir_rvalue *nonmul; - ir_expression *mul; - bool mul_negate, mul_abs; - - for (int i = 0; i < 2; i++) { - mul_negate = false; - mul_abs = false; - - mul = ir->operands[i]->as_expression(); - nonmul = ir->operands[1 - i]; - - if (mul && mul->operation == ir_unop_abs) { - mul = mul->operands[0]->as_expression(); - mul_abs = true; - } else if (mul && mul->operation == ir_unop_neg) { - mul = mul->operands[0]->as_expression(); - mul_negate = true; - } - - if (mul && mul->operation == ir_binop_mul) - break; - } - - if (!mul || mul->operation != ir_binop_mul) - return false; - - nonmul->accept(this); - fs_reg src0 = this->result; - - mul->operands[0]->accept(this); - fs_reg src1 = this->result; - src1.negate ^= mul_negate; - src1.abs = mul_abs; - if (mul_abs) - src1.negate = false; - - mul->operands[1]->accept(this); - fs_reg src2 = this->result; - src2.abs = mul_abs; - if (mul_abs) - src2.negate = false; - - this->result = vgrf(ir->type); - emit(BRW_OPCODE_MAD, this->result, src0, src1, src2); - - return true; -} - -bool -fs_visitor::try_emit_b2f_of_comparison(ir_expression *ir) -{ - /* On platforms that do not natively generate 0u and ~0u for Boolean - * results, b2f expressions that look like - * - * f = b2f(expr cmp 0) - * - * will generate better code by pretending the expression is - * - * f = ir_triop_csel(0.0, 1.0, expr cmp 0) - * - * This is because the last instruction of "expr" can generate the - * condition code for the "cmp 0". This avoids having to do the "-(b & 1)" - * trick to generate 0u or ~0u for the Boolean result. This means code like - * - * mov(16) g16<1>F 1F - * mul.ge.f0(16) null g6<8,8,1>F g14<8,8,1>F - * (+f0) sel(16) m6<1>F g16<8,8,1>F 0F - * - * will be generated instead of - * - * mul(16) g2<1>F g12<8,8,1>F g4<8,8,1>F - * cmp.ge.f0(16) g2<1>D g4<8,8,1>F 0F - * and(16) g4<1>D g2<8,8,1>D 1D - * and(16) m6<1>D -g4<8,8,1>D 0x3f800000UD - * - * When the comparison is != 0.0 using the knowledge that the false case - * already results in zero would allow better code generation by possibly - * avoiding a load-immediate instruction. - */ - ir_expression *cmp = ir->operands[0]->as_expression(); - if (cmp == NULL) - return false; - - if (cmp->operation == ir_binop_nequal) { - for (unsigned i = 0; i < 2; i++) { - ir_constant *c = cmp->operands[i]->as_constant(); - if (c == NULL || !c->is_zero()) - continue; - - ir_expression *expr = cmp->operands[i ^ 1]->as_expression(); - if (expr != NULL) { - fs_reg op[2]; - - for (unsigned j = 0; j < 2; j++) { - cmp->operands[j]->accept(this); - op[j] = this->result; - - resolve_ud_negate(&op[j]); - } - - emit_bool_to_cond_code_of_reg(cmp, op); - - /* In this case we know when the condition is true, op[i ^ 1] - * contains zero. Invert the predicate, use op[i ^ 1] as src0, - * and immediate 1.0f as src1. - */ - this->result = vgrf(ir->type); - op[i ^ 1].type = BRW_REGISTER_TYPE_F; - - fs_inst *inst = emit(SEL(this->result, op[i ^ 1], fs_reg(1.0f))); - inst->predicate = BRW_PREDICATE_NORMAL; - inst->predicate_inverse = true; - return true; - } - } - } - - emit_bool_to_cond_code(cmp); - - fs_reg temp = vgrf(ir->type); - emit(MOV(temp, fs_reg(1.0f))); - - this->result = vgrf(ir->type); - fs_inst *inst = emit(SEL(this->result, temp, fs_reg(0.0f))); - inst->predicate = BRW_PREDICATE_NORMAL; - - return true; -} - -static int -pack_pixel_offset(float x) -{ - /* Clamp upper end of the range to +7/16. See explanation in non-constant - * offset case below. */ - int n = MIN2((int)(x * 16), 7); - return n & 0xf; -} - -void -fs_visitor::emit_interpolate_expression(ir_expression *ir) -{ - /* in SIMD16 mode, the pixel interpolator returns coords interleaved - * 8 channels at a time, same as the barycentric coords presented in - * the FS payload. this requires a bit of extra work to support. - */ - no16("interpolate_at_* not yet supported in SIMD16 mode."); - - assert(stage == MESA_SHADER_FRAGMENT); - brw_wm_prog_key *key = (brw_wm_prog_key*) this->key; - - ir_dereference * deref = ir->operands[0]->as_dereference(); - ir_swizzle * swiz = NULL; - if (!deref) { - /* the api does not allow a swizzle here, but the varying packing code - * may have pushed one into here. - */ - swiz = ir->operands[0]->as_swizzle(); - assert(swiz); - deref = swiz->val->as_dereference(); - } - assert(deref); - ir_variable * var = deref->variable_referenced(); - assert(var); - - /* 1. collect interpolation factors */ - - fs_reg dst_xy = vgrf(glsl_type::get_instance(ir->type->base_type, 2, 1)); - - /* for most messages, we need one reg of ignored data; the hardware requires mlen==1 - * even when there is no payload. in the per-slot offset case, we'll replace this with - * the proper source data. */ - fs_reg src = vgrf(glsl_type::float_type); - int mlen = 1; /* one reg unless overriden */ - int reg_width = dispatch_width / 8; - fs_inst *inst; - - switch (ir->operation) { - case ir_unop_interpolate_at_centroid: - inst = emit(FS_OPCODE_INTERPOLATE_AT_CENTROID, dst_xy, src, fs_reg(0u)); - break; - - case ir_binop_interpolate_at_sample: { - ir_constant *sample_num = ir->operands[1]->as_constant(); - assert(sample_num || !"nonconstant sample number should have been lowered."); - - unsigned msg_data = sample_num->value.i[0] << 4; - inst = emit(FS_OPCODE_INTERPOLATE_AT_SAMPLE, dst_xy, src, fs_reg(msg_data)); - break; - } - - case ir_binop_interpolate_at_offset: { - ir_constant *const_offset = ir->operands[1]->as_constant(); - if (const_offset) { - unsigned msg_data = pack_pixel_offset(const_offset->value.f[0]) | - (pack_pixel_offset(const_offset->value.f[1]) << 4); - inst = emit(FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET, dst_xy, src, - fs_reg(msg_data)); - } else { - /* pack the operands: hw wants offsets as 4 bit signed ints */ - ir->operands[1]->accept(this); - src = vgrf(glsl_type::ivec2_type); - fs_reg src2 = src; - for (int i = 0; i < 2; i++) { - fs_reg temp = vgrf(glsl_type::float_type); - emit(MUL(temp, this->result, fs_reg(16.0f))); - emit(MOV(src2, temp)); /* float to int */ - - /* Clamp the upper end of the range to +7/16. ARB_gpu_shader5 requires - * that we support a maximum offset of +0.5, which isn't representable - * in a S0.4 value -- if we didn't clamp it, we'd end up with -8/16, - * which is the opposite of what the shader author wanted. - * - * This is legal due to ARB_gpu_shader5's quantization rules: - * - * "Not all values of may be supported; x and y offsets may - * be rounded to fixed-point values with the number of fraction bits - * given by the implementation-dependent constant - * FRAGMENT_INTERPOLATION_OFFSET_BITS" - */ - - fs_inst *inst = emit(BRW_OPCODE_SEL, src2, src2, fs_reg(7)); - inst->conditional_mod = BRW_CONDITIONAL_L; /* min(src2, 7) */ - - src2 = offset(src2, 1); - this->result = offset(this->result, 1); - } - - mlen = 2 * reg_width; - inst = emit(FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET, dst_xy, src, - fs_reg(0u)); - } - break; - } - - default: - unreachable("not reached"); - } - - inst->mlen = mlen; - inst->regs_written = 2 * reg_width; /* 2 floats per slot returned */ - inst->pi_noperspective = var->determine_interpolation_mode(key->flat_shade) == - INTERP_QUALIFIER_NOPERSPECTIVE; - - /* 2. emit linterp */ - - fs_reg res = vgrf(ir->type); - this->result = res; - - for (int i = 0; i < ir->type->vector_elements; i++) { - int ch = swiz ? ((*(int *)&swiz->mask) >> 2*i) & 3 : i; - emit(FS_OPCODE_LINTERP, res, dst_xy, - fs_reg(interp_reg(var->data.location, ch))); - res = offset(res, 1); - } -} - -void -fs_visitor::visit(ir_expression *ir) -{ - unsigned int operand; - fs_reg op[3], temp; - fs_inst *inst; - struct brw_wm_prog_key *fs_key = (struct brw_wm_prog_key *) this->key; - - assert(ir->get_num_operands() <= 3); - - if (try_emit_saturate(ir)) - return; - - /* Deal with the real oddball stuff first */ - switch (ir->operation) { - case ir_binop_add: - if (devinfo->gen <= 5 && try_emit_line(ir)) - return; - if (try_emit_mad(ir)) - return; - break; - - case ir_triop_csel: - ir->operands[1]->accept(this); - op[1] = this->result; - ir->operands[2]->accept(this); - op[2] = this->result; - - emit_bool_to_cond_code(ir->operands[0]); - - this->result = vgrf(ir->type); - inst = emit(SEL(this->result, op[1], op[2])); - inst->predicate = BRW_PREDICATE_NORMAL; - return; - - case ir_unop_b2f: - if (devinfo->gen <= 5 && try_emit_b2f_of_comparison(ir)) - return; - break; - - case ir_unop_interpolate_at_centroid: - case ir_binop_interpolate_at_offset: - case ir_binop_interpolate_at_sample: - emit_interpolate_expression(ir); - return; - - default: - break; - } - - for (operand = 0; operand < ir->get_num_operands(); operand++) { - ir->operands[operand]->accept(this); - if (this->result.file == BAD_FILE) { - fail("Failed to get tree for expression operand:\n"); - ir->operands[operand]->fprint(stderr); - fprintf(stderr, "\n"); - } - assert(this->result.file == GRF || - this->result.file == UNIFORM || this->result.file == ATTR); - op[operand] = this->result; - - /* Matrix expression operands should have been broken down to vector - * operations already. - */ - assert(!ir->operands[operand]->type->is_matrix()); - /* And then those vector operands should have been broken down to scalar. - */ - assert(!ir->operands[operand]->type->is_vector()); - } - - /* Storage for our result. If our result goes into an assignment, it will - * just get copy-propagated out, so no worries. - */ - this->result = vgrf(ir->type); - - switch (ir->operation) { - case ir_unop_logic_not: - emit(NOT(this->result, op[0])); - break; - case ir_unop_neg: - op[0].negate = !op[0].negate; - emit(MOV(this->result, op[0])); - break; - case ir_unop_abs: - op[0].abs = true; - op[0].negate = false; - emit(MOV(this->result, op[0])); - break; - case ir_unop_sign: - if (ir->type->is_float()) { - /* AND(val, 0x80000000) gives the sign bit. - * - * Predicated OR ORs 1.0 (0x3f800000) with the sign bit if val is not - * zero. - */ - emit(CMP(reg_null_f, op[0], fs_reg(0.0f), BRW_CONDITIONAL_NZ)); - - op[0].type = BRW_REGISTER_TYPE_UD; - this->result.type = BRW_REGISTER_TYPE_UD; - emit(AND(this->result, op[0], fs_reg(0x80000000u))); - - inst = emit(OR(this->result, this->result, fs_reg(0x3f800000u))); - inst->predicate = BRW_PREDICATE_NORMAL; - - this->result.type = BRW_REGISTER_TYPE_F; - } else { - /* ASR(val, 31) -> negative val generates 0xffffffff (signed -1). - * -> non-negative val generates 0x00000000. - * Predicated OR sets 1 if val is positive. - */ - emit(CMP(reg_null_d, op[0], fs_reg(0), BRW_CONDITIONAL_G)); - - emit(ASR(this->result, op[0], fs_reg(31))); - - inst = emit(OR(this->result, this->result, fs_reg(1))); - inst->predicate = BRW_PREDICATE_NORMAL; - } - break; - case ir_unop_rcp: - emit_math(SHADER_OPCODE_RCP, this->result, op[0]); - break; - - case ir_unop_exp2: - emit_math(SHADER_OPCODE_EXP2, this->result, op[0]); - break; - case ir_unop_log2: - emit_math(SHADER_OPCODE_LOG2, this->result, op[0]); - break; - case ir_unop_exp: - case ir_unop_log: - unreachable("not reached: should be handled by ir_explog_to_explog2"); - case ir_unop_sin: - emit_math(SHADER_OPCODE_SIN, this->result, op[0]); - break; - case ir_unop_cos: - emit_math(SHADER_OPCODE_COS, this->result, op[0]); - break; - - case ir_unop_dFdx: - /* Select one of the two opcodes based on the glHint value. */ - if (fs_key->high_quality_derivatives) - emit(FS_OPCODE_DDX_FINE, this->result, op[0]); - else - emit(FS_OPCODE_DDX_COARSE, this->result, op[0]); - break; - - case ir_unop_dFdx_coarse: - emit(FS_OPCODE_DDX_COARSE, this->result, op[0]); - break; - - case ir_unop_dFdx_fine: - emit(FS_OPCODE_DDX_FINE, this->result, op[0]); - break; - - case ir_unop_dFdy: - /* Select one of the two opcodes based on the glHint value. */ - if (fs_key->high_quality_derivatives) - emit(FS_OPCODE_DDY_FINE, result, op[0], fs_reg(fs_key->render_to_fbo)); - else - emit(FS_OPCODE_DDY_COARSE, result, op[0], fs_reg(fs_key->render_to_fbo)); - break; - - case ir_unop_dFdy_coarse: - emit(FS_OPCODE_DDY_COARSE, result, op[0], fs_reg(fs_key->render_to_fbo)); - break; - - case ir_unop_dFdy_fine: - emit(FS_OPCODE_DDY_FINE, result, op[0], fs_reg(fs_key->render_to_fbo)); - break; - - case ir_binop_add: - emit(ADD(this->result, op[0], op[1])); - break; - case ir_binop_sub: - unreachable("not reached: should be handled by ir_sub_to_add_neg"); - - case ir_binop_mul: - emit(MUL(this->result, op[0], op[1])); - break; - case ir_binop_imul_high: { - if (devinfo->gen >= 7) - no16("SIMD16 explicit accumulator operands unsupported\n"); - - struct brw_reg acc = retype(brw_acc_reg(dispatch_width), - this->result.type); - - fs_inst *mul = emit(MUL(acc, op[0], op[1])); - emit(MACH(this->result, op[0], op[1])); - - /* Until Gen8, integer multiplies read 32-bits from one source, and - * 16-bits from the other, and relying on the MACH instruction to - * generate the high bits of the result. - * - * On Gen8, the multiply instruction does a full 32x32-bit multiply, - * but in order to do a 64x64-bit multiply we have to simulate the - * previous behavior and then use a MACH instruction. - * - * FINISHME: Don't use source modifiers on src1. - */ - if (devinfo->gen >= 8) { - assert(mul->src[1].type == BRW_REGISTER_TYPE_D || - mul->src[1].type == BRW_REGISTER_TYPE_UD); - if (mul->src[1].type == BRW_REGISTER_TYPE_D) { - mul->src[1].type = BRW_REGISTER_TYPE_W; - mul->src[1].stride = 2; - } else { - mul->src[1].type = BRW_REGISTER_TYPE_UW; - mul->src[1].stride = 2; - } - } - - break; - } - case ir_binop_div: - /* Floating point should be lowered by DIV_TO_MUL_RCP in the compiler. */ - assert(ir->type->is_integer()); - emit_math(SHADER_OPCODE_INT_QUOTIENT, this->result, op[0], op[1]); - break; - case ir_binop_carry: { - if (devinfo->gen >= 7) - no16("SIMD16 explicit accumulator operands unsupported\n"); - - struct brw_reg acc = retype(brw_acc_reg(dispatch_width), - BRW_REGISTER_TYPE_UD); - - emit(ADDC(reg_null_ud, op[0], op[1])); - emit(MOV(this->result, fs_reg(acc))); - break; - } - case ir_binop_borrow: { - if (devinfo->gen >= 7) - no16("SIMD16 explicit accumulator operands unsupported\n"); - - struct brw_reg acc = retype(brw_acc_reg(dispatch_width), - BRW_REGISTER_TYPE_UD); - - emit(SUBB(reg_null_ud, op[0], op[1])); - emit(MOV(this->result, fs_reg(acc))); - break; - } - case ir_binop_mod: - /* Floating point should be lowered by MOD_TO_FLOOR in the compiler. */ - assert(ir->type->is_integer()); - emit_math(SHADER_OPCODE_INT_REMAINDER, this->result, op[0], op[1]); - break; - - case ir_binop_less: - case ir_binop_greater: - case ir_binop_lequal: - case ir_binop_gequal: - case ir_binop_equal: - case ir_binop_all_equal: - case ir_binop_nequal: - case ir_binop_any_nequal: - if (devinfo->gen <= 5) { - resolve_bool_comparison(ir->operands[0], &op[0]); - resolve_bool_comparison(ir->operands[1], &op[1]); - } - - emit(CMP(this->result, op[0], op[1], - brw_conditional_for_comparison(ir->operation))); - break; - - case ir_binop_logic_xor: - emit(XOR(this->result, op[0], op[1])); - break; - - case ir_binop_logic_or: - emit(OR(this->result, op[0], op[1])); - break; - - case ir_binop_logic_and: - emit(AND(this->result, op[0], op[1])); - break; - - case ir_binop_dot: - case ir_unop_any: - unreachable("not reached: should be handled by brw_fs_channel_expressions"); - - case ir_unop_noise: - unreachable("not reached: should be handled by lower_noise"); - - case ir_quadop_vector: - unreachable("not reached: should be handled by lower_quadop_vector"); - - case ir_binop_vector_extract: - unreachable("not reached: should be handled by lower_vec_index_to_cond_assign()"); - - case ir_triop_vector_insert: - unreachable("not reached: should be handled by lower_vector_insert()"); - - case ir_binop_ldexp: - unreachable("not reached: should be handled by ldexp_to_arith()"); - - case ir_unop_sqrt: - emit_math(SHADER_OPCODE_SQRT, this->result, op[0]); - break; - - case ir_unop_rsq: - emit_math(SHADER_OPCODE_RSQ, this->result, op[0]); - break; - - case ir_unop_bitcast_i2f: - case ir_unop_bitcast_u2f: - op[0].type = BRW_REGISTER_TYPE_F; - this->result = op[0]; - break; - case ir_unop_i2u: - case ir_unop_bitcast_f2u: - op[0].type = BRW_REGISTER_TYPE_UD; - this->result = op[0]; - break; - case ir_unop_u2i: - case ir_unop_bitcast_f2i: - op[0].type = BRW_REGISTER_TYPE_D; - this->result = op[0]; - break; - case ir_unop_i2f: - case ir_unop_u2f: - case ir_unop_f2i: - case ir_unop_f2u: - emit(MOV(this->result, op[0])); - break; - - case ir_unop_b2i: - emit(AND(this->result, op[0], fs_reg(1))); - break; - case ir_unop_b2f: - if (devinfo->gen <= 5) { - resolve_bool_comparison(ir->operands[0], &op[0]); - } - op[0].type = BRW_REGISTER_TYPE_D; - this->result.type = BRW_REGISTER_TYPE_D; - emit(AND(this->result, op[0], fs_reg(0x3f800000u))); - this->result.type = BRW_REGISTER_TYPE_F; - break; - - case ir_unop_f2b: - emit(CMP(this->result, op[0], fs_reg(0.0f), BRW_CONDITIONAL_NZ)); - break; - case ir_unop_i2b: - emit(CMP(this->result, op[0], fs_reg(0), BRW_CONDITIONAL_NZ)); - break; - - case ir_unop_trunc: - emit(RNDZ(this->result, op[0])); - break; - case ir_unop_ceil: { - fs_reg tmp = vgrf(ir->type); - op[0].negate = !op[0].negate; - emit(RNDD(tmp, op[0])); - tmp.negate = true; - emit(MOV(this->result, tmp)); - } - break; - case ir_unop_floor: - emit(RNDD(this->result, op[0])); - break; - case ir_unop_fract: - emit(FRC(this->result, op[0])); - break; - case ir_unop_round_even: - emit(RNDE(this->result, op[0])); - break; - - case ir_binop_min: - case ir_binop_max: - resolve_ud_negate(&op[0]); - resolve_ud_negate(&op[1]); - emit_minmax(ir->operation == ir_binop_min ? - BRW_CONDITIONAL_L : BRW_CONDITIONAL_GE, - this->result, op[0], op[1]); - break; - case ir_unop_pack_snorm_2x16: - case ir_unop_pack_snorm_4x8: - case ir_unop_pack_unorm_2x16: - case ir_unop_pack_unorm_4x8: - case ir_unop_unpack_snorm_2x16: - case ir_unop_unpack_snorm_4x8: - case ir_unop_unpack_unorm_2x16: - case ir_unop_unpack_unorm_4x8: - case ir_unop_unpack_half_2x16: - case ir_unop_pack_half_2x16: - unreachable("not reached: should be handled by lower_packing_builtins"); - case ir_unop_unpack_half_2x16_split_x: - emit(FS_OPCODE_UNPACK_HALF_2x16_SPLIT_X, this->result, op[0]); - break; - case ir_unop_unpack_half_2x16_split_y: - emit(FS_OPCODE_UNPACK_HALF_2x16_SPLIT_Y, this->result, op[0]); - break; - case ir_binop_pow: - emit_math(SHADER_OPCODE_POW, this->result, op[0], op[1]); - break; - - case ir_unop_bitfield_reverse: - emit(BFREV(this->result, op[0])); - break; - case ir_unop_bit_count: - emit(CBIT(this->result, op[0])); - break; - case ir_unop_find_msb: - temp = vgrf(glsl_type::uint_type); - emit(FBH(temp, op[0])); - - /* FBH counts from the MSB side, while GLSL's findMSB() wants the count - * from the LSB side. If FBH didn't return an error (0xFFFFFFFF), then - * subtract the result from 31 to convert the MSB count into an LSB count. - */ - - /* FBH only supports UD type for dst, so use a MOV to convert UD to D. */ - emit(MOV(this->result, temp)); - emit(CMP(reg_null_d, this->result, fs_reg(-1), BRW_CONDITIONAL_NZ)); - - temp.negate = true; - inst = emit(ADD(this->result, temp, fs_reg(31))); - inst->predicate = BRW_PREDICATE_NORMAL; - break; - case ir_unop_find_lsb: - emit(FBL(this->result, op[0])); - break; - case ir_unop_saturate: - inst = emit(MOV(this->result, op[0])); - inst->saturate = true; - break; - case ir_triop_bitfield_extract: - /* Note that the instruction's argument order is reversed from GLSL - * and the IR. - */ - emit(BFE(this->result, op[2], op[1], op[0])); - break; - case ir_binop_bfm: - emit(BFI1(this->result, op[0], op[1])); - break; - case ir_triop_bfi: - emit(BFI2(this->result, op[0], op[1], op[2])); - break; - case ir_quadop_bitfield_insert: - unreachable("not reached: should be handled by " - "lower_instructions::bitfield_insert_to_bfm_bfi"); - - case ir_unop_bit_not: - emit(NOT(this->result, op[0])); - break; - case ir_binop_bit_and: - emit(AND(this->result, op[0], op[1])); - break; - case ir_binop_bit_xor: - emit(XOR(this->result, op[0], op[1])); - break; - case ir_binop_bit_or: - emit(OR(this->result, op[0], op[1])); - break; - - case ir_binop_lshift: - emit(SHL(this->result, op[0], op[1])); - break; - - case ir_binop_rshift: - if (ir->type->base_type == GLSL_TYPE_INT) - emit(ASR(this->result, op[0], op[1])); - else - emit(SHR(this->result, op[0], op[1])); - break; - case ir_binop_pack_half_2x16_split: - emit(FS_OPCODE_PACK_HALF_2x16_SPLIT, this->result, op[0], op[1]); - break; - case ir_binop_ubo_load: { - /* This IR node takes a constant uniform block and a constant or - * variable byte offset within the block and loads a vector from that. - */ - ir_constant *const_uniform_block = ir->operands[0]->as_constant(); - ir_constant *const_offset = ir->operands[1]->as_constant(); - fs_reg surf_index; - - if (const_uniform_block) { - /* The block index is a constant, so just emit the binding table entry - * as an immediate. - */ - surf_index = fs_reg(stage_prog_data->binding_table.ubo_start + - const_uniform_block->value.u[0]); - } else { - /* The block index is not a constant. Evaluate the index expression - * per-channel and add the base UBO index; we have to select a value - * from any live channel. - */ - surf_index = vgrf(glsl_type::uint_type); - emit(ADD(surf_index, op[0], - fs_reg(stage_prog_data->binding_table.ubo_start))); - emit_uniformize(surf_index, surf_index); - - /* Assume this may touch any UBO. It would be nice to provide - * a tighter bound, but the array information is already lowered away. - */ - brw_mark_surface_used(prog_data, - stage_prog_data->binding_table.ubo_start + - shader_prog->NumUniformBlocks - 1); - } - - if (const_offset) { - fs_reg packed_consts = vgrf(glsl_type::float_type); - packed_consts.type = result.type; - - fs_reg const_offset_reg = fs_reg(const_offset->value.u[0] & ~15); - emit(new(mem_ctx) fs_inst(FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD, 8, - packed_consts, surf_index, const_offset_reg)); - - for (int i = 0; i < ir->type->vector_elements; i++) { - packed_consts.set_smear(const_offset->value.u[0] % 16 / 4 + i); - - /* The std140 packing rules don't allow vectors to cross 16-byte - * boundaries, and a reg is 32 bytes. - */ - assert(packed_consts.subreg_offset < 32); - - /* UBO bools are any nonzero value. We consider bools to be - * values with the low bit set to 1. Convert them using CMP. - */ - if (ir->type->base_type == GLSL_TYPE_BOOL) { - emit(CMP(result, packed_consts, fs_reg(0u), BRW_CONDITIONAL_NZ)); - } else { - emit(MOV(result, packed_consts)); - } - - result = offset(result, 1); - } - } else { - /* Turn the byte offset into a dword offset. */ - fs_reg base_offset = vgrf(glsl_type::int_type); - emit(SHR(base_offset, op[1], fs_reg(2))); - - for (int i = 0; i < ir->type->vector_elements; i++) { - emit(VARYING_PULL_CONSTANT_LOAD(result, surf_index, - base_offset, i)); - - if (ir->type->base_type == GLSL_TYPE_BOOL) - emit(CMP(result, result, fs_reg(0), BRW_CONDITIONAL_NZ)); - - result = offset(result, 1); - } - } - - result.reg_offset = 0; - break; - } - - case ir_triop_fma: - /* Note that the instruction's argument order is reversed from GLSL - * and the IR. - */ - emit(MAD(this->result, op[2], op[1], op[0])); - break; - - case ir_triop_lrp: - emit_lrp(this->result, op[0], op[1], op[2]); - break; - - case ir_triop_csel: - case ir_unop_interpolate_at_centroid: - case ir_binop_interpolate_at_offset: - case ir_binop_interpolate_at_sample: - unreachable("already handled above"); - break; - - case ir_unop_d2f: - case ir_unop_f2d: - case ir_unop_d2i: - case ir_unop_i2d: - case ir_unop_d2u: - case ir_unop_u2d: - case ir_unop_d2b: - case ir_unop_pack_double_2x32: - case ir_unop_unpack_double_2x32: - case ir_unop_frexp_sig: - case ir_unop_frexp_exp: - unreachable("fp64 todo"); - break; - } -} - -void -fs_visitor::emit_assignment_writes(fs_reg &l, fs_reg &r, - const glsl_type *type, bool predicated) -{ - switch (type->base_type) { - case GLSL_TYPE_FLOAT: - case GLSL_TYPE_UINT: - case GLSL_TYPE_INT: - case GLSL_TYPE_BOOL: - for (unsigned int i = 0; i < type->components(); i++) { - l.type = brw_type_for_base_type(type); - r.type = brw_type_for_base_type(type); - - if (predicated || !l.equals(r)) { - fs_inst *inst = emit(MOV(l, r)); - inst->predicate = predicated ? BRW_PREDICATE_NORMAL : BRW_PREDICATE_NONE; - } - - l = offset(l, 1); - r = offset(r, 1); - } - break; - case GLSL_TYPE_ARRAY: - for (unsigned int i = 0; i < type->length; i++) { - emit_assignment_writes(l, r, type->fields.array, predicated); - } - break; - - case GLSL_TYPE_STRUCT: - for (unsigned int i = 0; i < type->length; i++) { - emit_assignment_writes(l, r, type->fields.structure[i].type, - predicated); - } - break; - - case GLSL_TYPE_SAMPLER: - case GLSL_TYPE_IMAGE: - case GLSL_TYPE_ATOMIC_UINT: - break; - - case GLSL_TYPE_DOUBLE: - case GLSL_TYPE_VOID: - case GLSL_TYPE_ERROR: - case GLSL_TYPE_INTERFACE: - unreachable("not reached"); - } -} - -/* If the RHS processing resulted in an instruction generating a - * temporary value, and it would be easy to rewrite the instruction to - * generate its result right into the LHS instead, do so. This ends - * up reliably removing instructions where it can be tricky to do so - * later without real UD chain information. - */ -bool -fs_visitor::try_rewrite_rhs_to_dst(ir_assignment *ir, - fs_reg dst, - fs_reg src, - fs_inst *pre_rhs_inst, - fs_inst *last_rhs_inst) -{ - /* Only attempt if we're doing a direct assignment. */ - if (ir->condition || - !(ir->lhs->type->is_scalar() || - (ir->lhs->type->is_vector() && - ir->write_mask == (1 << ir->lhs->type->vector_elements) - 1))) - return false; - - /* Make sure the last instruction generated our source reg. */ - fs_inst *modify = get_instruction_generating_reg(pre_rhs_inst, - last_rhs_inst, - src); - if (!modify) - return false; - - /* If last_rhs_inst wrote a different number of components than our LHS, - * we can't safely rewrite it. - */ - if (alloc.sizes[dst.reg] != modify->regs_written) - return false; - - /* Success! Rewrite the instruction. */ - modify->dst = dst; - - return true; -} - -void -fs_visitor::visit(ir_assignment *ir) -{ - fs_reg l, r; - fs_inst *inst; - - /* FINISHME: arrays on the lhs */ - ir->lhs->accept(this); - l = this->result; - - fs_inst *pre_rhs_inst = (fs_inst *) this->instructions.get_tail(); - - ir->rhs->accept(this); - r = this->result; - - fs_inst *last_rhs_inst = (fs_inst *) this->instructions.get_tail(); - - assert(l.file != BAD_FILE); - assert(r.file != BAD_FILE); - - if (try_rewrite_rhs_to_dst(ir, l, r, pre_rhs_inst, last_rhs_inst)) - return; - - if (ir->condition) { - emit_bool_to_cond_code(ir->condition); - } - - if (ir->lhs->type->is_scalar() || - ir->lhs->type->is_vector()) { - for (int i = 0; i < ir->lhs->type->vector_elements; i++) { - if (ir->write_mask & (1 << i)) { - inst = emit(MOV(l, r)); - if (ir->condition) - inst->predicate = BRW_PREDICATE_NORMAL; - r = offset(r, 1); - } - l = offset(l, 1); - } - } else { - emit_assignment_writes(l, r, ir->lhs->type, ir->condition != NULL); - } -} - fs_inst * fs_visitor::emit_texture_gen4(ir_texture_opcode op, fs_reg dst, fs_reg coordinate, int coord_components, @@ -2257,156 +966,6 @@ fs_visitor::emit_texture(ir_texture_opcode op, swizzle_result(op, dest_type->vector_elements, dst, sampler); } -void -fs_visitor::visit(ir_texture *ir) -{ - uint32_t sampler = - _mesa_get_sampler_uniform_value(ir->sampler, shader_prog, prog); - - ir_rvalue *nonconst_sampler_index = - _mesa_get_sampler_array_nonconst_index(ir->sampler); - - /* Handle non-constant sampler array indexing */ - fs_reg sampler_reg; - if (nonconst_sampler_index) { - /* The highest sampler which may be used by this operation is - * the last element of the array. Mark it here, because the generator - * doesn't have enough information to determine the bound. - */ - uint32_t array_size = ir->sampler->as_dereference_array() - ->array->type->array_size(); - - uint32_t max_used = sampler + array_size - 1; - if (ir->op == ir_tg4 && devinfo->gen < 8) { - max_used += stage_prog_data->binding_table.gather_texture_start; - } else { - max_used += stage_prog_data->binding_table.texture_start; - } - - brw_mark_surface_used(prog_data, max_used); - - /* Emit code to evaluate the actual indexing expression */ - nonconst_sampler_index->accept(this); - fs_reg temp = vgrf(glsl_type::uint_type); - emit(ADD(temp, this->result, fs_reg(sampler))); - emit_uniformize(temp, temp); - - sampler_reg = temp; - } else { - /* Single sampler, or constant array index; the indexing expression - * is just an immediate. - */ - sampler_reg = fs_reg(sampler); - } - - /* FINISHME: We're failing to recompile our programs when the sampler is - * updated. This only matters for the texture rectangle scale parameters - * (pre-gen6, or gen6+ with GL_CLAMP). - */ - int texunit = prog->SamplerUnits[sampler]; - - /* Should be lowered by do_lower_texture_projection */ - assert(!ir->projector); - - /* Should be lowered */ - assert(!ir->offset || !ir->offset->type->is_array()); - - /* Generate code to compute all the subexpression trees. This has to be - * done before loading any values into MRFs for the sampler message since - * generating these values may involve SEND messages that need the MRFs. - */ - fs_reg coordinate; - int coord_components = 0; - if (ir->coordinate) { - coord_components = ir->coordinate->type->vector_elements; - ir->coordinate->accept(this); - coordinate = this->result; - } - - fs_reg shadow_comparitor; - if (ir->shadow_comparitor) { - ir->shadow_comparitor->accept(this); - shadow_comparitor = this->result; - } - - fs_reg offset_value; - if (ir->offset) { - ir_constant *const_offset = ir->offset->as_constant(); - if (const_offset) { - /* Store the header bitfield in an IMM register. This allows us to - * use offset_value.file to distinguish between no offset, a constant - * offset, and a non-constant offset. - */ - offset_value = - fs_reg(brw_texture_offset(const_offset->value.i, - const_offset->type->vector_elements)); - } else { - ir->offset->accept(this); - offset_value = this->result; - } - } - - fs_reg lod, lod2, sample_index, mcs; - int grad_components = 0; - switch (ir->op) { - case ir_tex: - case ir_lod: - case ir_tg4: - case ir_query_levels: - break; - case ir_txb: - ir->lod_info.bias->accept(this); - lod = this->result; - break; - case ir_txd: - ir->lod_info.grad.dPdx->accept(this); - lod = this->result; - - ir->lod_info.grad.dPdy->accept(this); - lod2 = this->result; - - grad_components = ir->lod_info.grad.dPdx->type->vector_elements; - break; - case ir_txf: - case ir_txl: - case ir_txs: - ir->lod_info.lod->accept(this); - lod = this->result; - break; - case ir_txf_ms: - ir->lod_info.sample_index->accept(this); - sample_index = this->result; - - if (devinfo->gen >= 7 && - key_tex->compressed_multisample_layout_mask & (1 << sampler)) { - mcs = emit_mcs_fetch(coordinate, ir->coordinate->type->vector_elements, - sampler_reg); - } else { - mcs = fs_reg(0u); - } - break; - default: - unreachable("Unrecognized texture opcode"); - }; - - int gather_component = 0; - if (ir->op == ir_tg4) - gather_component = ir->lod_info.component->as_constant()->value.i[0]; - - bool is_rect = - ir->sampler->type->sampler_dimensionality == GLSL_SAMPLER_DIM_RECT; - - bool is_cube_array = - ir->sampler->type->sampler_dimensionality == GLSL_SAMPLER_DIM_CUBE && - ir->sampler->type->sampler_array; - - emit_texture(ir->op, ir->type, coordinate, coord_components, - shadow_comparitor, lod, lod2, grad_components, - sample_index, offset_value, mcs, - gather_component, is_cube_array, is_rect, sampler, - sampler_reg, texunit); -} - /** * Apply workarounds for Gen6 gather with UINT/SINT */ @@ -2506,449 +1065,6 @@ fs_visitor::swizzle_result(ir_texture_opcode op, int dest_components, } } -void -fs_visitor::visit(ir_swizzle *ir) -{ - ir->val->accept(this); - fs_reg val = this->result; - - if (ir->type->vector_elements == 1) { - this->result = offset(this->result, ir->mask.x); - return; - } - - fs_reg result = vgrf(ir->type); - this->result = result; - - for (unsigned int i = 0; i < ir->type->vector_elements; i++) { - fs_reg channel = val; - int swiz = 0; - - switch (i) { - case 0: - swiz = ir->mask.x; - break; - case 1: - swiz = ir->mask.y; - break; - case 2: - swiz = ir->mask.z; - break; - case 3: - swiz = ir->mask.w; - break; - } - - emit(MOV(result, offset(channel, swiz))); - result = offset(result, 1); - } -} - -void -fs_visitor::visit(ir_discard *ir) -{ - /* We track our discarded pixels in f0.1. By predicating on it, we can - * update just the flag bits that aren't yet discarded. If there's no - * condition, we emit a CMP of g0 != g0, so all currently executing - * channels will get turned off. - */ - fs_inst *cmp; - if (ir->condition) { - emit_bool_to_cond_code(ir->condition); - cmp = (fs_inst *) this->instructions.get_tail(); - cmp->conditional_mod = brw_negate_cmod(cmp->conditional_mod); - } else { - fs_reg some_reg = fs_reg(retype(brw_vec8_grf(0, 0), - BRW_REGISTER_TYPE_UW)); - cmp = emit(CMP(reg_null_f, some_reg, some_reg, BRW_CONDITIONAL_NZ)); - } - cmp->predicate = BRW_PREDICATE_NORMAL; - cmp->flag_subreg = 1; - - if (devinfo->gen >= 6) { - emit_discard_jump(); - } -} - -void -fs_visitor::visit(ir_constant *ir) -{ - /* Set this->result to reg at the bottom of the function because some code - * paths will cause this visitor to be applied to other fields. This will - * cause the value stored in this->result to be modified. - * - * Make reg constant so that it doesn't get accidentally modified along the - * way. Yes, I actually had this problem. :( - */ - const fs_reg reg = vgrf(ir->type); - fs_reg dst_reg = reg; - - if (ir->type->is_array()) { - const unsigned size = type_size(ir->type->fields.array); - - for (unsigned i = 0; i < ir->type->length; i++) { - ir->array_elements[i]->accept(this); - fs_reg src_reg = this->result; - - dst_reg.type = src_reg.type; - for (unsigned j = 0; j < size; j++) { - emit(MOV(dst_reg, src_reg)); - src_reg = offset(src_reg, 1); - dst_reg = offset(dst_reg, 1); - } - } - } else if (ir->type->is_record()) { - foreach_in_list(ir_constant, field, &ir->components) { - const unsigned size = type_size(field->type); - - field->accept(this); - fs_reg src_reg = this->result; - - dst_reg.type = src_reg.type; - for (unsigned j = 0; j < size; j++) { - emit(MOV(dst_reg, src_reg)); - src_reg = offset(src_reg, 1); - dst_reg = offset(dst_reg, 1); - } - } - } else { - const unsigned size = type_size(ir->type); - - for (unsigned i = 0; i < size; i++) { - switch (ir->type->base_type) { - case GLSL_TYPE_FLOAT: - emit(MOV(dst_reg, fs_reg(ir->value.f[i]))); - break; - case GLSL_TYPE_UINT: - emit(MOV(dst_reg, fs_reg(ir->value.u[i]))); - break; - case GLSL_TYPE_INT: - emit(MOV(dst_reg, fs_reg(ir->value.i[i]))); - break; - case GLSL_TYPE_BOOL: - emit(MOV(dst_reg, fs_reg(ir->value.b[i] != 0 ? ~0 : 0))); - break; - default: - unreachable("Non-float/uint/int/bool constant"); - } - dst_reg = offset(dst_reg, 1); - } - } - - this->result = reg; -} - -void -fs_visitor::emit_bool_to_cond_code(ir_rvalue *ir) -{ - ir_expression *expr = ir->as_expression(); - - if (!expr || expr->operation == ir_binop_ubo_load) { - ir->accept(this); - - fs_inst *inst = emit(AND(reg_null_d, this->result, fs_reg(1))); - inst->conditional_mod = BRW_CONDITIONAL_NZ; - return; - } - - fs_reg op[3]; - - assert(expr->get_num_operands() <= 3); - for (unsigned int i = 0; i < expr->get_num_operands(); i++) { - assert(expr->operands[i]->type->is_scalar()); - - expr->operands[i]->accept(this); - op[i] = this->result; - - resolve_ud_negate(&op[i]); - } - - emit_bool_to_cond_code_of_reg(expr, op); -} - -void -fs_visitor::emit_bool_to_cond_code_of_reg(ir_expression *expr, fs_reg op[3]) -{ - fs_inst *inst; - - switch (expr->operation) { - case ir_unop_logic_not: - inst = emit(AND(reg_null_d, op[0], fs_reg(1))); - inst->conditional_mod = BRW_CONDITIONAL_Z; - break; - - case ir_binop_logic_xor: - if (devinfo->gen <= 5) { - fs_reg temp = vgrf(expr->type); - emit(XOR(temp, op[0], op[1])); - inst = emit(AND(reg_null_d, temp, fs_reg(1))); - } else { - inst = emit(XOR(reg_null_d, op[0], op[1])); - } - inst->conditional_mod = BRW_CONDITIONAL_NZ; - break; - - case ir_binop_logic_or: - if (devinfo->gen <= 5) { - fs_reg temp = vgrf(expr->type); - emit(OR(temp, op[0], op[1])); - inst = emit(AND(reg_null_d, temp, fs_reg(1))); - } else { - inst = emit(OR(reg_null_d, op[0], op[1])); - } - inst->conditional_mod = BRW_CONDITIONAL_NZ; - break; - - case ir_binop_logic_and: - if (devinfo->gen <= 5) { - fs_reg temp = vgrf(expr->type); - emit(AND(temp, op[0], op[1])); - inst = emit(AND(reg_null_d, temp, fs_reg(1))); - } else { - inst = emit(AND(reg_null_d, op[0], op[1])); - } - inst->conditional_mod = BRW_CONDITIONAL_NZ; - break; - - case ir_unop_f2b: - if (devinfo->gen >= 6) { - emit(CMP(reg_null_d, op[0], fs_reg(0.0f), BRW_CONDITIONAL_NZ)); - } else { - inst = emit(MOV(reg_null_f, op[0])); - inst->conditional_mod = BRW_CONDITIONAL_NZ; - } - break; - - case ir_unop_i2b: - if (devinfo->gen >= 6) { - emit(CMP(reg_null_d, op[0], fs_reg(0), BRW_CONDITIONAL_NZ)); - } else { - inst = emit(MOV(reg_null_d, op[0])); - inst->conditional_mod = BRW_CONDITIONAL_NZ; - } - break; - - case ir_binop_greater: - case ir_binop_gequal: - case ir_binop_less: - case ir_binop_lequal: - case ir_binop_equal: - case ir_binop_all_equal: - case ir_binop_nequal: - case ir_binop_any_nequal: - if (devinfo->gen <= 5) { - resolve_bool_comparison(expr->operands[0], &op[0]); - resolve_bool_comparison(expr->operands[1], &op[1]); - } - - emit(CMP(reg_null_d, op[0], op[1], - brw_conditional_for_comparison(expr->operation))); - break; - - case ir_triop_csel: { - /* Expand the boolean condition into the flag register. */ - inst = emit(MOV(reg_null_d, op[0])); - inst->conditional_mod = BRW_CONDITIONAL_NZ; - - /* Select which boolean to return. */ - fs_reg temp = vgrf(expr->operands[1]->type); - inst = emit(SEL(temp, op[1], op[2])); - inst->predicate = BRW_PREDICATE_NORMAL; - - /* Expand the result to a condition code. */ - inst = emit(MOV(reg_null_d, temp)); - inst->conditional_mod = BRW_CONDITIONAL_NZ; - break; - } - - default: - unreachable("not reached"); - } -} - -/** - * Emit a gen6 IF statement with the comparison folded into the IF - * instruction. - */ -void -fs_visitor::emit_if_gen6(ir_if *ir) -{ - ir_expression *expr = ir->condition->as_expression(); - - if (expr && expr->operation != ir_binop_ubo_load) { - fs_reg op[3]; - fs_inst *inst; - fs_reg temp; - - assert(expr->get_num_operands() <= 3); - for (unsigned int i = 0; i < expr->get_num_operands(); i++) { - assert(expr->operands[i]->type->is_scalar()); - - expr->operands[i]->accept(this); - op[i] = this->result; - } - - switch (expr->operation) { - case ir_unop_logic_not: - emit(IF(op[0], fs_reg(0), BRW_CONDITIONAL_Z)); - return; - - case ir_binop_logic_xor: - emit(IF(op[0], op[1], BRW_CONDITIONAL_NZ)); - return; - - case ir_binop_logic_or: - temp = vgrf(glsl_type::bool_type); - emit(OR(temp, op[0], op[1])); - emit(IF(temp, fs_reg(0), BRW_CONDITIONAL_NZ)); - return; - - case ir_binop_logic_and: - temp = vgrf(glsl_type::bool_type); - emit(AND(temp, op[0], op[1])); - emit(IF(temp, fs_reg(0), BRW_CONDITIONAL_NZ)); - return; - - case ir_unop_f2b: - inst = emit(BRW_OPCODE_IF, reg_null_f, op[0], fs_reg(0)); - inst->conditional_mod = BRW_CONDITIONAL_NZ; - return; - - case ir_unop_i2b: - emit(IF(op[0], fs_reg(0), BRW_CONDITIONAL_NZ)); - return; - - case ir_binop_greater: - case ir_binop_gequal: - case ir_binop_less: - case ir_binop_lequal: - case ir_binop_equal: - case ir_binop_all_equal: - case ir_binop_nequal: - case ir_binop_any_nequal: - if (devinfo->gen <= 5) { - resolve_bool_comparison(expr->operands[0], &op[0]); - resolve_bool_comparison(expr->operands[1], &op[1]); - } - - emit(IF(op[0], op[1], - brw_conditional_for_comparison(expr->operation))); - return; - - case ir_triop_csel: { - /* Expand the boolean condition into the flag register. */ - fs_inst *inst = emit(MOV(reg_null_d, op[0])); - inst->conditional_mod = BRW_CONDITIONAL_NZ; - - /* Select which boolean to use as the result. */ - fs_reg temp = vgrf(expr->operands[1]->type); - inst = emit(SEL(temp, op[1], op[2])); - inst->predicate = BRW_PREDICATE_NORMAL; - - emit(IF(temp, fs_reg(0), BRW_CONDITIONAL_NZ)); - return; - } - - default: - unreachable("not reached"); - } - } - - ir->condition->accept(this); - emit(IF(this->result, fs_reg(0), BRW_CONDITIONAL_NZ)); -} - -bool -fs_visitor::try_opt_frontfacing_ternary(ir_if *ir) -{ - ir_dereference_variable *deref = ir->condition->as_dereference_variable(); - if (!deref || strcmp(deref->var->name, "gl_FrontFacing") != 0) - return false; - - if (ir->then_instructions.length() != 1 || - ir->else_instructions.length() != 1) - return false; - - ir_assignment *then_assign = - ((ir_instruction *)ir->then_instructions.head)->as_assignment(); - ir_assignment *else_assign = - ((ir_instruction *)ir->else_instructions.head)->as_assignment(); - - if (!then_assign || then_assign->condition || - !else_assign || else_assign->condition || - then_assign->write_mask != else_assign->write_mask || - !then_assign->lhs->equals(else_assign->lhs)) - return false; - - ir_constant *then_rhs = then_assign->rhs->as_constant(); - ir_constant *else_rhs = else_assign->rhs->as_constant(); - - if (!then_rhs || !else_rhs) - return false; - - if (then_rhs->type->base_type != GLSL_TYPE_FLOAT) - return false; - - if ((then_rhs->is_one() && else_rhs->is_negative_one()) || - (else_rhs->is_one() && then_rhs->is_negative_one())) { - then_assign->lhs->accept(this); - fs_reg dst = this->result; - dst.type = BRW_REGISTER_TYPE_D; - fs_reg tmp = vgrf(glsl_type::int_type); - - if (devinfo->gen >= 6) { - /* Bit 15 of g0.0 is 0 if the polygon is front facing. */ - fs_reg g0 = fs_reg(retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_W)); - - /* For (gl_FrontFacing ? 1.0 : -1.0), emit: - * - * or(8) tmp.1<2>W g0.0<0,1,0>W 0x00003f80W - * and(8) dst<1>D tmp<8,8,1>D 0xbf800000D - * - * and negate g0.0<0,1,0>W for (gl_FrontFacing ? -1.0 : 1.0). - */ - - if (then_rhs->is_negative_one()) { - assert(else_rhs->is_one()); - g0.negate = true; - } - - tmp.type = BRW_REGISTER_TYPE_W; - tmp.subreg_offset = 2; - tmp.stride = 2; - - fs_inst *or_inst = emit(OR(tmp, g0, fs_reg(0x3f80))); - or_inst->src[1].type = BRW_REGISTER_TYPE_UW; - - tmp.type = BRW_REGISTER_TYPE_D; - tmp.subreg_offset = 0; - tmp.stride = 1; - } else { - /* Bit 31 of g1.6 is 0 if the polygon is front facing. */ - fs_reg g1_6 = fs_reg(retype(brw_vec1_grf(1, 6), BRW_REGISTER_TYPE_D)); - - /* For (gl_FrontFacing ? 1.0 : -1.0), emit: - * - * or(8) tmp<1>D g1.6<0,1,0>D 0x3f800000D - * and(8) dst<1>D tmp<8,8,1>D 0xbf800000D - * - * and negate g1.6<0,1,0>D for (gl_FrontFacing ? -1.0 : 1.0). - */ - - if (then_rhs->is_negative_one()) { - assert(else_rhs->is_one()); - g1_6.negate = true; - } - - emit(OR(tmp, g1_6, fs_reg(0x3f800000))); - } - emit(AND(dst, tmp, fs_reg(0xbf800000))); - return true; - } - - return false; -} - /** * Try to replace IF/MOV/ELSE/MOV/ENDIF with SEL. * @@ -3038,178 +1154,6 @@ fs_visitor::try_replace_with_sel() return false; } -void -fs_visitor::visit(ir_if *ir) -{ - if (try_opt_frontfacing_ternary(ir)) - return; - - /* Don't point the annotation at the if statement, because then it plus - * the then and else blocks get printed. - */ - this->base_ir = ir->condition; - - if (devinfo->gen == 6) { - emit_if_gen6(ir); - } else { - emit_bool_to_cond_code(ir->condition); - - emit(IF(BRW_PREDICATE_NORMAL)); - } - - foreach_in_list(ir_instruction, ir_, &ir->then_instructions) { - this->base_ir = ir_; - ir_->accept(this); - } - - if (!ir->else_instructions.is_empty()) { - emit(BRW_OPCODE_ELSE); - - foreach_in_list(ir_instruction, ir_, &ir->else_instructions) { - this->base_ir = ir_; - ir_->accept(this); - } - } - - emit(BRW_OPCODE_ENDIF); - - if (!try_replace_with_sel() && devinfo->gen < 6) { - no16("Can't support (non-uniform) control flow on SIMD16\n"); - } -} - -void -fs_visitor::visit(ir_loop *ir) -{ - if (devinfo->gen < 6) { - no16("Can't support (non-uniform) control flow on SIMD16\n"); - } - - this->base_ir = NULL; - emit(BRW_OPCODE_DO); - - foreach_in_list(ir_instruction, ir_, &ir->body_instructions) { - this->base_ir = ir_; - ir_->accept(this); - } - - this->base_ir = NULL; - emit(BRW_OPCODE_WHILE); -} - -void -fs_visitor::visit(ir_loop_jump *ir) -{ - switch (ir->mode) { - case ir_loop_jump::jump_break: - emit(BRW_OPCODE_BREAK); - break; - case ir_loop_jump::jump_continue: - emit(BRW_OPCODE_CONTINUE); - break; - } -} - -void -fs_visitor::visit_atomic_counter_intrinsic(ir_call *ir) -{ - ir_dereference *deref = static_cast( - ir->actual_parameters.get_head()); - ir_variable *location = deref->variable_referenced(); - unsigned surf_index = (stage_prog_data->binding_table.abo_start + - location->data.binding); - - /* Calculate the surface offset */ - fs_reg offset = vgrf(glsl_type::uint_type); - ir_dereference_array *deref_array = deref->as_dereference_array(); - - if (deref_array) { - deref_array->array_index->accept(this); - - fs_reg tmp = vgrf(glsl_type::uint_type); - emit(MUL(tmp, this->result, fs_reg(ATOMIC_COUNTER_SIZE))); - emit(ADD(offset, tmp, fs_reg(location->data.atomic.offset))); - } else { - offset = fs_reg(location->data.atomic.offset); - } - - /* Emit the appropriate machine instruction */ - const char *callee = ir->callee->function_name(); - ir->return_deref->accept(this); - fs_reg dst = this->result; - - if (!strcmp("__intrinsic_atomic_read", callee)) { - emit_untyped_surface_read(surf_index, dst, offset); - - } else if (!strcmp("__intrinsic_atomic_increment", callee)) { - emit_untyped_atomic(BRW_AOP_INC, surf_index, dst, offset, - fs_reg(), fs_reg()); - - } else if (!strcmp("__intrinsic_atomic_predecrement", callee)) { - emit_untyped_atomic(BRW_AOP_PREDEC, surf_index, dst, offset, - fs_reg(), fs_reg()); - } -} - -void -fs_visitor::visit(ir_call *ir) -{ - const char *callee = ir->callee->function_name(); - - if (!strcmp("__intrinsic_atomic_read", callee) || - !strcmp("__intrinsic_atomic_increment", callee) || - !strcmp("__intrinsic_atomic_predecrement", callee)) { - visit_atomic_counter_intrinsic(ir); - } else { - unreachable("Unsupported intrinsic."); - } -} - -void -fs_visitor::visit(ir_return *) -{ - unreachable("FINISHME"); -} - -void -fs_visitor::visit(ir_function *ir) -{ - /* Ignore function bodies other than main() -- we shouldn't see calls to - * them since they should all be inlined before we get to ir_to_mesa. - */ - if (strcmp(ir->name, "main") == 0) { - const ir_function_signature *sig; - exec_list empty; - - sig = ir->matching_signature(NULL, &empty, false); - - assert(sig); - - foreach_in_list(ir_instruction, ir_, &sig->body) { - this->base_ir = ir_; - ir_->accept(this); - } - } -} - -void -fs_visitor::visit(ir_function_signature *) -{ - unreachable("not reached"); -} - -void -fs_visitor::visit(ir_emit_vertex *) -{ - unreachable("not reached"); -} - -void -fs_visitor::visit(ir_end_primitive *) -{ - unreachable("not reached"); -} - void fs_visitor::emit_untyped_atomic(unsigned atomic_op, unsigned surf_index, fs_reg dst, fs_reg offset, fs_reg src0, @@ -4096,27 +2040,6 @@ fs_visitor::emit_cs_terminate() inst->eot = true; } -/** - * Resolve the result of a Gen4-5 CMP instruction to a proper boolean. - * - * CMP on Gen4-5 only sets the LSB of the result; the rest are undefined. - * If we need a proper boolean value, we have to fix it up to be 0 or ~0. - */ -void -fs_visitor::resolve_bool_comparison(ir_rvalue *rvalue, fs_reg *reg) -{ - assert(devinfo->gen <= 5); - - if (rvalue->type != glsl_type::bool_type) - return; - - fs_reg and_result = vgrf(glsl_type::bool_type); - fs_reg neg_result = vgrf(glsl_type::bool_type); - emit(AND(and_result, *reg, fs_reg(1))); - emit(MOV(neg_result, negate(and_result))); - *reg = neg_result; -} - fs_visitor::fs_visitor(struct brw_context *brw, void *mem_ctx, gl_shader_stage stage, @@ -4152,9 +2075,6 @@ fs_visitor::fs_visitor(struct brw_context *brw, this->failed = false; this->simd16_unsupported = false; this->no16_msg = NULL; - this->variable_ht = hash_table_ctor(0, - hash_table_pointer_hash, - hash_table_pointer_compare); this->nir_locals = NULL; this->nir_globals = NULL; @@ -4189,5 +2109,4 @@ fs_visitor::fs_visitor(struct brw_context *brw, fs_visitor::~fs_visitor() { - hash_table_dtor(this->variable_ht); } From b95ec49e57f81bdd75795dc93022533704efe509 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 20 May 2015 12:03:33 -0700 Subject: [PATCH 362/834] i965/vs: Rework the logic for generating NIR from ARB vertex programs Whether or not to use NIR is now equivalent to brw->scalar_vs. We can simplify the logic and make it far less confusing. Reviewed-by: Matt Turner Reviewed-by: Kenneth Graunke --- src/mesa/drivers/dri/i965/brw_vec4.cpp | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp index 02a7e33f791..a324798e060 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp @@ -1868,8 +1868,6 @@ brw_vs_emit(struct brw_context *brw, bool start_busy = false; double start_time = 0; const unsigned *assembly = NULL; - bool use_nir = - brw->ctx.Const.ShaderCompilerOptions[MESA_SHADER_VERTEX].NirOptions != NULL; if (unlikely(brw->perf_debug)) { start_busy = (brw->batch.last_bo && @@ -1884,17 +1882,18 @@ brw_vs_emit(struct brw_context *brw, if (unlikely(INTEL_DEBUG & DEBUG_VS)) brw_dump_ir("vertex", prog, &shader->base, &c->vp->program.Base); - if (use_nir && !c->vp->program.Base.nir) { - /* Normally we generate NIR in LinkShader() or ProgramStringNotify(), but - * Mesa's fixed-function vertex program handling doesn't notify the driver - * at all. Just do it here, at the last minute, even though it's lame. - */ - assert(c->vp->program.Base.Id == 0 && prog == NULL); - c->vp->program.Base.nir = - brw_create_nir(brw, NULL, &c->vp->program.Base, MESA_SHADER_VERTEX); - } + if (brw->scalar_vs) { + if (!c->vp->program.Base.nir) { + /* Normally we generate NIR in LinkShader() or + * ProgramStringNotify(), but Mesa's fixed-function vertex program + * handling doesn't notify the driver at all. Just do it here, at + * the last minute, even though it's lame. + */ + assert(c->vp->program.Base.Id == 0 && prog == NULL); + c->vp->program.Base.nir = + brw_create_nir(brw, NULL, &c->vp->program.Base, MESA_SHADER_VERTEX); + } - if (brw->scalar_vs && (prog || use_nir)) { fs_visitor v(brw, mem_ctx, MESA_SHADER_VERTEX, &c->key, &prog_data->base.base, prog, &c->vp->program.Base, 8); if (!v.run_vs()) { From 065978d36b8a8ba5aa23248c6bcd0f0e4d6e86de Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Wed, 27 May 2015 11:11:06 +1000 Subject: [PATCH 363/834] softpipe: fix offset wrapping calculations (v2) Roland pointed out my previous attempt was lacking, so I enhanced the texwrap piglit test, and tested them. This fixes the offset calculations in a number of areas by adding the offset first, it also fixes the fastpaths, which I forgot to address in the previous commit. v2: try and avoid divides in most paths, the repeat mirror path really was ugly no matter which way I went, so I left it having the divide. Also fix the gather lod calculation bug. Reviewed-by: Roland Scheidegger Signed-off-by: Dave Airlie --- src/gallium/drivers/softpipe/sp_tex_sample.c | 146 +++++++++---------- 1 file changed, 68 insertions(+), 78 deletions(-) diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c b/src/gallium/drivers/softpipe/sp_tex_sample.c index 4ac349807e5..1010b63de2c 100644 --- a/src/gallium/drivers/softpipe/sp_tex_sample.c +++ b/src/gallium/drivers/softpipe/sp_tex_sample.c @@ -145,14 +145,14 @@ wrap_nearest_clamp(float s, unsigned size, int offset, int *icoord) { /* s limited to [0,1] */ /* i limited to [0,size-1] */ + s *= size; + s += offset; if (s <= 0.0F) *icoord = 0; - else if (s >= 1.0F) + else if (s >= size) *icoord = size - 1; else - *icoord = util_ifloor(s * size); - if (offset) - *icoord = CLAMP(*icoord + offset, 0, size - 1); + *icoord = util_ifloor(s); } @@ -161,17 +161,18 @@ wrap_nearest_clamp_to_edge(float s, unsigned size, int offset, int *icoord) { /* s limited to [min,max] */ /* i limited to [0, size-1] */ - const float min = 1.0F / (2.0F * size); - const float max = 1.0F - min; + const float min = 0.5F; + const float max = (float)size - 0.5F; + + s *= size; + s += offset; if (s < min) *icoord = 0; else if (s > max) *icoord = size - 1; else - *icoord = util_ifloor(s * size); - if (offset) - *icoord = CLAMP(*icoord + offset, 0, size - 1); + *icoord = util_ifloor(s); } @@ -180,26 +181,30 @@ wrap_nearest_clamp_to_border(float s, unsigned size, int offset, int *icoord) { /* s limited to [min,max] */ /* i limited to [-1, size] */ - const float min = -1.0F / (2.0F * size); - const float max = 1.0F - min; + const float min = -0.5F; + const float max = size + 0.5F; + + s *= size; + s += offset; if (s <= min) *icoord = -1; else if (s >= max) *icoord = size; else - *icoord = util_ifloor(s * size); - if (offset) - *icoord = CLAMP(*icoord + offset, 0, size - 1); + *icoord = util_ifloor(s); } - static void wrap_nearest_mirror_repeat(float s, unsigned size, int offset, int *icoord) { const float min = 1.0F / (2.0F * size); const float max = 1.0F - min; - const int flr = util_ifloor(s); - float u = frac(s); + int flr; + float u; + + s += (float)offset / size; + flr = util_ifloor(s); + u = frac(s); if (flr & 1) u = 1.0F - u; if (u < min) @@ -208,8 +213,6 @@ wrap_nearest_mirror_repeat(float s, unsigned size, int offset, int *icoord) *icoord = size - 1; else *icoord = util_ifloor(u * size); - if (offset) - *icoord = CLAMP(*icoord + offset, 0, size - 1); } @@ -218,15 +221,13 @@ wrap_nearest_mirror_clamp(float s, unsigned size, int offset, int *icoord) { /* s limited to [0,1] */ /* i limited to [0,size-1] */ - const float u = fabsf(s); + const float u = fabsf(s * size + offset); if (u <= 0.0F) *icoord = 0; - else if (u >= 1.0F) + else if (u >= size) *icoord = size - 1; else - *icoord = util_ifloor(u * size); - if (offset) - *icoord = CLAMP(*icoord + offset, 0, size - 1); + *icoord = util_ifloor(u); } @@ -235,36 +236,33 @@ wrap_nearest_mirror_clamp_to_edge(float s, unsigned size, int offset, int *icoor { /* s limited to [min,max] */ /* i limited to [0, size-1] */ - const float min = 1.0F / (2.0F * size); - const float max = 1.0F - min; - const float u = fabsf(s); + const float min = 0.5F; + const float max = (float)size - 0.5F; + const float u = fabsf(s * size + offset); + if (u < min) *icoord = 0; else if (u > max) *icoord = size - 1; else - *icoord = util_ifloor(u * size); - if (offset) - *icoord = CLAMP(*icoord + offset, 0, size - 1); + *icoord = util_ifloor(u); } static void wrap_nearest_mirror_clamp_to_border(float s, unsigned size, int offset, int *icoord) { - /* s limited to [min,max] */ - /* i limited to [0, size-1] */ - const float min = -1.0F / (2.0F * size); - const float max = 1.0F - min; - const float u = fabsf(s); + /* u limited to [-0.5, size-0.5] */ + const float min = -0.5F; + const float max = (float)size + 0.5F; + const float u = fabsf(s * size + offset); + if (u < min) *icoord = -1; else if (u > max) *icoord = size; else - *icoord = util_ifloor(u * size); - if (offset) - *icoord = CLAMP(*icoord + offset, 0, size - 1); + *icoord = util_ifloor(u); } @@ -293,14 +291,11 @@ static void wrap_linear_clamp(float s, unsigned size, int offset, int *icoord0, int *icoord1, float *w) { - float u = CLAMP(s, 0.0F, 1.0F); - u = u * size - 0.5f; + float u = CLAMP(s * size + offset, 0.0F, (float)size); + + u = u - 0.5f; *icoord0 = util_ifloor(u); *icoord1 = *icoord0 + 1; - if (offset) { - *icoord0 = CLAMP(*icoord0 + offset, 0, size - 1); - *icoord1 = CLAMP(*icoord1 + offset, 0, size - 1); - } *w = frac(u); } @@ -309,18 +304,14 @@ static void wrap_linear_clamp_to_edge(float s, unsigned size, int offset, int *icoord0, int *icoord1, float *w) { - float u = CLAMP(s, 0.0F, 1.0F); - u = u * size - 0.5f; + float u = CLAMP(s * size + offset, 0.0F, (float)size); + u = u - 0.5f; *icoord0 = util_ifloor(u); *icoord1 = *icoord0 + 1; if (*icoord0 < 0) *icoord0 = 0; if (*icoord1 >= (int) size) *icoord1 = size - 1; - if (offset) { - *icoord0 = CLAMP(*icoord0 + offset, 0, size - 1); - *icoord1 = CLAMP(*icoord1 + offset, 0, size - 1); - } *w = frac(u); } @@ -329,10 +320,10 @@ static void wrap_linear_clamp_to_border(float s, unsigned size, int offset, int *icoord0, int *icoord1, float *w) { - const float min = -1.0F / (2.0F * size); - const float max = 1.0F - min; - float u = CLAMP(s, min, max); - u = u * size - 0.5f; + const float min = -0.5F; + const float max = (float)size + 0.5F; + float u = CLAMP(s * size + offset, min, max); + u = u - 0.5f; *icoord0 = util_ifloor(u); *icoord1 = *icoord0 + 1; *w = frac(u); @@ -343,8 +334,12 @@ static void wrap_linear_mirror_repeat(float s, unsigned size, int offset, int *icoord0, int *icoord1, float *w) { - const int flr = util_ifloor(s); - float u = frac(s); + int flr; + float u; + + s += (float)offset / size; + flr = util_ifloor(s); + u = frac(s); if (flr & 1) u = 1.0F - u; u = u * size - 0.5F; @@ -362,11 +357,9 @@ static void wrap_linear_mirror_clamp(float s, unsigned size, int offset, int *icoord0, int *icoord1, float *w) { - float u = fabsf(s); - if (u >= 1.0F) + float u = fabsf(s * size + offset); + if (u >= size) u = (float) size; - else - u *= size; u -= 0.5F; *icoord0 = util_ifloor(u); *icoord1 = *icoord0 + 1; @@ -378,11 +371,9 @@ static void wrap_linear_mirror_clamp_to_edge(float s, unsigned size, int offset, int *icoord0, int *icoord1, float *w) { - float u = fabsf(s); - if (u >= 1.0F) + float u = fabsf(s * size + offset); + if (u >= size) u = (float) size; - else - u *= size; u -= 0.5F; *icoord0 = util_ifloor(u); *icoord1 = *icoord0 + 1; @@ -398,15 +389,13 @@ static void wrap_linear_mirror_clamp_to_border(float s, unsigned size, int offset, int *icoord0, int *icoord1, float *w) { - const float min = -1.0F / (2.0F * size); - const float max = 1.0F - min; - float u = fabsf(s); + const float min = -0.5F; + const float max = size + 0.5F; + float u = fabsf(s * size + offset); if (u <= min) - u = min * size; + u = min; else if (u >= max) - u = max * size; - else - u *= size; + u = max; u -= 0.5F; *icoord0 = util_ifloor(u); *icoord1 = *icoord0 + 1; @@ -1040,8 +1029,8 @@ img_filter_2d_linear_repeat_POT(struct sp_sampler_view *sp_sview, union tex_tile_address addr; int c; - float u = args->s * xpot - 0.5F; - float v = args->t * ypot - 0.5F; + float u = (args->s * xpot - 0.5F) + args->offset[0]; + float v = (args->t * ypot - 0.5F) + args->offset[1]; int uflr = util_ifloor(u); int vflr = util_ifloor(v); @@ -1093,8 +1082,8 @@ img_filter_2d_nearest_repeat_POT(struct sp_sampler_view *sp_sview, union tex_tile_address addr; int c; - float u = args->s * xpot; - float v = args->t * ypot; + float u = args->s * xpot + args->offset[0]; + float v = args->t * ypot + args->offset[1]; int uflr = util_ifloor(u); int vflr = util_ifloor(v); @@ -1126,8 +1115,8 @@ img_filter_2d_nearest_clamp_POT(struct sp_sampler_view *sp_sview, union tex_tile_address addr; int c; - float u = args->s * xpot; - float v = args->t * ypot; + float u = args->s * xpot + args->offset[0]; + float v = args->t * ypot + args->offset[1]; int x0, y0; const float *out; @@ -1889,7 +1878,6 @@ compute_lambda_lod(struct sp_sampler_view *sp_sview, switch (control) { case tgsi_sampler_lod_none: - case tgsi_sampler_gather: /* XXX FIXME */ case tgsi_sampler_derivs_explicit: lambda = sp_sview->compute_lambda(sp_sview, s, t, p) + lod_bias; @@ -1908,6 +1896,7 @@ compute_lambda_lod(struct sp_sampler_view *sp_sview, } break; case tgsi_sampler_lod_zero: + case tgsi_sampler_gather: /* this is all static state in the sampler really need clamp here? */ lod[0] = lod[1] = lod[2] = lod[3] = CLAMP(lod_bias, min_lod, max_lod); break; @@ -2472,6 +2461,7 @@ mip_filter_linear_2d_linear_repeat_POT( args.t = t[j]; args.p = p[j]; args.face_id = sp_sview->faces[j]; + args.offset = filt_args->offset; args.gather_only = filt_args->control == tgsi_sampler_gather; if ((unsigned)level0 >= psview->u.tex.last_level) { if (level0 < 0) From 7afc992c20a94883b876fe53e155b9fec6e5fb27 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Tue, 26 May 2015 15:35:10 +0200 Subject: [PATCH 364/834] radeon/llvm: don't use a static array size for radeon_llvm_context::arrays (v2) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit v2: - don't use realloc (tgsi_shader_info provides the size) Reviewed-by: Michel Dänzer --- src/gallium/drivers/radeon/radeon_llvm.h | 4 +--- .../drivers/radeon/radeon_setup_tgsi_llvm.c | 15 +++++++++++---- 2 files changed, 12 insertions(+), 7 deletions(-) diff --git a/src/gallium/drivers/radeon/radeon_llvm.h b/src/gallium/drivers/radeon/radeon_llvm.h index 8612ef8daf7..6a9557b0b73 100644 --- a/src/gallium/drivers/radeon/radeon_llvm.h +++ b/src/gallium/drivers/radeon/radeon_llvm.h @@ -33,7 +33,6 @@ #define RADEON_LLVM_MAX_INPUTS 32 * 4 #define RADEON_LLVM_MAX_OUTPUTS 32 * 4 -#define RADEON_LLVM_MAX_ARRAYS 16 #define RADEON_LLVM_INITIAL_CF_DEPTH 4 @@ -130,8 +129,7 @@ struct radeon_llvm_context { unsigned loop_depth; unsigned loop_depth_max; - struct tgsi_declaration_range arrays[RADEON_LLVM_MAX_ARRAYS]; - unsigned num_arrays; + struct tgsi_declaration_range *arrays; LLVMValueRef main_fn; diff --git a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c index 86385375176..49836672dce 100644 --- a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c +++ b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c @@ -85,8 +85,9 @@ get_array_range(struct lp_build_tgsi_context *bld_base, unsigned File, const struct tgsi_ind_register *reg) { struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base); + if (File != TGSI_FILE_TEMPORARY || reg->ArrayID == 0 || - reg->ArrayID > RADEON_LLVM_MAX_ARRAYS) { + reg->ArrayID > bld_base->info->array_max[TGSI_FILE_TEMPORARY]) { struct tgsi_declaration_range range; range.First = 0; range.Last = bld_base->info->file_max[File]; @@ -252,8 +253,14 @@ static void emit_declaration( } case TGSI_FILE_TEMPORARY: - if (decl->Declaration.Array && decl->Array.ArrayID <= RADEON_LLVM_MAX_ARRAYS) + if (decl->Declaration.Array) { + if (!ctx->arrays) { + int size = bld_base->info->array_max[TGSI_FILE_TEMPORARY]; + ctx->arrays = MALLOC(sizeof(ctx->arrays[0]) * size); + } + ctx->arrays[decl->Array.ArrayID - 1] = decl->Range; + } if (uses_temp_indirect_addressing(bld_base)) { lp_emit_declaration_soa(bld_base, decl); break; @@ -1432,8 +1439,6 @@ void radeon_llvm_context_init(struct radeon_llvm_context * ctx) /* Allocate outputs */ ctx->soa.outputs = ctx->outputs; - ctx->num_arrays = 0; - /* XXX: Is there a better way to initialize all this ? */ lp_set_default_actions(bld_base); @@ -1622,6 +1627,8 @@ void radeon_llvm_dispose(struct radeon_llvm_context * ctx) { LLVMDisposeModule(ctx->soa.bld_base.base.gallivm->module); LLVMContextDispose(ctx->soa.bld_base.base.gallivm->context); + FREE(ctx->arrays); + ctx->arrays = NULL; FREE(ctx->temps); ctx->temps = NULL; FREE(ctx->loop); From 7116250b7a3aa8863f11d18032a3fbd24e2eee73 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Wed, 27 May 2015 00:15:16 +0200 Subject: [PATCH 365/834] radeon/llvm: reset temps_count on deallocation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Michel Dänzer --- src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c index 49836672dce..c8c980d9d32 100644 --- a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c +++ b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c @@ -1631,6 +1631,7 @@ void radeon_llvm_dispose(struct radeon_llvm_context * ctx) ctx->arrays = NULL; FREE(ctx->temps); ctx->temps = NULL; + ctx->temps_count = 0; FREE(ctx->loop); ctx->loop = NULL; ctx->loop_depth_max = 0; From 25e9ae2b79f32631e7255807a242e5fc4e39984c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Tue, 26 May 2015 19:32:36 +0200 Subject: [PATCH 366/834] st/dri: fix postprocessing crash when there's no depth buffer Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=89131 Cc: 10.6 10.5 Reviewed-by: Brian Paul --- src/gallium/state_trackers/dri/dri_context.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/src/gallium/state_trackers/dri/dri_context.c b/src/gallium/state_trackers/dri/dri_context.c index 9f11b15596c..3d8af65ca61 100644 --- a/src/gallium/state_trackers/dri/dri_context.c +++ b/src/gallium/state_trackers/dri/dri_context.c @@ -244,11 +244,10 @@ dri_make_current(__DRIcontext * cPriv, ctx->stapi->make_current(ctx->stapi, ctx->st, &draw->base, &read->base); - // This is ok to call here. If they are already init, it's a no-op. - if (draw->textures[ST_ATTACHMENT_BACK_LEFT] && draw->textures[ST_ATTACHMENT_DEPTH_STENCIL] - && ctx->pp) - pp_init_fbos(ctx->pp, draw->textures[ST_ATTACHMENT_BACK_LEFT]->width0, - draw->textures[ST_ATTACHMENT_BACK_LEFT]->height0); + /* This is ok to call here. If they are already init, it's a no-op. */ + if (ctx->pp && draw->textures[ST_ATTACHMENT_BACK_LEFT]) + pp_init_fbos(ctx->pp, draw->textures[ST_ATTACHMENT_BACK_LEFT]->width0, + draw->textures[ST_ATTACHMENT_BACK_LEFT]->height0); return GL_TRUE; } From dd048543e944d95b4471572454cfa902392e3f61 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Tue, 26 May 2015 12:47:03 +0200 Subject: [PATCH 367/834] configure.ac: enable building GLES1 and GLES2 by default Reviewed-by: Matt Turner Reviewed-by: Emil Velikov --- configure.ac | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/configure.ac b/configure.ac index 5594548ddfd..84b404eb8b2 100644 --- a/configure.ac +++ b/configure.ac @@ -714,15 +714,15 @@ AC_ARG_ENABLE([opengl], [enable_opengl="$enableval"], [enable_opengl=yes]) AC_ARG_ENABLE([gles1], - [AS_HELP_STRING([--enable-gles1], - [enable support for OpenGL ES 1.x API @<:@default=disabled@:>@])], + [AS_HELP_STRING([--disable-gles1], + [disable support for OpenGL ES 1.x API @<:@default=enabled@:>@])], [enable_gles1="$enableval"], - [enable_gles1=no]) + [enable_gles1=yes]) AC_ARG_ENABLE([gles2], - [AS_HELP_STRING([--enable-gles2], - [enable support for OpenGL ES 2.x API @<:@default=disabled@:>@])], + [AS_HELP_STRING([--disable-gles2], + [disable support for OpenGL ES 2.x API @<:@default=enabled@:>@])], [enable_gles2="$enableval"], - [enable_gles2=no]) + [enable_gles2=yes]) AC_ARG_ENABLE([dri], [AS_HELP_STRING([--enable-dri], From 29203e77388f11e36db3190834809c3196ee47b5 Mon Sep 17 00:00:00 2001 From: Jose Fonseca Date: Thu, 28 May 2015 15:35:14 +0100 Subject: [PATCH 368/834] gallivm: Disable frame pointer omission on LLVM 3.7. Reviewed-by: Roland Scheidegger --- src/gallium/auxiliary/gallivm/lp_bld_init.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/gallium/auxiliary/gallivm/lp_bld_init.c b/src/gallium/auxiliary/gallivm/lp_bld_init.c index 7b906c27ed5..384ea864081 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_init.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_init.c @@ -533,6 +533,16 @@ gallivm_compile_module(struct gallivm_state *gallivm) if (0) { debug_printf("optimizing func %s...\n", LLVMGetValueName(func)); } + + /* Disable frame pointer omission on debug/profile builds */ + /* XXX: And workaround http://llvm.org/PR21435 */ +#if HAVE_LLVM >= 0x0307 && \ + (defined(DEBUG) || defined(PROFILE) || \ + defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)) + LLVMAddTargetDependentFunctionAttr(func, "no-frame-pointer-elim", "true"); + LLVMAddTargetDependentFunctionAttr(func, "no-frame-pointer-elim-non-leaf", "true"); +#endif + LLVMRunFunctionPassManager(gallivm->passmgr, func); func = LLVMGetNextFunction(func); } From 0db4ef9df152da1d0f3601bbccc68ac1c94d4a3b Mon Sep 17 00:00:00 2001 From: Jose Fonseca Date: Thu, 28 May 2015 16:55:10 +0100 Subject: [PATCH 369/834] gallivm: Use the LLVM's C disassembly interface. It doesn't do everything we want. In particular it doesn't allow to detect jumps or return opcodes. Currently we detect the x86's RET opcode. Even though it's worse for LLVM 3.3, it's an improvement for LLVM 3.7, which was totally busted. Reviewed-by: Roland Scheidegger --- scons/llvm.py | 4 +- .../auxiliary/gallivm/lp_bld_debug.cpp | 262 +++--------------- 2 files changed, 41 insertions(+), 225 deletions(-) diff --git a/scons/llvm.py b/scons/llvm.py index 17278df88be..c59b8cb9317 100644 --- a/scons/llvm.py +++ b/scons/llvm.py @@ -120,6 +120,7 @@ def generate(env): ]) elif llvm_version >= distutils.version.LooseVersion('3.5'): env.Prepend(LIBS = [ + 'LLVMMCDisassembler', 'LLVMBitWriter', 'LLVMMCJIT', 'LLVMRuntimeDyld', 'LLVMX86Disassembler', 'LLVMX86AsmParser', 'LLVMX86CodeGen', 'LLVMSelectionDAG', 'LLVMAsmPrinter', 'LLVMX86Desc', @@ -132,6 +133,7 @@ def generate(env): ]) else: env.Prepend(LIBS = [ + 'LLVMMCDisassembler', 'LLVMBitWriter', 'LLVMX86Disassembler', 'LLVMX86AsmParser', 'LLVMX86CodeGen', 'LLVMX86Desc', 'LLVMSelectionDAG', 'LLVMAsmPrinter', 'LLVMMCParser', 'LLVMX86AsmPrinter', @@ -189,7 +191,7 @@ def generate(env): if '-fno-rtti' in cxxflags: env.Append(CXXFLAGS = ['-fno-rtti']) - components = ['engine', 'mcjit', 'bitwriter', 'x86asmprinter'] + components = ['engine', 'mcjit', 'bitwriter', 'x86asmprinter', 'mcdisassembler'] env.ParseConfig('llvm-config --libs ' + ' '.join(components)) env.ParseConfig('llvm-config --ldflags') diff --git a/src/gallium/auxiliary/gallivm/lp_bld_debug.cpp b/src/gallium/auxiliary/gallivm/lp_bld_debug.cpp index 76c302f6531..64fb044868f 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_debug.cpp +++ b/src/gallium/auxiliary/gallivm/lp_bld_debug.cpp @@ -28,40 +28,12 @@ #include #include -#include -#include +#include #include #include - -#if HAVE_LLVM >= 0x0306 -#include -#else -#include -#endif - -#include -#include - #include - #include -#include -#include -#include -#include -#include - -#if HAVE_LLVM >= 0x0305 -#define OwningPtr std::unique_ptr -#else -#include -#endif - -#if HAVE_LLVM >= 0x0305 -#include -#endif - #include "util/u_math.h" #include "util/u_debug.h" @@ -133,7 +105,7 @@ lp_get_module_id(LLVMModuleRef module) extern "C" void lp_debug_dump_value(LLVMValueRef value) { -#if (defined(PIPE_OS_WINDOWS) && !defined(PIPE_CC_MSVC)) || defined(PIPE_OS_EMBDDED) +#if (defined(PIPE_OS_WINDOWS) && !defined(PIPE_CC_MSVC)) || defined(PIPE_OS_EMBEDDED) raw_debug_ostream os; llvm::unwrap(value)->print(os); os.flush(); @@ -143,44 +115,16 @@ lp_debug_dump_value(LLVMValueRef value) } -#if HAVE_LLVM < 0x0306 - -/* - * MemoryObject wrapper around a buffer of memory, to be used by MC - * disassembler. - */ -class BufferMemoryObject: - public llvm::MemoryObject +static const char * +disassemblerSymbolLookupCB(void *DisInfo, + uint64_t ReferenceValue, + uint64_t *ReferenceType, + uint64_t ReferencePC, + const char **ReferenceName) { -private: - const uint8_t *Bytes; - uint64_t Length; -public: - BufferMemoryObject(const uint8_t *bytes, uint64_t length) : - Bytes(bytes), Length(length) - { - } - - uint64_t getBase() const - { - return 0; - } - - uint64_t getExtent() const - { - return Length; - } - - int readByte(uint64_t addr, uint8_t *byte) const - { - if (addr > getExtent()) - return -1; - *byte = Bytes[addr]; - return 0; - } -}; - -#endif /* HAVE_LLVM < 0x0306 */ + // TODO: Maybe this can be used to guess jumps + return NULL; +} /* @@ -193,8 +137,6 @@ public: static size_t disassemble(const void* func, llvm::raw_ostream & Out) { - using namespace llvm; - const uint8_t *bytes = (const uint8_t *)func; /* @@ -202,101 +144,23 @@ disassemble(const void* func, llvm::raw_ostream & Out) */ const uint64_t extent = 96 * 1024; - uint64_t max_pc = 0; - /* * Initialize all used objects. */ - std::string Triple = sys::getDefaultTargetTriple(); + std::string Triple = llvm::sys::getProcessTriple(); + LLVMDisasmContextRef D = LLVMCreateDisasm(Triple.c_str(), NULL, 0, NULL, &disassemblerSymbolLookupCB); + char outline[1024]; - std::string Error; - const Target *T = TargetRegistry::lookupTarget(Triple, Error); - -#if HAVE_LLVM >= 0x0304 - OwningPtr AsmInfo(T->createMCAsmInfo(*T->createMCRegInfo(Triple), Triple)); -#else - OwningPtr AsmInfo(T->createMCAsmInfo(Triple)); -#endif - - if (!AsmInfo) { - Out << "error: no assembly info for target " << Triple << "\n"; - Out.flush(); + if (!D) { + Out << "error: couldn't create disassembler for triple " << Triple << "\n"; return 0; } - unsigned int AsmPrinterVariant = AsmInfo->getAssemblerDialect(); - - OwningPtr MRI(T->createMCRegInfo(Triple)); - if (!MRI) { - Out << "error: no register info for target " << Triple.c_str() << "\n"; - Out.flush(); - return 0; - } - - OwningPtr MII(T->createMCInstrInfo()); - if (!MII) { - Out << "error: no instruction info for target " << Triple.c_str() << "\n"; - Out.flush(); - return 0; - } - -#if HAVE_LLVM >= 0x0305 - OwningPtr STI(T->createMCSubtargetInfo(Triple, sys::getHostCPUName(), "")); - OwningPtr MCCtx(new MCContext(AsmInfo.get(), MRI.get(), 0)); - OwningPtr DisAsm(T->createMCDisassembler(*STI, *MCCtx)); -#else - OwningPtr STI(T->createMCSubtargetInfo(Triple, sys::getHostCPUName(), "")); - OwningPtr DisAsm(T->createMCDisassembler(*STI)); -#endif - if (!DisAsm) { - Out << "error: no disassembler for target " << Triple << "\n"; - Out.flush(); - return 0; - } - - -#if HAVE_LLVM >= 0x0307 - OwningPtr Printer( - T->createMCInstPrinter(llvm::Triple(Triple), AsmPrinterVariant, *AsmInfo, *MII, *MRI)); -#else - OwningPtr Printer( - T->createMCInstPrinter(AsmPrinterVariant, *AsmInfo, *MII, *MRI, *STI)); -#endif - if (!Printer) { - Out << "error: no instruction printer for target " << Triple.c_str() << "\n"; - Out.flush(); - return 0; - } - - TargetOptions options; -#if defined(DEBUG) && HAVE_LLVM < 0x0307 - options.JITEmitDebugInfo = true; -#endif -#if defined(PIPE_ARCH_X86) - options.StackAlignmentOverride = 4; -#endif -#if defined(DEBUG) || defined(PROFILE) -#if HAVE_LLVM < 0x0307 - options.NoFramePointerElim = true; -#endif -#endif - OwningPtr TM(T->createTargetMachine(Triple, sys::getHostCPUName(), "", options)); - - /* - * Wrap the data in a MemoryObject - */ -#if HAVE_LLVM >= 0x0306 - ArrayRef memoryObject((const uint8_t *)bytes, extent); -#else - BufferMemoryObject memoryObject((const uint8_t *)bytes, extent); -#endif - uint64_t pc; pc = 0; - while (true) { - MCInst Inst; - uint64_t Size; + while (pc < extent) { + size_t Size; /* * Print address. We use addresses relative to the start of the function, @@ -305,11 +169,13 @@ disassemble(const void* func, llvm::raw_ostream & Out) Out << llvm::format("%6lu:\t", (unsigned long)pc); - if (!DisAsm->getInstruction(Inst, Size, memoryObject, - pc, - nulls(), nulls())) { - Out << "invalid"; + Size = LLVMDisasmInstruction(D, (uint8_t *)bytes + pc, extent - pc, 0, outline, + sizeof outline); + + if (!Size) { + Out << "invalid\n"; pc += 1; + break; } /* @@ -319,7 +185,7 @@ disassemble(const void* func, llvm::raw_ostream & Out) if (0) { unsigned i; for (i = 0; i < Size; ++i) { - Out << llvm::format("%02x ", ((const uint8_t*)bytes)[pc + i]); + Out << llvm::format("%02x ", bytes[pc + i]); } for (; i < 16; ++i) { Out << " "; @@ -329,82 +195,28 @@ disassemble(const void* func, llvm::raw_ostream & Out) /* * Print the instruction. */ -#if HAVE_LLVM >= 0x0307 - Printer->printInst(&Inst, Out, "", *STI); -#else - Printer->printInst(&Inst, Out, ""); -#endif - /* - * Advance. - */ - - pc += Size; - - const MCInstrDesc &TID = MII->get(Inst.getOpcode()); - - /* - * Keep track of forward jumps to a nearby address. - */ - - if (TID.isBranch()) { - for (unsigned i = 0; i < Inst.getNumOperands(); ++i) { - const MCOperand &operand = Inst.getOperand(i); - if (operand.isImm()) { - uint64_t jump; - - /* - * FIXME: Handle both relative and absolute addresses correctly. - * EDInstInfo actually has this info, but operandTypes and - * operandFlags enums are not exposed in the public interface. - */ - - if (1) { - /* - * PC relative addr. - */ - - jump = pc + operand.getImm(); - } else { - /* - * Absolute addr. - */ - - jump = (uint64_t)operand.getImm(); - } - - /* - * Output the address relative to the function start, given - * that MC will print the addresses relative the current pc. - */ - Out << "\t\t; " << jump; - - /* - * Ignore far jumps given it could be actually a tail return to - * a random address. - */ - - if (jump > max_pc && - jump < extent) { - max_pc = jump; - } - } - } - } + Out << outline; Out << "\n"; /* * Stop disassembling on return statements, if there is no record of a * jump to a successive address. + * + * XXX: This currently assumes x86 */ - if (TID.isReturn()) { - if (pc > max_pc) { - break; - } + if (Size == 1 && bytes[pc] == 0xc3) { + break; } + /* + * Advance. + */ + + pc += Size; + if (pc >= extent) { Out << "disassembly larger than " << extent << "bytes, aborting\n"; break; @@ -414,6 +226,8 @@ disassemble(const void* func, llvm::raw_ostream & Out) Out << "\n"; Out.flush(); + LLVMDisasmDispose(D); + /* * Print GDB command, useful to verify output. */ From 9119cd7d2c959e437c40c86f214d08dc198bfa69 Mon Sep 17 00:00:00 2001 From: Jose Fonseca Date: Fri, 29 May 2015 11:58:58 +0100 Subject: [PATCH 370/834] configure.ac: Don't bother checking whether LLVM's MCJIT component is available. Now that we require LLVM 3.3, MCJIT is guaranteed to be available. Trvial. --- configure.ac | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/configure.ac b/configure.ac index 84b404eb8b2..e49473b0f7e 100644 --- a/configure.ac +++ b/configure.ac @@ -1921,10 +1921,7 @@ if test "x$enable_gallium_llvm" = xyes; then AC_MSG_ERROR([LLVM $LLVM_REQUIRED_VERSION_MAJOR.$LLVM_REQUIRED_VERSION_MINOR or newer is required]) fi - LLVM_COMPONENTS="engine bitwriter" - if $LLVM_CONFIG --components | grep -qw 'mcjit'; then - LLVM_COMPONENTS="${LLVM_COMPONENTS} mcjit" - fi + LLVM_COMPONENTS="engine bitwriter mcjit" if test "x$enable_opencl" = xyes; then llvm_check_version_for "3" "5" "0" "opencl" From 0ad15e55bfbca3d6b829b985f9e7ea7e3e69bc61 Mon Sep 17 00:00:00 2001 From: Jose Fonseca Date: Fri, 29 May 2015 12:13:36 +0100 Subject: [PATCH 371/834] configure.ac: Link mcdisassembler component. gallivm now depends on it. And depending on particular LLVM version / configure options, the build can fail without this change due to undefined reference to `LLVM*Disasm*' symbols. Trivial. --- configure.ac | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/configure.ac b/configure.ac index e49473b0f7e..d32aa2492cc 100644 --- a/configure.ac +++ b/configure.ac @@ -1921,7 +1921,7 @@ if test "x$enable_gallium_llvm" = xyes; then AC_MSG_ERROR([LLVM $LLVM_REQUIRED_VERSION_MAJOR.$LLVM_REQUIRED_VERSION_MINOR or newer is required]) fi - LLVM_COMPONENTS="engine bitwriter mcjit" + LLVM_COMPONENTS="engine bitwriter mcjit mcdisassembler" if test "x$enable_opencl" = xyes; then llvm_check_version_for "3" "5" "0" "opencl" From c0d2b83f0bb15c1a10e53ef85c167febf699921a Mon Sep 17 00:00:00 2001 From: Roland Scheidegger Date: Fri, 29 May 2015 18:17:24 +0200 Subject: [PATCH 372/834] gallivm: make sampling more robust when the sampler setup is bogus Pure integer formats cannot be sampled with linear tex / mip filters. In GL such a setup would make the texture incomplete. We shouldn't rely on the state tracker though to filter that out, just return all zeros instead of dying in the lerp. Reviewed-by: Jose Fonseca --- .../auxiliary/gallivm/lp_bld_sample_soa.c | 38 ++++++++++++++++--- 1 file changed, 32 insertions(+), 6 deletions(-) diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c index 1a60ca9d3cb..b5c06b69571 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c @@ -2501,7 +2501,7 @@ lp_build_sample_soa_code(struct gallivm_state *gallivm, * all zero as mandated by d3d10 in this case. */ unsigned chan; - LLVMValueRef zero = lp_build_const_vec(gallivm, type, 0.0F); + LLVMValueRef zero = lp_build_zero(gallivm, type); for (chan = 0; chan < 4; chan++) { texel_out[chan] = zero; } @@ -2748,11 +2748,37 @@ lp_build_sample_soa_code(struct gallivm_state *gallivm, else { LLVMValueRef lod_fpart = NULL, lod_positive = NULL; LLVMValueRef ilevel0 = NULL, ilevel1 = NULL; - boolean use_aos = util_format_fits_8unorm(bld.format_desc) && - op_is_tex && - /* not sure this is strictly needed or simply impossible */ - derived_sampler_state.compare_mode == PIPE_TEX_COMPARE_NONE && - lp_is_simple_wrap_mode(derived_sampler_state.wrap_s); + boolean use_aos; + + if (util_format_is_pure_integer(static_texture_state->format) && + !util_format_has_depth(bld.format_desc) && + (static_sampler_state->min_mip_filter == PIPE_TEX_MIPFILTER_LINEAR || + static_sampler_state->min_img_filter == PIPE_TEX_FILTER_LINEAR || + static_sampler_state->mag_img_filter == PIPE_TEX_FILTER_LINEAR)) { + /* + * Bail if impossible filtering is specified (the awkard additional + * depth check is because it is legal in gallium to have things like S8Z24 + * here which would say it's pure int despite such formats should sample + * the depth component). + * In GL such filters make the texture incomplete, this makes it robust + * against state trackers which set this up regardless (we'd crash in the + * lerp later (except for gather)). + * Must do this after fetch_texel code since with GL state tracker we'll + * get some junk sampler for buffer textures. + */ + unsigned chan; + LLVMValueRef zero = lp_build_zero(gallivm, type); + for (chan = 0; chan < 4; chan++) { + texel_out[chan] = zero; + } + return; + } + + use_aos = util_format_fits_8unorm(bld.format_desc) && + op_is_tex && + /* not sure this is strictly needed or simply impossible */ + derived_sampler_state.compare_mode == PIPE_TEX_COMPARE_NONE && + lp_is_simple_wrap_mode(derived_sampler_state.wrap_s); use_aos &= bld.num_lods <= num_quads || derived_sampler_state.min_img_filter == From b307921c3ff3b36607752f881a180272366a79cf Mon Sep 17 00:00:00 2001 From: Ben Widawsky Date: Wed, 27 May 2015 17:55:02 -0700 Subject: [PATCH 373/834] i965: Disable compaction for EOT send messages AFAICT, there is no real way to make sure a send message with EOT is properly ignored from compact, nor can I see a way to actually encode EOT while compacting. Before the single send optimization we'd always bail because we hit the is_immediate && !is_compactable_immediate case. However, with single send, is_immediate is not true, and so we end up trying to compact the un-compactible. Without this, any compacting single send instruction will hang because the EOT isn't there. I am not sure how I didn't hit this when I originally enabled the optimization. I didn't check if some surrounding code changed. I know Neil and Matt were both looking into this. I did a quick search and didn't see any patches out there to handle this. Please ignore if this has already been sent by someone. (Direct me to it and I will review it). Reported-by: Neil Roberts Reported-by: Mark Janes Tested-by: Mark Janes Signed-off-by: Ben Widawsky Reviewed-by: Matt Turner --- src/mesa/drivers/dri/i965/brw_eu_compact.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/mesa/drivers/dri/i965/brw_eu_compact.c b/src/mesa/drivers/dri/i965/brw_eu_compact.c index 69cb114b945..67f0b45ac04 100644 --- a/src/mesa/drivers/dri/i965/brw_eu_compact.c +++ b/src/mesa/drivers/dri/i965/brw_eu_compact.c @@ -849,6 +849,12 @@ set_3src_source_index(const struct brw_device_info *devinfo, static bool has_unmapped_bits(const struct brw_device_info *devinfo, brw_inst *src) { + /* EOT can only be mapped on a send if the src1 is an immediate */ + if ((brw_inst_opcode(devinfo, src) == BRW_OPCODE_SENDC || + brw_inst_opcode(devinfo, src) == BRW_OPCODE_SEND) && + brw_inst_eot(devinfo, src)) + return true; + /* Check for instruction bits that don't map to any of the fields of the * compacted instruction. The instruction cannot be compacted if any of * them are set. They overlap with: From 82305f7b003879a3d08e1445f8ac4b1c6bee6330 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 29 May 2015 11:28:58 -0600 Subject: [PATCH 374/834] dri_util: make version var unsigned to silence warnings _mesa_override_gl_version_contextless() takes an unsigned version parameter. Reviewed-by: Matt Turner --- src/mesa/drivers/dri/common/dri_util.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mesa/drivers/dri/common/dri_util.c b/src/mesa/drivers/dri/common/dri_util.c index 2d847ef7bc8..e7ababe0b67 100644 --- a/src/mesa/drivers/dri/common/dri_util.c +++ b/src/mesa/drivers/dri/common/dri_util.c @@ -164,7 +164,7 @@ driCreateNewScreen2(int scrn, int fd, struct gl_constants consts = { 0 }; gl_api api; - int version; + unsigned version; api = API_OPENGLES2; if (_mesa_override_gl_version_contextless(&consts, &api, &version)) From fcc79af9e25d5770b8de1f4102901cbf97857a34 Mon Sep 17 00:00:00 2001 From: Timothy Arceri Date: Fri, 29 May 2015 21:47:53 +1000 Subject: [PATCH 375/834] mesa: remove unused function declaration Reviewed-by: Matt Turner --- src/mesa/main/uniforms.h | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/mesa/main/uniforms.h b/src/mesa/main/uniforms.h index 55fa2357e38..bd7b05e207a 100644 --- a/src/mesa/main/uniforms.h +++ b/src/mesa/main/uniforms.h @@ -343,10 +343,6 @@ void GLAPIENTRY _mesa_ProgramUniformMatrix4x3dv(GLuint program, GLint location, GLsizei count, GLboolean transpose, const GLdouble *value); -long -_mesa_parse_program_resource_name(const GLchar *name, - const GLchar **out_base_name_end); - unsigned _mesa_get_uniform_location(struct gl_shader_program *shProg, const GLchar *name, unsigned *offset); From c821ccf0e3a051e5e867792898ae9b8f08e4601a Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Fri, 29 May 2015 17:21:15 -0700 Subject: [PATCH 376/834] vc4: Fix return value handling for BO waits. If the wait ever returned -ETIME, we'd abort because the errno was stored in errno and not drmIoctl()'s return value. --- src/gallium/drivers/vc4/vc4_bufmgr.c | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/src/gallium/drivers/vc4/vc4_bufmgr.c b/src/gallium/drivers/vc4/vc4_bufmgr.c index 6b3a8c3070c..8f9d9c3ff77 100644 --- a/src/gallium/drivers/vc4/vc4_bufmgr.c +++ b/src/gallium/drivers/vc4/vc4_bufmgr.c @@ -343,15 +343,17 @@ vc4_wait_seqno(struct vc4_screen *screen, uint64_t seqno, uint64_t timeout_ns) ret = 0; } - if (ret == -ETIME) { - return false; - } else if (ret != 0) { - fprintf(stderr, "wait failed\n"); - abort(); - } else { + if (ret == 0) { screen->finished_seqno = wait.seqno; return true; } + + if (errno != ETIME) { + fprintf(stderr, "wait failed: %d\n", ret); + abort(); + } + + return false; } bool @@ -370,14 +372,15 @@ vc4_bo_wait(struct vc4_bo *bo, uint64_t timeout_ns) else ret = 0; - if (ret == -ETIME) { - return false; - } else if (ret != 0) { - fprintf(stderr, "wait failed\n"); - abort(); - } else { + if (ret == 0) return true; + + if (errno != ETIME) { + fprintf(stderr, "wait failed: %d\n", ret); + abort(); } + + return false; } void * From 21a22a61c02a1d1807ff03df8eb8fa16ebdd1b74 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Fri, 29 May 2015 18:06:32 -0700 Subject: [PATCH 377/834] vc4: Make sure we allocate idle BOs from the cache. We were returning the most recently freed BO, without checking if it was idle yet. This meant that we generally stalled immediately on the previous frame when generating a new one. Instead, allocate new BOs when the *oldest* BO is still busy, so that the cache scales with how much is needed to keep some frames outstanding, as originally intended. Note that if you don't have some throttling happening, this means that you can accidentally run the system out of memory. The kernel is now applying some throttling on all execs, to hopefully avoid this. --- src/gallium/drivers/vc4/vc4_bufmgr.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/src/gallium/drivers/vc4/vc4_bufmgr.c b/src/gallium/drivers/vc4/vc4_bufmgr.c index 8f9d9c3ff77..8d976707791 100644 --- a/src/gallium/drivers/vc4/vc4_bufmgr.c +++ b/src/gallium/drivers/vc4/vc4_bufmgr.c @@ -49,8 +49,18 @@ vc4_bo_from_cache(struct vc4_screen *screen, uint32_t size, const char *name) struct vc4_bo *bo = NULL; pipe_mutex_lock(cache->lock); if (!is_empty_list(&cache->size_list[page_index])) { - struct simple_node *node = last_elem(&cache->size_list[page_index]); + struct simple_node *node = first_elem(&cache->size_list[page_index]); bo = container_of(node, struct vc4_bo, size_list); + + /* Check that the BO has gone idle. If not, then we want to + * allocate something new instead, since we assume that the + * user will proceed to CPU map it and fill it with stuff. + */ + if (!vc4_bo_wait(bo, 0)) { + pipe_mutex_unlock(cache->lock); + return NULL; + } + pipe_reference_init(&bo->reference, 1); remove_from_list(&bo->time_list); remove_from_list(&bo->size_list); From 78c773bb3646295e4a4f1fe7d6d10f05758ee48b Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Fri, 29 May 2015 18:19:42 -0700 Subject: [PATCH 378/834] vc4: Convert from simple_list.h to list.h list.h is a nicer and more familiar set of list functions/macros. --- src/gallium/drivers/vc4/vc4_bufmgr.c | 53 +++++++-------- src/gallium/drivers/vc4/vc4_bufmgr.h | 4 +- src/gallium/drivers/vc4/vc4_opt_algebraic.c | 5 +- .../drivers/vc4/vc4_opt_constant_folding.c | 4 +- .../drivers/vc4/vc4_opt_copy_propagation.c | 5 +- src/gallium/drivers/vc4/vc4_opt_cse.c | 5 +- src/gallium/drivers/vc4/vc4_opt_dead_code.c | 2 +- .../drivers/vc4/vc4_opt_small_immediates.c | 5 +- src/gallium/drivers/vc4/vc4_opt_vpm_writes.c | 8 +-- src/gallium/drivers/vc4/vc4_program.c | 4 +- src/gallium/drivers/vc4/vc4_qir.c | 18 ++---- src/gallium/drivers/vc4/vc4_qir.h | 10 +-- .../drivers/vc4/vc4_qir_lower_uniforms.c | 9 +-- src/gallium/drivers/vc4/vc4_qpu_emit.c | 11 ++-- src/gallium/drivers/vc4/vc4_qpu_schedule.c | 64 ++++++++----------- .../drivers/vc4/vc4_register_allocate.c | 9 +-- .../drivers/vc4/vc4_reorder_uniforms.c | 4 +- src/gallium/drivers/vc4/vc4_screen.c | 2 +- src/gallium/drivers/vc4/vc4_screen.h | 4 +- 19 files changed, 87 insertions(+), 139 deletions(-) diff --git a/src/gallium/drivers/vc4/vc4_bufmgr.c b/src/gallium/drivers/vc4/vc4_bufmgr.c index 8d976707791..69a7584e14b 100644 --- a/src/gallium/drivers/vc4/vc4_bufmgr.c +++ b/src/gallium/drivers/vc4/vc4_bufmgr.c @@ -34,9 +34,6 @@ #include "vc4_context.h" #include "vc4_screen.h" -#define container_of(ptr, type, field) \ - (type*)((char*)ptr - offsetof(type, field)) - static struct vc4_bo * vc4_bo_from_cache(struct vc4_screen *screen, uint32_t size, const char *name) { @@ -48,9 +45,10 @@ vc4_bo_from_cache(struct vc4_screen *screen, uint32_t size, const char *name) struct vc4_bo *bo = NULL; pipe_mutex_lock(cache->lock); - if (!is_empty_list(&cache->size_list[page_index])) { - struct simple_node *node = first_elem(&cache->size_list[page_index]); - bo = container_of(node, struct vc4_bo, size_list); + if (!list_empty(&cache->size_list[page_index])) { + struct vc4_bo *bo = LIST_ENTRY(struct vc4_bo, + cache->size_list[page_index].next, + size_list); /* Check that the BO has gone idle. If not, then we want to * allocate something new instead, since we assume that the @@ -62,8 +60,8 @@ vc4_bo_from_cache(struct vc4_screen *screen, uint32_t size, const char *name) } pipe_reference_init(&bo->reference, 1); - remove_from_list(&bo->time_list); - remove_from_list(&bo->size_list); + list_del(&bo->time_list); + list_del(&bo->size_list); bo->name = name; } @@ -161,15 +159,14 @@ vc4_bo_free(struct vc4_bo *bo) static void free_stale_bos(struct vc4_screen *screen, time_t time) { - while (!is_empty_list(&screen->bo_cache.time_list)) { - struct simple_node *node = - first_elem(&screen->bo_cache.time_list); - struct vc4_bo *bo = container_of(node, struct vc4_bo, time_list); + struct vc4_bo_cache *cache = &screen->bo_cache; + list_for_each_entry_safe(struct vc4_bo, bo, &cache->time_list, + time_list) { /* If it's more than a second old, free it. */ if (time - bo->free_time > 2) { - remove_from_list(&bo->time_list); - remove_from_list(&bo->size_list); + list_del(&bo->time_list); + list_del(&bo->size_list); vc4_bo_free(bo); } else { break; @@ -190,16 +187,16 @@ vc4_bo_last_unreference_locked_timed(struct vc4_bo *bo, time_t time) } if (cache->size_list_size <= page_index) { - struct simple_node *new_list = - ralloc_array(screen, struct simple_node, page_index + 1); + struct list_head *new_list = + ralloc_array(screen, struct list_head, page_index + 1); /* Move old list contents over (since the array has moved, and - * therefore the pointers to the list heads have to change. + * therefore the pointers to the list heads have to change). */ for (int i = 0; i < cache->size_list_size; i++) { - struct simple_node *old_head = &cache->size_list[i]; - if (is_empty_list(old_head)) - make_empty_list(&new_list[i]); + struct list_head *old_head = &cache->size_list[i]; + if (list_empty(old_head)) + list_inithead(&new_list[i]); else { new_list[i].next = old_head->next; new_list[i].prev = old_head->prev; @@ -208,15 +205,15 @@ vc4_bo_last_unreference_locked_timed(struct vc4_bo *bo, time_t time) } } for (int i = cache->size_list_size; i < page_index + 1; i++) - make_empty_list(&new_list[i]); + list_inithead(&new_list[i]); cache->size_list = new_list; cache->size_list_size = page_index + 1; } bo->free_time = time; - insert_at_tail(&cache->size_list[page_index], &bo->size_list); - insert_at_tail(&cache->time_list, &bo->time_list); + list_addtail(&bo->size_list, &cache->size_list[page_index]); + list_addtail(&bo->time_list, &cache->time_list); free_stale_bos(screen, time); } @@ -451,12 +448,10 @@ vc4_bufmgr_destroy(struct pipe_screen *pscreen) struct vc4_screen *screen = vc4_screen(pscreen); struct vc4_bo_cache *cache = &screen->bo_cache; - while (!is_empty_list(&cache->time_list)) { - struct simple_node *node = first_elem(&cache->time_list); - struct vc4_bo *bo = container_of(node, struct vc4_bo, time_list); - - remove_from_list(&bo->time_list); - remove_from_list(&bo->size_list); + list_for_each_entry_safe(struct vc4_bo, bo, &cache->time_list, + time_list) { + list_del(&bo->time_list); + list_del(&bo->size_list); vc4_bo_free(bo); } } diff --git a/src/gallium/drivers/vc4/vc4_bufmgr.h b/src/gallium/drivers/vc4/vc4_bufmgr.h index f9559e999a1..7320695ca8e 100644 --- a/src/gallium/drivers/vc4/vc4_bufmgr.h +++ b/src/gallium/drivers/vc4/vc4_bufmgr.h @@ -44,9 +44,9 @@ struct vc4_bo { #endif /** Entry in the linked list of buffers freed, by age. */ - struct simple_node time_list; + struct list_head time_list; /** Entry in the per-page-count linked list of buffers freed (by age). */ - struct simple_node size_list; + struct list_head size_list; /** Approximate second when the bo was freed. */ time_t free_time; /** diff --git a/src/gallium/drivers/vc4/vc4_opt_algebraic.c b/src/gallium/drivers/vc4/vc4_opt_algebraic.c index e40e0f3b71b..7978ea1829f 100644 --- a/src/gallium/drivers/vc4/vc4_opt_algebraic.c +++ b/src/gallium/drivers/vc4/vc4_opt_algebraic.c @@ -136,11 +136,8 @@ bool qir_opt_algebraic(struct vc4_compile *c) { bool progress = false; - struct simple_node *node; - - foreach(node, &c->instructions) { - struct qinst *inst = (struct qinst *)node; + list_for_each_entry(struct qinst, inst, &c->instructions, link) { switch (inst->op) { case QOP_SEL_X_Y_ZS: case QOP_SEL_X_Y_ZC: diff --git a/src/gallium/drivers/vc4/vc4_opt_constant_folding.c b/src/gallium/drivers/vc4/vc4_opt_constant_folding.c index ac9be5c9642..15ec9f07260 100644 --- a/src/gallium/drivers/vc4/vc4_opt_constant_folding.c +++ b/src/gallium/drivers/vc4/vc4_opt_constant_folding.c @@ -98,10 +98,8 @@ bool qir_opt_constant_folding(struct vc4_compile *c) { bool progress = false; - struct simple_node *node; - foreach(node, &c->instructions) { - struct qinst *inst = (struct qinst *)node; + list_for_each_entry(struct qinst, inst, &c->instructions, link) { if (constant_fold(c, inst)) progress = true; } diff --git a/src/gallium/drivers/vc4/vc4_opt_copy_propagation.c b/src/gallium/drivers/vc4/vc4_opt_copy_propagation.c index 5189a401248..d6d2fbf257f 100644 --- a/src/gallium/drivers/vc4/vc4_opt_copy_propagation.c +++ b/src/gallium/drivers/vc4/vc4_opt_copy_propagation.c @@ -38,13 +38,10 @@ bool qir_opt_copy_propagation(struct vc4_compile *c) { bool progress = false; - struct simple_node *node; bool debug = false; struct qreg *movs = calloc(c->num_temps, sizeof(struct qreg)); - foreach(node, &c->instructions) { - struct qinst *inst = (struct qinst *)node; - + list_for_each_entry(struct qinst, inst, &c->instructions, link) { for (int i = 0; i < qir_get_op_nsrc(inst->op); i++) { int index = inst->src[i].index; if (inst->src[i].file == QFILE_TEMP && diff --git a/src/gallium/drivers/vc4/vc4_opt_cse.c b/src/gallium/drivers/vc4/vc4_opt_cse.c index 71794f7d1cf..27d0fae739c 100644 --- a/src/gallium/drivers/vc4/vc4_opt_cse.c +++ b/src/gallium/drivers/vc4/vc4_opt_cse.c @@ -121,7 +121,6 @@ bool qir_opt_cse(struct vc4_compile *c) { bool progress = false; - struct simple_node *node, *t; uint32_t sf_count = 0, r4_count = 0; struct hash_table *ht = _mesa_hash_table_create(NULL, NULL, @@ -129,9 +128,7 @@ qir_opt_cse(struct vc4_compile *c) if (!ht) return false; - foreach_s(node, t, &c->instructions) { - struct qinst *inst = (struct qinst *)node; - + list_for_each_entry_safe(struct qinst, inst, &c->instructions, link) { if (qir_has_side_effects(c, inst) || qir_has_side_effect_reads(c, inst)) { continue; diff --git a/src/gallium/drivers/vc4/vc4_opt_dead_code.c b/src/gallium/drivers/vc4/vc4_opt_dead_code.c index e4ead46c9c2..ffd42422de8 100644 --- a/src/gallium/drivers/vc4/vc4_opt_dead_code.c +++ b/src/gallium/drivers/vc4/vc4_opt_dead_code.c @@ -86,7 +86,7 @@ qir_opt_dead_code(struct vc4_compile *c) /* Whether we're eliminating texture setup currently. */ bool dce_tex = false; - struct simple_node *node, *t; + struct list_head *node, *t; for (node = c->instructions.prev, t = node->prev; &c->instructions != node; node = t, t = t->prev) { diff --git a/src/gallium/drivers/vc4/vc4_opt_small_immediates.c b/src/gallium/drivers/vc4/vc4_opt_small_immediates.c index a329ac69d11..d6e98f0aebf 100644 --- a/src/gallium/drivers/vc4/vc4_opt_small_immediates.c +++ b/src/gallium/drivers/vc4/vc4_opt_small_immediates.c @@ -37,11 +37,8 @@ bool qir_opt_small_immediates(struct vc4_compile *c) { bool progress = false; - struct simple_node *node; - - foreach(node, &c->instructions) { - struct qinst *inst = (struct qinst *)node; + list_for_each_entry(struct qinst, inst, &c->instructions, link) { /* The small immediate value sits in the raddr B field, so we * can't have 2 small immediates in one instruction (unless * they're the same value, but that should be optimized away diff --git a/src/gallium/drivers/vc4/vc4_opt_vpm_writes.c b/src/gallium/drivers/vc4/vc4_opt_vpm_writes.c index e9711f222cd..e04f02859d5 100644 --- a/src/gallium/drivers/vc4/vc4_opt_vpm_writes.c +++ b/src/gallium/drivers/vc4/vc4_opt_vpm_writes.c @@ -37,15 +37,12 @@ qir_opt_vpm_writes(struct vc4_compile *c) return false; bool progress = false; - struct simple_node *node; struct qinst *vpm_writes[64] = { 0 }; uint32_t use_count[c->num_temps]; uint32_t vpm_write_count = 0; memset(&use_count, 0, sizeof(use_count)); - foreach(node, &c->instructions) { - struct qinst *inst = (struct qinst *)node; - + list_for_each_entry(struct qinst, inst, &c->instructions, link) { switch (inst->dst.file) { case QFILE_VPM: vpm_writes[vpm_write_count++] = inst; @@ -102,7 +99,8 @@ qir_opt_vpm_writes(struct vc4_compile *c) * to maintain the order of the VPM writes. */ assert(!vpm_writes[i]->sf); - move_to_tail(&vpm_writes[i]->link, &inst->link); + list_del(&inst->link); + list_addtail(&inst->link, &vpm_writes[i]->link); qir_remove_instruction(c, vpm_writes[i]); c->defs[inst->dst.index] = NULL; diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c index d84e5f25616..91540cfe2fa 100644 --- a/src/gallium/drivers/vc4/vc4_program.c +++ b/src/gallium/drivers/vc4/vc4_program.c @@ -2209,11 +2209,9 @@ vc4_get_compiled_shader(struct vc4_context *vc4, enum qstage stage, shader->program_id = vc4->next_compiled_program_id++; if (stage == QSTAGE_FRAG) { bool input_live[c->num_input_semantics]; - struct simple_node *node; memset(input_live, 0, sizeof(input_live)); - foreach(node, &c->instructions) { - struct qinst *inst = (struct qinst *)node; + list_for_each_entry(struct qinst, inst, &c->instructions, link) { for (int i = 0; i < qir_get_op_nsrc(inst->op); i++) { if (inst->src[i].file == QFILE_VARY) input_live[inst->src[i].index] = true; diff --git a/src/gallium/drivers/vc4/vc4_qir.c b/src/gallium/drivers/vc4/vc4_qir.c index e2e6a5cdf16..1c96ef4795f 100644 --- a/src/gallium/drivers/vc4/vc4_qir.c +++ b/src/gallium/drivers/vc4/vc4_qir.c @@ -22,7 +22,6 @@ */ #include "util/u_memory.h" -#include "util/simple_list.h" #include "util/ralloc.h" #include "vc4_qir.h" @@ -301,10 +300,7 @@ qir_dump_inst(struct vc4_compile *c, struct qinst *inst) void qir_dump(struct vc4_compile *c) { - struct simple_node *node; - - foreach(node, &c->instructions) { - struct qinst *inst = (struct qinst *)node; + list_for_each_entry(struct qinst, inst, &c->instructions, link) { qir_dump_inst(c, inst); fprintf(stderr, "\n"); } @@ -370,7 +366,7 @@ qir_emit(struct vc4_compile *c, struct qinst *inst) if (inst->dst.file == QFILE_TEMP) c->defs[inst->dst.index] = inst; - insert_at_tail(&c->instructions, &inst->link); + list_addtail(&inst->link, &c->instructions); } bool @@ -384,7 +380,7 @@ qir_compile_init(void) { struct vc4_compile *c = rzalloc(NULL, struct vc4_compile); - make_empty_list(&c->instructions); + list_inithead(&c->instructions); c->output_position_index = -1; c->output_clipvertex_index = -1; @@ -403,7 +399,7 @@ qir_remove_instruction(struct vc4_compile *c, struct qinst *qinst) if (qinst->dst.file == QFILE_TEMP) c->defs[qinst->dst.index] = NULL; - remove_from_list(&qinst->link); + list_del(&qinst->link); free(qinst->src); free(qinst); } @@ -420,9 +416,9 @@ qir_follow_movs(struct vc4_compile *c, struct qreg reg) void qir_compile_destroy(struct vc4_compile *c) { - while (!is_empty_list(&c->instructions)) { + while (!list_empty(&c->instructions)) { struct qinst *qinst = - (struct qinst *)first_elem(&c->instructions); + (struct qinst *)c->instructions.next; qir_remove_instruction(c, qinst); } @@ -478,7 +474,7 @@ void qir_SF(struct vc4_compile *c, struct qreg src) { struct qinst *last_inst = NULL; - if (!is_empty_list(&c->instructions)) + if (!list_empty(&c->instructions)) last_inst = (struct qinst *)c->instructions.prev; if (!last_inst || diff --git a/src/gallium/drivers/vc4/vc4_qir.h b/src/gallium/drivers/vc4/vc4_qir.h index adc2c89d2c1..732cfd0b306 100644 --- a/src/gallium/drivers/vc4/vc4_qir.h +++ b/src/gallium/drivers/vc4/vc4_qir.h @@ -33,7 +33,7 @@ #include "util/macros.h" #include "glsl/nir/nir.h" -#include "util/simple_list.h" +#include "util/list.h" #include "util/u_math.h" enum qfile { @@ -162,12 +162,12 @@ enum qop { }; struct queued_qpu_inst { - struct simple_node link; + struct list_head link; uint64_t inst; }; struct qinst { - struct simple_node link; + struct list_head link; enum qop op; struct qreg dst; @@ -356,10 +356,10 @@ struct vc4_compile { struct qreg undef; enum qstage stage; uint32_t num_temps; - struct simple_node instructions; + struct list_head instructions; uint32_t immediates[1024]; - struct simple_node qpu_inst_list; + struct list_head qpu_inst_list; uint64_t *qpu_insts; uint32_t qpu_inst_count; uint32_t qpu_inst_size; diff --git a/src/gallium/drivers/vc4/vc4_qir_lower_uniforms.c b/src/gallium/drivers/vc4/vc4_qir_lower_uniforms.c index 63f5eb22858..910c89dca79 100644 --- a/src/gallium/drivers/vc4/vc4_qir_lower_uniforms.c +++ b/src/gallium/drivers/vc4/vc4_qir_lower_uniforms.c @@ -88,7 +88,6 @@ is_lowerable_uniform(struct qinst *inst, int i) void qir_lower_uniforms(struct vc4_compile *c) { - struct simple_node *node; struct hash_table *ht = _mesa_hash_table_create(c, index_hash, index_compare); @@ -96,8 +95,7 @@ qir_lower_uniforms(struct vc4_compile *c) * than one uniform referenced, and add those uniform values to the * ht. */ - foreach(node, &c->instructions) { - struct qinst *inst = (struct qinst *)node; + list_for_each_entry(struct qinst, inst, &c->instructions, link) { uint32_t nsrc = qir_get_op_nsrc(inst->op); uint32_t count = 0; @@ -137,10 +135,9 @@ qir_lower_uniforms(struct vc4_compile *c) struct qreg temp = qir_get_temp(c); struct qreg unif = { QFILE_UNIF, max_index }; struct qinst *mov = qir_inst(QOP_MOV, temp, unif, c->undef); - insert_at_head(&c->instructions, &mov->link); + list_add(&mov->link, &c->instructions); c->defs[temp.index] = mov; - foreach(node, &c->instructions) { - struct qinst *inst = (struct qinst *)node; + list_for_each_entry(struct qinst, inst, &c->instructions, link) { uint32_t nsrc = qir_get_op_nsrc(inst->op); uint32_t count = 0; diff --git a/src/gallium/drivers/vc4/vc4_qpu_emit.c b/src/gallium/drivers/vc4/vc4_qpu_emit.c index eeb8d3a21ff..577eb9200f4 100644 --- a/src/gallium/drivers/vc4/vc4_qpu_emit.c +++ b/src/gallium/drivers/vc4/vc4_qpu_emit.c @@ -47,14 +47,14 @@ queue(struct vc4_compile *c, uint64_t inst) { struct queued_qpu_inst *q = rzalloc(c, struct queued_qpu_inst); q->inst = inst; - insert_at_tail(&c->qpu_inst_list, &q->link); + list_addtail(&q->link, &c->qpu_inst_list); } static uint64_t * last_inst(struct vc4_compile *c) { struct queued_qpu_inst *q = - (struct queued_qpu_inst *)last_elem(&c->qpu_inst_list); + (struct queued_qpu_inst *)c->qpu_inst_list.prev; return &q->inst; } @@ -144,7 +144,7 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c) QPU_UNPACK_16B_TO_F32, }; - make_empty_list(&c->qpu_inst_list); + list_inithead(&c->qpu_inst_list); switch (c->stage) { case QSTAGE_VERT: @@ -170,10 +170,7 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c) break; } - struct simple_node *node; - foreach(node, &c->instructions) { - struct qinst *qinst = (struct qinst *)node; - + list_for_each_entry(struct qinst, qinst, &c->instructions, link) { #if 0 fprintf(stderr, "translating qinst to qpu: "); qir_dump_inst(qinst); diff --git a/src/gallium/drivers/vc4/vc4_qpu_schedule.c b/src/gallium/drivers/vc4/vc4_qpu_schedule.c index f523b4c6fb0..19cbf7bb98c 100644 --- a/src/gallium/drivers/vc4/vc4_qpu_schedule.c +++ b/src/gallium/drivers/vc4/vc4_qpu_schedule.c @@ -43,7 +43,7 @@ static bool debug; struct schedule_node_child; struct schedule_node { - struct simple_node link; + struct list_head link; struct queued_qpu_inst *inst; struct schedule_node_child *children; uint32_t child_count; @@ -400,22 +400,21 @@ calculate_deps(struct schedule_state *state, struct schedule_node *n) } static void -calculate_forward_deps(struct vc4_compile *c, struct simple_node *schedule_list) +calculate_forward_deps(struct vc4_compile *c, struct list_head *schedule_list) { - struct simple_node *node; struct schedule_state state; memset(&state, 0, sizeof(state)); state.dir = F; - foreach(node, schedule_list) - calculate_deps(&state, (struct schedule_node *)node); + list_for_each_entry(struct schedule_node, node, schedule_list, link) + calculate_deps(&state, node); } static void -calculate_reverse_deps(struct vc4_compile *c, struct simple_node *schedule_list) +calculate_reverse_deps(struct vc4_compile *c, struct list_head *schedule_list) { - struct simple_node *node; + struct list_head *node; struct schedule_state state; memset(&state, 0, sizeof(state)); @@ -507,15 +506,13 @@ get_instruction_priority(uint64_t inst) static struct schedule_node * choose_instruction_to_schedule(struct choose_scoreboard *scoreboard, - struct simple_node *schedule_list, + struct list_head *schedule_list, struct schedule_node *prev_inst) { struct schedule_node *chosen = NULL; - struct simple_node *node; int chosen_prio = 0; - foreach(node, schedule_list) { - struct schedule_node *n = (struct schedule_node *)node; + list_for_each_entry(struct schedule_node, n, schedule_list, link) { uint64_t inst = n->inst->inst; /* "An instruction must not read from a location in physical @@ -596,14 +593,11 @@ update_scoreboard_for_chosen(struct choose_scoreboard *scoreboard, } static void -dump_state(struct simple_node *schedule_list) +dump_state(struct list_head *schedule_list) { - struct simple_node *node; - uint32_t i = 0; - foreach(node, schedule_list) { - struct schedule_node *n = (struct schedule_node *)node; + list_for_each_entry(struct schedule_node, n, schedule_list, link) { fprintf(stderr, "%3d: ", i++); vc4_qpu_disasm(&n->inst->inst, 1); fprintf(stderr, "\n"); @@ -639,7 +633,7 @@ compute_delay(struct schedule_node *n) } static void -mark_instruction_scheduled(struct simple_node *schedule_list, +mark_instruction_scheduled(struct list_head *schedule_list, struct schedule_node *node, bool war_only) { @@ -658,16 +652,15 @@ mark_instruction_scheduled(struct simple_node *schedule_list, child->parent_count--; if (child->parent_count == 0) - insert_at_head(schedule_list, &child->link); + list_add(&child->link, schedule_list); node->children[i].node = NULL; } } static void -schedule_instructions(struct vc4_compile *c, struct simple_node *schedule_list) +schedule_instructions(struct vc4_compile *c, struct list_head *schedule_list) { - struct simple_node *node, *t; struct choose_scoreboard scoreboard; /* We reorder the uniforms as we schedule instructions, so save the @@ -693,14 +686,12 @@ schedule_instructions(struct vc4_compile *c, struct simple_node *schedule_list) } /* Remove non-DAG heads from the list. */ - foreach_s(node, t, schedule_list) { - struct schedule_node *n = (struct schedule_node *)node; - + list_for_each_entry_safe(struct schedule_node, n, schedule_list, link) { if (n->parent_count != 0) - remove_from_list(&n->link); + list_del(&n->link); } - while (!is_empty_list(schedule_list)) { + while (!list_empty(schedule_list)) { struct schedule_node *chosen = choose_instruction_to_schedule(&scoreboard, schedule_list, @@ -724,7 +715,7 @@ schedule_instructions(struct vc4_compile *c, struct simple_node *schedule_list) * find an instruction to pair with it. */ if (chosen) { - remove_from_list(&chosen->link); + list_del(&chosen->link); mark_instruction_scheduled(schedule_list, chosen, true); if (chosen->uniform != -1) { c->uniform_data[next_uniform] = @@ -738,7 +729,7 @@ schedule_instructions(struct vc4_compile *c, struct simple_node *schedule_list) schedule_list, chosen); if (merge) { - remove_from_list(&merge->link); + list_del(&merge->link); inst = qpu_merge_inst(inst, merge->inst->inst); assert(inst != 0); if (merge->uniform != -1) { @@ -813,16 +804,14 @@ void qpu_schedule_instructions(struct vc4_compile *c) { void *mem_ctx = ralloc_context(NULL); - struct simple_node schedule_list; - struct simple_node *node; + struct list_head schedule_list; - make_empty_list(&schedule_list); + list_inithead(&schedule_list); if (debug) { fprintf(stderr, "Pre-schedule instructions\n"); - foreach(node, &c->qpu_inst_list) { - struct queued_qpu_inst *q = - (struct queued_qpu_inst *)node; + list_for_each_entry(struct queued_qpu_inst, q, + &c->qpu_inst_list, link) { vc4_qpu_disasm(&q->inst, 1); fprintf(stderr, "\n"); } @@ -831,7 +820,7 @@ qpu_schedule_instructions(struct vc4_compile *c) /* Wrap each instruction in a scheduler structure. */ uint32_t next_uniform = 0; - while (!is_empty_list(&c->qpu_inst_list)) { + while (!list_empty(&c->qpu_inst_list)) { struct queued_qpu_inst *inst = (struct queued_qpu_inst *)c->qpu_inst_list.next; struct schedule_node *n = rzalloc(mem_ctx, struct schedule_node); @@ -844,16 +833,15 @@ qpu_schedule_instructions(struct vc4_compile *c) } else { n->uniform = -1; } - remove_from_list(&inst->link); - insert_at_tail(&schedule_list, &n->link); + list_del(&inst->link); + list_addtail(&n->link, &schedule_list); } assert(next_uniform == c->num_uniforms); calculate_forward_deps(c, &schedule_list); calculate_reverse_deps(c, &schedule_list); - foreach(node, &schedule_list) { - struct schedule_node *n = (struct schedule_node *)node; + list_for_each_entry(struct schedule_node, n, &schedule_list, link) { compute_delay(n); } diff --git a/src/gallium/drivers/vc4/vc4_register_allocate.c b/src/gallium/drivers/vc4/vc4_register_allocate.c index f40547b8154..3b0b890b66a 100644 --- a/src/gallium/drivers/vc4/vc4_register_allocate.c +++ b/src/gallium/drivers/vc4/vc4_register_allocate.c @@ -161,7 +161,6 @@ node_to_temp_priority(const void *in_a, const void *in_b) struct qpu_reg * vc4_register_allocate(struct vc4_context *vc4, struct vc4_compile *c) { - struct simple_node *node; struct node_to_temp_map map[c->num_temps]; uint32_t temp_to_node[c->num_temps]; uint32_t def[c->num_temps]; @@ -189,9 +188,7 @@ vc4_register_allocate(struct vc4_context *vc4, struct vc4_compile *c) /* Compute the live ranges so we can figure out interference. */ uint32_t ip = 0; - foreach(node, &c->instructions) { - struct qinst *inst = (struct qinst *)node; - + list_for_each_entry(struct qinst, inst, &c->instructions, link) { if (inst->dst.file == QFILE_TEMP) { def[inst->dst.index] = ip; use[inst->dst.index] = ip; @@ -227,9 +224,7 @@ vc4_register_allocate(struct vc4_context *vc4, struct vc4_compile *c) } /* Figure out our register classes and preallocated registers*/ - foreach(node, &c->instructions) { - struct qinst *inst = (struct qinst *)node; - + list_for_each_entry(struct qinst, inst, &c->instructions, link) { switch (inst->op) { case QOP_FRAG_Z: ra_set_node_reg(g, temp_to_node[inst->dst.index], diff --git a/src/gallium/drivers/vc4/vc4_reorder_uniforms.c b/src/gallium/drivers/vc4/vc4_reorder_uniforms.c index 109724369d5..7f11fba2340 100644 --- a/src/gallium/drivers/vc4/vc4_reorder_uniforms.c +++ b/src/gallium/drivers/vc4/vc4_reorder_uniforms.c @@ -42,10 +42,8 @@ qir_reorder_uniforms(struct vc4_compile *c) uint32_t *uniform_index = NULL; uint32_t uniform_index_size = 0; uint32_t next_uniform = 0; - struct simple_node *node; - foreach(node, &c->instructions) { - struct qinst *inst = (struct qinst *)node; + list_for_each_entry(struct qinst, inst, &c->instructions, link) { for (int i = 0; i < qir_get_op_nsrc(inst->op); i++) { if (inst->src[i].file != QFILE_UNIF) continue; diff --git a/src/gallium/drivers/vc4/vc4_screen.c b/src/gallium/drivers/vc4/vc4_screen.c index 134d644cb48..60d917d7520 100644 --- a/src/gallium/drivers/vc4/vc4_screen.c +++ b/src/gallium/drivers/vc4/vc4_screen.c @@ -459,7 +459,7 @@ vc4_screen_create(int fd) pscreen->is_format_supported = vc4_screen_is_format_supported; screen->fd = fd; - make_empty_list(&screen->bo_cache.time_list); + list_inithead(&screen->bo_cache.time_list); vc4_fence_init(screen); diff --git a/src/gallium/drivers/vc4/vc4_screen.h b/src/gallium/drivers/vc4/vc4_screen.h index 60626285d4d..46c4687a3b9 100644 --- a/src/gallium/drivers/vc4/vc4_screen.h +++ b/src/gallium/drivers/vc4/vc4_screen.h @@ -61,9 +61,9 @@ struct vc4_screen { struct vc4_bo_cache { /** List of struct vc4_bo freed, by age. */ - struct simple_node time_list; + struct list_head time_list; /** List of struct vc4_bo freed, per size, by age. */ - struct simple_node *size_list; + struct list_head *size_list; uint32_t size_list_size; pipe_mutex lock; From ec1c72d38ea4c709a39c6be9e0ff96bc2a90940f Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Fri, 29 May 2015 21:27:53 -0700 Subject: [PATCH 379/834] vc4: Don't bother with safe list traversal in CSE. We don't remove or move instructions. --- src/gallium/drivers/vc4/vc4_opt_cse.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gallium/drivers/vc4/vc4_opt_cse.c b/src/gallium/drivers/vc4/vc4_opt_cse.c index 27d0fae739c..92c8260eb59 100644 --- a/src/gallium/drivers/vc4/vc4_opt_cse.c +++ b/src/gallium/drivers/vc4/vc4_opt_cse.c @@ -128,7 +128,7 @@ qir_opt_cse(struct vc4_compile *c) if (!ht) return false; - list_for_each_entry_safe(struct qinst, inst, &c->instructions, link) { + list_for_each_entry(struct qinst, inst, &c->instructions, link) { if (qir_has_side_effects(c, inst) || qir_has_side_effect_reads(c, inst)) { continue; From 6c846dc57b1d6f3e015a604dba1976f96c4be9e9 Mon Sep 17 00:00:00 2001 From: Neil Roberts Date: Thu, 28 May 2015 15:27:31 +0100 Subject: [PATCH 380/834] i965: Don't use a temporary when generating an indirect sample Previously when generating the send instruction for a sample instruction with an indirect sampler it would use the destination register as a temporary store. This breaks when used in combination with the opt_sampler_eot optimisation because that forces the destination to be null. This patch fixes that by avoiding the temp register altogether. The reason the temporary register was needed was because it was trying to ensure the binding table index doesn't overflow a byte by and'ing it with 0xff. The result is then or'd with samper_index<<8. This patch instead just and's the whole thing by 0xfff. This will ensure that a bogus sampler index won't overflow into the rest of the message descriptor but unlike the previous code it won't ensure that the binding table index doesn't overflow into the sampler index. It doesn't seem like that should matter very much though because if the shader is generating a bogus sampler index then it's going to just get garbage out either way. Instead of doing sampler_index<<8|(sampler_index+base_table_index) the new code avoids one operation by doing sampler_index*0x101+base_table_index which should be equivalent. However if we wanted to avoid the multiply for some reason we could do this by adding an extra or instruction still without needing the temporary register. This fixes a number of Piglit tests on Skylake that were using indirect samplers such as: spec@arb_gpu_shader5@execution@sampler_array_indexing@fs-simple Reviewed-by: Matt Turner Reviewed-by: Chris Forbes Acked-by: Ben Widawsky Tested-by: Anuj Phogat --- src/mesa/drivers/dri/i965/brw_fs_generator.cpp | 17 ++++------------- .../drivers/dri/i965/brw_vec4_generator.cpp | 17 ++++------------- 2 files changed, 8 insertions(+), 26 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp index 0be0f866558..ea46b1a1917 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp @@ -779,27 +779,18 @@ fs_generator::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src brw_mark_surface_used(prog_data, sampler + base_binding_table_index); } else { /* Non-const sampler index */ - /* Note: this clobbers `dst` as a temporary before emitting the send */ struct brw_reg addr = vec1(retype(brw_address_reg(0), BRW_REGISTER_TYPE_UD)); - struct brw_reg temp = vec1(retype(dst, BRW_REGISTER_TYPE_UD)); - struct brw_reg sampler_reg = vec1(retype(sampler_index, BRW_REGISTER_TYPE_UD)); brw_push_insn_state(p); brw_set_default_mask_control(p, BRW_MASK_DISABLE); brw_set_default_access_mode(p, BRW_ALIGN_1); - /* Some care required: `sampler` and `temp` may alias: - * addr = sampler & 0xff - * temp = (sampler << 8) & 0xf00 - * addr = addr | temp - */ - brw_ADD(p, addr, sampler_reg, brw_imm_ud(base_binding_table_index)); - brw_SHL(p, temp, sampler_reg, brw_imm_ud(8u)); - brw_AND(p, temp, temp, brw_imm_ud(0x0f00)); - brw_AND(p, addr, addr, brw_imm_ud(0x0ff)); - brw_OR(p, addr, addr, temp); + /* addr = ((sampler * 0x101) + base_binding_table_index) & 0xfff */ + brw_MUL(p, addr, sampler_reg, brw_imm_ud(0x101)); + brw_ADD(p, addr, addr, brw_imm_ud(base_binding_table_index)); + brw_AND(p, addr, addr, brw_imm_ud(0xfff)); brw_pop_insn_state(p); diff --git a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp index ef77b8df051..9699607ed28 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp @@ -398,27 +398,18 @@ vec4_generator::generate_tex(vec4_instruction *inst, brw_mark_surface_used(&prog_data->base, sampler + base_binding_table_index); } else { /* Non-constant sampler index. */ - /* Note: this clobbers `dst` as a temporary before emitting the send */ struct brw_reg addr = vec1(retype(brw_address_reg(0), BRW_REGISTER_TYPE_UD)); - struct brw_reg temp = vec1(retype(dst, BRW_REGISTER_TYPE_UD)); - struct brw_reg sampler_reg = vec1(retype(sampler_index, BRW_REGISTER_TYPE_UD)); brw_push_insn_state(p); brw_set_default_mask_control(p, BRW_MASK_DISABLE); brw_set_default_access_mode(p, BRW_ALIGN_1); - /* Some care required: `sampler` and `temp` may alias: - * addr = sampler & 0xff - * temp = (sampler << 8) & 0xf00 - * addr = addr | temp - */ - brw_ADD(p, addr, sampler_reg, brw_imm_ud(base_binding_table_index)); - brw_SHL(p, temp, sampler_reg, brw_imm_ud(8u)); - brw_AND(p, temp, temp, brw_imm_ud(0x0f00)); - brw_AND(p, addr, addr, brw_imm_ud(0x0ff)); - brw_OR(p, addr, addr, temp); + /* addr = ((sampler * 0x101) + base_binding_table_index) & 0xfff */ + brw_MUL(p, addr, sampler_reg, brw_imm_ud(0x101)); + brw_ADD(p, addr, addr, brw_imm_ud(base_binding_table_index)); + brw_AND(p, addr, addr, brw_imm_ud(0xfff)); brw_pop_insn_state(p); From 7f62fdae1629d75dd581d1c57b28c2f099c5ef6b Mon Sep 17 00:00:00 2001 From: Neil Roberts Date: Fri, 29 May 2015 13:41:48 +0100 Subject: [PATCH 381/834] i965: Don't add base_binding_table_index if it's zero When calculating the binding table index for non-constant sampler array indexing it needs to add the base binding table index which is a constant within the generated code. Often this base is zero so we can avoid a redundant instruction in that case. It looks like nothing in shader-db is doing non-constant sampler array indexing so this patch doesn't make any difference but it might be worth having anyway. Reviewed-by: Matt Turner Reviewed-by: Chris Forbes Acked-by: Ben Widawsky --- src/mesa/drivers/dri/i965/brw_fs_generator.cpp | 3 ++- src/mesa/drivers/dri/i965/brw_vec4_generator.cpp | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp index ea46b1a1917..40a3db3040e 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp @@ -789,7 +789,8 @@ fs_generator::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src /* addr = ((sampler * 0x101) + base_binding_table_index) & 0xfff */ brw_MUL(p, addr, sampler_reg, brw_imm_ud(0x101)); - brw_ADD(p, addr, addr, brw_imm_ud(base_binding_table_index)); + if (base_binding_table_index) + brw_ADD(p, addr, addr, brw_imm_ud(base_binding_table_index)); brw_AND(p, addr, addr, brw_imm_ud(0xfff)); brw_pop_insn_state(p); diff --git a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp index 9699607ed28..ead620b3c00 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp @@ -408,7 +408,8 @@ vec4_generator::generate_tex(vec4_instruction *inst, /* addr = ((sampler * 0x101) + base_binding_table_index) & 0xfff */ brw_MUL(p, addr, sampler_reg, brw_imm_ud(0x101)); - brw_ADD(p, addr, addr, brw_imm_ud(base_binding_table_index)); + if (base_binding_table_index) + brw_ADD(p, addr, addr, brw_imm_ud(base_binding_table_index)); brw_AND(p, addr, addr, brw_imm_ud(0xfff)); brw_pop_insn_state(p); From 512117ce0e1b32b4e3086a638bf50b966a4724bc Mon Sep 17 00:00:00 2001 From: Jose Fonseca Date: Fri, 29 May 2015 14:33:18 +0100 Subject: [PATCH 382/834] gallivm: Remove stub disassemblerSymbolLookupCB. It's incompletete -- it wasn't filling ReferenceType so it was causing garbagge on the disassembly. Furthermore it seems impossible to get the jump information through this interface. The solution for function size problem is to effectively book-keep the machine code start and end address while JIT'ing. --- src/gallium/auxiliary/gallivm/lp_bld_debug.cpp | 14 +------------- 1 file changed, 1 insertion(+), 13 deletions(-) diff --git a/src/gallium/auxiliary/gallivm/lp_bld_debug.cpp b/src/gallium/auxiliary/gallivm/lp_bld_debug.cpp index 64fb044868f..9a85248018c 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_debug.cpp +++ b/src/gallium/auxiliary/gallivm/lp_bld_debug.cpp @@ -115,18 +115,6 @@ lp_debug_dump_value(LLVMValueRef value) } -static const char * -disassemblerSymbolLookupCB(void *DisInfo, - uint64_t ReferenceValue, - uint64_t *ReferenceType, - uint64_t ReferencePC, - const char **ReferenceName) -{ - // TODO: Maybe this can be used to guess jumps - return NULL; -} - - /* * Disassemble a function, using the LLVM MC disassembler. * @@ -149,7 +137,7 @@ disassemble(const void* func, llvm::raw_ostream & Out) */ std::string Triple = llvm::sys::getProcessTriple(); - LLVMDisasmContextRef D = LLVMCreateDisasm(Triple.c_str(), NULL, 0, NULL, &disassemblerSymbolLookupCB); + LLVMDisasmContextRef D = LLVMCreateDisasm(Triple.c_str(), NULL, 0, NULL, NULL); char outline[1024]; if (!D) { From ae5d6db924d304a6b4af4f802e4ca1e1e2f25489 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 29 May 2015 15:59:27 -0600 Subject: [PATCH 383/834] draw: silence unused var warnings for non-debug build Reviewed-by: Jose Fonseca --- src/gallium/auxiliary/draw/draw_llvm.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/gallium/auxiliary/draw/draw_llvm.c b/src/gallium/auxiliary/draw/draw_llvm.c index b9e55af42c7..885c27c13c2 100644 --- a/src/gallium/auxiliary/draw/draw_llvm.c +++ b/src/gallium/auxiliary/draw/draw_llvm.c @@ -97,6 +97,7 @@ create_jit_dvbuffer_type(struct gallivm_state *gallivm, dvbuffer_type = LLVMStructTypeInContext(gallivm->context, elem_types, Elements(elem_types), 0); + (void) target; /* silence unused var warning for non-debug build */ LP_CHECK_MEMBER_OFFSET(struct draw_vertex_buffer, map, target, dvbuffer_type, DRAW_JIT_DVBUFFER_MAP); @@ -133,6 +134,7 @@ create_jit_texture_type(struct gallivm_state *gallivm, const char *struct_name) texture_type = LLVMStructTypeInContext(gallivm->context, elem_types, Elements(elem_types), 0); + (void) target; /* silence unused var warning for non-debug build */ LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, width, target, texture_type, DRAW_JIT_TEXTURE_WIDTH); @@ -290,6 +292,7 @@ create_gs_jit_context_type(struct gallivm_state *gallivm, context_type = LLVMStructTypeInContext(gallivm->context, elem_types, Elements(elem_types), 0); + (void) target; /* silence unused var warning for non-debug build */ LP_CHECK_MEMBER_OFFSET(struct draw_gs_jit_context, constants, target, context_type, DRAW_GS_JIT_CTX_CONSTANTS); LP_CHECK_MEMBER_OFFSET(struct draw_gs_jit_context, num_constants, @@ -353,6 +356,7 @@ create_jit_vertex_buffer_type(struct gallivm_state *gallivm, vb_type = LLVMStructTypeInContext(gallivm->context, elem_types, Elements(elem_types), 0); + (void) target; /* silence unused var warning for non-debug build */ LP_CHECK_MEMBER_OFFSET(struct pipe_vertex_buffer, stride, target, vb_type, 0); LP_CHECK_MEMBER_OFFSET(struct pipe_vertex_buffer, buffer_offset, From 875918587156e139a82ac9ece9c1290b8019d007 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 29 May 2015 15:59:50 -0600 Subject: [PATCH 384/834] st/mesa: silence unused var warnings for non-debug build Reviewed-by: Jose Fonseca --- src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index 719d08145ea..db190c943a4 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -4209,6 +4209,7 @@ get_pixel_transfer_visitor(struct st_fragment_program *fp, st_dst_reg temp_dst = st_dst_reg(temp); assert(st->pixel_xfer.pixelmap_texture); + (void) st; /* With a little effort, we can do four pixel map look-ups with * two TEX instructions: From 71afc13eda53f36827f19e96404ff782561b523b Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 29 May 2015 16:00:14 -0600 Subject: [PATCH 385/834] pipebuffer: silence unused var warnings for non-debug build Reviewed-by: Jose Fonseca --- src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c b/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c index 3bd9cd70ae3..fc81e11b972 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c +++ b/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c @@ -376,6 +376,7 @@ fenced_buffer_finish_locked(struct fenced_manager *fenced_mgr, /* TODO: remove consequents buffers with the same fence? */ assert(!destroyed); + (void) destroyed; /* silence unused var warning for non-debug build */ fenced_buf->flags &= ~PB_USAGE_GPU_READ_WRITE; From 87813c504a8a72198a6a4e8de9e5905fd751ac2c Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 29 May 2015 16:00:52 -0600 Subject: [PATCH 386/834] gallivm: silence unused var warnings for non-debug build Reviewed-by: Jose Fonseca --- src/gallium/auxiliary/gallivm/lp_bld_format_aos.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c b/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c index 3c25c329edd..efe71704c3a 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c @@ -405,6 +405,7 @@ lp_build_fetch_rgba_aos(struct gallivm_state *gallivm, aligned, base_ptr, offset, TRUE); assert(format_desc->block.bits <= vec_len); + (void) vec_len; /* silence unused var warning for non-debug build */ packed = LLVMBuildBitCast(gallivm->builder, packed, dst_vec_type, ""); return lp_build_format_swizzle_aos(format_desc, &bld, packed); From 54070a9d1db332853609a31e5da76126d8f9445a Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 29 May 2015 16:01:37 -0600 Subject: [PATCH 387/834] egl/dri2: silence uninitialized variable warnings And update assertions to be more informative. Reviewed-by: Jose Fonseca --- src/egl/drivers/dri2/egl_dri2.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/egl/drivers/dri2/egl_dri2.c b/src/egl/drivers/dri2/egl_dri2.c index fe5cbc8815d..8b915ef54d0 100644 --- a/src/egl/drivers/dri2/egl_dri2.c +++ b/src/egl/drivers/dri2/egl_dri2.c @@ -1256,7 +1256,8 @@ dri2_bind_tex_image(_EGLDriver *drv, format = __DRI_TEXTURE_FORMAT_RGBA; break; default: - assert(0); + assert(!"Unexpected texture format in dri2_bind_tex_image()"); + format = __DRI_TEXTURE_FORMAT_RGBA; } switch (dri2_surf->base.TextureTarget) { @@ -1264,7 +1265,8 @@ dri2_bind_tex_image(_EGLDriver *drv, target = GL_TEXTURE_2D; break; default: - assert(0); + target = GL_TEXTURE_2D; + assert(!"Unexpected texture target in dri2_bind_tex_image()"); } (*dri2_dpy->tex_buffer->setTexBuffer2)(dri2_ctx->dri_context, From 51d08d55f46655715cb4a4ef5d14fb7d051b989a Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 29 May 2015 16:02:44 -0600 Subject: [PATCH 388/834] gallium/util: silence silence unused var warnings for non-debug build Reviewed-by: Jose Fonseca --- src/gallium/auxiliary/util/u_format_etc.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/gallium/auxiliary/util/u_format_etc.c b/src/gallium/auxiliary/util/u_format_etc.c index f909b16081a..63e03ff5cc2 100644 --- a/src/gallium/auxiliary/util/u_format_etc.c +++ b/src/gallium/auxiliary/util/u_format_etc.c @@ -65,11 +65,10 @@ util_format_etc1_rgb8_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride, con void util_format_etc1_rgb8_fetch_rgba_float(float *dst, const uint8_t *src, unsigned i, unsigned j) { - const unsigned bw = 4, bh = 4; struct etc1_block block; uint8_t tmp[3]; - assert(i < bw && j < bh); + assert(i < 4 && j < 4); /* check i, j against 4x4 block size */ etc1_parse_block(&block, src); etc1_fetch_texel(&block, i, j, tmp); From f97166e550f17f69f8de7e51775e745a5218d3e7 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Mon, 1 Jun 2015 07:40:34 -0600 Subject: [PATCH 389/834] docs: update GL_ARB_copy_image, GL_ARB_clear_texture gallium status VMware is working on these. Signed-off-by: Brian Paul --- docs/GL3.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/GL3.txt b/docs/GL3.txt index 8e1c8cd4eef..f2d06f17360 100644 --- a/docs/GL3.txt +++ b/docs/GL3.txt @@ -153,7 +153,7 @@ GL 4.3, GLSL 4.30: GL_ARB_ES3_compatibility DONE (all drivers that support GLSL 3.30) GL_ARB_clear_buffer_object DONE (all drivers) GL_ARB_compute_shader in progress (jljusten) - GL_ARB_copy_image DONE (i965) + GL_ARB_copy_image DONE (i965) (gallium - in progress, VMware) GL_KHR_debug DONE (all drivers) GL_ARB_explicit_uniform_location DONE (all drivers that support GLSL) GL_ARB_fragment_layer_viewport DONE (nv50, nvc0, r600, llvmpipe) @@ -177,7 +177,7 @@ GL 4.4, GLSL 4.40: GL_MAX_VERTEX_ATTRIB_STRIDE DONE (all drivers) GL_ARB_buffer_storage DONE (i965, nv50, nvc0, r600, radeonsi) - GL_ARB_clear_texture DONE (i965) + GL_ARB_clear_texture DONE (i965) (gallium - in progress, VMware) GL_ARB_enhanced_layouts not started GL_ARB_multi_bind DONE (all drivers) GL_ARB_query_buffer_object not started From 9945573d65f4f66d127df7cbb62648889d09a7ed Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Sun, 10 May 2015 23:33:30 -0700 Subject: [PATCH 390/834] i965: Drop "Vector Mask Enable" bit from 3DSTATE_GS on Gen8+. The documentation makes it pretty clear that we shouldn't use this: "Under normal conditions SW shall specify DMask, as the GS stage will provide a Dispatch Mask appropriate to SIMD4x2 or SIMD8 thread execution (as a function of dispatch mode). E.g., for SIMD4x2 execution, the GS stage will generate a Dispatch Mask that is equal to what the EU would use as the Vector Mask. For SIMD8 execution there is no known usage model for use of Vector Mask (as there is for PS shaders)." I also managed to find descriptions of DMask and VMask, in the "State Register" (sr0.2/3) field descriptions: "Dispatch Mask (DMask). This 32-bit field specifies which channels are active at Dispatch time." "Vector Mask (VMask). This 32-bit field contains, for each 4-bit group, the OR of the corresponding 4-bit group in the dispatch mask." SIMD4x2 shaders process one or two vec4 values, with each 4-bit group corresponding to xyzw channel enables (either all on, or all off). Thus, DMask = VMask in SIMD4x2 mode. But in SIMD8 mode, 4-bit groups are meaningless, so it just messes up your values. Signed-off-by: Kenneth Graunke Reviewed-by: Ben Widawsky --- src/mesa/drivers/dri/i965/gen8_gs_state.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/mesa/drivers/dri/i965/gen8_gs_state.c b/src/mesa/drivers/dri/i965/gen8_gs_state.c index 6a0e215eca3..0763e910561 100644 --- a/src/mesa/drivers/dri/i965/gen8_gs_state.c +++ b/src/mesa/drivers/dri/i965/gen8_gs_state.c @@ -48,8 +48,7 @@ gen8_upload_gs_state(struct brw_context *brw) OUT_BATCH(_3DSTATE_GS << 16 | (10 - 2)); OUT_BATCH(stage_state->prog_offset); OUT_BATCH(0); - OUT_BATCH(GEN6_GS_VECTOR_MASK_ENABLE | - brw->geometry_program->VerticesIn | + OUT_BATCH(brw->geometry_program->VerticesIn | ((ALIGN(stage_state->sampler_count, 4)/4) << GEN6_GS_SAMPLER_COUNT_SHIFT) | ((prog_data->base.binding_table.size_bytes / 4) << From 0f8ec779ddff4126837a7d4216ecf1d4b97e93d2 Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Wed, 11 Mar 2015 21:18:42 -0700 Subject: [PATCH 391/834] i965: Create a shader_dispatch_mode enum to replace VS/GS fields. We used to store the GS dispatch mode in brw_gs_prog_data while separately storing the VS dispatch mode in brw_vue_prog_data::simd8. This patch introduces an enum to represent all possible dispatch modes, and stores it in brw_vue_prog_data::dispatch_mode, unifying the two. Based on a suggestion by Matt Turner. Signed-off-by: Kenneth Graunke Reviewed-by: Ben Widawsky --- src/mesa/drivers/dri/i965/brw_context.h | 16 +++++++--------- src/mesa/drivers/dri/i965/brw_defines.h | 5 ++--- src/mesa/drivers/dri/i965/brw_vec4.cpp | 5 ++++- .../drivers/dri/i965/brw_vec4_gs_visitor.cpp | 8 ++++---- src/mesa/drivers/dri/i965/brw_vs_surface_state.c | 4 ++-- src/mesa/drivers/dri/i965/gen7_gs_state.c | 2 +- src/mesa/drivers/dri/i965/gen8_gs_state.c | 3 ++- src/mesa/drivers/dri/i965/gen8_vs_state.c | 3 ++- 8 files changed, 24 insertions(+), 22 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index abc11f63230..01c4283c8f0 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -605,6 +605,12 @@ struct brw_ff_gs_prog_data { unsigned svbi_postincrement_value; }; +enum shader_dispatch_mode { + DISPATCH_MODE_4X1_SINGLE = 0, + DISPATCH_MODE_4X2_DUAL_INSTANCE = 1, + DISPATCH_MODE_4X2_DUAL_OBJECT = 2, + DISPATCH_MODE_SIMD8 = 3, +}; /* Note: brw_vue_prog_data_compare() must be updated when adding fields to * this struct! @@ -622,7 +628,7 @@ struct brw_vue_prog_data { */ GLuint urb_entry_size; - bool simd8; + enum shader_dispatch_mode dispatch_mode; }; @@ -719,14 +725,6 @@ struct brw_gs_prog_data int invocations; - /** - * Dispatch mode, can be any of: - * GEN7_GS_DISPATCH_MODE_DUAL_OBJECT - * GEN7_GS_DISPATCH_MODE_DUAL_INSTANCE - * GEN7_GS_DISPATCH_MODE_SINGLE - */ - int dispatch_mode; - /** * Gen6 transform feedback enabled flag. */ diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h index dedc3811abe..f6da305254c 100644 --- a/src/mesa/drivers/dri/i965/brw_defines.h +++ b/src/mesa/drivers/dri/i965/brw_defines.h @@ -1773,9 +1773,8 @@ enum brw_message_target { # define GEN7_GS_CONTROL_DATA_FORMAT_GSCTL_SID 1 # define GEN7_GS_CONTROL_DATA_HEADER_SIZE_SHIFT 20 # define GEN7_GS_INSTANCE_CONTROL_SHIFT 15 -# define GEN7_GS_DISPATCH_MODE_SINGLE (0 << 11) -# define GEN7_GS_DISPATCH_MODE_DUAL_INSTANCE (1 << 11) -# define GEN7_GS_DISPATCH_MODE_DUAL_OBJECT (2 << 11) +# define GEN7_GS_DISPATCH_MODE_SHIFT 11 +# define GEN7_GS_DISPATCH_MODE_MASK INTEL_MASK(12, 11) # define GEN6_GS_STATISTICS_ENABLE (1 << 10) # define GEN6_GS_SO_STATISTICS_ENABLE (1 << 9) # define GEN6_GS_RENDERING_ENABLE (1 << 8) diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp index a324798e060..5a9c3f53218 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp @@ -1894,6 +1894,8 @@ brw_vs_emit(struct brw_context *brw, brw_create_nir(brw, NULL, &c->vp->program.Base, MESA_SHADER_VERTEX); } + prog_data->base.dispatch_mode = DISPATCH_MODE_SIMD8; + fs_visitor v(brw, mem_ctx, MESA_SHADER_VERTEX, &c->key, &prog_data->base.base, prog, &c->vp->program.Base, 8); if (!v.run_vs()) { @@ -1926,11 +1928,12 @@ brw_vs_emit(struct brw_context *brw, g.generate_code(v.cfg, 8); assembly = g.get_assembly(final_assembly_size); - prog_data->base.simd8 = true; c->base.last_scratch = v.last_scratch; } if (!assembly) { + prog_data->base.dispatch_mode = DISPATCH_MODE_4X2_DUAL_OBJECT; + vec4_vs_visitor v(brw, c, prog_data, prog, mem_ctx); if (!v.run()) { if (prog) { diff --git a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp index 363e30e34e4..eacb2f5be7b 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp @@ -106,7 +106,7 @@ vec4_gs_visitor::setup_payload() * to be interleaved, so one register contains two attribute slots. */ int attributes_per_reg = - c->prog_data.dispatch_mode == GEN7_GS_DISPATCH_MODE_DUAL_OBJECT ? 1 : 2; + c->prog_data.base.dispatch_mode == DISPATCH_MODE_4X2_DUAL_OBJECT ? 1 : 2; /* If a geometry shader tries to read from an input that wasn't written by * the vertex shader, that produces undefined results, but it shouldn't @@ -655,7 +655,7 @@ brw_gs_emit(struct brw_context *brw, */ if (c->prog_data.invocations <= 1 && likely(!(INTEL_DEBUG & DEBUG_NO_DUAL_OBJECT_GS))) { - c->prog_data.dispatch_mode = GEN7_GS_DISPATCH_MODE_DUAL_OBJECT; + c->prog_data.base.dispatch_mode = DISPATCH_MODE_4X2_DUAL_OBJECT; vec4_gs_visitor v(brw, c, prog, mem_ctx, true /* no_spills */); if (v.run()) { @@ -690,9 +690,9 @@ brw_gs_emit(struct brw_context *brw, * SINGLE mode. */ if (c->prog_data.invocations <= 1 || brw->gen < 7) - c->prog_data.dispatch_mode = GEN7_GS_DISPATCH_MODE_SINGLE; + c->prog_data.base.dispatch_mode = DISPATCH_MODE_4X1_SINGLE; else - c->prog_data.dispatch_mode = GEN7_GS_DISPATCH_MODE_DUAL_INSTANCE; + c->prog_data.base.dispatch_mode = DISPATCH_MODE_4X2_DUAL_INSTANCE; vec4_gs_visitor *gs = NULL; const unsigned *ret = NULL; diff --git a/src/mesa/drivers/dri/i965/brw_vs_surface_state.c b/src/mesa/drivers/dri/i965/brw_vs_surface_state.c index f82a62b4851..b2f91bd412b 100644 --- a/src/mesa/drivers/dri/i965/brw_vs_surface_state.c +++ b/src/mesa/drivers/dri/i965/brw_vs_surface_state.c @@ -121,7 +121,7 @@ brw_upload_vs_pull_constants(struct brw_context *brw) /* BRW_NEW_VS_PROG_DATA */ const struct brw_stage_prog_data *prog_data = &brw->vs.prog_data->base.base; - dword_pitch = brw->vs.prog_data->base.simd8; + dword_pitch = brw->vs.prog_data->base.dispatch_mode == DISPATCH_MODE_SIMD8; /* _NEW_PROGRAM_CONSTANTS */ brw_upload_pull_constants(brw, BRW_NEW_VS_CONSTBUF, &vp->program.Base, @@ -151,7 +151,7 @@ brw_upload_vs_ubo_surfaces(struct brw_context *brw) return; /* BRW_NEW_VS_PROG_DATA */ - dword_pitch = brw->vs.prog_data->base.simd8; + dword_pitch = brw->vs.prog_data->base.dispatch_mode == DISPATCH_MODE_SIMD8; brw_upload_ubo_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_VERTEX], &brw->vs.base, &brw->vs.prog_data->base.base, dword_pitch); diff --git a/src/mesa/drivers/dri/i965/gen7_gs_state.c b/src/mesa/drivers/dri/i965/gen7_gs_state.c index e1c4f8b5d14..8d6d3fe1d34 100644 --- a/src/mesa/drivers/dri/i965/gen7_gs_state.c +++ b/src/mesa/drivers/dri/i965/gen7_gs_state.c @@ -112,7 +112,7 @@ upload_gs_state(struct brw_context *brw) GEN7_GS_CONTROL_DATA_HEADER_SIZE_SHIFT) | ((brw->gs.prog_data->invocations - 1) << GEN7_GS_INSTANCE_CONTROL_SHIFT) | - brw->gs.prog_data->dispatch_mode | + SET_FIELD(prog_data->dispatch_mode, GEN7_GS_DISPATCH_MODE) | GEN6_GS_STATISTICS_ENABLE | (brw->gs.prog_data->include_primitive_id ? GEN7_GS_INCLUDE_PRIMITIVE_ID : 0) | diff --git a/src/mesa/drivers/dri/i965/gen8_gs_state.c b/src/mesa/drivers/dri/i965/gen8_gs_state.c index 0763e910561..26a02d3b045 100644 --- a/src/mesa/drivers/dri/i965/gen8_gs_state.c +++ b/src/mesa/drivers/dri/i965/gen8_gs_state.c @@ -76,7 +76,8 @@ gen8_upload_gs_state(struct brw_context *brw) uint32_t dw7 = (brw->gs.prog_data->control_data_header_size_hwords << GEN7_GS_CONTROL_DATA_HEADER_SIZE_SHIFT) | - brw->gs.prog_data->dispatch_mode | + SET_FIELD(prog_data->dispatch_mode, + GEN7_GS_DISPATCH_MODE) | ((brw->gs.prog_data->invocations - 1) << GEN7_GS_INSTANCE_CONTROL_SHIFT) | GEN6_GS_STATISTICS_ENABLE | diff --git a/src/mesa/drivers/dri/i965/gen8_vs_state.c b/src/mesa/drivers/dri/i965/gen8_vs_state.c index f92af55e37f..9bfde38c430 100644 --- a/src/mesa/drivers/dri/i965/gen8_vs_state.c +++ b/src/mesa/drivers/dri/i965/gen8_vs_state.c @@ -66,7 +66,8 @@ upload_vs_state(struct brw_context *brw) (prog_data->urb_read_length << GEN6_VS_URB_READ_LENGTH_SHIFT) | (0 << GEN6_VS_URB_ENTRY_READ_OFFSET_SHIFT)); - uint32_t simd8_enable = prog_data->simd8 ? GEN8_VS_SIMD8_ENABLE : 0; + uint32_t simd8_enable = prog_data->dispatch_mode == DISPATCH_MODE_SIMD8 ? + GEN8_VS_SIMD8_ENABLE : 0; OUT_BATCH(((brw->max_vs_threads - 1) << HSW_VS_MAX_THREADS_SHIFT) | GEN6_VS_STATISTICS_ENABLE | simd8_enable | From 386bf336c400104fbc80bf8a21f745eca5771ec1 Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Wed, 13 May 2015 14:45:45 -0700 Subject: [PATCH 392/834] i965: Use proper pitch for scalar GS pull constants and UBOs. See the corresponding code in brw_vs_surface_state.c. v2: const more things (requested by Topi Pohjolainen) Signed-off-by: Kenneth Graunke Reviewed-by: Ben Widawsky --- src/mesa/drivers/dri/i965/brw_gs_surface_state.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_gs_surface_state.c b/src/mesa/drivers/dri/i965/brw_gs_surface_state.c index a323e4d9031..0b8bfc3d9bd 100644 --- a/src/mesa/drivers/dri/i965/brw_gs_surface_state.c +++ b/src/mesa/drivers/dri/i965/brw_gs_surface_state.c @@ -47,11 +47,12 @@ brw_upload_gs_pull_constants(struct brw_context *brw) return; /* BRW_NEW_GS_PROG_DATA */ - const struct brw_stage_prog_data *prog_data = &brw->gs.prog_data->base.base; + const struct brw_vue_prog_data *prog_data = &brw->gs.prog_data->base; + const bool dword_pitch = prog_data->dispatch_mode == DISPATCH_MODE_SIMD8; /* _NEW_PROGRAM_CONSTANTS */ brw_upload_pull_constants(brw, BRW_NEW_GS_CONSTBUF, &gp->program.Base, - stage_state, prog_data, false); + stage_state, &prog_data->base, dword_pitch); } const struct brw_tracked_state brw_gs_pull_constants = { @@ -77,8 +78,11 @@ brw_upload_gs_ubo_surfaces(struct brw_context *brw) return; /* BRW_NEW_GS_PROG_DATA */ + struct brw_vue_prog_data *prog_data = &brw->gs.prog_data->base; + bool dword_pitch = prog_data->dispatch_mode == DISPATCH_MODE_SIMD8; + brw_upload_ubo_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_GEOMETRY], - &brw->gs.base, &brw->gs.prog_data->base.base, false); + &brw->gs.base, &prog_data->base, dword_pitch); } const struct brw_tracked_state brw_gs_ubo_surfaces = { From a2655e0dd422599c07c572472855abd98d20d21a Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Fri, 29 May 2015 23:02:56 -0700 Subject: [PATCH 393/834] i965: Drop LOAD_PAYLOAD workaround in fs_visitor::emit_urb_writes(). Now that Jason's LOAD_PAYLOAD improvements have landed, we don't need this. Passing 1 for the number of header registers already takes care of setting force_writemask_all on the header copy. Signed-off-by: Kenneth Graunke Reviewed-by: Jason Ekstrand --- src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp index e336b73392c..8b9fedea0ab 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp @@ -1980,22 +1980,14 @@ fs_visitor::emit_urb_writes() fs_reg *payload_sources = ralloc_array(mem_ctx, fs_reg, length + 1); fs_reg payload = fs_reg(GRF, alloc.allocate(length + 1), BRW_REGISTER_TYPE_F, dispatch_width); - - /* We need WE_all on the MOV for the message header (the URB handles) - * so do a MOV to a dummy register and set force_writemask_all on the - * MOV. LOAD_PAYLOAD will preserve that. - */ - fs_reg dummy = fs_reg(GRF, alloc.allocate(1), - BRW_REGISTER_TYPE_UD); - fs_inst *inst = emit(MOV(dummy, fs_reg(retype(brw_vec8_grf(1, 0), - BRW_REGISTER_TYPE_UD)))); - inst->force_writemask_all = true; - payload_sources[0] = dummy; + payload_sources[0] = + fs_reg(retype(brw_vec8_grf(1, 0), BRW_REGISTER_TYPE_UD)); memcpy(&payload_sources[1], sources, length * sizeof sources[0]); emit(LOAD_PAYLOAD(payload, payload_sources, length + 1, 1)); - inst = emit(SHADER_OPCODE_URB_WRITE_SIMD8, reg_undef, payload); + fs_inst *inst = + emit(SHADER_OPCODE_URB_WRITE_SIMD8, reg_undef, payload); inst->eot = last; inst->mlen = length + 1; inst->offset = urb_offset; From 762395736be3adcc810274e1e96acd4bdceb10c6 Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Sat, 30 May 2015 11:19:28 -0700 Subject: [PATCH 394/834] i965: Add Gen8+ VS dispatch_mode assertion. Suggested by Ben Widawsky. Signed-off-by: Kenneth Graunke Reviewed-by: Ben Widawsky --- src/mesa/drivers/dri/i965/gen8_vs_state.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/mesa/drivers/dri/i965/gen8_vs_state.c b/src/mesa/drivers/dri/i965/gen8_vs_state.c index 9bfde38c430..28f5adddf14 100644 --- a/src/mesa/drivers/dri/i965/gen8_vs_state.c +++ b/src/mesa/drivers/dri/i965/gen8_vs_state.c @@ -39,6 +39,9 @@ upload_vs_state(struct brw_context *brw) /* BRW_NEW_VS_PROG_DATA */ const struct brw_vue_prog_data *prog_data = &brw->vs.prog_data->base; + assert(prog_data->dispatch_mode == DISPATCH_MODE_SIMD8 || + prog_data->dispatch_mode == DISPATCH_MODE_4X2_DUAL_OBJECT); + if (prog_data->base.use_alt_mode) floating_point_mode = GEN6_VS_FLOATING_POINT_MODE_ALT; From 5b226a12420993a0f4aae2295b33aaa305242a3d Mon Sep 17 00:00:00 2001 From: Eduardo Lima Mitev Date: Tue, 2 Jun 2015 13:42:46 +0200 Subject: [PATCH 395/834] nir: prevent use-after-free condition in should_lower_phi() lower_phis_to_scalar() pass recurses the instruction dependence graph to determine if all the sources of a given instruction are scalarizable. To prevent cycles, it temporary marks the phi instruction before recursing in, then updates the entry with the resulting value. However, it does not consider that the entry value may have changed after a recursion pass, hence causing a use-after-free situation and a crash. This patch fixes this by reloading the entry corresponding to the 'phi' after recursing and before updating its value. The crash can be reproduced ~20% of times with the dEQP test: dEQP-GLES3.functional.shaders.loops.while_constant_iterations.nested_sequence_fragment Reviewed-by: Jason Ekstrand --- src/glsl/nir/nir_lower_phis_to_scalar.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/glsl/nir/nir_lower_phis_to_scalar.c b/src/glsl/nir/nir_lower_phis_to_scalar.c index 4bdb80072ab..a57d253975d 100644 --- a/src/glsl/nir/nir_lower_phis_to_scalar.c +++ b/src/glsl/nir/nir_lower_phis_to_scalar.c @@ -153,6 +153,11 @@ should_lower_phi(nir_phi_instr *phi, struct lower_phis_to_scalar_state *state) break; } + /* The hash table entry for 'phi' may have changed while recursing the + * dependence graph, so we need to reset it */ + entry = _mesa_hash_table_search(state->phi_table, phi); + assert(entry); + entry->data = (void *)(intptr_t)scalarizable; return scalarizable; From 576f7241b6ce0ae22aa52a3e91fb1ac913d4b7b2 Mon Sep 17 00:00:00 2001 From: Matt Turner Date: Mon, 1 Jun 2015 12:19:30 -0700 Subject: [PATCH 396/834] prog_to_nir: Remove from op_trans[] opcodes handled in the switch. Reviewed-by: Kenneth Graunke --- src/mesa/program/prog_to_nir.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/mesa/program/prog_to_nir.c b/src/mesa/program/prog_to_nir.c index 5ca81e54b7f..5e104e70f4a 100644 --- a/src/mesa/program/prog_to_nir.c +++ b/src/mesa/program/prog_to_nir.c @@ -697,7 +697,7 @@ static const nir_op op_trans[MAX_OPCODE] = { [OPCODE_ADD] = nir_op_fadd, [OPCODE_ARL] = 0, [OPCODE_CMP] = 0, - [OPCODE_COS] = nir_op_fcos, + [OPCODE_COS] = 0, [OPCODE_DDX] = nir_op_fddx, [OPCODE_DDY] = nir_op_fddy, [OPCODE_DP2] = 0, @@ -706,11 +706,11 @@ static const nir_op op_trans[MAX_OPCODE] = { [OPCODE_DPH] = 0, [OPCODE_DST] = 0, [OPCODE_END] = 0, - [OPCODE_EX2] = nir_op_fexp2, + [OPCODE_EX2] = 0, [OPCODE_EXP] = 0, [OPCODE_FLR] = nir_op_ffloor, [OPCODE_FRC] = nir_op_ffract, - [OPCODE_LG2] = nir_op_flog2, + [OPCODE_LG2] = 0, [OPCODE_LIT] = 0, [OPCODE_LOG] = 0, [OPCODE_LRP] = 0, @@ -719,15 +719,15 @@ static const nir_op op_trans[MAX_OPCODE] = { [OPCODE_MIN] = nir_op_fmin, [OPCODE_MOV] = nir_op_fmov, [OPCODE_MUL] = nir_op_fmul, - [OPCODE_POW] = nir_op_fpow, - [OPCODE_RCP] = nir_op_frcp, + [OPCODE_POW] = 0, + [OPCODE_RCP] = 0, - [OPCODE_RSQ] = nir_op_frsq, + [OPCODE_RSQ] = 0, [OPCODE_SCS] = 0, [OPCODE_SEQ] = 0, [OPCODE_SGE] = 0, [OPCODE_SGT] = 0, - [OPCODE_SIN] = nir_op_fsin, + [OPCODE_SIN] = 0, [OPCODE_SLE] = 0, [OPCODE_SLT] = 0, [OPCODE_SNE] = 0, From 5da809d70fb50eb4b290ee7cbe1b8f09e9286f4e Mon Sep 17 00:00:00 2001 From: Matt Turner Date: Mon, 1 Jun 2015 12:22:54 -0700 Subject: [PATCH 397/834] prog_to_nir: Remove OPCODE_MOV special case. OPCODE_MOV is in the op_trans[] array. Reviewed-by: Kenneth Graunke --- src/mesa/program/prog_to_nir.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mesa/program/prog_to_nir.c b/src/mesa/program/prog_to_nir.c index 5e104e70f4a..d6f165e5060 100644 --- a/src/mesa/program/prog_to_nir.c +++ b/src/mesa/program/prog_to_nir.c @@ -891,7 +891,7 @@ ptn_emit_instruction(struct ptn_compile *c, struct prog_instruction *prog_inst) break; default: - if (op_trans[op] != 0 || op == OPCODE_MOV) { + if (op_trans[op] != 0) { ptn_alu(b, op_trans[op], dest, src); } else { fprintf(stderr, "unknown opcode: %s\n", _mesa_opcode_string(op)); From 5f7b8fa4811ae0acb49de5d0ef44ae1573eb5ccc Mon Sep 17 00:00:00 2001 From: Timothy Arceri Date: Mon, 1 Jun 2015 08:00:14 +1000 Subject: [PATCH 398/834] nir: remove extra semicolon Reviewed-by: Thomas Helland Reviewed-by: Jason Ekstrand Reviewed-by: Connor Abbott --- src/glsl/nir/nir_lower_atomics.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/glsl/nir/nir_lower_atomics.c b/src/glsl/nir/nir_lower_atomics.c index f6f89020f78..0457de60d9a 100644 --- a/src/glsl/nir/nir_lower_atomics.c +++ b/src/glsl/nir/nir_lower_atomics.c @@ -109,7 +109,7 @@ lower_instr(nir_intrinsic_instr *instr, nir_function_impl *impl) } new_instr->src[0].is_ssa = true; - new_instr->src[0].ssa = offset_def;; + new_instr->src[0].ssa = offset_def; if (instr->dest.is_ssa) { nir_ssa_dest_init(&new_instr->instr, &new_instr->dest, From 86a74e9b6b8953a55de234f185a14defd646f489 Mon Sep 17 00:00:00 2001 From: Timothy Arceri Date: Tue, 2 Jun 2015 19:26:42 +1000 Subject: [PATCH 399/834] nir: use src for ssa helper Reviewed-by: Thomas Helland Reviewed-by: Connor Abbott --- src/glsl/nir/glsl_to_nir.cpp | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/src/glsl/nir/glsl_to_nir.cpp b/src/glsl/nir/glsl_to_nir.cpp index af758ceb020..ad8cfad627b 100644 --- a/src/glsl/nir/glsl_to_nir.cpp +++ b/src/glsl/nir/glsl_to_nir.cpp @@ -930,13 +930,9 @@ nir_visitor::evaluate_rvalue(ir_rvalue* ir) } nir_dest *dest = get_instr_dest(this->result); - assert(dest->is_ssa); - nir_src src = NIR_SRC_INIT; - src.is_ssa = true; - src.ssa = &dest->ssa; - return src; + return nir_src_for_ssa(&dest->ssa); } nir_alu_instr * From 6139195606d97b43a739500627c906baf804fab0 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 15 May 2015 11:22:25 -0600 Subject: [PATCH 400/834] mesa: fix glPushAttrib(0) / glPopAttrib() error If the glPushAttrib() mask value was zero we didn't actually push anything onto the attribute stack. A subsequent glPopAttrib() call would generate a GL_STACK_UNDERFLOW error. Now push a dummy attribute in that case to prevent the error. Mesa now matches nvidia's behavior. Reviewed-by: Jose Fonseca --- src/mesa/main/attrib.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/src/mesa/main/attrib.c b/src/mesa/main/attrib.c index b163c0aa699..365a79ded87 100644 --- a/src/mesa/main/attrib.c +++ b/src/mesa/main/attrib.c @@ -177,6 +177,10 @@ struct texture_state }; +/** An unused GL_*_BIT value */ +#define DUMMY_BIT 0x10000000 + + /** * Allocate new attribute node of given type/kind. Attach payload data. * Insert it into the linked list named by 'head'. @@ -253,6 +257,15 @@ _mesa_PushAttrib(GLbitfield mask) /* groups specified by the mask. */ head = NULL; + if (mask == 0) { + /* if mask is zero we still need to push something so that we + * don't get a GL_STACK_UNDERFLOW error in glPopAttrib(). + */ + GLuint dummy = 0; + if (!push_attrib(ctx, &head, DUMMY_BIT, sizeof(dummy), &dummy)) + goto end; + } + if (mask & GL_ACCUM_BUFFER_BIT) { if (!push_attrib(ctx, &head, GL_ACCUM_BUFFER_BIT, sizeof(struct gl_accum_attrib), @@ -928,6 +941,10 @@ _mesa_PopAttrib(void) } switch (attr->kind) { + case DUMMY_BIT: + /* do nothing */ + break; + case GL_ACCUM_BUFFER_BIT: { const struct gl_accum_attrib *accum; From 4dd72fe70d6800525302c734d161be411ed9f7e0 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 15 May 2015 12:09:54 -0600 Subject: [PATCH 401/834] mesa: restore GL_EXT_depth_bounds_test state in glPopAttrib() Spotted by inspection. Untested (no piglit test). Signed-off-by: Brian Paul --- src/mesa/main/attrib.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/mesa/main/attrib.c b/src/mesa/main/attrib.c index 365a79ded87..53626e38be9 100644 --- a/src/mesa/main/attrib.c +++ b/src/mesa/main/attrib.c @@ -1091,6 +1091,11 @@ _mesa_PopAttrib(void) _mesa_ClearDepth(depth->Clear); _mesa_set_enable(ctx, GL_DEPTH_TEST, depth->Test); _mesa_DepthMask(depth->Mask); + if (ctx->Extensions.EXT_depth_bounds_test) { + _mesa_set_enable(ctx, GL_DEPTH_BOUNDS_TEST_EXT, + depth->BoundsTest); + _mesa_DepthBoundsEXT(depth->BoundsMin, depth->BoundsMax); + } } break; case GL_ENABLE_BIT: From 56b2b3d385170ab33934ec71fd9d0a6e0e1af9a8 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 15 May 2015 12:12:04 -0600 Subject: [PATCH 402/834] mesa: move no-change glDepthFunc check earlier If the incoming func matches the current state it must be a legal value so we can do this before the switch statement. Signed-off-by: Brian Paul --- src/mesa/main/depth.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/mesa/main/depth.c b/src/mesa/main/depth.c index 29851ecb8a4..bb4591cf152 100644 --- a/src/mesa/main/depth.c +++ b/src/mesa/main/depth.c @@ -65,6 +65,9 @@ _mesa_DepthFunc( GLenum func ) if (MESA_VERBOSE & VERBOSE_API) _mesa_debug(ctx, "glDepthFunc %s\n", _mesa_lookup_enum_by_nr(func)); + if (ctx->Depth.Func == func) + return; + switch (func) { case GL_LESS: /* (default) pass if incoming z < stored z */ case GL_GEQUAL: @@ -80,9 +83,6 @@ _mesa_DepthFunc( GLenum func ) return; } - if (ctx->Depth.Func == func) - return; - FLUSH_VERTICES(ctx, _NEW_DEPTH); ctx->Depth.Func = func; From ef3f89e53e76332ddb300b08f4698347e17d1633 Mon Sep 17 00:00:00 2001 From: Matt Turner Date: Mon, 1 Jun 2015 16:22:55 -0700 Subject: [PATCH 403/834] program: Shrink and rename SaturateMode field to Saturate. It was 2 bits to accommodate SATURATE_PLUS_MINUS_ONE (removed by commit 09b566e1). A similar change was made to TGSI recently in commit e1c4e8aa. Reducing the size from 2 bits to 1 reduces the size of the bit fields from 17 bits to 16, which is a much nicer number. Reviewed-by: Brian Paul --- src/mesa/drivers/dri/i915/i915_fragprog.c | 2 +- src/mesa/program/ir_to_mesa.cpp | 2 +- src/mesa/program/prog_execute.c | 2 +- src/mesa/program/prog_instruction.c | 2 +- src/mesa/program/prog_instruction.h | 16 ++-------------- src/mesa/program/prog_optimize.c | 4 ++-- src/mesa/program/prog_print.c | 6 +++--- src/mesa/program/prog_to_nir.c | 4 ++-- src/mesa/program/program_parse.y | 2 +- src/mesa/program/program_parse_extra.c | 4 ++-- src/mesa/program/programopt.c | 6 +++--- src/mesa/state_tracker/st_mesa_to_tgsi.c | 2 +- 12 files changed, 20 insertions(+), 32 deletions(-) diff --git a/src/mesa/drivers/dri/i915/i915_fragprog.c b/src/mesa/drivers/dri/i915/i915_fragprog.c index 9b002236add..03c32e56d82 100644 --- a/src/mesa/drivers/dri/i915/i915_fragprog.c +++ b/src/mesa/drivers/dri/i915/i915_fragprog.c @@ -220,7 +220,7 @@ get_result_flags(const struct prog_instruction *inst) { GLuint flags = 0; - if (inst->SaturateMode == SATURATE_ZERO_ONE) + if (inst->Saturate) flags |= A0_DEST_SATURATE; if (inst->DstReg.WriteMask & WRITEMASK_X) flags |= A0_DEST_CHANNEL_X; diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp index 3dcb53702a5..02a05687683 100644 --- a/src/mesa/program/ir_to_mesa.cpp +++ b/src/mesa/program/ir_to_mesa.cpp @@ -2720,7 +2720,7 @@ get_mesa_program(struct gl_context *ctx, mesa_inst->Opcode = inst->op; mesa_inst->CondUpdate = inst->cond_update; if (inst->saturate) - mesa_inst->SaturateMode = SATURATE_ZERO_ONE; + mesa_inst->Saturate = GL_TRUE; mesa_inst->DstReg.File = inst->dst.file; mesa_inst->DstReg.Index = inst->dst.index; mesa_inst->DstReg.CondMask = inst->dst.cond_mask; diff --git a/src/mesa/program/prog_execute.c b/src/mesa/program/prog_execute.c index 16e8e340d8d..46260b54882 100644 --- a/src/mesa/program/prog_execute.c +++ b/src/mesa/program/prog_execute.c @@ -397,7 +397,7 @@ store_vector4(const struct prog_instruction *inst, struct gl_program_machine *machine, const GLfloat value[4]) { const struct prog_dst_register *dstReg = &(inst->DstReg); - const GLboolean clamp = inst->SaturateMode == SATURATE_ZERO_ONE; + const GLboolean clamp = inst->Saturate; GLuint writeMask = dstReg->WriteMask; GLfloat clampedValue[4]; GLfloat *dst = get_dst_register_pointer(dstReg, machine); diff --git a/src/mesa/program/prog_instruction.c b/src/mesa/program/prog_instruction.c index f9ebe4e8fd2..a8fb9d9b04e 100644 --- a/src/mesa/program/prog_instruction.c +++ b/src/mesa/program/prog_instruction.c @@ -55,7 +55,7 @@ _mesa_init_instructions(struct prog_instruction *inst, GLuint count) inst[i].DstReg.CondMask = COND_TR; inst[i].DstReg.CondSwizzle = SWIZZLE_NOOP; - inst[i].SaturateMode = SATURATE_OFF; + inst[i].Saturate = GL_FALSE; inst[i].Precision = FLOAT32; } } diff --git a/src/mesa/program/prog_instruction.h b/src/mesa/program/prog_instruction.h index 96da198f86d..3ada85750a5 100644 --- a/src/mesa/program/prog_instruction.h +++ b/src/mesa/program/prog_instruction.h @@ -117,15 +117,6 @@ /*@}*/ -/** - * Saturation modes when storing values. - */ -/*@{*/ -#define SATURATE_OFF 0 -#define SATURATE_ZERO_ONE 1 -/*@}*/ - - /** * Per-component negation masks */ @@ -327,15 +318,12 @@ struct prog_instruction GLuint CondDst:1; /** - * Saturate each value of the vectored result to the range [0,1] or the - * range [-1,1]. \c SSAT mode (i.e., saturation to the range [-1,1]) is - * only available in NV_fragment_program2 mode. - * Value is one of the SATURATE_* tokens. + * Saturate each value of the vectored result to the range [0,1]. * * \since * NV_fragment_program_option, NV_vertex_program3. */ - GLuint SaturateMode:2; + GLuint Saturate:1; /** * Per-instruction selectable precision: FLOAT32, FLOAT16, FIXED12. diff --git a/src/mesa/program/prog_optimize.c b/src/mesa/program/prog_optimize.c index 6d4485acb65..f9e9035fc3e 100644 --- a/src/mesa/program/prog_optimize.c +++ b/src/mesa/program/prog_optimize.c @@ -478,7 +478,7 @@ can_upward_mov_be_modifed(const struct prog_instruction *mov) return can_downward_mov_be_modifed(mov) && mov->DstReg.File == PROGRAM_TEMPORARY && - mov->SaturateMode == SATURATE_OFF; + !mov->Saturate; } @@ -653,7 +653,7 @@ _mesa_merge_mov_into_inst(struct prog_instruction *inst, if (mask != (inst->DstReg.WriteMask & mask)) return GL_FALSE; - inst->SaturateMode |= mov->SaturateMode; + inst->Saturate |= mov->Saturate; /* Depending on the instruction, we may need to recompute the swizzles. * Also, some other instructions (like TEX) are not linear. We will only diff --git a/src/mesa/program/prog_print.c b/src/mesa/program/prog_print.c index d588d07ffe4..e360f09b72c 100644 --- a/src/mesa/program/prog_print.c +++ b/src/mesa/program/prog_print.c @@ -600,7 +600,7 @@ _mesa_fprint_alu_instruction(FILE *f, fprintf(f, ".C"); /* frag prog only */ - if (inst->SaturateMode == SATURATE_ZERO_ONE) + if (inst->Saturate) fprintf(f, "_SAT"); fprintf(f, " "); @@ -658,7 +658,7 @@ _mesa_fprint_instruction_opt(FILE *f, switch (inst->Opcode) { case OPCODE_SWZ: fprintf(f, "SWZ"); - if (inst->SaturateMode == SATURATE_ZERO_ONE) + if (inst->Saturate) fprintf(f, "_SAT"); fprintf(f, " "); fprint_dst_reg(f, &inst->DstReg, mode, prog); @@ -675,7 +675,7 @@ _mesa_fprint_instruction_opt(FILE *f, case OPCODE_TXB: case OPCODE_TXD: fprintf(f, "%s", _mesa_opcode_string(inst->Opcode)); - if (inst->SaturateMode == SATURATE_ZERO_ONE) + if (inst->Saturate) fprintf(f, "_SAT"); fprintf(f, " "); fprint_dst_reg(f, &inst->DstReg, mode, prog); diff --git a/src/mesa/program/prog_to_nir.c b/src/mesa/program/prog_to_nir.c index d6f165e5060..e986e511514 100644 --- a/src/mesa/program/prog_to_nir.c +++ b/src/mesa/program/prog_to_nir.c @@ -900,8 +900,8 @@ ptn_emit_instruction(struct ptn_compile *c, struct prog_instruction *prog_inst) break; } - if (prog_inst->SaturateMode) { - assert(prog_inst->SaturateMode == SATURATE_ZERO_ONE); + if (prog_inst->Saturate) { + assert(prog_inst->Saturate); assert(!dest.dest.is_ssa); ptn_move_dest(b, dest, nir_fsat(b, ptn_src_for_dest(c, &dest))); } diff --git a/src/mesa/program/program_parse.y b/src/mesa/program/program_parse.y index 716b83d2d07..1319f29d30a 100644 --- a/src/mesa/program/program_parse.y +++ b/src/mesa/program/program_parse.y @@ -2308,7 +2308,7 @@ asm_instruction_copy_ctor(const struct prog_instruction *base, inst->Base.Opcode = base->Opcode; inst->Base.CondUpdate = base->CondUpdate; inst->Base.CondDst = base->CondDst; - inst->Base.SaturateMode = base->SaturateMode; + inst->Base.Saturate = base->Saturate; inst->Base.Precision = base->Precision; asm_instruction_set_operands(inst, dst, src0, src1, src2); diff --git a/src/mesa/program/program_parse_extra.c b/src/mesa/program/program_parse_extra.c index a9e36404580..32b54afc57b 100644 --- a/src/mesa/program/program_parse_extra.c +++ b/src/mesa/program/program_parse_extra.c @@ -40,7 +40,7 @@ _mesa_parse_instruction_suffix(const struct asm_parser_state *state, { inst->CondUpdate = 0; inst->CondDst = 0; - inst->SaturateMode = SATURATE_OFF; + inst->Saturate = GL_FALSE; inst->Precision = FLOAT32; @@ -82,7 +82,7 @@ _mesa_parse_instruction_suffix(const struct asm_parser_state *state, */ if (state->mode == ARB_fragment) { if (strcmp(suffix, "_SAT") == 0) { - inst->SaturateMode = SATURATE_ZERO_ONE; + inst->Saturate = GL_TRUE; suffix += 4; } } diff --git a/src/mesa/program/programopt.c b/src/mesa/program/programopt.c index e82c68a5305..af78150d594 100644 --- a/src/mesa/program/programopt.c +++ b/src/mesa/program/programopt.c @@ -305,7 +305,7 @@ _mesa_append_fog_code(struct gl_context *ctx, /* change the instruction to write to colorTemp w/ clamping */ inst->DstReg.File = PROGRAM_TEMPORARY; inst->DstReg.Index = colorTemp; - inst->SaturateMode = saturate; + inst->Saturate = saturate; /* don't break (may be several writes to result.color) */ } inst++; @@ -331,7 +331,7 @@ _mesa_append_fog_code(struct gl_context *ctx, inst->SrcReg[2].File = PROGRAM_STATE_VAR; inst->SrcReg[2].Index = fogPRefOpt; inst->SrcReg[2].Swizzle = SWIZZLE_YYYY; - inst->SaturateMode = SATURATE_ZERO_ONE; + inst->Saturate = GL_TRUE; inst++; } else { @@ -374,7 +374,7 @@ _mesa_append_fog_code(struct gl_context *ctx, inst->SrcReg[0].Index = fogFactorTemp; inst->SrcReg[0].Negate = NEGATE_XYZW; inst->SrcReg[0].Swizzle = SWIZZLE_XXXX; - inst->SaturateMode = SATURATE_ZERO_ONE; + inst->Saturate = GL_TRUE; inst++; } /* LRP result.color.xyz, fogFactorTemp.xxxx, colorTemp, fogColorRef; */ diff --git a/src/mesa/state_tracker/st_mesa_to_tgsi.c b/src/mesa/state_tracker/st_mesa_to_tgsi.c index 98d525c86c2..a88d7a87ff4 100644 --- a/src/mesa/state_tracker/st_mesa_to_tgsi.c +++ b/src/mesa/state_tracker/st_mesa_to_tgsi.c @@ -665,7 +665,7 @@ compile_instruction( if (num_dst) dst[0] = translate_dst( t, &inst->DstReg, - inst->SaturateMode, + inst->Saturate, clamp_dst_color_output); for (i = 0; i < num_src; i++) From fb011d31578ada40c2755314db783522477d0ad4 Mon Sep 17 00:00:00 2001 From: Matt Turner Date: Mon, 1 Jun 2015 16:27:46 -0700 Subject: [PATCH 404/834] program: Remove dead Aux field from prog_instruction. Appears to have been last used by the i965 driver (removed by commit 098acf6c). Reviewed-by: Brian Paul --- src/mesa/program/prog_instruction.h | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/mesa/program/prog_instruction.h b/src/mesa/program/prog_instruction.h index 3ada85750a5..3518f6c69b5 100644 --- a/src/mesa/program/prog_instruction.h +++ b/src/mesa/program/prog_instruction.h @@ -356,9 +356,6 @@ struct prog_instruction */ GLint BranchTarget; - /** for driver use (try to remove someday) */ - GLint Aux; - /** for debugging purposes */ const char *Comment; }; From 54a70a8ef20a9a875f0828acb42332cf69217ff5 Mon Sep 17 00:00:00 2001 From: Matt Turner Date: Mon, 1 Jun 2015 16:46:29 -0700 Subject: [PATCH 405/834] program: Replace gl_inst_opcode with enum prog_opcode. Both were introduced at the same time. I'm not sure why we needed two. Reviewed-by: Brian Paul --- src/mesa/program/ir_to_mesa.cpp | 2 +- src/mesa/program/prog_instruction.c | 10 +++++----- src/mesa/program/prog_instruction.h | 14 +++++++------- src/mesa/program/program_parse.y | 6 +++--- 4 files changed, 16 insertions(+), 16 deletions(-) diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp index 02a05687683..37597247904 100644 --- a/src/mesa/program/ir_to_mesa.cpp +++ b/src/mesa/program/ir_to_mesa.cpp @@ -405,7 +405,7 @@ ir_to_mesa_visitor::emit_dp(ir_instruction *ir, dst_reg dst, src_reg src0, src_reg src1, unsigned elements) { - static const gl_inst_opcode dot_opcodes[] = { + static const enum prog_opcode dot_opcodes[] = { OPCODE_DP2, OPCODE_DP3, OPCODE_DP4 }; diff --git a/src/mesa/program/prog_instruction.c b/src/mesa/program/prog_instruction.c index a8fb9d9b04e..21ef35337f6 100644 --- a/src/mesa/program/prog_instruction.c +++ b/src/mesa/program/prog_instruction.c @@ -114,7 +114,7 @@ _mesa_free_instructions(struct prog_instruction *inst, GLuint count) */ struct instruction_info { - gl_inst_opcode Opcode; + enum prog_opcode Opcode; const char *Name; GLuint NumSrcRegs; GLuint NumDstRegs; @@ -198,7 +198,7 @@ static const struct instruction_info InstInfo[MAX_OPCODE] = { * Return the number of src registers for the given instruction/opcode. */ GLuint -_mesa_num_inst_src_regs(gl_inst_opcode opcode) +_mesa_num_inst_src_regs(enum prog_opcode opcode) { assert(opcode < MAX_OPCODE); assert(opcode == InstInfo[opcode].Opcode); @@ -211,7 +211,7 @@ _mesa_num_inst_src_regs(gl_inst_opcode opcode) * Return the number of dst registers for the given instruction/opcode. */ GLuint -_mesa_num_inst_dst_regs(gl_inst_opcode opcode) +_mesa_num_inst_dst_regs(enum prog_opcode opcode) { assert(opcode < MAX_OPCODE); assert(opcode == InstInfo[opcode].Opcode); @@ -221,7 +221,7 @@ _mesa_num_inst_dst_regs(gl_inst_opcode opcode) GLboolean -_mesa_is_tex_instruction(gl_inst_opcode opcode) +_mesa_is_tex_instruction(enum prog_opcode opcode) { return (opcode == OPCODE_TEX || opcode == OPCODE_TXB || @@ -285,7 +285,7 @@ _mesa_check_soa_dependencies(const struct prog_instruction *inst) * Return string name for given program opcode. */ const char * -_mesa_opcode_string(gl_inst_opcode opcode) +_mesa_opcode_string(enum prog_opcode opcode) { if (opcode < MAX_OPCODE) return InstInfo[opcode].Name; diff --git a/src/mesa/program/prog_instruction.h b/src/mesa/program/prog_instruction.h index 3518f6c69b5..d56f96cfaa1 100644 --- a/src/mesa/program/prog_instruction.h +++ b/src/mesa/program/prog_instruction.h @@ -134,7 +134,7 @@ /** * Program instruction opcodes for vertex, fragment and geometry programs. */ -typedef enum prog_opcode { +enum prog_opcode { /* ARB_vp ARB_fp NV_vp NV_fp GLSL */ /*------------------------------------------*/ OPCODE_NOP = 0, /* X */ @@ -204,7 +204,7 @@ typedef enum prog_opcode { OPCODE_TRUNC, /* X */ OPCODE_XPD, /* X X */ MAX_OPCODE -} gl_inst_opcode; +}; /** @@ -291,7 +291,7 @@ struct prog_dst_register */ struct prog_instruction { - gl_inst_opcode Opcode; + enum prog_opcode Opcode; struct prog_src_register SrcReg[3]; struct prog_dst_register DstReg; @@ -379,19 +379,19 @@ extern void _mesa_free_instructions(struct prog_instruction *inst, GLuint count); extern GLuint -_mesa_num_inst_src_regs(gl_inst_opcode opcode); +_mesa_num_inst_src_regs(enum prog_opcode opcode); extern GLuint -_mesa_num_inst_dst_regs(gl_inst_opcode opcode); +_mesa_num_inst_dst_regs(enum prog_opcode opcode); extern GLboolean -_mesa_is_tex_instruction(gl_inst_opcode opcode); +_mesa_is_tex_instruction(enum prog_opcode opcode); extern GLboolean _mesa_check_soa_dependencies(const struct prog_instruction *inst); extern const char * -_mesa_opcode_string(gl_inst_opcode opcode); +_mesa_opcode_string(enum prog_opcode opcode); #ifdef __cplusplus diff --git a/src/mesa/program/program_parse.y b/src/mesa/program/program_parse.y index 1319f29d30a..635f5d09d60 100644 --- a/src/mesa/program/program_parse.y +++ b/src/mesa/program/program_parse.y @@ -84,7 +84,7 @@ static void asm_instruction_set_operands(struct asm_instruction *inst, const struct prog_dst_register *dst, const struct asm_src_register *src0, const struct asm_src_register *src1, const struct asm_src_register *src2); -static struct asm_instruction *asm_instruction_ctor(gl_inst_opcode op, +static struct asm_instruction *asm_instruction_ctor(enum prog_opcode op, const struct prog_dst_register *dst, const struct asm_src_register *src0, const struct asm_src_register *src1, const struct asm_src_register *src2); @@ -139,7 +139,7 @@ static struct asm_instruction *asm_instruction_copy_ctor( gl_state_index state[STATE_LENGTH]; int negate; struct asm_vector vector; - gl_inst_opcode opcode; + enum prog_opcode opcode; struct { unsigned swz; @@ -2275,7 +2275,7 @@ asm_instruction_set_operands(struct asm_instruction *inst, struct asm_instruction * -asm_instruction_ctor(gl_inst_opcode op, +asm_instruction_ctor(enum prog_opcode op, const struct prog_dst_register *dst, const struct asm_src_register *src0, const struct asm_src_register *src1, From d46d04529b9c1e55b4c3b65a7078bbbd7ab1a810 Mon Sep 17 00:00:00 2001 From: Matt Turner Date: Tue, 2 Jun 2015 17:46:38 -0700 Subject: [PATCH 406/834] i965: Use UW-typed immediate in multiply inst. Some hardware reads only the low 16-bits even if the type is UD, but other hardware like Cherryview can't handle this. Fixes spec@arb_gpu_shader5@execution@sampler_array_indexing@fs-simple on Cherryview. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=90830 Reviewed-by: Neil Roberts Reviewed-by: Chris Forbes --- src/mesa/drivers/dri/i965/brw_fs_generator.cpp | 2 +- src/mesa/drivers/dri/i965/brw_vec4_generator.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp index 40a3db3040e..ff05b2a35ab 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp @@ -788,7 +788,7 @@ fs_generator::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src brw_set_default_access_mode(p, BRW_ALIGN_1); /* addr = ((sampler * 0x101) + base_binding_table_index) & 0xfff */ - brw_MUL(p, addr, sampler_reg, brw_imm_ud(0x101)); + brw_MUL(p, addr, sampler_reg, brw_imm_uw(0x101)); if (base_binding_table_index) brw_ADD(p, addr, addr, brw_imm_ud(base_binding_table_index)); brw_AND(p, addr, addr, brw_imm_ud(0xfff)); diff --git a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp index ead620b3c00..67495d2d76e 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp @@ -407,7 +407,7 @@ vec4_generator::generate_tex(vec4_instruction *inst, brw_set_default_access_mode(p, BRW_ALIGN_1); /* addr = ((sampler * 0x101) + base_binding_table_index) & 0xfff */ - brw_MUL(p, addr, sampler_reg, brw_imm_ud(0x101)); + brw_MUL(p, addr, sampler_reg, brw_imm_uw(0x101)); if (base_binding_table_index) brw_ADD(p, addr, addr, brw_imm_ud(base_binding_table_index)); brw_AND(p, addr, addr, brw_imm_ud(0xfff)); From 4fd42a7c2798d03476c84b79cb855984a15c222c Mon Sep 17 00:00:00 2001 From: Roland Scheidegger Date: Wed, 3 Jun 2015 01:34:06 +0200 Subject: [PATCH 407/834] llvmpipe: Implement stencil export Pretty trivial, fixes the issue that we're expected to be able to blit stencil surfaces (as the blit just relies on util blitter code which needs stencil export to do it). 2 piglits skip->pass, 11 fail->pass v2: prettify, keep different stencil ref value handling out of depth/stencil test itself. Reviewed-by: Jose Fonseca Reviewed-by: Dave Airlie --- src/gallium/drivers/llvmpipe/lp_bld_depth.c | 4 ---- src/gallium/drivers/llvmpipe/lp_screen.c | 2 +- src/gallium/drivers/llvmpipe/lp_state_fs.c | 25 ++++++++++++++++----- src/gallium/drivers/llvmpipe/lp_surface.c | 5 ----- 4 files changed, 21 insertions(+), 15 deletions(-) diff --git a/src/gallium/drivers/llvmpipe/lp_bld_depth.c b/src/gallium/drivers/llvmpipe/lp_bld_depth.c index b6c32ffb979..b25e0413750 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_depth.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_depth.c @@ -975,10 +975,6 @@ lp_build_depth_stencil_test(struct gallivm_state *gallivm, s_bld.int_vec_type, ""); } - /* convert scalar stencil refs into vectors */ - stencil_refs[0] = lp_build_broadcast_scalar(&s_bld, stencil_refs[0]); - stencil_refs[1] = lp_build_broadcast_scalar(&s_bld, stencil_refs[1]); - s_pass_mask = lp_build_stencil_test(&s_bld, stencil, stencil_refs, stencil_vals, front_facing); diff --git a/src/gallium/drivers/llvmpipe/lp_screen.c b/src/gallium/drivers/llvmpipe/lp_screen.c index 09ac9af50ec..47f1897c732 100644 --- a/src/gallium/drivers/llvmpipe/lp_screen.c +++ b/src/gallium/drivers/llvmpipe/lp_screen.c @@ -165,7 +165,7 @@ llvmpipe_get_param(struct pipe_screen *screen, enum pipe_cap param) case PIPE_CAP_DEPTH_CLIP_DISABLE: return 1; case PIPE_CAP_SHADER_STENCIL_EXPORT: - return 0; + return 1; case PIPE_CAP_TGSI_INSTANCEID: case PIPE_CAP_VERTEX_ELEMENT_INSTANCE_DIVISOR: case PIPE_CAP_START_INSTANCE: diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c index 35fe7b20181..b5ce8683f1a 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_fs.c +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c @@ -260,7 +260,8 @@ generate_fs_loop(struct gallivm_state *gallivm, { const struct util_format_description *zs_format_desc = NULL; const struct tgsi_token *tokens = shader->base.tokens; - LLVMTypeRef vec_type; + struct lp_type int_type = lp_int_type(type); + LLVMTypeRef vec_type, int_vec_type; LLVMValueRef mask_ptr, mask_val; LLVMValueRef consts_ptr, num_consts_ptr; LLVMValueRef z; @@ -295,7 +296,7 @@ generate_fs_loop(struct gallivm_state *gallivm, zs_format_desc = util_format_description(key->zsbuf_format); assert(zs_format_desc); - if (!shader->info.base.writes_z) { + if (!shader->info.base.writes_z && !shader->info.base.writes_stencil) { if (key->alpha.enabled || key->blend.alpha_to_coverage || shader->info.base.uses_kill) { @@ -329,11 +330,14 @@ generate_fs_loop(struct gallivm_state *gallivm, depth_mode = 0; } + vec_type = lp_build_vec_type(gallivm, type); + int_vec_type = lp_build_vec_type(gallivm, int_type); stencil_refs[0] = lp_jit_context_stencil_ref_front_value(gallivm, context_ptr); stencil_refs[1] = lp_jit_context_stencil_ref_back_value(gallivm, context_ptr); - - vec_type = lp_build_vec_type(gallivm, type); + /* convert scalar stencil refs into vectors */ + stencil_refs[0] = lp_build_broadcast(gallivm, int_vec_type, stencil_refs[0]); + stencil_refs[1] = lp_build_broadcast(gallivm, int_vec_type, stencil_refs[1]); consts_ptr = lp_jit_context_constants(gallivm, context_ptr); num_consts_ptr = lp_jit_context_num_constants(gallivm, context_ptr); @@ -462,7 +466,9 @@ generate_fs_loop(struct gallivm_state *gallivm, int pos0 = find_output_by_semantic(&shader->info.base, TGSI_SEMANTIC_POSITION, 0); - + int s_out = find_output_by_semantic(&shader->info.base, + TGSI_SEMANTIC_STENCIL, + 0); if (pos0 != -1 && outputs[pos0][2]) { z = LLVMBuildLoad(builder, outputs[pos0][2], "output.z"); @@ -512,6 +518,15 @@ generate_fs_loop(struct gallivm_state *gallivm, } } + if (s_out != -1 && outputs[s_out][1]) { + /* there's only one value, and spec says to discard additional bits */ + LLVMValueRef s_max_mask = lp_build_const_int_vec(gallivm, int_type, 255); + stencil_refs[0] = LLVMBuildLoad(builder, outputs[s_out][1], "output.s"); + stencil_refs[0] = LLVMBuildBitCast(builder, stencil_refs[0], int_vec_type, ""); + stencil_refs[0] = LLVMBuildAnd(builder, stencil_refs[0], s_max_mask, ""); + stencil_refs[1] = stencil_refs[0]; + } + lp_build_depth_stencil_load_swizzled(gallivm, type, zs_format_desc, key->resource_1d, depth_ptr, depth_stride, diff --git a/src/gallium/drivers/llvmpipe/lp_surface.c b/src/gallium/drivers/llvmpipe/lp_surface.c index 08f968f7f0a..b985877e43e 100644 --- a/src/gallium/drivers/llvmpipe/lp_surface.c +++ b/src/gallium/drivers/llvmpipe/lp_surface.c @@ -139,11 +139,6 @@ static void lp_blit(struct pipe_context *pipe, return; /* done */ } - if (info.mask & PIPE_MASK_S) { - debug_printf("llvmpipe: cannot blit stencil, skipping\n"); - info.mask &= ~PIPE_MASK_S; - } - if (!util_blitter_is_blit_supported(lp->blitter, &info)) { debug_printf("llvmpipe: blit unsupported %s -> %s\n", util_format_short_name(info.src.resource->format), From 87a4bc511811327a00f9bbc1b6870b7fa46675f7 Mon Sep 17 00:00:00 2001 From: Martin Peres Date: Thu, 21 May 2015 15:51:09 +0300 Subject: [PATCH 408/834] mesa: reference built-in uniforms into gl_uniform_storage MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This change introduces a new field in gl_uniform_storage to explicitely say that a uniform is built-in. In the case where it is, no storage is defined to make it clear that it is read-only from the mesa side. I fixed all the places in the code that made use of the structure that I changed. Any place making a wrong assumption and using the storage straight away will just crash. This patch seems to implement the path of least resistance towards listing built-in uniforms in GL_ACTIVE_UNIFORM (and other APIs). Reviewed-by: Tapani Pälli Signed-off-by: Martin Peres --- src/glsl/ir_uniform.h | 5 ++ src/glsl/link_uniform_initializers.cpp | 4 +- src/glsl/link_uniforms.cpp | 55 +++++++++---------- src/glsl/linker.cpp | 6 +- src/glsl/standalone_scaffolding.cpp | 2 +- .../tests/set_uniform_initializer_tests.cpp | 4 +- src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 5 +- .../drivers/dri/i965/brw_vec4_visitor.cpp | 5 +- src/mesa/main/mtypes.h | 2 +- src/mesa/main/shaderapi.c | 4 +- src/mesa/main/shaderobj.c | 4 +- src/mesa/main/uniform_query.cpp | 15 ++++- src/mesa/program/ir_to_mesa.cpp | 9 ++- src/mesa/state_tracker/st_draw.c | 2 +- 14 files changed, 73 insertions(+), 49 deletions(-) diff --git a/src/glsl/ir_uniform.h b/src/glsl/ir_uniform.h index 21b5d05c11a..e1b80147788 100644 --- a/src/glsl/ir_uniform.h +++ b/src/glsl/ir_uniform.h @@ -181,6 +181,11 @@ struct gl_uniform_storage { * via the API. */ bool hidden; + + /** + * This is a built-in uniform that should not be modified through any gl API. + */ + bool builtin; }; #ifdef __cplusplus diff --git a/src/glsl/link_uniform_initializers.cpp b/src/glsl/link_uniform_initializers.cpp index 69073841ea4..204acfa22b2 100644 --- a/src/glsl/link_uniform_initializers.cpp +++ b/src/glsl/link_uniform_initializers.cpp @@ -103,7 +103,7 @@ void set_sampler_binding(gl_shader_program *prog, const char *name, int binding) { struct gl_uniform_storage *const storage = - get_storage(prog->UniformStorage, prog->NumUserUniformStorage, name); + get_storage(prog->UniformStorage, prog->NumUniformStorage, name); if (storage == NULL) { assert(storage != NULL); @@ -193,7 +193,7 @@ set_uniform_initializer(void *mem_ctx, gl_shader_program *prog, struct gl_uniform_storage *const storage = get_storage(prog->UniformStorage, - prog->NumUserUniformStorage, + prog->NumUniformStorage, name); if (storage == NULL) { assert(storage != NULL); diff --git a/src/glsl/link_uniforms.cpp b/src/glsl/link_uniforms.cpp index 2c928e1445b..11ae06f9bfb 100644 --- a/src/glsl/link_uniforms.cpp +++ b/src/glsl/link_uniforms.cpp @@ -589,12 +589,13 @@ private: handle_samplers(base_type, &this->uniforms[id]); handle_images(base_type, &this->uniforms[id]); - /* If there is already storage associated with this uniform, it means - * that it was set while processing an earlier shader stage. For - * example, we may be processing the uniform in the fragment shader, but - * the uniform was already processed in the vertex shader. + /* If there is already storage associated with this uniform or if the + * uniform is set as builtin, it means that it was set while processing + * an earlier shader stage. For example, we may be processing the + * uniform in the fragment shader, but the uniform was already processed + * in the vertex shader. */ - if (this->uniforms[id].storage != NULL) { + if (this->uniforms[id].storage != NULL || this->uniforms[id].builtin) { return; } @@ -619,10 +620,15 @@ private: this->uniforms[id].initialized = 0; this->uniforms[id].num_driver_storage = 0; this->uniforms[id].driver_storage = NULL; - this->uniforms[id].storage = this->values; this->uniforms[id].atomic_buffer_index = -1; this->uniforms[id].hidden = current_var->data.how_declared == ir_var_hidden; + this->uniforms[id].builtin = is_gl_identifier(name); + + /* Do not assign storage if the uniform is builtin */ + if (!this->uniforms[id].builtin) + this->uniforms[id].storage = this->values; + if (this->ubo_block_index != -1) { this->uniforms[id].block_index = this->ubo_block_index; @@ -894,7 +900,7 @@ link_assign_uniform_locations(struct gl_shader_program *prog, { ralloc_free(prog->UniformStorage); prog->UniformStorage = NULL; - prog->NumUserUniformStorage = 0; + prog->NumUniformStorage = 0; if (prog->UniformHash != NULL) { prog->UniformHash->clear(); @@ -940,14 +946,6 @@ link_assign_uniform_locations(struct gl_shader_program *prog, if ((var == NULL) || (var->data.mode != ir_var_uniform)) continue; - /* FINISHME: Update code to process built-in uniforms! - */ - if (is_gl_identifier(var->name)) { - uniform_size.num_shader_uniform_components += - var->type->component_slots(); - continue; - } - uniform_size.process(var); } @@ -962,16 +960,16 @@ link_assign_uniform_locations(struct gl_shader_program *prog, } } - const unsigned num_user_uniforms = uniform_size.num_active_uniforms; + const unsigned num_uniforms = uniform_size.num_active_uniforms; const unsigned num_data_slots = uniform_size.num_values; /* On the outside chance that there were no uniforms, bail out. */ - if (num_user_uniforms == 0) + if (num_uniforms == 0) return; struct gl_uniform_storage *uniforms = - rzalloc_array(prog, struct gl_uniform_storage, num_user_uniforms); + rzalloc_array(prog, struct gl_uniform_storage, num_uniforms); union gl_constant_value *data = rzalloc_array(uniforms, union gl_constant_value, num_data_slots); #ifndef NDEBUG @@ -992,11 +990,6 @@ link_assign_uniform_locations(struct gl_shader_program *prog, if ((var == NULL) || (var->data.mode != ir_var_uniform)) continue; - /* FINISHME: Update code to process built-in uniforms! - */ - if (is_gl_identifier(var->name)) - continue; - parcel.set_and_process(prog, var); } @@ -1009,10 +1002,10 @@ link_assign_uniform_locations(struct gl_shader_program *prog, } const unsigned hidden_uniforms = - move_hidden_uniforms_to_end(prog, uniforms, num_user_uniforms); + move_hidden_uniforms_to_end(prog, uniforms, num_uniforms); /* Reserve all the explicit locations of the active uniforms. */ - for (unsigned i = 0; i < num_user_uniforms; i++) { + for (unsigned i = 0; i < num_uniforms; i++) { if (uniforms[i].remap_location != UNMAPPED_UNIFORM_LOC) { /* How many new entries for this uniform? */ const unsigned entries = MAX2(1, uniforms[i].array_elements); @@ -1028,7 +1021,11 @@ link_assign_uniform_locations(struct gl_shader_program *prog, } /* Reserve locations for rest of the uniforms. */ - for (unsigned i = 0; i < num_user_uniforms; i++) { + for (unsigned i = 0; i < num_uniforms; i++) { + + /* Built-in uniforms should not get any location. */ + if (uniforms[i].builtin) + continue; /* Explicit ones have been set already. */ if (uniforms[i].remap_location != UNMAPPED_UNIFORM_LOC) @@ -1055,14 +1052,14 @@ link_assign_uniform_locations(struct gl_shader_program *prog, } #ifndef NDEBUG - for (unsigned i = 0; i < num_user_uniforms; i++) { - assert(uniforms[i].storage != NULL); + for (unsigned i = 0; i < num_uniforms; i++) { + assert(uniforms[i].storage != NULL || uniforms[i].builtin); } assert(parcel.values == data_end); #endif - prog->NumUserUniformStorage = num_user_uniforms; + prog->NumUniformStorage = num_uniforms; prog->NumHiddenUniforms = hidden_uniforms; prog->UniformStorage = uniforms; diff --git a/src/glsl/linker.cpp b/src/glsl/linker.cpp index 99e0a388bb4..99783800b7f 100644 --- a/src/glsl/linker.cpp +++ b/src/glsl/linker.cpp @@ -1400,8 +1400,8 @@ link_fs_input_layout_qualifiers(struct gl_shader_program *prog, "layout qualifiers for gl_FragCoord\n"); } - /* Update the linked shader state.  Note that uses_gl_fragcoord should - * accumulate the results.  The other values should replace.  If there + /* Update the linked shader state. Note that uses_gl_fragcoord should + * accumulate the results. The other values should replace. If there * are multiple redeclarations, all the fields except uses_gl_fragcoord * are already known to be the same. */ @@ -2693,7 +2693,7 @@ build_program_resource_list(struct gl_context *ctx, } /* Add uniforms from uniform storage. */ - for (unsigned i = 0; i < shProg->NumUserUniformStorage; i++) { + for (unsigned i = 0; i < shProg->NumUniformStorage; i++) { /* Do not add uniforms internally used by Mesa. */ if (shProg->UniformStorage[i].hidden) continue; diff --git a/src/glsl/standalone_scaffolding.cpp b/src/glsl/standalone_scaffolding.cpp index a109c4e92d2..00db61e409b 100644 --- a/src/glsl/standalone_scaffolding.cpp +++ b/src/glsl/standalone_scaffolding.cpp @@ -89,7 +89,7 @@ _mesa_clear_shader_program_data(struct gl_shader_program *shProg) { unsigned i; - shProg->NumUserUniformStorage = 0; + shProg->NumUniformStorage = 0; shProg->UniformStorage = NULL; shProg->NumUniformRemapTable = 0; shProg->UniformRemapTable = NULL; diff --git a/src/glsl/tests/set_uniform_initializer_tests.cpp b/src/glsl/tests/set_uniform_initializer_tests.cpp index d3fdeb3a844..91227d9487a 100644 --- a/src/glsl/tests/set_uniform_initializer_tests.cpp +++ b/src/glsl/tests/set_uniform_initializer_tests.cpp @@ -110,7 +110,7 @@ establish_uniform_storage(struct gl_shader_program *prog, unsigned num_storage, prog->UniformStorage = rzalloc_array(prog, struct gl_uniform_storage, num_storage); - prog->NumUserUniformStorage = num_storage; + prog->NumUniformStorage = num_storage; prog->UniformStorage[index_to_set].name = (char *) name; prog->UniformStorage[index_to_set].type = type; @@ -155,7 +155,7 @@ establish_uniform_storage(struct gl_shader_program *prog, unsigned num_storage, static void verify_initialization(struct gl_shader_program *prog, unsigned actual_index) { - for (unsigned i = 0; i < prog->NumUserUniformStorage; i++) { + for (unsigned i = 0; i < prog->NumUniformStorage; i++) { if (i == actual_index) { EXPECT_TRUE(prog->UniformStorage[actual_index].initialized); } else { diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp index 56a2278a2dc..5d3501c60ba 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp @@ -218,9 +218,12 @@ fs_visitor::nir_setup_uniform(nir_variable *var) * our name. */ unsigned index = var->data.driver_location; - for (unsigned u = 0; u < shader_prog->NumUserUniformStorage; u++) { + for (unsigned u = 0; u < shader_prog->NumUniformStorage; u++) { struct gl_uniform_storage *storage = &shader_prog->UniformStorage[u]; + if (storage->builtin) + continue; + if (strncmp(var->name, storage->name, namelen) != 0 || (storage->name[namelen] != 0 && storage->name[namelen] != '.' && diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp index d3347ecce38..242d007b355 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp @@ -683,9 +683,12 @@ vec4_visitor::setup_uniform_values(ir_variable *ir) * order we'd walk the type, so walk the list of storage and find anything * with our name, or the prefix of a component that starts with our name. */ - for (unsigned u = 0; u < shader_prog->NumUserUniformStorage; u++) { + for (unsigned u = 0; u < shader_prog->NumUniformStorage; u++) { struct gl_uniform_storage *storage = &shader_prog->UniformStorage[u]; + if (storage->builtin) + continue; + if (strncmp(ir->name, storage->name, namelen) != 0 || (storage->name[namelen] != 0 && storage->name[namelen] != '.' && diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h index 737f0be6d62..0aa607653d1 100644 --- a/src/mesa/main/mtypes.h +++ b/src/mesa/main/mtypes.h @@ -2728,7 +2728,7 @@ struct gl_shader_program } Comp; /* post-link info: */ - unsigned NumUserUniformStorage; + unsigned NumUniformStorage; unsigned NumHiddenUniforms; struct gl_uniform_storage *UniformStorage; diff --git a/src/mesa/main/shaderapi.c b/src/mesa/main/shaderapi.c index a04b28711f7..6d8e6e23e9c 100644 --- a/src/mesa/main/shaderapi.c +++ b/src/mesa/main/shaderapi.c @@ -569,13 +569,13 @@ get_programiv(struct gl_context *ctx, GLuint program, GLenum pname, *params = _mesa_longest_attribute_name_length(shProg); return; case GL_ACTIVE_UNIFORMS: - *params = shProg->NumUserUniformStorage - shProg->NumHiddenUniforms; + *params = shProg->NumUniformStorage - shProg->NumHiddenUniforms; return; case GL_ACTIVE_UNIFORM_MAX_LENGTH: { unsigned i; GLint max_len = 0; const unsigned num_uniforms = - shProg->NumUserUniformStorage - shProg->NumHiddenUniforms; + shProg->NumUniformStorage - shProg->NumHiddenUniforms; for (i = 0; i < num_uniforms; i++) { /* Add one for the terminating NUL character for a non-array, and diff --git a/src/mesa/main/shaderobj.c b/src/mesa/main/shaderobj.c index e428960362d..110a18e1e2c 100644 --- a/src/mesa/main/shaderobj.c +++ b/src/mesa/main/shaderobj.c @@ -282,10 +282,10 @@ _mesa_clear_shader_program_data(struct gl_shader_program *shProg) unsigned i; if (shProg->UniformStorage) { - for (i = 0; i < shProg->NumUserUniformStorage; ++i) + for (i = 0; i < shProg->NumUniformStorage; ++i) _mesa_uniform_detach_all_driver_storage(&shProg->UniformStorage[i]); ralloc_free(shProg->UniformStorage); - shProg->NumUserUniformStorage = 0; + shProg->NumUniformStorage = 0; shProg->UniformStorage = NULL; } diff --git a/src/mesa/main/uniform_query.cpp b/src/mesa/main/uniform_query.cpp index 728bd1bac10..cab5083e81b 100644 --- a/src/mesa/main/uniform_query.cpp +++ b/src/mesa/main/uniform_query.cpp @@ -237,6 +237,13 @@ validate_uniform_parameters(struct gl_context *ctx, struct gl_uniform_storage *const uni = shProg->UniformRemapTable[location]; + /* Even though no location is assigned to a built-in uniform and this + * function should already have returned NULL, this test makes it explicit + * that we are not allowing to update the value of a built-in. + */ + if (uni->builtin) + return NULL; + if (uni->array_elements == 0) { if (count > 1) { _mesa_error(ctx, GL_INVALID_OPERATION, @@ -1028,6 +1035,10 @@ _mesa_get_uniform_location(struct gl_shader_program *shProg, if (!found) return GL_INVALID_INDEX; + /* If the uniform is built-in, fail. */ + if (shProg->UniformStorage[location].builtin) + return GL_INVALID_INDEX; + /* If the uniform is an array, fail if the index is out of bounds. * (A negative index is caught above.) This also fails if the uniform * is not an array, but the user is trying to index it, because @@ -1047,7 +1058,7 @@ _mesa_sampler_uniforms_are_valid(const struct gl_shader_program *shProg, char *errMsg, size_t errMsgLength) { /* Shader does not have samplers. */ - if (shProg->NumUserUniformStorage == 0) + if (shProg->NumUniformStorage == 0) return true; if (!shProg->SamplersValidated) { @@ -1087,7 +1098,7 @@ _mesa_sampler_uniforms_pipeline_are_valid(struct gl_pipeline_object *pipeline) if (!shProg[idx]) continue; - for (unsigned i = 0; i < shProg[idx]->NumUserUniformStorage; i++) { + for (unsigned i = 0; i < shProg[idx]->NumUniformStorage; i++) { const struct gl_uniform_storage *const storage = &shProg[idx]->UniformStorage[i]; const glsl_type *const t = (storage->type->is_array()) diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp index 37597247904..50b86ada37a 100644 --- a/src/mesa/program/ir_to_mesa.cpp +++ b/src/mesa/program/ir_to_mesa.cpp @@ -2406,9 +2406,14 @@ _mesa_associate_uniform_storage(struct gl_context *ctx, if (!found) continue; + struct gl_uniform_storage *storage = + &shader_program->UniformStorage[location]; + + /* Do not associate any uniform storage to built-in uniforms */ + if (!storage->builtin) + continue; + if (location != last_location) { - struct gl_uniform_storage *storage = - &shader_program->UniformStorage[location]; enum gl_uniform_driver_format format = uniform_native; unsigned columns = 0; diff --git a/src/mesa/state_tracker/st_draw.c b/src/mesa/state_tracker/st_draw.c index 488f6ead201..8b43582c14b 100644 --- a/src/mesa/state_tracker/st_draw.c +++ b/src/mesa/state_tracker/st_draw.c @@ -141,7 +141,7 @@ check_uniforms(struct gl_context *ctx) if (shProg[j] == NULL || !shProg[j]->LinkStatus) continue; - for (i = 0; i < shProg[j]->NumUserUniformStorage; i++) { + for (i = 0; i < shProg[j]->NumUniformStorage; i++) { const struct gl_uniform_storage *u = &shProg[j]->UniformStorage[i]; if (!u->initialized) { _mesa_warning(ctx, From 71e94578779e4344066d434004fd85ca493de552 Mon Sep 17 00:00:00 2001 From: Martin Peres Date: Thu, 4 Jun 2015 14:09:31 +0300 Subject: [PATCH 409/834] main: fix a regression in uniform handling introduced by 87a4bc5 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The comment was accurate but the condition was reversed... Reviewed-by: Jose Fonseca Reviewed-by: Tapani Pälli Signed-off-by: Martin Peres --- src/mesa/program/ir_to_mesa.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp index 50b86ada37a..514bb930e76 100644 --- a/src/mesa/program/ir_to_mesa.cpp +++ b/src/mesa/program/ir_to_mesa.cpp @@ -2410,7 +2410,7 @@ _mesa_associate_uniform_storage(struct gl_context *ctx, &shader_program->UniformStorage[location]; /* Do not associate any uniform storage to built-in uniforms */ - if (!storage->builtin) + if (storage->builtin) continue; if (location != last_location) { From 15a12795c6f3edef0e1cbab39b6da3d5b8f64fc3 Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Thu, 4 Jun 2015 01:24:07 -0700 Subject: [PATCH 410/834] prog_to_nir: Make RSQ properly take the absolute value of its argument. I just botched this when writing the original code. From the ARB_vertex_program specification: "The RSQ instruction approximates the reciprocal of the square root of the absolute value of the scalar operand and replicates it to all four components of the result vector." Fixes a Glean vertProg1 subtest: RSQ test 2 (reciprocal square root of negative value) Cc: mesa-stable@lists.freedesktop.org Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=90547 Signed-off-by: Kenneth Graunke Reviewed-by: Francisco Jerez --- src/mesa/program/prog_to_nir.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/mesa/program/prog_to_nir.c b/src/mesa/program/prog_to_nir.c index e986e511514..d3e3f15c959 100644 --- a/src/mesa/program/prog_to_nir.c +++ b/src/mesa/program/prog_to_nir.c @@ -764,7 +764,8 @@ ptn_emit_instruction(struct ptn_compile *c, struct prog_instruction *prog_inst) switch (op) { case OPCODE_RSQ: - ptn_move_dest(b, dest, nir_frsq(b, ptn_channel(b, src[0], X))); + ptn_move_dest(b, dest, + nir_frsq(b, nir_fabs(b, ptn_channel(b, src[0], X)))); break; case OPCODE_RCP: From c820407ef0aac87546d1a778e169cfa1a915a219 Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Tue, 2 Jun 2015 20:40:54 -0700 Subject: [PATCH 411/834] i965/fs: Print mlen in dump_instructions() output. Signed-off-by: Kenneth Graunke Reviewed-by: Matt Turner --- src/mesa/drivers/dri/i965/brw_fs.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 28a19bd14ec..902f239e3df 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -3650,6 +3650,9 @@ fs_visitor::dump_instruction(backend_instruction *be_inst, FILE *file) } fprintf(file, "(%d) ", inst->exec_size); + if (inst->mlen) { + fprintf(file, "(mlen: %d) ", inst->mlen); + } switch (inst->dst.file) { case GRF: From 6e5970ffee0129fb94d8b7f0ebd4fac3992e7dce Mon Sep 17 00:00:00 2001 From: Roland Scheidegger Date: Thu, 4 Jun 2015 14:35:59 +0200 Subject: [PATCH 412/834] draw: (trivial) fix NULL pointer dereference This probably got broken when the samplers were converted to be indexed by shader type. Seen when looking at bug 89819 though I'm not sure if that really was what the bug was about... Cc: "10.5 10.6" Reviewed-by: Brian Paul --- src/gallium/auxiliary/draw/draw_llvm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/gallium/auxiliary/draw/draw_llvm.c b/src/gallium/auxiliary/draw/draw_llvm.c index 885c27c13c2..9629a8a3e42 100644 --- a/src/gallium/auxiliary/draw/draw_llvm.c +++ b/src/gallium/auxiliary/draw/draw_llvm.c @@ -1969,7 +1969,7 @@ draw_llvm_set_sampler_state(struct draw_context *draw, for (i = 0; i < draw->num_samplers[PIPE_SHADER_VERTEX]; i++) { struct draw_jit_sampler *jit_sam = &draw->llvm->jit_context.samplers[i]; - if (draw->samplers[i]) { + if (draw->samplers[PIPE_SHADER_VERTEX][i]) { const struct pipe_sampler_state *s = draw->samplers[PIPE_SHADER_VERTEX][i]; jit_sam->min_lod = s->min_lod; @@ -1982,7 +1982,7 @@ draw_llvm_set_sampler_state(struct draw_context *draw, for (i = 0; i < draw->num_samplers[PIPE_SHADER_GEOMETRY]; i++) { struct draw_jit_sampler *jit_sam = &draw->llvm->gs_jit_context.samplers[i]; - if (draw->samplers[i]) { + if (draw->samplers[PIPE_SHADER_GEOMETRY][i]) { const struct pipe_sampler_state *s = draw->samplers[PIPE_SHADER_GEOMETRY][i]; jit_sam->min_lod = s->min_lod; From 00d8733120276fc5bdd3ecb7fea6e04b7940d71b Mon Sep 17 00:00:00 2001 From: Roland Scheidegger Date: Fri, 5 Jun 2015 02:25:03 +0200 Subject: [PATCH 413/834] docs: add note about llvmpipe supporting GL_ARB_shader_stencil_export --- docs/relnotes/10.7.0.html | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/docs/relnotes/10.7.0.html b/docs/relnotes/10.7.0.html index 6206716e58e..7518389ba4b 100644 --- a/docs/relnotes/10.7.0.html +++ b/docs/relnotes/10.7.0.html @@ -43,7 +43,9 @@ TBD. Note: some of the new features are only available with certain drivers.

    -TBD. +
      +
    • GL_ARB_shader_stencil_export on llvmpipe
    • +

    Bug fixes

    From cb277cde6f2a210b0515cd04269964fd409307e9 Mon Sep 17 00:00:00 2001 From: Alan Coopersmith Date: Sat, 23 May 2015 00:03:53 -0700 Subject: [PATCH 414/834] glsl_compiler: Remove unused extra argument to printf in usage_fail Flagged by Oracle's parfait static analyzer: Error: Format string argument mismatch (CWE 628) In call to printf with format string "usage: %s [options] \n\nPossible options are:\n" Too many arguments for format string (got more than 1 arguments) at line 285 of src/glsl/main.cpp in function 'usage_fail'. Signed-off-by: Alan Coopersmith Reviewed-by: Matt Turner --- src/glsl/main.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/glsl/main.cpp b/src/glsl/main.cpp index ccac8399646..4b39c9e54c2 100644 --- a/src/glsl/main.cpp +++ b/src/glsl/main.cpp @@ -282,7 +282,7 @@ usage_fail(const char *name) "usage: %s [options] \n" "\n" "Possible options are:\n"; - printf(header, name, name); + printf(header, name); for (const struct option *o = compiler_opts; o->name != 0; ++o) { printf(" --%s\n", o->name); } From 78395dbf9ff429d98523f8b4a340f7188d8b4db0 Mon Sep 17 00:00:00 2001 From: Tapani Date: Fri, 5 Jun 2015 08:22:07 +0300 Subject: [PATCH 415/834] mesa: fix program resource queries for builtin variables MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Patch fixes special cases with gl_VertexID and sets all builtin variables locations as '-1' as specified by the extension spec. Fixes ES 3.1 conformance test failure: ES31-CTS.program_interface_query.input-built-in v2: comments + use is_gl_identifier() (Martin) Signed-off-by: Tapani Pälli Reviewed-by: Martin Peres --- src/mesa/main/shader_query.cpp | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/src/mesa/main/shader_query.cpp b/src/mesa/main/shader_query.cpp index 3445f89a356..a6246a39aad 100644 --- a/src/mesa/main/shader_query.cpp +++ b/src/mesa/main/shader_query.cpp @@ -479,12 +479,20 @@ _mesa_GetFragDataLocation(GLuint program, const GLchar *name) const char* _mesa_program_resource_name(struct gl_program_resource *res) { + const ir_variable *var; switch (res->Type) { case GL_UNIFORM_BLOCK: return RESOURCE_UBO(res)->Name; case GL_TRANSFORM_FEEDBACK_VARYING: return RESOURCE_XFB(res)->Name; case GL_PROGRAM_INPUT: + var = RESOURCE_VAR(res); + /* Special case gl_VertexIDMESA -> gl_VertexID. */ + if (var->data.mode == ir_var_system_value && + var->data.location == SYSTEM_VALUE_VERTEX_ID_ZERO_BASE) { + return "gl_VertexID"; + } + /* fallthrough */ case GL_PROGRAM_OUTPUT: return RESOURCE_VAR(res)->name; case GL_UNIFORM: @@ -539,6 +547,17 @@ struct gl_program_resource * _mesa_program_resource_find_name(struct gl_shader_program *shProg, GLenum programInterface, const char *name) { + GET_CURRENT_CONTEXT(ctx); + const char *full_name = name; + + /* When context has 'VertexID_is_zero_based' set, gl_VertexID has been + * lowered to gl_VertexIDMESA. + */ + if (name && ctx->Const.VertexID_is_zero_based) { + if (strcmp(name, "gl_VertexID") == 0) + full_name = "gl_VertexIDMESA"; + } + struct gl_program_resource *res = shProg->ProgramResourceList; for (unsigned i = 0; i < shProg->NumProgramResourceList; i++, res++) { if (res->Type != programInterface) @@ -563,7 +582,7 @@ _mesa_program_resource_find_name(struct gl_shader_program *shProg, break; case GL_PROGRAM_INPUT: case GL_PROGRAM_OUTPUT: - if (array_index_of_resource(res, name) >= 0) + if (array_index_of_resource(res, full_name) >= 0) return res; break; default: @@ -728,6 +747,10 @@ program_resource_location(struct gl_shader_program *shProg, return -1; } + /* Built-in locations should report GL_INVALID_INDEX. */ + if (is_gl_identifier(name)) + return GL_INVALID_INDEX; + /* VERT_ATTRIB_GENERIC0 and FRAG_RESULT_DATA0 are decremented as these * offsets are used internally to differentiate between built-in attributes * and user-defined attributes. From cf2c9265a3977d43beb9a9894a5b934af74df7d7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Sun, 10 May 2015 17:41:26 +0200 Subject: [PATCH 416/834] tgsi/scan: get more information about arrays and handle arrays correctly (v2) v2: use less memory for the information --- src/gallium/auxiliary/tgsi/tgsi_scan.c | 24 +++++++++++++++++++++--- src/gallium/auxiliary/tgsi/tgsi_scan.h | 4 ++++ 2 files changed, 25 insertions(+), 3 deletions(-) diff --git a/src/gallium/auxiliary/tgsi/tgsi_scan.c b/src/gallium/auxiliary/tgsi/tgsi_scan.c index d821072935a..369f56a1955 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_scan.c +++ b/src/gallium/auxiliary/tgsi/tgsi_scan.c @@ -167,13 +167,31 @@ tgsi_scan_shader(const struct tgsi_token *tokens, = &parse.FullToken.FullDeclaration; const uint file = fulldecl->Declaration.File; uint reg; - if (fulldecl->Declaration.Array) - info->array_max[file] = MAX2(info->array_max[file], fulldecl->Array.ArrayID); + + if (fulldecl->Declaration.Array) { + unsigned array_id = fulldecl->Array.ArrayID; + + switch (file) { + case TGSI_FILE_INPUT: + assert(array_id < ARRAY_SIZE(info->input_array_first)); + info->input_array_first[array_id] = fulldecl->Range.First; + info->input_array_last[array_id] = fulldecl->Range.Last; + break; + case TGSI_FILE_OUTPUT: + assert(array_id < ARRAY_SIZE(info->output_array_first)); + info->output_array_first[array_id] = fulldecl->Range.First; + info->output_array_last[array_id] = fulldecl->Range.Last; + break; + } + info->array_max[file] = MAX2(info->array_max[file], array_id); + } + for (reg = fulldecl->Range.First; reg <= fulldecl->Range.Last; reg++) { unsigned semName = fulldecl->Semantic.Name; - unsigned semIndex = fulldecl->Semantic.Index; + unsigned semIndex = + fulldecl->Semantic.Index + (reg - fulldecl->Range.First); /* only first 32 regs will appear in this bitfield */ info->file_mask[file] |= (1 << reg); diff --git a/src/gallium/auxiliary/tgsi/tgsi_scan.h b/src/gallium/auxiliary/tgsi/tgsi_scan.h index 0ea0e8846be..af4b128fcaf 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_scan.h +++ b/src/gallium/auxiliary/tgsi/tgsi_scan.h @@ -65,6 +65,10 @@ struct tgsi_shader_info int file_max[TGSI_FILE_COUNT]; /**< highest index of declared registers */ int const_file_max[PIPE_MAX_CONSTANT_BUFFERS]; + ubyte input_array_first[PIPE_MAX_SHADER_INPUTS]; + ubyte input_array_last[PIPE_MAX_SHADER_INPUTS]; + ubyte output_array_first[PIPE_MAX_SHADER_OUTPUTS]; + ubyte output_array_last[PIPE_MAX_SHADER_OUTPUTS]; unsigned array_max[TGSI_FILE_COUNT]; /**< highest index array per register file */ uint immediate_count; /**< number of immediates declared */ From 918ca4031f670066f054cdebcfe68ad75c963ac6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Sun, 10 May 2015 12:52:02 +0200 Subject: [PATCH 417/834] tgsi/ureg: add support for FS input array declarations --- src/gallium/auxiliary/tgsi/tgsi_ureg.c | 46 +++++++++++++++---- src/gallium/auxiliary/tgsi/tgsi_ureg.h | 29 +++++++++--- src/gallium/state_trackers/nine/nine_shader.c | 2 +- src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 2 +- 4 files changed, 61 insertions(+), 18 deletions(-) diff --git a/src/gallium/auxiliary/tgsi/tgsi_ureg.c b/src/gallium/auxiliary/tgsi/tgsi_ureg.c index 037d31a1643..ca2589ac996 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_ureg.c +++ b/src/gallium/auxiliary/tgsi/tgsi_ureg.c @@ -104,8 +104,11 @@ struct ureg_program unsigned interp; unsigned char cylindrical_wrap; unsigned interp_location; + unsigned first; + unsigned last; + unsigned array_id; } fs_input[UREG_MAX_INPUT]; - unsigned nr_fs_inputs; + unsigned nr_fs_inputs, nr_fs_input_regs; unsigned vs_inputs[UREG_MAX_INPUT/32]; @@ -254,30 +257,42 @@ ureg_DECL_fs_input_cyl_centroid(struct ureg_program *ureg, unsigned semantic_index, unsigned interp_mode, unsigned cylindrical_wrap, - unsigned interp_location) + unsigned interp_location, + unsigned array_id, + unsigned array_size) { unsigned i; for (i = 0; i < ureg->nr_fs_inputs; i++) { if (ureg->fs_input[i].semantic_name == semantic_name && ureg->fs_input[i].semantic_index == semantic_index) { + assert(ureg->fs_input[i].interp == interp_mode); + assert(ureg->fs_input[i].cylindrical_wrap == cylindrical_wrap); + assert(ureg->fs_input[i].interp_location == interp_location); + assert(ureg->fs_input[i].array_id == array_id); goto out; } } if (ureg->nr_fs_inputs < UREG_MAX_INPUT) { + assert(array_size >= 1); ureg->fs_input[i].semantic_name = semantic_name; ureg->fs_input[i].semantic_index = semantic_index; ureg->fs_input[i].interp = interp_mode; ureg->fs_input[i].cylindrical_wrap = cylindrical_wrap; ureg->fs_input[i].interp_location = interp_location; + ureg->fs_input[i].first = ureg->nr_fs_input_regs; + ureg->fs_input[i].last = ureg->nr_fs_input_regs + array_size - 1; + ureg->fs_input[i].array_id = array_id; + ureg->nr_fs_input_regs += array_size; ureg->nr_fs_inputs++; } else { set_bad(ureg); } out: - return ureg_src_register(TGSI_FILE_INPUT, i); + return ureg_src_array_register(TGSI_FILE_INPUT, ureg->fs_input[i].first, + array_id); } @@ -1281,14 +1296,17 @@ emit_decl_semantic(struct ureg_program *ureg, static void emit_decl_fs(struct ureg_program *ureg, unsigned file, - unsigned index, + unsigned first, + unsigned last, unsigned semantic_name, unsigned semantic_index, unsigned interpolate, unsigned cylindrical_wrap, - unsigned interpolate_location) + unsigned interpolate_location, + unsigned array_id) { - union tgsi_any_token *out = get_tokens(ureg, DOMAIN_DECL, 4); + union tgsi_any_token *out = get_tokens(ureg, DOMAIN_DECL, + array_id ? 5 : 4); out[0].value = 0; out[0].decl.Type = TGSI_TOKEN_TYPE_DECLARATION; @@ -1297,10 +1315,11 @@ emit_decl_fs(struct ureg_program *ureg, out[0].decl.UsageMask = TGSI_WRITEMASK_XYZW; /* FIXME! */ out[0].decl.Interpolate = 1; out[0].decl.Semantic = 1; + out[0].decl.Array = array_id != 0; out[1].value = 0; - out[1].decl_range.First = index; - out[1].decl_range.Last = index; + out[1].decl_range.First = first; + out[1].decl_range.Last = last; out[2].value = 0; out[2].decl_interp.Interpolate = interpolate; @@ -1310,6 +1329,11 @@ emit_decl_fs(struct ureg_program *ureg, out[3].value = 0; out[3].decl_semantic.Name = semantic_name; out[3].decl_semantic.Index = semantic_index; + + if (array_id) { + out[4].value = 0; + out[4].array.ArrayID = array_id; + } } static void @@ -1464,12 +1488,14 @@ static void emit_decls( struct ureg_program *ureg ) for (i = 0; i < ureg->nr_fs_inputs; i++) { emit_decl_fs(ureg, TGSI_FILE_INPUT, - i, + ureg->fs_input[i].first, + ureg->fs_input[i].last, ureg->fs_input[i].semantic_name, ureg->fs_input[i].semantic_index, ureg->fs_input[i].interp, ureg->fs_input[i].cylindrical_wrap, - ureg->fs_input[i].interp_location); + ureg->fs_input[i].interp_location, + ureg->fs_input[i].array_id); } } else { for (i = 0; i < ureg->nr_gs_inputs; i++) { diff --git a/src/gallium/auxiliary/tgsi/tgsi_ureg.h b/src/gallium/auxiliary/tgsi/tgsi_ureg.h index c3f4012f2b9..37846d5be8d 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_ureg.h +++ b/src/gallium/auxiliary/tgsi/tgsi_ureg.h @@ -172,7 +172,9 @@ ureg_DECL_fs_input_cyl_centroid(struct ureg_program *, unsigned semantic_index, unsigned interp_mode, unsigned cylindrical_wrap, - unsigned interp_location); + unsigned interp_location, + unsigned array_id, + unsigned array_size); static INLINE struct ureg_src ureg_DECL_fs_input_cyl(struct ureg_program *ureg, @@ -186,7 +188,7 @@ ureg_DECL_fs_input_cyl(struct ureg_program *ureg, semantic_index, interp_mode, cylindrical_wrap, - 0); + 0, 0, 1); } static INLINE struct ureg_src @@ -199,7 +201,7 @@ ureg_DECL_fs_input(struct ureg_program *ureg, semantic_name, semantic_index, interp_mode, - 0, 0); + 0, 0, 0, 1); } struct ureg_src @@ -1162,6 +1164,13 @@ ureg_src_dimension_indirect( struct ureg_src reg, struct ureg_src addr, return reg; } +static INLINE struct ureg_src +ureg_src_array_offset(struct ureg_src reg, int offset) +{ + reg.Index += offset; + return reg; +} + static INLINE struct ureg_dst ureg_dst_array_offset( struct ureg_dst reg, int offset ) { @@ -1236,8 +1245,9 @@ ureg_dst( struct ureg_src src ) } static INLINE struct ureg_src -ureg_src_register(unsigned file, - unsigned index) +ureg_src_array_register(unsigned file, + unsigned index, + unsigned array_id) { struct ureg_src src; @@ -1259,11 +1269,18 @@ ureg_src_register(unsigned file, src.DimIndFile = TGSI_FILE_NULL; src.DimIndIndex = 0; src.DimIndSwizzle = 0; - src.ArrayID = 0; + src.ArrayID = array_id; return src; } +static INLINE struct ureg_src +ureg_src_register(unsigned file, + unsigned index) +{ + return ureg_src_array_register(file, index, 0); +} + static INLINE struct ureg_src ureg_src( struct ureg_dst dst ) { diff --git a/src/gallium/state_trackers/nine/nine_shader.c b/src/gallium/state_trackers/nine/nine_shader.c index fd0f76e1118..cdd4918530b 100644 --- a/src/gallium/state_trackers/nine/nine_shader.c +++ b/src/gallium/state_trackers/nine/nine_shader.c @@ -1979,7 +1979,7 @@ DECL_SPECIAL(DCL) ureg, tgsi.Name, tgsi.Index, nine_tgsi_to_interp_mode(&tgsi), 0, /* cylwrap */ - sem.reg.mod & NINED3DSPDM_CENTROID); + sem.reg.mod & NINED3DSPDM_CENTROID, 0, 1); } else if (!is_input && 0) { /* declare in COLOROUT/DEPTHOUT case */ /* FragColor or FragDepth */ diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index db190c943a4..72b47b7421b 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -5149,7 +5149,7 @@ st_translate_program( inputSemanticName[i], inputSemanticIndex[i], interpMode[i], 0, - interpLocation[i]); + interpLocation[i], 0, 1); } if (proginfo->InputsRead & VARYING_BIT_POS) { From 3b1d15775190945b1a639dd9b2581b4032cd2ac6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Sun, 10 May 2015 13:10:03 +0200 Subject: [PATCH 418/834] tgsi/ureg: rename and simplify ureg_DECL_gs_input There is nothing special about it and it's used for tessellation shaders too. --- src/gallium/auxiliary/tgsi/tgsi_ureg.c | 31 +++++++++++----------- src/gallium/auxiliary/tgsi/tgsi_ureg.h | 7 +++-- src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 7 +++-- src/mesa/state_tracker/st_mesa_to_tgsi.c | 7 +++-- 4 files changed, 25 insertions(+), 27 deletions(-) diff --git a/src/gallium/auxiliary/tgsi/tgsi_ureg.c b/src/gallium/auxiliary/tgsi/tgsi_ureg.c index ca2589ac996..a8f0867cf2f 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_ureg.c +++ b/src/gallium/auxiliary/tgsi/tgsi_ureg.c @@ -116,8 +116,8 @@ struct ureg_program unsigned index; unsigned semantic_name; unsigned semantic_index; - } gs_input[UREG_MAX_INPUT]; - unsigned nr_gs_inputs; + } input[UREG_MAX_INPUT]; + unsigned nr_inputs; struct { unsigned index; @@ -308,22 +308,23 @@ ureg_DECL_vs_input( struct ureg_program *ureg, struct ureg_src -ureg_DECL_gs_input(struct ureg_program *ureg, - unsigned index, +ureg_DECL_input(struct ureg_program *ureg, unsigned semantic_name, unsigned semantic_index) { - if (ureg->nr_gs_inputs < UREG_MAX_INPUT) { - ureg->gs_input[ureg->nr_gs_inputs].index = index; - ureg->gs_input[ureg->nr_gs_inputs].semantic_name = semantic_name; - ureg->gs_input[ureg->nr_gs_inputs].semantic_index = semantic_index; - ureg->nr_gs_inputs++; + int i = 0; + + if (ureg->nr_inputs < UREG_MAX_INPUT) { + i = ureg->nr_inputs; + ureg->input[i].index = i; + ureg->input[i].semantic_name = semantic_name; + ureg->input[i].semantic_index = semantic_index; + ureg->nr_inputs++; } else { set_bad(ureg); } - /* XXX: Add suport for true 2D input registers. */ - return ureg_src_register(TGSI_FILE_INPUT, index); + return ureg_src_register(TGSI_FILE_INPUT, i); } @@ -1498,12 +1499,12 @@ static void emit_decls( struct ureg_program *ureg ) ureg->fs_input[i].array_id); } } else { - for (i = 0; i < ureg->nr_gs_inputs; i++) { + for (i = 0; i < ureg->nr_inputs; i++) { emit_decl_semantic(ureg, TGSI_FILE_INPUT, - ureg->gs_input[i].index, - ureg->gs_input[i].semantic_name, - ureg->gs_input[i].semantic_index, + ureg->input[i].index, + ureg->input[i].semantic_name, + ureg->input[i].semantic_index, TGSI_WRITEMASK_XYZW); } } diff --git a/src/gallium/auxiliary/tgsi/tgsi_ureg.h b/src/gallium/auxiliary/tgsi/tgsi_ureg.h index 37846d5be8d..7ad4c93981c 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_ureg.h +++ b/src/gallium/auxiliary/tgsi/tgsi_ureg.h @@ -209,10 +209,9 @@ ureg_DECL_vs_input( struct ureg_program *, unsigned index ); struct ureg_src -ureg_DECL_gs_input(struct ureg_program *, - unsigned index, - unsigned semantic_name, - unsigned semantic_index); +ureg_DECL_input(struct ureg_program *, + unsigned semantic_name, + unsigned semantic_index); struct ureg_src ureg_DECL_system_value(struct ureg_program *, diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index 72b47b7421b..c1810b69247 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -5201,10 +5201,9 @@ st_translate_program( } else if (procType == TGSI_PROCESSOR_GEOMETRY) { for (i = 0; i < numInputs; i++) { - t->inputs[i] = ureg_DECL_gs_input(ureg, - i, - inputSemanticName[i], - inputSemanticIndex[i]); + t->inputs[i] = ureg_DECL_input(ureg, + inputSemanticName[i], + inputSemanticIndex[i]); } for (i = 0; i < numOutputs; i++) { diff --git a/src/mesa/state_tracker/st_mesa_to_tgsi.c b/src/mesa/state_tracker/st_mesa_to_tgsi.c index a88d7a87ff4..8efbc6fd69e 100644 --- a/src/mesa/state_tracker/st_mesa_to_tgsi.c +++ b/src/mesa/state_tracker/st_mesa_to_tgsi.c @@ -1095,10 +1095,9 @@ st_translate_mesa_program( } else if (procType == TGSI_PROCESSOR_GEOMETRY) { for (i = 0; i < numInputs; i++) { - t->inputs[i] = ureg_DECL_gs_input(ureg, - i, - inputSemanticName[i], - inputSemanticIndex[i]); + t->inputs[i] = ureg_DECL_input(ureg, + inputSemanticName[i], + inputSemanticIndex[i]); } for (i = 0; i < numOutputs; i++) { From d3fbc659868ecdbfe14600a75eafe28174f7d99e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Sun, 10 May 2015 16:51:32 +0200 Subject: [PATCH 419/834] tgsi/ureg: merge input and fs_input arrays --- src/gallium/auxiliary/tgsi/tgsi_ureg.c | 84 ++++++++++---------------- 1 file changed, 33 insertions(+), 51 deletions(-) diff --git a/src/gallium/auxiliary/tgsi/tgsi_ureg.c b/src/gallium/auxiliary/tgsi/tgsi_ureg.c index a8f0867cf2f..ad13f64c586 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_ureg.c +++ b/src/gallium/auxiliary/tgsi/tgsi_ureg.c @@ -107,18 +107,11 @@ struct ureg_program unsigned first; unsigned last; unsigned array_id; - } fs_input[UREG_MAX_INPUT]; - unsigned nr_fs_inputs, nr_fs_input_regs; + } input[UREG_MAX_INPUT]; + unsigned nr_inputs, nr_input_regs; unsigned vs_inputs[UREG_MAX_INPUT/32]; - struct { - unsigned index; - unsigned semantic_name; - unsigned semantic_index; - } input[UREG_MAX_INPUT]; - unsigned nr_inputs; - struct { unsigned index; unsigned semantic_name; @@ -263,35 +256,35 @@ ureg_DECL_fs_input_cyl_centroid(struct ureg_program *ureg, { unsigned i; - for (i = 0; i < ureg->nr_fs_inputs; i++) { - if (ureg->fs_input[i].semantic_name == semantic_name && - ureg->fs_input[i].semantic_index == semantic_index) { - assert(ureg->fs_input[i].interp == interp_mode); - assert(ureg->fs_input[i].cylindrical_wrap == cylindrical_wrap); - assert(ureg->fs_input[i].interp_location == interp_location); - assert(ureg->fs_input[i].array_id == array_id); + for (i = 0; i < ureg->nr_inputs; i++) { + if (ureg->input[i].semantic_name == semantic_name && + ureg->input[i].semantic_index == semantic_index) { + assert(ureg->input[i].interp == interp_mode); + assert(ureg->input[i].cylindrical_wrap == cylindrical_wrap); + assert(ureg->input[i].interp_location == interp_location); + assert(ureg->input[i].array_id == array_id); goto out; } } - if (ureg->nr_fs_inputs < UREG_MAX_INPUT) { + if (ureg->nr_inputs < UREG_MAX_INPUT) { assert(array_size >= 1); - ureg->fs_input[i].semantic_name = semantic_name; - ureg->fs_input[i].semantic_index = semantic_index; - ureg->fs_input[i].interp = interp_mode; - ureg->fs_input[i].cylindrical_wrap = cylindrical_wrap; - ureg->fs_input[i].interp_location = interp_location; - ureg->fs_input[i].first = ureg->nr_fs_input_regs; - ureg->fs_input[i].last = ureg->nr_fs_input_regs + array_size - 1; - ureg->fs_input[i].array_id = array_id; - ureg->nr_fs_input_regs += array_size; - ureg->nr_fs_inputs++; + ureg->input[i].semantic_name = semantic_name; + ureg->input[i].semantic_index = semantic_index; + ureg->input[i].interp = interp_mode; + ureg->input[i].cylindrical_wrap = cylindrical_wrap; + ureg->input[i].interp_location = interp_location; + ureg->input[i].first = ureg->nr_input_regs; + ureg->input[i].last = ureg->nr_input_regs + array_size - 1; + ureg->input[i].array_id = array_id; + ureg->nr_input_regs += array_size; + ureg->nr_inputs++; } else { set_bad(ureg); } out: - return ureg_src_array_register(TGSI_FILE_INPUT, ureg->fs_input[i].first, + return ureg_src_array_register(TGSI_FILE_INPUT, ureg->input[i].first, array_id); } @@ -312,19 +305,8 @@ ureg_DECL_input(struct ureg_program *ureg, unsigned semantic_name, unsigned semantic_index) { - int i = 0; - - if (ureg->nr_inputs < UREG_MAX_INPUT) { - i = ureg->nr_inputs; - ureg->input[i].index = i; - ureg->input[i].semantic_name = semantic_name; - ureg->input[i].semantic_index = semantic_index; - ureg->nr_inputs++; - } else { - set_bad(ureg); - } - - return ureg_src_register(TGSI_FILE_INPUT, i); + return ureg_DECL_fs_input_cyl_centroid(ureg, semantic_name, semantic_index, + 0, 0, 0, 0, 1); } @@ -1486,23 +1468,23 @@ static void emit_decls( struct ureg_program *ureg ) } } } else if (ureg->processor == TGSI_PROCESSOR_FRAGMENT) { - for (i = 0; i < ureg->nr_fs_inputs; i++) { + for (i = 0; i < ureg->nr_inputs; i++) { emit_decl_fs(ureg, TGSI_FILE_INPUT, - ureg->fs_input[i].first, - ureg->fs_input[i].last, - ureg->fs_input[i].semantic_name, - ureg->fs_input[i].semantic_index, - ureg->fs_input[i].interp, - ureg->fs_input[i].cylindrical_wrap, - ureg->fs_input[i].interp_location, - ureg->fs_input[i].array_id); + ureg->input[i].first, + ureg->input[i].last, + ureg->input[i].semantic_name, + ureg->input[i].semantic_index, + ureg->input[i].interp, + ureg->input[i].cylindrical_wrap, + ureg->input[i].interp_location, + ureg->input[i].array_id); } } else { for (i = 0; i < ureg->nr_inputs; i++) { emit_decl_semantic(ureg, TGSI_FILE_INPUT, - ureg->input[i].index, + ureg->input[i].first, ureg->input[i].semantic_name, ureg->input[i].semantic_index, TGSI_WRITEMASK_XYZW); From 1fa6c99e24890359e9cee2a9da02f21ea77b9f15 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Sun, 10 May 2015 16:17:35 +0200 Subject: [PATCH 420/834] tgsi/ureg: add support for GS input array declarations --- src/gallium/auxiliary/tgsi/tgsi_ureg.c | 37 +++++++++++++++------- src/gallium/auxiliary/tgsi/tgsi_ureg.h | 4 ++- src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 2 +- src/mesa/state_tracker/st_mesa_to_tgsi.c | 2 +- 4 files changed, 30 insertions(+), 15 deletions(-) diff --git a/src/gallium/auxiliary/tgsi/tgsi_ureg.c b/src/gallium/auxiliary/tgsi/tgsi_ureg.c index ad13f64c586..b1aebfa1cb9 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_ureg.c +++ b/src/gallium/auxiliary/tgsi/tgsi_ureg.c @@ -302,11 +302,13 @@ ureg_DECL_vs_input( struct ureg_program *ureg, struct ureg_src ureg_DECL_input(struct ureg_program *ureg, - unsigned semantic_name, - unsigned semantic_index) + unsigned semantic_name, + unsigned semantic_index, + unsigned array_id, + unsigned array_size) { return ureg_DECL_fs_input_cyl_centroid(ureg, semantic_name, semantic_index, - 0, 0, 0, 0, 1); + 0, 0, 0, array_id, array_size); } @@ -1252,12 +1254,14 @@ ureg_label_insn(struct ureg_program *ureg, static void emit_decl_semantic(struct ureg_program *ureg, unsigned file, - unsigned index, + unsigned first, + unsigned last, unsigned semantic_name, unsigned semantic_index, - unsigned usage_mask) + unsigned usage_mask, + unsigned array_id) { - union tgsi_any_token *out = get_tokens(ureg, DOMAIN_DECL, 3); + union tgsi_any_token *out = get_tokens(ureg, DOMAIN_DECL, array_id ? 4 : 3); out[0].value = 0; out[0].decl.Type = TGSI_TOKEN_TYPE_DECLARATION; @@ -1265,14 +1269,20 @@ emit_decl_semantic(struct ureg_program *ureg, out[0].decl.File = file; out[0].decl.UsageMask = usage_mask; out[0].decl.Semantic = 1; + out[0].decl.Array = array_id != 0; out[1].value = 0; - out[1].decl_range.First = index; - out[1].decl_range.Last = index; + out[1].decl_range.First = first; + out[1].decl_range.Last = last; out[2].value = 0; out[2].decl_semantic.Name = semantic_name; out[2].decl_semantic.Index = semantic_index; + + if (array_id) { + out[3].value = 0; + out[3].array.ArrayID = array_id; + } } @@ -1485,9 +1495,11 @@ static void emit_decls( struct ureg_program *ureg ) emit_decl_semantic(ureg, TGSI_FILE_INPUT, ureg->input[i].first, + ureg->input[i].last, ureg->input[i].semantic_name, ureg->input[i].semantic_index, - TGSI_WRITEMASK_XYZW); + TGSI_WRITEMASK_XYZW, + ureg->input[i].array_id); } } @@ -1495,18 +1507,19 @@ static void emit_decls( struct ureg_program *ureg ) emit_decl_semantic(ureg, TGSI_FILE_SYSTEM_VALUE, ureg->system_value[i].index, + ureg->system_value[i].index, ureg->system_value[i].semantic_name, ureg->system_value[i].semantic_index, - TGSI_WRITEMASK_XYZW); + TGSI_WRITEMASK_XYZW, 0); } for (i = 0; i < ureg->nr_outputs; i++) { emit_decl_semantic(ureg, TGSI_FILE_OUTPUT, - i, + i, i, ureg->output[i].semantic_name, ureg->output[i].semantic_index, - ureg->output[i].usage_mask); + ureg->output[i].usage_mask, 0); } for (i = 0; i < ureg->nr_samplers; i++) { diff --git a/src/gallium/auxiliary/tgsi/tgsi_ureg.h b/src/gallium/auxiliary/tgsi/tgsi_ureg.h index 7ad4c93981c..e6304697938 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_ureg.h +++ b/src/gallium/auxiliary/tgsi/tgsi_ureg.h @@ -211,7 +211,9 @@ ureg_DECL_vs_input( struct ureg_program *, struct ureg_src ureg_DECL_input(struct ureg_program *, unsigned semantic_name, - unsigned semantic_index); + unsigned semantic_index, + unsigned array_id, + unsigned array_size); struct ureg_src ureg_DECL_system_value(struct ureg_program *, diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index c1810b69247..95968120730 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -5203,7 +5203,7 @@ st_translate_program( for (i = 0; i < numInputs; i++) { t->inputs[i] = ureg_DECL_input(ureg, inputSemanticName[i], - inputSemanticIndex[i]); + inputSemanticIndex[i], 0, 1); } for (i = 0; i < numOutputs; i++) { diff --git a/src/mesa/state_tracker/st_mesa_to_tgsi.c b/src/mesa/state_tracker/st_mesa_to_tgsi.c index 8efbc6fd69e..896e239ee68 100644 --- a/src/mesa/state_tracker/st_mesa_to_tgsi.c +++ b/src/mesa/state_tracker/st_mesa_to_tgsi.c @@ -1097,7 +1097,7 @@ st_translate_mesa_program( for (i = 0; i < numInputs; i++) { t->inputs[i] = ureg_DECL_input(ureg, inputSemanticName[i], - inputSemanticIndex[i]); + inputSemanticIndex[i], 0, 1); } for (i = 0; i < numOutputs; i++) { From a015b3952f568ad3da1ddfe42ff7ce6568f52780 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Wed, 20 May 2015 11:11:43 +0200 Subject: [PATCH 421/834] tgsi/ureg: add support for output array declarations --- src/gallium/auxiliary/tgsi/tgsi_ureg.c | 52 ++++++++++++++----- src/gallium/auxiliary/tgsi/tgsi_ureg.h | 38 ++++++++++---- src/gallium/state_trackers/nine/nine_ff.c | 6 ++- src/gallium/state_trackers/nine/nine_shader.c | 13 +++-- 4 files changed, 78 insertions(+), 31 deletions(-) diff --git a/src/gallium/auxiliary/tgsi/tgsi_ureg.c b/src/gallium/auxiliary/tgsi/tgsi_ureg.c index b1aebfa1cb9..76ffe9f79a2 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_ureg.c +++ b/src/gallium/auxiliary/tgsi/tgsi_ureg.c @@ -123,8 +123,11 @@ struct ureg_program unsigned semantic_name; unsigned semantic_index; unsigned usage_mask; /* = TGSI_WRITEMASK_* */ + unsigned first; + unsigned last; + unsigned array_id; } output[UREG_MAX_OUTPUT]; - unsigned nr_outputs; + unsigned nr_outputs, nr_output_regs; struct { union { @@ -332,10 +335,12 @@ ureg_DECL_system_value(struct ureg_program *ureg, struct ureg_dst -ureg_DECL_output_masked( struct ureg_program *ureg, - unsigned name, - unsigned index, - unsigned usage_mask ) +ureg_DECL_output_masked(struct ureg_program *ureg, + unsigned name, + unsigned index, + unsigned usage_mask, + unsigned array_id, + unsigned array_size) { unsigned i; @@ -343,7 +348,8 @@ ureg_DECL_output_masked( struct ureg_program *ureg, for (i = 0; i < ureg->nr_outputs; i++) { if (ureg->output[i].semantic_name == name && - ureg->output[i].semantic_index == index) { + ureg->output[i].semantic_index == index) { + assert(ureg->output[i].array_id == array_id); ureg->output[i].usage_mask |= usage_mask; goto out; } @@ -353,6 +359,10 @@ ureg_DECL_output_masked( struct ureg_program *ureg, ureg->output[i].semantic_name = name; ureg->output[i].semantic_index = index; ureg->output[i].usage_mask = usage_mask; + ureg->output[i].first = ureg->nr_output_regs; + ureg->output[i].last = ureg->nr_output_regs + array_size - 1; + ureg->output[i].array_id = array_id; + ureg->nr_output_regs += array_size; ureg->nr_outputs++; } else { @@ -360,16 +370,30 @@ ureg_DECL_output_masked( struct ureg_program *ureg, } out: - return ureg_dst_register( TGSI_FILE_OUTPUT, i ); + return ureg_dst_array_register(TGSI_FILE_OUTPUT, ureg->output[i].first, + array_id); } struct ureg_dst -ureg_DECL_output( struct ureg_program *ureg, - unsigned name, - unsigned index ) +ureg_DECL_output(struct ureg_program *ureg, + unsigned name, + unsigned index) { - return ureg_DECL_output_masked(ureg, name, index, TGSI_WRITEMASK_XYZW); + return ureg_DECL_output_masked(ureg, name, index, TGSI_WRITEMASK_XYZW, + 0, 1); +} + +struct ureg_dst +ureg_DECL_output_array(struct ureg_program *ureg, + unsigned semantic_name, + unsigned semantic_index, + unsigned array_id, + unsigned array_size) +{ + return ureg_DECL_output_masked(ureg, semantic_name, semantic_index, + TGSI_WRITEMASK_XYZW, + array_id, array_size); } @@ -1516,10 +1540,12 @@ static void emit_decls( struct ureg_program *ureg ) for (i = 0; i < ureg->nr_outputs; i++) { emit_decl_semantic(ureg, TGSI_FILE_OUTPUT, - i, i, + ureg->output[i].first, + ureg->output[i].last, ureg->output[i].semantic_name, ureg->output[i].semantic_index, - ureg->output[i].usage_mask, 0); + ureg->output[i].usage_mask, + ureg->output[i].array_id); } for (i = 0; i < ureg->nr_samplers; i++) { diff --git a/src/gallium/auxiliary/tgsi/tgsi_ureg.h b/src/gallium/auxiliary/tgsi/tgsi_ureg.h index e6304697938..e20f96d5674 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_ureg.h +++ b/src/gallium/auxiliary/tgsi/tgsi_ureg.h @@ -222,15 +222,24 @@ ureg_DECL_system_value(struct ureg_program *, unsigned semantic_index); struct ureg_dst -ureg_DECL_output_masked( struct ureg_program *, - unsigned semantic_name, - unsigned semantic_index, - unsigned usage_mask ); +ureg_DECL_output_masked(struct ureg_program *, + unsigned semantic_name, + unsigned semantic_index, + unsigned usage_mask, + unsigned array_id, + unsigned array_size); struct ureg_dst -ureg_DECL_output( struct ureg_program *, - unsigned semantic_name, - unsigned semantic_index ); +ureg_DECL_output(struct ureg_program *, + unsigned semantic_name, + unsigned semantic_index); + +struct ureg_dst +ureg_DECL_output_array(struct ureg_program *ureg, + unsigned semantic_name, + unsigned semantic_index, + unsigned array_id, + unsigned array_size); struct ureg_src ureg_DECL_immediate( struct ureg_program *, @@ -1175,14 +1184,14 @@ ureg_src_array_offset(struct ureg_src reg, int offset) static INLINE struct ureg_dst ureg_dst_array_offset( struct ureg_dst reg, int offset ) { - assert(reg.File == TGSI_FILE_TEMPORARY); reg.Index += offset; return reg; } static INLINE struct ureg_dst -ureg_dst_register( unsigned file, - unsigned index ) +ureg_dst_array_register(unsigned file, + unsigned index, + unsigned array_id) { struct ureg_dst dst; @@ -1206,11 +1215,18 @@ ureg_dst_register( unsigned file, dst.DimIndFile = TGSI_FILE_NULL; dst.DimIndIndex = 0; dst.DimIndSwizzle = 0; - dst.ArrayID = 0; + dst.ArrayID = array_id; return dst; } +static INLINE struct ureg_dst +ureg_dst_register(unsigned file, + unsigned index) +{ + return ureg_dst_array_register(file, index, 0); +} + static INLINE struct ureg_dst ureg_dst( struct ureg_src src ) { diff --git a/src/gallium/state_trackers/nine/nine_ff.c b/src/gallium/state_trackers/nine/nine_ff.c index e6f2b21dd4d..c2213e6bf11 100644 --- a/src/gallium/state_trackers/nine/nine_ff.c +++ b/src/gallium/state_trackers/nine/nine_ff.c @@ -422,13 +422,15 @@ nine_ff_build_vs(struct NineDevice9 *device, struct vs_build_ctx *vs) oCol[1] = ureg_saturate(ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, 1)); if (key->vertexpointsize || key->pointscale) { - oPsz = ureg_DECL_output_masked(ureg, TGSI_SEMANTIC_PSIZE, 0, TGSI_WRITEMASK_X); + oPsz = ureg_DECL_output_masked(ureg, TGSI_SEMANTIC_PSIZE, 0, + TGSI_WRITEMASK_X, 0, 1); oPsz = ureg_writemask(oPsz, TGSI_WRITEMASK_X); } if (key->fog_mode) { /* We apply fog to the vertex colors, oFog is for programmable shaders only ? */ - oFog = ureg_DECL_output_masked(ureg, TGSI_SEMANTIC_FOG, 0, TGSI_WRITEMASK_X); + oFog = ureg_DECL_output_masked(ureg, TGSI_SEMANTIC_FOG, 0, + TGSI_WRITEMASK_X, 0, 1); oFog = ureg_writemask(oFog, TGSI_WRITEMASK_X); } diff --git a/src/gallium/state_trackers/nine/nine_shader.c b/src/gallium/state_trackers/nine/nine_shader.c index cdd4918530b..22a58825f78 100644 --- a/src/gallium/state_trackers/nine/nine_shader.c +++ b/src/gallium/state_trackers/nine/nine_shader.c @@ -1098,7 +1098,7 @@ _tx_dst_param(struct shader_translator *tx, const struct sm1_dst_param *param) if (ureg_dst_is_undef(tx->regs.oDepth)) tx->regs.oDepth = ureg_DECL_output_masked(tx->ureg, TGSI_SEMANTIC_POSITION, 0, - TGSI_WRITEMASK_Z); + TGSI_WRITEMASK_Z, 0, 1); dst = tx->regs.oDepth; /* XXX: must write .z component */ break; case D3DSPR_PREDICATE: @@ -1966,7 +1966,7 @@ DECL_SPECIAL(DCL) tx->info->position_t = TRUE; assert(sem.reg.idx < Elements(tx->regs.o)); tx->regs.o[sem.reg.idx] = ureg_DECL_output_masked( - ureg, tgsi.Name, tgsi.Index, sem.reg.mask); + ureg, tgsi.Name, tgsi.Index, sem.reg.mask, 0, 1); if (tgsi.Name == TGSI_SEMANTIC_PSIZE) tx->regs.oPts = tx->regs.o[sem.reg.idx]; @@ -1984,7 +1984,8 @@ DECL_SPECIAL(DCL) if (!is_input && 0) { /* declare in COLOROUT/DEPTHOUT case */ /* FragColor or FragDepth */ assert(sem.reg.mask != 0); - ureg_DECL_output_masked(ureg, tgsi.Name, tgsi.Index, sem.reg.mask); + ureg_DECL_output_masked(ureg, tgsi.Name, tgsi.Index, sem.reg.mask, + 0, 1); } } return D3D_OK; @@ -2312,7 +2313,8 @@ DECL_SPECIAL(TEXM3x2DEPTH) ureg_CMP(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_negate(ureg_abs(ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y))), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_imm1f(ureg, 1.0f)); /* replace the depth for depth testing with the result */ - tx->regs.oDepth = ureg_DECL_output_masked(ureg, TGSI_SEMANTIC_POSITION, 0, TGSI_WRITEMASK_Z); + tx->regs.oDepth = ureg_DECL_output_masked(ureg, TGSI_SEMANTIC_POSITION, 0, + TGSI_WRITEMASK_Z, 0, 1); ureg_MOV(ureg, tx->regs.oDepth, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X)); /* note that we write nothing to the destination, since it's disallowed to use it afterward */ return D3D_OK; @@ -2410,7 +2412,8 @@ DECL_SPECIAL(TEXDEPTH) ureg_CMP(ureg, ureg_writemask(r5, TGSI_WRITEMASK_X), ureg_negate(ureg_abs(r5g)), r5r, ureg_imm1f(ureg, 1.0f)); /* replace the depth for depth testing with the result */ - tx->regs.oDepth = ureg_DECL_output_masked(ureg, TGSI_SEMANTIC_POSITION, 0, TGSI_WRITEMASK_Z); + tx->regs.oDepth = ureg_DECL_output_masked(ureg, TGSI_SEMANTIC_POSITION, 0, + TGSI_WRITEMASK_Z, 0, 1); ureg_MOV(ureg, tx->regs.oDepth, r5r); return D3D_OK; From b6ebe7eabf54936a02acc0968e718e0c264a73f5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Mon, 25 May 2015 19:30:44 +0200 Subject: [PATCH 422/834] tgsi/ureg: don't emit in/out arrays if drivers don't support ranged declarations Softpipe, llvmpipe, r300g, and radeonsi pass tests. Other drivers need testing. Freedreno and nv30 are definitely broken. Other drivers seem to be alright. --- src/gallium/auxiliary/gallivm/lp_bld_limits.h | 1 + src/gallium/auxiliary/tgsi/tgsi_exec.h | 1 + src/gallium/auxiliary/tgsi/tgsi_ureg.c | 172 ++++++++++++++---- src/gallium/auxiliary/tgsi/tgsi_ureg.h | 6 +- src/gallium/docs/source/screen.rst | 2 + .../drivers/freedreno/freedreno_screen.c | 1 + src/gallium/drivers/i915/i915_screen.c | 1 + .../drivers/nouveau/nv30/nv30_screen.c | 2 + .../drivers/nouveau/nv50/nv50_screen.c | 1 + .../drivers/nouveau/nvc0/nvc0_screen.c | 1 + src/gallium/drivers/r300/r300_screen.c | 2 + src/gallium/drivers/r600/r600_pipe.c | 1 + src/gallium/drivers/radeonsi/si_pipe.c | 1 + src/gallium/drivers/svga/svga_screen.c | 2 + src/gallium/drivers/vc4/vc4_screen.c | 1 + src/gallium/include/pipe/p_defines.h | 1 + src/mesa/state_tracker/st_program.c | 6 +- 17 files changed, 162 insertions(+), 40 deletions(-) diff --git a/src/gallium/auxiliary/gallivm/lp_bld_limits.h b/src/gallium/auxiliary/gallivm/lp_bld_limits.h index c5c51c18a0a..49064feddef 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_limits.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_limits.h @@ -125,6 +125,7 @@ gallivm_get_shader_param(enum pipe_shader_cap param) case PIPE_SHADER_CAP_PREFERRED_IR: return PIPE_SHADER_IR_TGSI; case PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED: + case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE: return 1; case PIPE_SHADER_CAP_DOUBLES: case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED: diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.h b/src/gallium/auxiliary/tgsi/tgsi_exec.h index 0f4c966cc11..208640cfd46 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.h +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.h @@ -458,6 +458,7 @@ tgsi_exec_get_shader_param(enum pipe_shader_cap param) return 1; case PIPE_SHADER_CAP_DOUBLES: case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED: + case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE: return 1; case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED: case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED: diff --git a/src/gallium/auxiliary/tgsi/tgsi_ureg.c b/src/gallium/auxiliary/tgsi/tgsi_ureg.c index 76ffe9f79a2..1cea0919ce4 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_ureg.c +++ b/src/gallium/auxiliary/tgsi/tgsi_ureg.c @@ -26,6 +26,7 @@ **************************************************************************/ +#include "pipe/p_screen.h" #include "pipe/p_context.h" #include "pipe/p_state.h" #include "tgsi/tgsi_ureg.h" @@ -96,7 +97,7 @@ struct const_decl { struct ureg_program { unsigned processor; - struct pipe_context *pipe; + bool supports_any_inout_decl_range; struct { unsigned semantic_name; @@ -906,7 +907,11 @@ ureg_emit_src( struct ureg_program *ureg, out[n].ind.File = src.IndirectFile; out[n].ind.Swizzle = src.IndirectSwizzle; out[n].ind.Index = src.IndirectIndex; - out[n].ind.ArrayID = src.ArrayID; + if (!ureg->supports_any_inout_decl_range && + (src.File == TGSI_FILE_INPUT || src.File == TGSI_FILE_OUTPUT)) + out[n].ind.ArrayID = 0; + else + out[n].ind.ArrayID = src.ArrayID; n++; } @@ -922,7 +927,11 @@ ureg_emit_src( struct ureg_program *ureg, out[n].ind.File = src.DimIndFile; out[n].ind.Swizzle = src.DimIndSwizzle; out[n].ind.Index = src.DimIndIndex; - out[n].ind.ArrayID = src.ArrayID; + if (!ureg->supports_any_inout_decl_range && + (src.File == TGSI_FILE_INPUT || src.File == TGSI_FILE_OUTPUT)) + out[n].ind.ArrayID = 0; + else + out[n].ind.ArrayID = src.ArrayID; } else { out[n].dim.Indirect = 0; out[n].dim.Index = src.DimensionIndex; @@ -964,7 +973,11 @@ ureg_emit_dst( struct ureg_program *ureg, out[n].ind.File = dst.IndirectFile; out[n].ind.Swizzle = dst.IndirectSwizzle; out[n].ind.Index = dst.IndirectIndex; - out[n].ind.ArrayID = dst.ArrayID; + if (!ureg->supports_any_inout_decl_range && + (dst.File == TGSI_FILE_INPUT || dst.File == TGSI_FILE_OUTPUT)) + out[n].ind.ArrayID = 0; + else + out[n].ind.ArrayID = dst.ArrayID; n++; } @@ -980,7 +993,11 @@ ureg_emit_dst( struct ureg_program *ureg, out[n].ind.File = dst.DimIndFile; out[n].ind.Swizzle = dst.DimIndSwizzle; out[n].ind.Index = dst.DimIndIndex; - out[n].ind.ArrayID = dst.ArrayID; + if (!ureg->supports_any_inout_decl_range && + (dst.File == TGSI_FILE_INPUT || dst.File == TGSI_FILE_OUTPUT)) + out[n].ind.ArrayID = 0; + else + out[n].ind.ArrayID = dst.ArrayID; } else { out[n].dim.Indirect = 0; out[n].dim.Index = dst.DimensionIndex; @@ -1489,7 +1506,7 @@ emit_property(struct ureg_program *ureg, static void emit_decls( struct ureg_program *ureg ) { - unsigned i; + unsigned i,j; for (i = 0; i < Elements(ureg->properties); i++) if (ureg->properties[i] != ~0) @@ -1502,28 +1519,60 @@ static void emit_decls( struct ureg_program *ureg ) } } } else if (ureg->processor == TGSI_PROCESSOR_FRAGMENT) { - for (i = 0; i < ureg->nr_inputs; i++) { - emit_decl_fs(ureg, - TGSI_FILE_INPUT, - ureg->input[i].first, - ureg->input[i].last, - ureg->input[i].semantic_name, - ureg->input[i].semantic_index, - ureg->input[i].interp, - ureg->input[i].cylindrical_wrap, - ureg->input[i].interp_location, - ureg->input[i].array_id); + if (ureg->supports_any_inout_decl_range) { + for (i = 0; i < ureg->nr_inputs; i++) { + emit_decl_fs(ureg, + TGSI_FILE_INPUT, + ureg->input[i].first, + ureg->input[i].last, + ureg->input[i].semantic_name, + ureg->input[i].semantic_index, + ureg->input[i].interp, + ureg->input[i].cylindrical_wrap, + ureg->input[i].interp_location, + ureg->input[i].array_id); + } + } + else { + for (i = 0; i < ureg->nr_inputs; i++) { + for (j = ureg->input[i].first; j <= ureg->input[i].last; j++) { + emit_decl_fs(ureg, + TGSI_FILE_INPUT, + j, j, + ureg->input[i].semantic_name, + ureg->input[i].semantic_index + + (j - ureg->input[i].first), + ureg->input[i].interp, + ureg->input[i].cylindrical_wrap, + ureg->input[i].interp_location, 0); + } + } } } else { - for (i = 0; i < ureg->nr_inputs; i++) { - emit_decl_semantic(ureg, - TGSI_FILE_INPUT, - ureg->input[i].first, - ureg->input[i].last, - ureg->input[i].semantic_name, - ureg->input[i].semantic_index, - TGSI_WRITEMASK_XYZW, - ureg->input[i].array_id); + if (ureg->supports_any_inout_decl_range) { + for (i = 0; i < ureg->nr_inputs; i++) { + emit_decl_semantic(ureg, + TGSI_FILE_INPUT, + ureg->input[i].first, + ureg->input[i].last, + ureg->input[i].semantic_name, + ureg->input[i].semantic_index, + TGSI_WRITEMASK_XYZW, + ureg->input[i].array_id); + } + } + else { + for (i = 0; i < ureg->nr_inputs; i++) { + for (j = ureg->input[i].first; j <= ureg->input[i].last; j++) { + emit_decl_semantic(ureg, + TGSI_FILE_INPUT, + j, j, + ureg->input[i].semantic_name, + ureg->input[i].semantic_index + + (j - ureg->input[i].first), + TGSI_WRITEMASK_XYZW, 0); + } + } } } @@ -1537,15 +1586,30 @@ static void emit_decls( struct ureg_program *ureg ) TGSI_WRITEMASK_XYZW, 0); } - for (i = 0; i < ureg->nr_outputs; i++) { - emit_decl_semantic(ureg, - TGSI_FILE_OUTPUT, - ureg->output[i].first, - ureg->output[i].last, - ureg->output[i].semantic_name, - ureg->output[i].semantic_index, - ureg->output[i].usage_mask, - ureg->output[i].array_id); + if (ureg->supports_any_inout_decl_range) { + for (i = 0; i < ureg->nr_outputs; i++) { + emit_decl_semantic(ureg, + TGSI_FILE_OUTPUT, + ureg->output[i].first, + ureg->output[i].last, + ureg->output[i].semantic_name, + ureg->output[i].semantic_index, + ureg->output[i].usage_mask, + ureg->output[i].array_id); + } + } + else { + for (i = 0; i < ureg->nr_outputs; i++) { + for (j = ureg->output[i].first; j <= ureg->output[i].last; j++) { + emit_decl_semantic(ureg, + TGSI_FILE_OUTPUT, + j, j, + ureg->output[i].semantic_name, + ureg->output[i].semantic_index + + (j - ureg->output[i].first), + ureg->output[i].usage_mask, 0); + } + } } for (i = 0; i < ureg->nr_samplers; i++) { @@ -1759,7 +1823,38 @@ void ureg_free_tokens( const struct tgsi_token *tokens ) } -struct ureg_program *ureg_create( unsigned processor ) +static INLINE unsigned +pipe_shader_from_tgsi_processor(unsigned processor) +{ + switch (processor) { + case TGSI_PROCESSOR_VERTEX: + return PIPE_SHADER_VERTEX; + case TGSI_PROCESSOR_TESS_CTRL: + return PIPE_SHADER_TESS_CTRL; + case TGSI_PROCESSOR_TESS_EVAL: + return PIPE_SHADER_TESS_EVAL; + case TGSI_PROCESSOR_GEOMETRY: + return PIPE_SHADER_GEOMETRY; + case TGSI_PROCESSOR_FRAGMENT: + return PIPE_SHADER_FRAGMENT; + case TGSI_PROCESSOR_COMPUTE: + return PIPE_SHADER_COMPUTE; + default: + assert(0); + return PIPE_SHADER_VERTEX; + } +} + + +struct ureg_program * +ureg_create(unsigned processor) +{ + return ureg_create_with_screen(processor, NULL); +} + + +struct ureg_program * +ureg_create_with_screen(unsigned processor, struct pipe_screen *screen) { int i; struct ureg_program *ureg = CALLOC_STRUCT( ureg_program ); @@ -1767,6 +1862,11 @@ struct ureg_program *ureg_create( unsigned processor ) goto no_ureg; ureg->processor = processor; + ureg->supports_any_inout_decl_range = + screen && + screen->get_shader_param(screen, + pipe_shader_from_tgsi_processor(processor), + PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE) != 0; for (i = 0; i < Elements(ureg->properties); i++) ureg->properties[i] = ~0; diff --git a/src/gallium/auxiliary/tgsi/tgsi_ureg.h b/src/gallium/auxiliary/tgsi/tgsi_ureg.h index e20f96d5674..1891b068774 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_ureg.h +++ b/src/gallium/auxiliary/tgsi/tgsi_ureg.h @@ -36,6 +36,7 @@ extern "C" { #endif +struct pipe_screen; struct ureg_program; struct pipe_stream_output_info; @@ -98,7 +99,10 @@ struct ureg_dst struct pipe_context; struct ureg_program * -ureg_create( unsigned processor ); +ureg_create(unsigned processor); + +struct ureg_program * +ureg_create_with_screen(unsigned processor, struct pipe_screen *screen); const struct tgsi_token * ureg_finalize( struct ureg_program * ); diff --git a/src/gallium/docs/source/screen.rst b/src/gallium/docs/source/screen.rst index 416ef2dada6..8f64817fe5f 100644 --- a/src/gallium/docs/source/screen.rst +++ b/src/gallium/docs/source/screen.rst @@ -340,6 +340,8 @@ to be 0. DLDEXP are supported. * ``PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED``: Whether FMA and DFMA (doubles only) are supported. +* ``PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE``: Whether the driver doesn't + ignore tgsi_declaration_range::Last for shader inputs and outputs. .. _pipe_compute_cap: diff --git a/src/gallium/drivers/freedreno/freedreno_screen.c b/src/gallium/drivers/freedreno/freedreno_screen.c index c596d03b084..6a5748c73ca 100644 --- a/src/gallium/drivers/freedreno/freedreno_screen.c +++ b/src/gallium/drivers/freedreno/freedreno_screen.c @@ -375,6 +375,7 @@ fd_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader, case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED: case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED: case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED: + case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE: return 0; case PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED: return 1; diff --git a/src/gallium/drivers/i915/i915_screen.c b/src/gallium/drivers/i915/i915_screen.c index 03fecd1ca64..0590da07b9a 100644 --- a/src/gallium/drivers/i915/i915_screen.c +++ b/src/gallium/drivers/i915/i915_screen.c @@ -165,6 +165,7 @@ i915_get_shader_param(struct pipe_screen *screen, unsigned shader, enum pipe_sha case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED: case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED: case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED: + case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE: return 0; default: debug_printf("%s: Unknown cap %u.\n", __FUNCTION__, cap); diff --git a/src/gallium/drivers/nouveau/nv30/nv30_screen.c b/src/gallium/drivers/nouveau/nv30/nv30_screen.c index bb79ccc538f..2e38a1978ae 100644 --- a/src/gallium/drivers/nouveau/nv30/nv30_screen.c +++ b/src/gallium/drivers/nouveau/nv30/nv30_screen.c @@ -252,6 +252,7 @@ nv30_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader, case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED: case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED: case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED: + case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE: return 0; default: debug_printf("unknown vertex shader param %d\n", param); @@ -292,6 +293,7 @@ nv30_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader, case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED: case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED: case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED: + case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE: return 0; default: debug_printf("unknown fragment shader param %d\n", param); diff --git a/src/gallium/drivers/nouveau/nv50/nv50_screen.c b/src/gallium/drivers/nouveau/nv50/nv50_screen.c index f455a7f91b6..6583a353578 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_screen.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_screen.c @@ -291,6 +291,7 @@ nv50_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader, case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED: case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED: case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED: + case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE: return 0; default: NOUVEAU_ERR("unknown PIPE_SHADER_CAP %d\n", param); diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c index 1ca997a4913..5936d05a5b9 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c @@ -297,6 +297,7 @@ nvc0_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader, return 1; case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED: case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED: + case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE: return 0; case PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS: return 16; /* would be 32 in linked (OpenGL-style) mode */ diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c index 8e1d7102f0c..a7bca915f57 100644 --- a/src/gallium/drivers/r300/r300_screen.c +++ b/src/gallium/drivers/r300/r300_screen.c @@ -274,6 +274,7 @@ static int r300_get_shader_param(struct pipe_screen *pscreen, unsigned shader, e case PIPE_SHADER_CAP_MAX_CONST_BUFFER_SIZE: return (is_r500 ? 256 : 32) * sizeof(float[4]); case PIPE_SHADER_CAP_MAX_CONST_BUFFERS: + case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE: return 1; case PIPE_SHADER_CAP_MAX_TEMPS: return is_r500 ? 128 : is_r400 ? 64 : 32; @@ -333,6 +334,7 @@ static int r300_get_shader_param(struct pipe_screen *pscreen, unsigned shader, e case PIPE_SHADER_CAP_MAX_PREDS: return 0; /* unused */ case PIPE_SHADER_CAP_INDIRECT_CONST_ADDR: + case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE: return 1; case PIPE_SHADER_CAP_MAX_TEX_INSTRUCTIONS: case PIPE_SHADER_CAP_MAX_TEX_INDIRECTIONS: diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c index 5a8eb068f00..93a6e556b16 100644 --- a/src/gallium/drivers/r600/r600_pipe.c +++ b/src/gallium/drivers/r600/r600_pipe.c @@ -495,6 +495,7 @@ static int r600_get_shader_param(struct pipe_screen* pscreen, unsigned shader, e case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED: case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED: case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED: + case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE: return 0; } return 0; diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c index b57aa5ff01b..53ae71a8c92 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.c +++ b/src/gallium/drivers/radeonsi/si_pipe.c @@ -452,6 +452,7 @@ static int si_get_shader_param(struct pipe_screen* pscreen, unsigned shader, enu case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED: return 0; case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED: + case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE: return 1; } return 0; diff --git a/src/gallium/drivers/svga/svga_screen.c b/src/gallium/drivers/svga/svga_screen.c index 549a89aed22..56e486786df 100644 --- a/src/gallium/drivers/svga/svga_screen.c +++ b/src/gallium/drivers/svga/svga_screen.c @@ -377,6 +377,7 @@ static int svga_get_shader_param(struct pipe_screen *screen, unsigned shader, en case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED: case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED: case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED: + case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE: return 0; } /* If we get here, we failed to handle a cap above */ @@ -434,6 +435,7 @@ static int svga_get_shader_param(struct pipe_screen *screen, unsigned shader, en case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED: case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED: case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED: + case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE: return 0; } /* If we get here, we failed to handle a cap above */ diff --git a/src/gallium/drivers/vc4/vc4_screen.c b/src/gallium/drivers/vc4/vc4_screen.c index 60d917d7520..f63bead0fbb 100644 --- a/src/gallium/drivers/vc4/vc4_screen.c +++ b/src/gallium/drivers/vc4/vc4_screen.c @@ -323,6 +323,7 @@ vc4_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader, case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED: case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED: case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED: + case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE: return 0; case PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS: case PIPE_SHADER_CAP_MAX_SAMPLER_VIEWS: diff --git a/src/gallium/include/pipe/p_defines.h b/src/gallium/include/pipe/p_defines.h index 8fabf5e0ff7..a077029725c 100644 --- a/src/gallium/include/pipe/p_defines.h +++ b/src/gallium/include/pipe/p_defines.h @@ -669,6 +669,7 @@ enum pipe_shader_cap PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED, /* all rounding modes */ PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED, PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED, + PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE }; /** diff --git a/src/mesa/state_tracker/st_program.c b/src/mesa/state_tracker/st_program.c index a9110d3c674..9191cd68416 100644 --- a/src/mesa/state_tracker/st_program.c +++ b/src/mesa/state_tracker/st_program.c @@ -321,7 +321,7 @@ st_translate_vertex_program(struct st_context *st, _mesa_remove_output_reads(&stvp->Base.Base, PROGRAM_OUTPUT); } - ureg = ureg_create( TGSI_PROCESSOR_VERTEX ); + ureg = ureg_create_with_screen(TGSI_PROCESSOR_VERTEX, st->pipe->screen); if (ureg == NULL) { free(vpv); return NULL; @@ -732,7 +732,7 @@ st_translate_fragment_program(struct st_context *st, } } - ureg = ureg_create( TGSI_PROCESSOR_FRAGMENT ); + ureg = ureg_create_with_screen(TGSI_PROCESSOR_FRAGMENT, st->pipe->screen); if (ureg == NULL) { free(variant); return NULL; @@ -890,7 +890,7 @@ st_translate_geometry_program(struct st_context *st, if (!gpv) return NULL; - ureg = ureg_create(TGSI_PROCESSOR_GEOMETRY); + ureg = ureg_create_with_screen(TGSI_PROCESSOR_GEOMETRY, st->pipe->screen); if (ureg == NULL) { free(gpv); return NULL; From 57c98e22db3397efe42268ba0750f319cea3b0fd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Tue, 26 May 2015 15:09:20 +0200 Subject: [PATCH 423/834] glsl_to_tgsi: don't use a static array size for "array_sizes" --- src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index 95968120730..bf7f222e5de 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -317,7 +317,8 @@ public: int next_temp; - unsigned array_sizes[MAX_ARRAYS]; + unsigned *array_sizes; + unsigned max_num_arrays; unsigned next_array; int num_address_regs; @@ -1142,6 +1143,12 @@ glsl_to_tgsi_visitor::get_temp(const glsl_type *type) if (!options->EmitNoIndirectTemp && (type->is_array() || type->is_matrix())) { + if (next_array >= max_num_arrays) { + max_num_arrays += 32; + array_sizes = (unsigned*) + realloc(array_sizes, sizeof(array_sizes[0]) * max_num_arrays); + } + src.file = PROGRAM_ARRAY; src.index = next_array << 16 | 0x8000; array_sizes[next_array] = type_size(type); @@ -3344,6 +3351,8 @@ glsl_to_tgsi_visitor::glsl_to_tgsi_visitor() { result.file = PROGRAM_UNDEFINED; next_temp = 1; + array_sizes = NULL; + max_num_arrays = 0; next_array = 0; next_signature_id = 1; num_immediates = 0; @@ -3366,6 +3375,7 @@ glsl_to_tgsi_visitor::glsl_to_tgsi_visitor() glsl_to_tgsi_visitor::~glsl_to_tgsi_visitor() { + free(array_sizes); ralloc_free(mem_ctx); } @@ -4374,7 +4384,7 @@ struct st_translate { struct ureg_src samplers[PIPE_MAX_SAMPLERS]; struct ureg_src systemValues[SYSTEM_VALUE_MAX]; struct tgsi_texture_offset tex_offsets[MAX_GLSL_TEXTURE_OFFSET]; - unsigned array_sizes[MAX_ARRAYS]; + unsigned *array_sizes; const GLuint *inputMapping; const GLuint *outputMapping; @@ -5277,9 +5287,7 @@ st_translate_program( } } - /* Copy over array sizes - */ - memcpy(t->array_sizes, program->array_sizes, sizeof(unsigned) * program->next_array); + t->array_sizes = program->array_sizes; /* Emit constants and uniforms. TGSI uses a single index space for these, * so we put all the translated regs in t->constants. From 6ae3bc256927b583690729b8940a4418e75b0596 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Tue, 26 May 2015 15:09:20 +0200 Subject: [PATCH 424/834] glsl_to_tgsi: don't use a static array size for st_translate::arrays --- src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index bf7f222e5de..7a34e43186f 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -57,11 +57,6 @@ (1 << PROGRAM_CONSTANT) | \ (1 << PROGRAM_UNIFORM)) -/** - * Maximum number of arrays - */ -#define MAX_ARRAYS 256 - #define MAX_GLSL_TEXTURE_OFFSET 4 class st_src_reg; @@ -4373,7 +4368,8 @@ struct st_translate { unsigned temps_size; struct ureg_dst *temps; - struct ureg_dst arrays[MAX_ARRAYS]; + struct ureg_dst *arrays; + unsigned num_temp_arrays; struct ureg_src *constants; int num_constants; struct ureg_src *immediates; @@ -4541,7 +4537,7 @@ dst_register(struct st_translate *t, case PROGRAM_ARRAY: array = index >> 16; - assert(array < ARRAY_SIZE(t->arrays)); + assert(array < t->num_temp_arrays); if (ureg_dst_is_undef(t->arrays[array])) t->arrays[array] = ureg_DECL_array_temporary( @@ -4749,7 +4745,7 @@ translate_tex_offset(struct st_translate *t, array = in_offset->index >> 16; assert(array >= 0); - assert(array < (int) ARRAY_SIZE(t->arrays)); + assert(array < (int)t->num_temp_arrays); dst = t->arrays[array]; offset.File = dst.File; @@ -5149,6 +5145,10 @@ st_translate_program( t->inputMapping = inputMapping; t->outputMapping = outputMapping; t->ureg = ureg; + t->num_temp_arrays = program->next_array; + if (t->num_temp_arrays) + t->arrays = (struct ureg_dst*) + calloc(1, sizeof(t->arrays[0]) * t->num_temp_arrays); /* * Declare input attributes. @@ -5383,6 +5383,7 @@ st_translate_program( out: if (t) { + free(t->arrays); free(t->temps); free(t->insn); free(t->labels); From 30b74c02cd57463591588274ad638ca80b34cb57 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Tue, 26 May 2015 15:18:27 +0200 Subject: [PATCH 425/834] glsl_to_tgsi: remove memset after calloc --- src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index 7a34e43186f..39251afdce3 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -5139,8 +5139,6 @@ st_translate_program( goto out; } - memset(t, 0, sizeof *t); - t->procType = procType; t->inputMapping = inputMapping; t->outputMapping = outputMapping; From 85cd1cf4b88aff9bd2667359e36e6fbb7be92122 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Sat, 9 May 2015 23:38:52 +0200 Subject: [PATCH 426/834] glsl_to_tgsi: rename emit -> emit_asm My editor thinks "emit" is a keyword, which breaks code indexing. --- src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 428 ++++++++++----------- 1 file changed, 214 insertions(+), 214 deletions(-) diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index 39251afdce3..ff5969af54c 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -386,31 +386,31 @@ public: /** List of glsl_to_tgsi_instruction */ exec_list instructions; - glsl_to_tgsi_instruction *emit(ir_instruction *ir, unsigned op); + glsl_to_tgsi_instruction *emit_asm(ir_instruction *ir, unsigned op); - glsl_to_tgsi_instruction *emit(ir_instruction *ir, unsigned op, - st_dst_reg dst, st_src_reg src0); + glsl_to_tgsi_instruction *emit_asm(ir_instruction *ir, unsigned op, + st_dst_reg dst, st_src_reg src0); - glsl_to_tgsi_instruction *emit(ir_instruction *ir, unsigned op, - st_dst_reg dst, st_dst_reg dst1, - st_src_reg src0); + glsl_to_tgsi_instruction *emit_asm(ir_instruction *ir, unsigned op, + st_dst_reg dst, st_dst_reg dst1, + st_src_reg src0); - glsl_to_tgsi_instruction *emit(ir_instruction *ir, unsigned op, - st_dst_reg dst, st_src_reg src0, st_src_reg src1); + glsl_to_tgsi_instruction *emit_asm(ir_instruction *ir, unsigned op, + st_dst_reg dst, st_src_reg src0, st_src_reg src1); - glsl_to_tgsi_instruction *emit(ir_instruction *ir, unsigned op, - st_dst_reg dst, - st_src_reg src0, st_src_reg src1, st_src_reg src2); + glsl_to_tgsi_instruction *emit_asm(ir_instruction *ir, unsigned op, + st_dst_reg dst, + st_src_reg src0, st_src_reg src1, st_src_reg src2); - glsl_to_tgsi_instruction *emit(ir_instruction *ir, unsigned op, - st_dst_reg dst, - st_src_reg src0, st_src_reg src1, - st_src_reg src2, st_src_reg src3); + glsl_to_tgsi_instruction *emit_asm(ir_instruction *ir, unsigned op, + st_dst_reg dst, + st_src_reg src0, st_src_reg src1, + st_src_reg src2, st_src_reg src3); - glsl_to_tgsi_instruction *emit(ir_instruction *ir, unsigned op, - st_dst_reg dst, st_dst_reg dst1, - st_src_reg src0, st_src_reg src1, - st_src_reg src2, st_src_reg src3); + glsl_to_tgsi_instruction *emit_asm(ir_instruction *ir, unsigned op, + st_dst_reg dst, st_dst_reg dst1, + st_src_reg src0, st_src_reg src1, + st_src_reg src2, st_src_reg src3); unsigned get_opcode(ir_instruction *ir, unsigned op, st_dst_reg dst, @@ -522,10 +522,10 @@ num_inst_src_regs(unsigned opcode) } glsl_to_tgsi_instruction * -glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op, - st_dst_reg dst, st_dst_reg dst1, - st_src_reg src0, st_src_reg src1, - st_src_reg src2, st_src_reg src3) +glsl_to_tgsi_visitor::emit_asm(ir_instruction *ir, unsigned op, + st_dst_reg dst, st_dst_reg dst1, + st_src_reg src0, st_src_reg src1, + st_src_reg src2, st_src_reg src3) { glsl_to_tgsi_instruction *inst = new(mem_ctx) glsl_to_tgsi_instruction(); int num_reladdr = 0, i, j; @@ -712,48 +712,48 @@ glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op, } glsl_to_tgsi_instruction * -glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op, - st_dst_reg dst, - st_src_reg src0, st_src_reg src1, - st_src_reg src2, st_src_reg src3) +glsl_to_tgsi_visitor::emit_asm(ir_instruction *ir, unsigned op, + st_dst_reg dst, + st_src_reg src0, st_src_reg src1, + st_src_reg src2, st_src_reg src3) { - return emit(ir, op, dst, undef_dst, src0, src1, src2, src3); + return emit_asm(ir, op, dst, undef_dst, src0, src1, src2, src3); } glsl_to_tgsi_instruction * -glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op, - st_dst_reg dst, st_src_reg src0, - st_src_reg src1, st_src_reg src2) +glsl_to_tgsi_visitor::emit_asm(ir_instruction *ir, unsigned op, + st_dst_reg dst, st_src_reg src0, + st_src_reg src1, st_src_reg src2) { - return emit(ir, op, dst, undef_dst, src0, src1, src2, undef_src); + return emit_asm(ir, op, dst, undef_dst, src0, src1, src2, undef_src); } glsl_to_tgsi_instruction * -glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op, - st_dst_reg dst, st_src_reg src0, st_src_reg src1) +glsl_to_tgsi_visitor::emit_asm(ir_instruction *ir, unsigned op, + st_dst_reg dst, st_src_reg src0, st_src_reg src1) { - return emit(ir, op, dst, undef_dst, src0, src1, undef_src, undef_src); + return emit_asm(ir, op, dst, undef_dst, src0, src1, undef_src, undef_src); } glsl_to_tgsi_instruction * -glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op, - st_dst_reg dst, st_src_reg src0) +glsl_to_tgsi_visitor::emit_asm(ir_instruction *ir, unsigned op, + st_dst_reg dst, st_src_reg src0) { assert(dst.writemask != 0); - return emit(ir, op, dst, undef_dst, src0, undef_src, undef_src, undef_src); + return emit_asm(ir, op, dst, undef_dst, src0, undef_src, undef_src, undef_src); } glsl_to_tgsi_instruction * -glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op, - st_dst_reg dst, st_dst_reg dst1, st_src_reg src0) +glsl_to_tgsi_visitor::emit_asm(ir_instruction *ir, unsigned op, + st_dst_reg dst, st_dst_reg dst1, st_src_reg src0) { - return emit(ir, op, dst, dst1, src0, undef_src, undef_src, undef_src); + return emit_asm(ir, op, dst, dst1, src0, undef_src, undef_src, undef_src); } glsl_to_tgsi_instruction * -glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op) +glsl_to_tgsi_visitor::emit_asm(ir_instruction *ir, unsigned op) { - return emit(ir, op, undef_dst, undef_dst, undef_src, undef_src, undef_src, undef_src); + return emit_asm(ir, op, undef_dst, undef_dst, undef_src, undef_src, undef_src, undef_src); } /** @@ -875,7 +875,7 @@ glsl_to_tgsi_visitor::emit_dp(ir_instruction *ir, TGSI_OPCODE_DP2, TGSI_OPCODE_DP3, TGSI_OPCODE_DP4 }; - return emit(ir, dot_opcodes[elements - 2], dst, src0, src1); + return emit_asm(ir, dot_opcodes[elements - 2], dst, src0, src1); } /** @@ -925,7 +925,7 @@ glsl_to_tgsi_visitor::emit_scalar(ir_instruction *ir, unsigned op, src1_swiz, src1_swiz); dst.writemask = this_mask; - emit(ir, op, dst, src0, src1); + emit_asm(ir, op, dst, src0, src1); done_mask |= this_mask; } } @@ -954,7 +954,7 @@ glsl_to_tgsi_visitor::emit_arl(ir_instruction *ir, if (dst.index >= this->num_address_regs) this->num_address_regs = dst.index + 1; - emit(NULL, op, dst, src0); + emit_asm(NULL, op, dst, src0); } int @@ -1244,7 +1244,7 @@ glsl_to_tgsi_visitor::visit(ir_variable *ir) */ st_src_reg src(PROGRAM_STATE_VAR, index, GLSL_TYPE_FLOAT); src.swizzle = slots[i].swizzle; - emit(ir, TGSI_OPCODE_MOV, dst, src); + emit_asm(ir, TGSI_OPCODE_MOV, dst, src); /* even a float takes up a whole vec4 reg in a struct/array. */ dst.index++; } @@ -1263,11 +1263,11 @@ glsl_to_tgsi_visitor::visit(ir_variable *ir) void glsl_to_tgsi_visitor::visit(ir_loop *ir) { - emit(NULL, TGSI_OPCODE_BGNLOOP); + emit_asm(NULL, TGSI_OPCODE_BGNLOOP); visit_exec_list(&ir->body_instructions, this); - emit(NULL, TGSI_OPCODE_ENDLOOP); + emit_asm(NULL, TGSI_OPCODE_ENDLOOP); } void @@ -1275,10 +1275,10 @@ glsl_to_tgsi_visitor::visit(ir_loop_jump *ir) { switch (ir->mode) { case ir_loop_jump::jump_break: - emit(NULL, TGSI_OPCODE_BRK); + emit_asm(NULL, TGSI_OPCODE_BRK); break; case ir_loop_jump::jump_continue: - emit(NULL, TGSI_OPCODE_CONT); + emit_asm(NULL, TGSI_OPCODE_CONT); break; } } @@ -1332,7 +1332,7 @@ glsl_to_tgsi_visitor::try_emit_mad(ir_expression *ir, int mul_operand) this->result = get_temp(ir->type); result_dst = st_dst_reg(this->result); result_dst.writemask = (1 << ir->type->vector_elements) - 1; - emit(ir, TGSI_OPCODE_MAD, result_dst, a, b, c); + emit_asm(ir, TGSI_OPCODE_MAD, result_dst, a, b, c); return true; } @@ -1372,7 +1372,7 @@ glsl_to_tgsi_visitor::try_emit_mad_for_and_not(ir_expression *ir, int try_operan b.negate = ~b.negate; this->result = get_temp(ir->type); - emit(ir, TGSI_OPCODE_MAD, st_dst_reg(this->result), a, b, a); + emit_asm(ir, TGSI_OPCODE_MAD, st_dst_reg(this->result), a, b, a); return true; } @@ -1390,7 +1390,7 @@ glsl_to_tgsi_visitor::reladdr_to_temp(ir_instruction *ir, if (*num_reladdr != 1) { st_src_reg temp = get_temp(glsl_type::vec4_type); - emit(ir, TGSI_OPCODE_MOV, st_dst_reg(temp), *reg); + emit_asm(ir, TGSI_OPCODE_MOV, st_dst_reg(temp), *reg); *reg = temp; } @@ -1466,7 +1466,7 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir) switch (ir->operation) { case ir_unop_logic_not: if (result_dst.type != GLSL_TYPE_FLOAT) - emit(ir, TGSI_OPCODE_NOT, result_dst, op[0]); + emit_asm(ir, TGSI_OPCODE_NOT, result_dst, op[0]); else { /* Previously 'SEQ dst, src, 0.0' was used for this. However, many * older GPUs implement SEQ using multiple instructions (i915 uses two @@ -1474,24 +1474,24 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir) * 0.0 and 1.0, 1-x also implements !x. */ op[0].negate = ~op[0].negate; - emit(ir, TGSI_OPCODE_ADD, result_dst, op[0], st_src_reg_for_float(1.0)); + emit_asm(ir, TGSI_OPCODE_ADD, result_dst, op[0], st_src_reg_for_float(1.0)); } break; case ir_unop_neg: if (result_dst.type == GLSL_TYPE_INT || result_dst.type == GLSL_TYPE_UINT) - emit(ir, TGSI_OPCODE_INEG, result_dst, op[0]); + emit_asm(ir, TGSI_OPCODE_INEG, result_dst, op[0]); else if (result_dst.type == GLSL_TYPE_DOUBLE) - emit(ir, TGSI_OPCODE_DNEG, result_dst, op[0]); + emit_asm(ir, TGSI_OPCODE_DNEG, result_dst, op[0]); else { op[0].negate = ~op[0].negate; result_src = op[0]; } break; case ir_unop_abs: - emit(ir, TGSI_OPCODE_ABS, result_dst, op[0]); + emit_asm(ir, TGSI_OPCODE_ABS, result_dst, op[0]); break; case ir_unop_sign: - emit(ir, TGSI_OPCODE_SSG, result_dst, op[0]); + emit_asm(ir, TGSI_OPCODE_SSG, result_dst, op[0]); break; case ir_unop_rcp: emit_scalar(ir, TGSI_OPCODE_RCP, result_dst, op[0]); @@ -1515,17 +1515,17 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir) break; case ir_unop_saturate: { glsl_to_tgsi_instruction *inst; - inst = emit(ir, TGSI_OPCODE_MOV, result_dst, op[0]); + inst = emit_asm(ir, TGSI_OPCODE_MOV, result_dst, op[0]); inst->saturate = true; break; } case ir_unop_dFdx: case ir_unop_dFdx_coarse: - emit(ir, TGSI_OPCODE_DDX, result_dst, op[0]); + emit_asm(ir, TGSI_OPCODE_DDX, result_dst, op[0]); break; case ir_unop_dFdx_fine: - emit(ir, TGSI_OPCODE_DDX_FINE, result_dst, op[0]); + emit_asm(ir, TGSI_OPCODE_DDX_FINE, result_dst, op[0]); break; case ir_unop_dFdy: case ir_unop_dFdy_coarse: @@ -1549,18 +1549,18 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir) st_src_reg temp = get_temp(glsl_type::vec4_type); - emit(ir, TGSI_OPCODE_MUL, st_dst_reg(temp), transform_y, op[0]); - emit(ir, ir->operation == ir_unop_dFdy_fine ? + emit_asm(ir, TGSI_OPCODE_MUL, st_dst_reg(temp), transform_y, op[0]); + emit_asm(ir, ir->operation == ir_unop_dFdy_fine ? TGSI_OPCODE_DDY_FINE : TGSI_OPCODE_DDY, result_dst, temp); break; } case ir_unop_frexp_sig: - emit(ir, TGSI_OPCODE_DFRACEXP, result_dst, undef_dst, op[0]); + emit_asm(ir, TGSI_OPCODE_DFRACEXP, result_dst, undef_dst, op[0]); break; case ir_unop_frexp_exp: - emit(ir, TGSI_OPCODE_DFRACEXP, undef_dst, result_dst, op[0]); + emit_asm(ir, TGSI_OPCODE_DFRACEXP, undef_dst, result_dst, op[0]); break; case ir_unop_noise: { @@ -1570,50 +1570,50 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir) * place to do this is in the GL state tracker, not the poor * driver. */ - emit(ir, TGSI_OPCODE_MOV, result_dst, st_src_reg_for_float(0.5)); + emit_asm(ir, TGSI_OPCODE_MOV, result_dst, st_src_reg_for_float(0.5)); break; } case ir_binop_add: - emit(ir, TGSI_OPCODE_ADD, result_dst, op[0], op[1]); + emit_asm(ir, TGSI_OPCODE_ADD, result_dst, op[0], op[1]); break; case ir_binop_sub: - emit(ir, TGSI_OPCODE_SUB, result_dst, op[0], op[1]); + emit_asm(ir, TGSI_OPCODE_SUB, result_dst, op[0], op[1]); break; case ir_binop_mul: - emit(ir, TGSI_OPCODE_MUL, result_dst, op[0], op[1]); + emit_asm(ir, TGSI_OPCODE_MUL, result_dst, op[0], op[1]); break; case ir_binop_div: if (result_dst.type == GLSL_TYPE_FLOAT || result_dst.type == GLSL_TYPE_DOUBLE) assert(!"not reached: should be handled by ir_div_to_mul_rcp"); else - emit(ir, TGSI_OPCODE_DIV, result_dst, op[0], op[1]); + emit_asm(ir, TGSI_OPCODE_DIV, result_dst, op[0], op[1]); break; case ir_binop_mod: if (result_dst.type == GLSL_TYPE_FLOAT) assert(!"ir_binop_mod should have been converted to b * fract(a/b)"); else - emit(ir, TGSI_OPCODE_MOD, result_dst, op[0], op[1]); + emit_asm(ir, TGSI_OPCODE_MOD, result_dst, op[0], op[1]); break; case ir_binop_less: - emit(ir, TGSI_OPCODE_SLT, result_dst, op[0], op[1]); + emit_asm(ir, TGSI_OPCODE_SLT, result_dst, op[0], op[1]); break; case ir_binop_greater: - emit(ir, TGSI_OPCODE_SLT, result_dst, op[1], op[0]); + emit_asm(ir, TGSI_OPCODE_SLT, result_dst, op[1], op[0]); break; case ir_binop_lequal: - emit(ir, TGSI_OPCODE_SGE, result_dst, op[1], op[0]); + emit_asm(ir, TGSI_OPCODE_SGE, result_dst, op[1], op[0]); break; case ir_binop_gequal: - emit(ir, TGSI_OPCODE_SGE, result_dst, op[0], op[1]); + emit_asm(ir, TGSI_OPCODE_SGE, result_dst, op[0], op[1]); break; case ir_binop_equal: - emit(ir, TGSI_OPCODE_SEQ, result_dst, op[0], op[1]); + emit_asm(ir, TGSI_OPCODE_SEQ, result_dst, op[0], op[1]); break; case ir_binop_nequal: - emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], op[1]); + emit_asm(ir, TGSI_OPCODE_SNE, result_dst, op[0], op[1]); break; case ir_binop_all_equal: /* "==" operator producing a scalar boolean. */ @@ -1627,7 +1627,7 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir) st_dst_reg temp_dst = st_dst_reg(temp); st_src_reg temp1 = st_src_reg(temp), temp2 = st_src_reg(temp); - emit(ir, TGSI_OPCODE_SEQ, st_dst_reg(temp), op[0], op[1]); + emit_asm(ir, TGSI_OPCODE_SEQ, st_dst_reg(temp), op[0], op[1]); /* Emit 1-3 AND operations to combine the SEQ results. */ switch (ir->operands[0]->type->vector_elements) { @@ -1637,24 +1637,24 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir) temp_dst.writemask = WRITEMASK_Y; temp1.swizzle = SWIZZLE_YYYY; temp2.swizzle = SWIZZLE_ZZZZ; - emit(ir, TGSI_OPCODE_AND, temp_dst, temp1, temp2); + emit_asm(ir, TGSI_OPCODE_AND, temp_dst, temp1, temp2); break; case 4: temp_dst.writemask = WRITEMASK_X; temp1.swizzle = SWIZZLE_XXXX; temp2.swizzle = SWIZZLE_YYYY; - emit(ir, TGSI_OPCODE_AND, temp_dst, temp1, temp2); + emit_asm(ir, TGSI_OPCODE_AND, temp_dst, temp1, temp2); temp_dst.writemask = WRITEMASK_Y; temp1.swizzle = SWIZZLE_ZZZZ; temp2.swizzle = SWIZZLE_WWWW; - emit(ir, TGSI_OPCODE_AND, temp_dst, temp1, temp2); + emit_asm(ir, TGSI_OPCODE_AND, temp_dst, temp1, temp2); } temp1.swizzle = SWIZZLE_XXXX; temp2.swizzle = SWIZZLE_YYYY; - emit(ir, TGSI_OPCODE_AND, result_dst, temp1, temp2); + emit_asm(ir, TGSI_OPCODE_AND, result_dst, temp1, temp2); } else { - emit(ir, TGSI_OPCODE_SNE, st_dst_reg(temp), op[0], op[1]); + emit_asm(ir, TGSI_OPCODE_SNE, st_dst_reg(temp), op[0], op[1]); /* After the dot-product, the value will be an integer on the * range [0,4]. Zero becomes 1.0, and positive values become zero. @@ -1667,10 +1667,10 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir) */ st_src_reg sge_src = result_src; sge_src.negate = ~sge_src.negate; - emit(ir, TGSI_OPCODE_SGE, result_dst, sge_src, st_src_reg_for_float(0.0)); + emit_asm(ir, TGSI_OPCODE_SGE, result_dst, sge_src, st_src_reg_for_float(0.0)); } } else { - emit(ir, TGSI_OPCODE_SEQ, result_dst, op[0], op[1]); + emit_asm(ir, TGSI_OPCODE_SEQ, result_dst, op[0], op[1]); } break; case ir_binop_any_nequal: @@ -1680,7 +1680,7 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir) st_src_reg temp = get_temp(native_integers ? glsl_type::uvec4_type : glsl_type::vec4_type); - emit(ir, TGSI_OPCODE_SNE, st_dst_reg(temp), op[0], op[1]); + emit_asm(ir, TGSI_OPCODE_SNE, st_dst_reg(temp), op[0], op[1]); if (native_integers) { st_dst_reg temp_dst = st_dst_reg(temp); @@ -1694,22 +1694,22 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir) temp_dst.writemask = WRITEMASK_Y; temp1.swizzle = SWIZZLE_YYYY; temp2.swizzle = SWIZZLE_ZZZZ; - emit(ir, TGSI_OPCODE_OR, temp_dst, temp1, temp2); + emit_asm(ir, TGSI_OPCODE_OR, temp_dst, temp1, temp2); break; case 4: temp_dst.writemask = WRITEMASK_X; temp1.swizzle = SWIZZLE_XXXX; temp2.swizzle = SWIZZLE_YYYY; - emit(ir, TGSI_OPCODE_OR, temp_dst, temp1, temp2); + emit_asm(ir, TGSI_OPCODE_OR, temp_dst, temp1, temp2); temp_dst.writemask = WRITEMASK_Y; temp1.swizzle = SWIZZLE_ZZZZ; temp2.swizzle = SWIZZLE_WWWW; - emit(ir, TGSI_OPCODE_OR, temp_dst, temp1, temp2); + emit_asm(ir, TGSI_OPCODE_OR, temp_dst, temp1, temp2); } temp1.swizzle = SWIZZLE_XXXX; temp2.swizzle = SWIZZLE_YYYY; - emit(ir, TGSI_OPCODE_OR, result_dst, temp1, temp2); + emit_asm(ir, TGSI_OPCODE_OR, result_dst, temp1, temp2); } else { /* After the dot-product, the value will be an integer on the * range [0,4]. Zero stays zero, and positive values become 1.0. @@ -1728,11 +1728,11 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir) */ st_src_reg slt_src = result_src; slt_src.negate = ~slt_src.negate; - emit(ir, TGSI_OPCODE_SLT, result_dst, slt_src, st_src_reg_for_float(0.0)); + emit_asm(ir, TGSI_OPCODE_SLT, result_dst, slt_src, st_src_reg_for_float(0.0)); } } } else { - emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], op[1]); + emit_asm(ir, TGSI_OPCODE_SNE, result_dst, op[0], op[1]); } break; @@ -1765,7 +1765,7 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir) GET_SWZ(op0_swizzle, 3), GET_SWZ(op0_swizzle, 3), GET_SWZ(op0_swizzle, 3)); - emit(ir, TGSI_OPCODE_OR, result_dst, accum, op[0]); + emit_asm(ir, TGSI_OPCODE_OR, result_dst, accum, op[0]); accum = st_src_reg(result_dst); accum.swizzle = dst_swizzle; /* fallthrough */ @@ -1774,7 +1774,7 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir) GET_SWZ(op0_swizzle, 2), GET_SWZ(op0_swizzle, 2), GET_SWZ(op0_swizzle, 2)); - emit(ir, TGSI_OPCODE_OR, result_dst, accum, op[0]); + emit_asm(ir, TGSI_OPCODE_OR, result_dst, accum, op[0]); accum = st_src_reg(result_dst); accum.swizzle = dst_swizzle; /* fallthrough */ @@ -1783,7 +1783,7 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir) GET_SWZ(op0_swizzle, 1), GET_SWZ(op0_swizzle, 1), GET_SWZ(op0_swizzle, 1)); - emit(ir, TGSI_OPCODE_OR, result_dst, accum, op[0]); + emit_asm(ir, TGSI_OPCODE_OR, result_dst, accum, op[0]); break; default: assert(!"Unexpected vector size"); @@ -1809,11 +1809,11 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir) */ st_src_reg slt_src = result_src; slt_src.negate = ~slt_src.negate; - emit(ir, TGSI_OPCODE_SLT, result_dst, slt_src, st_src_reg_for_float(0.0)); + emit_asm(ir, TGSI_OPCODE_SLT, result_dst, slt_src, st_src_reg_for_float(0.0)); } else { /* Use SNE 0 if integers are being used as boolean values. */ - emit(ir, TGSI_OPCODE_SNE, result_dst, result_src, st_src_reg_for_int(0)); + emit_asm(ir, TGSI_OPCODE_SNE, result_dst, result_src, st_src_reg_for_int(0)); } } break; @@ -1821,9 +1821,9 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir) case ir_binop_logic_xor: if (native_integers) - emit(ir, TGSI_OPCODE_XOR, result_dst, op[0], op[1]); + emit_asm(ir, TGSI_OPCODE_XOR, result_dst, op[0], op[1]); else - emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], op[1]); + emit_asm(ir, TGSI_OPCODE_SNE, result_dst, op[0], op[1]); break; case ir_binop_logic_or: { @@ -1832,13 +1832,13 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir) * instruction. */ assert(native_integers); - emit(ir, TGSI_OPCODE_OR, result_dst, op[0], op[1]); + emit_asm(ir, TGSI_OPCODE_OR, result_dst, op[0], op[1]); } else { /* After the addition, the value will be an integer on the * range [0,2]. Zero stays zero, and positive values become 1.0. */ glsl_to_tgsi_instruction *add = - emit(ir, TGSI_OPCODE_ADD, result_dst, op[0], op[1]); + emit_asm(ir, TGSI_OPCODE_ADD, result_dst, op[0], op[1]); if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB) { /* The clamping to [0,1] can be done for free in the fragment * shader with a saturate if floats are being used as boolean values. @@ -1851,7 +1851,7 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir) */ st_src_reg slt_src = result_src; slt_src.negate = ~slt_src.negate; - emit(ir, TGSI_OPCODE_SLT, result_dst, slt_src, st_src_reg_for_float(0.0)); + emit_asm(ir, TGSI_OPCODE_SLT, result_dst, slt_src, st_src_reg_for_float(0.0)); } } break; @@ -1863,9 +1863,9 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir) * actual AND opcode. */ if (native_integers) - emit(ir, TGSI_OPCODE_AND, result_dst, op[0], op[1]); + emit_asm(ir, TGSI_OPCODE_AND, result_dst, op[0], op[1]); else - emit(ir, TGSI_OPCODE_MUL, result_dst, op[0], op[1]); + emit_asm(ir, TGSI_OPCODE_MUL, result_dst, op[0], op[1]); break; case ir_binop_dot: @@ -1881,10 +1881,10 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir) } else { /* sqrt(x) = x * rsq(x). */ emit_scalar(ir, TGSI_OPCODE_RSQ, result_dst, op[0]); - emit(ir, TGSI_OPCODE_MUL, result_dst, result_src, op[0]); + emit_asm(ir, TGSI_OPCODE_MUL, result_dst, result_src, op[0]); /* For incoming channels <= 0, set the result to 0. */ op[0].negate = ~op[0].negate; - emit(ir, TGSI_OPCODE_CMP, result_dst, + emit_asm(ir, TGSI_OPCODE_CMP, result_dst, op[0], result_src, st_src_reg_for_float(0.0)); } break; @@ -1893,13 +1893,13 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir) break; case ir_unop_i2f: if (native_integers) { - emit(ir, TGSI_OPCODE_I2F, result_dst, op[0]); + emit_asm(ir, TGSI_OPCODE_I2F, result_dst, op[0]); break; } /* fallthrough to next case otherwise */ case ir_unop_b2f: if (native_integers) { - emit(ir, TGSI_OPCODE_AND, result_dst, op[0], st_src_reg_for_float(1.0)); + emit_asm(ir, TGSI_OPCODE_AND, result_dst, op[0], st_src_reg_for_float(1.0)); break; } /* fallthrough to next case otherwise */ @@ -1914,7 +1914,7 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir) * GLSL requires that int(bool) return 1 for true and 0 for false. * This conversion is done with AND, but it could be done with NEG. */ - emit(ir, TGSI_OPCODE_AND, result_dst, op[0], st_src_reg_for_int(1)); + emit_asm(ir, TGSI_OPCODE_AND, result_dst, op[0], st_src_reg_for_int(1)); } else { /* Booleans and integers are both stored as floats when native * integers are disabled. @@ -1924,15 +1924,15 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir) break; case ir_unop_f2i: if (native_integers) - emit(ir, TGSI_OPCODE_F2I, result_dst, op[0]); + emit_asm(ir, TGSI_OPCODE_F2I, result_dst, op[0]); else - emit(ir, TGSI_OPCODE_TRUNC, result_dst, op[0]); + emit_asm(ir, TGSI_OPCODE_TRUNC, result_dst, op[0]); break; case ir_unop_f2u: if (native_integers) - emit(ir, TGSI_OPCODE_F2U, result_dst, op[0]); + emit_asm(ir, TGSI_OPCODE_F2U, result_dst, op[0]); else - emit(ir, TGSI_OPCODE_TRUNC, result_dst, op[0]); + emit_asm(ir, TGSI_OPCODE_TRUNC, result_dst, op[0]); break; case ir_unop_bitcast_f2i: result_src = op[0]; @@ -1948,38 +1948,38 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir) result_src.type = GLSL_TYPE_FLOAT; break; case ir_unop_f2b: - emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], st_src_reg_for_float(0.0)); + emit_asm(ir, TGSI_OPCODE_SNE, result_dst, op[0], st_src_reg_for_float(0.0)); break; case ir_unop_d2b: - emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], st_src_reg_for_double(0.0)); + emit_asm(ir, TGSI_OPCODE_SNE, result_dst, op[0], st_src_reg_for_double(0.0)); break; case ir_unop_i2b: if (native_integers) - emit(ir, TGSI_OPCODE_USNE, result_dst, op[0], st_src_reg_for_int(0)); + emit_asm(ir, TGSI_OPCODE_USNE, result_dst, op[0], st_src_reg_for_int(0)); else - emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], st_src_reg_for_float(0.0)); + emit_asm(ir, TGSI_OPCODE_SNE, result_dst, op[0], st_src_reg_for_float(0.0)); break; case ir_unop_trunc: - emit(ir, TGSI_OPCODE_TRUNC, result_dst, op[0]); + emit_asm(ir, TGSI_OPCODE_TRUNC, result_dst, op[0]); break; case ir_unop_ceil: - emit(ir, TGSI_OPCODE_CEIL, result_dst, op[0]); + emit_asm(ir, TGSI_OPCODE_CEIL, result_dst, op[0]); break; case ir_unop_floor: - emit(ir, TGSI_OPCODE_FLR, result_dst, op[0]); + emit_asm(ir, TGSI_OPCODE_FLR, result_dst, op[0]); break; case ir_unop_round_even: - emit(ir, TGSI_OPCODE_ROUND, result_dst, op[0]); + emit_asm(ir, TGSI_OPCODE_ROUND, result_dst, op[0]); break; case ir_unop_fract: - emit(ir, TGSI_OPCODE_FRC, result_dst, op[0]); + emit_asm(ir, TGSI_OPCODE_FRC, result_dst, op[0]); break; case ir_binop_min: - emit(ir, TGSI_OPCODE_MIN, result_dst, op[0], op[1]); + emit_asm(ir, TGSI_OPCODE_MIN, result_dst, op[0], op[1]); break; case ir_binop_max: - emit(ir, TGSI_OPCODE_MAX, result_dst, op[0], op[1]); + emit_asm(ir, TGSI_OPCODE_MAX, result_dst, op[0], op[1]); break; case ir_binop_pow: emit_scalar(ir, TGSI_OPCODE_POW, result_dst, op[0], op[1]); @@ -1987,37 +1987,37 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir) case ir_unop_bit_not: if (native_integers) { - emit(ir, TGSI_OPCODE_NOT, result_dst, op[0]); + emit_asm(ir, TGSI_OPCODE_NOT, result_dst, op[0]); break; } case ir_unop_u2f: if (native_integers) { - emit(ir, TGSI_OPCODE_U2F, result_dst, op[0]); + emit_asm(ir, TGSI_OPCODE_U2F, result_dst, op[0]); break; } case ir_binop_lshift: if (native_integers) { - emit(ir, TGSI_OPCODE_SHL, result_dst, op[0], op[1]); + emit_asm(ir, TGSI_OPCODE_SHL, result_dst, op[0], op[1]); break; } case ir_binop_rshift: if (native_integers) { - emit(ir, TGSI_OPCODE_ISHR, result_dst, op[0], op[1]); + emit_asm(ir, TGSI_OPCODE_ISHR, result_dst, op[0], op[1]); break; } case ir_binop_bit_and: if (native_integers) { - emit(ir, TGSI_OPCODE_AND, result_dst, op[0], op[1]); + emit_asm(ir, TGSI_OPCODE_AND, result_dst, op[0], op[1]); break; } case ir_binop_bit_xor: if (native_integers) { - emit(ir, TGSI_OPCODE_XOR, result_dst, op[0], op[1]); + emit_asm(ir, TGSI_OPCODE_XOR, result_dst, op[0], op[1]); break; } case ir_binop_bit_or: if (native_integers) { - emit(ir, TGSI_OPCODE_OR, result_dst, op[0], op[1]); + emit_asm(ir, TGSI_OPCODE_OR, result_dst, op[0], op[1]); break; } @@ -2047,7 +2047,7 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir) } else { /* Relative/variable index into constant buffer */ - emit(ir, TGSI_OPCODE_USHR, st_dst_reg(index_reg), op[1], + emit_asm(ir, TGSI_OPCODE_USHR, st_dst_reg(index_reg), op[1], st_src_reg_for_int(4)); cbuf.reladdr = ralloc(mem_ctx, st_src_reg); memcpy(cbuf.reladdr, &index_reg, sizeof(index_reg)); @@ -2080,88 +2080,88 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir) const_offset % 16 / 4); if (ir->type->base_type == GLSL_TYPE_BOOL) { - emit(ir, TGSI_OPCODE_USNE, result_dst, cbuf, st_src_reg_for_int(0)); + emit_asm(ir, TGSI_OPCODE_USNE, result_dst, cbuf, st_src_reg_for_int(0)); } else { - emit(ir, TGSI_OPCODE_MOV, result_dst, cbuf); + emit_asm(ir, TGSI_OPCODE_MOV, result_dst, cbuf); } break; } case ir_triop_lrp: /* note: we have to reorder the three args here */ - emit(ir, TGSI_OPCODE_LRP, result_dst, op[2], op[1], op[0]); + emit_asm(ir, TGSI_OPCODE_LRP, result_dst, op[2], op[1], op[0]); break; case ir_triop_csel: if (this->ctx->Const.NativeIntegers) - emit(ir, TGSI_OPCODE_UCMP, result_dst, op[0], op[1], op[2]); + emit_asm(ir, TGSI_OPCODE_UCMP, result_dst, op[0], op[1], op[2]); else { op[0].negate = ~op[0].negate; - emit(ir, TGSI_OPCODE_CMP, result_dst, op[0], op[1], op[2]); + emit_asm(ir, TGSI_OPCODE_CMP, result_dst, op[0], op[1], op[2]); } break; case ir_triop_bitfield_extract: - emit(ir, TGSI_OPCODE_IBFE, result_dst, op[0], op[1], op[2]); + emit_asm(ir, TGSI_OPCODE_IBFE, result_dst, op[0], op[1], op[2]); break; case ir_quadop_bitfield_insert: - emit(ir, TGSI_OPCODE_BFI, result_dst, op[0], op[1], op[2], op[3]); + emit_asm(ir, TGSI_OPCODE_BFI, result_dst, op[0], op[1], op[2], op[3]); break; case ir_unop_bitfield_reverse: - emit(ir, TGSI_OPCODE_BREV, result_dst, op[0]); + emit_asm(ir, TGSI_OPCODE_BREV, result_dst, op[0]); break; case ir_unop_bit_count: - emit(ir, TGSI_OPCODE_POPC, result_dst, op[0]); + emit_asm(ir, TGSI_OPCODE_POPC, result_dst, op[0]); break; case ir_unop_find_msb: - emit(ir, TGSI_OPCODE_IMSB, result_dst, op[0]); + emit_asm(ir, TGSI_OPCODE_IMSB, result_dst, op[0]); break; case ir_unop_find_lsb: - emit(ir, TGSI_OPCODE_LSB, result_dst, op[0]); + emit_asm(ir, TGSI_OPCODE_LSB, result_dst, op[0]); break; case ir_binop_imul_high: - emit(ir, TGSI_OPCODE_IMUL_HI, result_dst, op[0], op[1]); + emit_asm(ir, TGSI_OPCODE_IMUL_HI, result_dst, op[0], op[1]); break; case ir_triop_fma: /* In theory, MAD is incorrect here. */ if (have_fma) - emit(ir, TGSI_OPCODE_FMA, result_dst, op[0], op[1], op[2]); + emit_asm(ir, TGSI_OPCODE_FMA, result_dst, op[0], op[1], op[2]); else - emit(ir, TGSI_OPCODE_MAD, result_dst, op[0], op[1], op[2]); + emit_asm(ir, TGSI_OPCODE_MAD, result_dst, op[0], op[1], op[2]); break; case ir_unop_interpolate_at_centroid: - emit(ir, TGSI_OPCODE_INTERP_CENTROID, result_dst, op[0]); + emit_asm(ir, TGSI_OPCODE_INTERP_CENTROID, result_dst, op[0]); break; case ir_binop_interpolate_at_offset: - emit(ir, TGSI_OPCODE_INTERP_OFFSET, result_dst, op[0], op[1]); + emit_asm(ir, TGSI_OPCODE_INTERP_OFFSET, result_dst, op[0], op[1]); break; case ir_binop_interpolate_at_sample: - emit(ir, TGSI_OPCODE_INTERP_SAMPLE, result_dst, op[0], op[1]); + emit_asm(ir, TGSI_OPCODE_INTERP_SAMPLE, result_dst, op[0], op[1]); break; case ir_unop_d2f: - emit(ir, TGSI_OPCODE_D2F, result_dst, op[0]); + emit_asm(ir, TGSI_OPCODE_D2F, result_dst, op[0]); break; case ir_unop_f2d: - emit(ir, TGSI_OPCODE_F2D, result_dst, op[0]); + emit_asm(ir, TGSI_OPCODE_F2D, result_dst, op[0]); break; case ir_unop_d2i: - emit(ir, TGSI_OPCODE_D2I, result_dst, op[0]); + emit_asm(ir, TGSI_OPCODE_D2I, result_dst, op[0]); break; case ir_unop_i2d: - emit(ir, TGSI_OPCODE_I2D, result_dst, op[0]); + emit_asm(ir, TGSI_OPCODE_I2D, result_dst, op[0]); break; case ir_unop_d2u: - emit(ir, TGSI_OPCODE_D2U, result_dst, op[0]); + emit_asm(ir, TGSI_OPCODE_D2U, result_dst, op[0]); break; case ir_unop_u2d: - emit(ir, TGSI_OPCODE_U2D, result_dst, op[0]); + emit_asm(ir, TGSI_OPCODE_U2D, result_dst, op[0]); break; case ir_unop_unpack_double_2x32: case ir_unop_pack_double_2x32: - emit(ir, TGSI_OPCODE_MOV, result_dst, op[0]); + emit_asm(ir, TGSI_OPCODE_MOV, result_dst, op[0]); break; case ir_binop_ldexp: if (ir->operands[0]->type->base_type == GLSL_TYPE_DOUBLE) { - emit(ir, TGSI_OPCODE_DLDEXP, result_dst, op[0], op[1]); + emit_asm(ir, TGSI_OPCODE_DLDEXP, result_dst, op[0], op[1]); } else { assert(!"Invalid ldexp for non-double opcode in glsl_to_tgsi_visitor::visit()"); } @@ -2343,7 +2343,7 @@ glsl_to_tgsi_visitor::visit(ir_dereference_array *ir) index_reg = get_temp(native_integers ? glsl_type::int_type : glsl_type::float_type); - emit(ir, TGSI_OPCODE_MUL, st_dst_reg(index_reg), + emit_asm(ir, TGSI_OPCODE_MUL, st_dst_reg(index_reg), this->result, st_src_reg_for_type(index_reg.type, element_size)); } @@ -2354,7 +2354,7 @@ glsl_to_tgsi_visitor::visit(ir_dereference_array *ir) st_src_reg accum_reg = get_temp(native_integers ? glsl_type::int_type : glsl_type::float_type); - emit(ir, TGSI_OPCODE_ADD, st_dst_reg(accum_reg), + emit_asm(ir, TGSI_OPCODE_ADD, st_dst_reg(accum_reg), index_reg, *src.reladdr); index_reg = accum_reg; @@ -2591,16 +2591,16 @@ glsl_to_tgsi_visitor::emit_block_mov(ir_assignment *ir, const struct glsl_type * l_src.swizzle = swizzle_for_size(type->vector_elements); if (native_integers) { - emit(ir, TGSI_OPCODE_UCMP, *l, *cond, + emit_asm(ir, TGSI_OPCODE_UCMP, *l, *cond, cond_swap ? l_src : *r, cond_swap ? *r : l_src); } else { - emit(ir, TGSI_OPCODE_CMP, *l, *cond, + emit_asm(ir, TGSI_OPCODE_CMP, *l, *cond, cond_swap ? l_src : *r, cond_swap ? *r : l_src); } } else { - emit(ir, TGSI_OPCODE_MOV, *l, *r); + emit_asm(ir, TGSI_OPCODE_MOV, *l, *r); } l->index++; r->index++; @@ -2681,7 +2681,7 @@ glsl_to_tgsi_visitor::visit(ir_assignment *ir) */ glsl_to_tgsi_instruction *inst, *new_inst; inst = (glsl_to_tgsi_instruction *)this->instructions.get_tail(); - new_inst = emit(ir, inst->op, l, inst->src[0], inst->src[1], inst->src[2]); + new_inst = emit_asm(ir, inst->op, l, inst->src[0], inst->src[1], inst->src[2]); new_inst->saturate = inst->saturate; inst->dead_mask = inst->dst[0].writemask; } else { @@ -2719,7 +2719,7 @@ glsl_to_tgsi_visitor::visit(ir_constant *ir) src = this->result; for (i = 0; i < (unsigned int)size; i++) { - emit(ir, TGSI_OPCODE_MOV, temp, src); + emit_asm(ir, TGSI_OPCODE_MOV, temp, src); src.index++; temp.index++; @@ -2741,7 +2741,7 @@ glsl_to_tgsi_visitor::visit(ir_constant *ir) ir->array_elements[i]->accept(this); src = this->result; for (int j = 0; j < size; j++) { - emit(ir, TGSI_OPCODE_MOV, temp, src); + emit_asm(ir, TGSI_OPCODE_MOV, temp, src); src.index++; temp.index++; @@ -2766,7 +2766,7 @@ glsl_to_tgsi_visitor::visit(ir_constant *ir) ir->type->vector_elements, GL_FLOAT, &src.swizzle); - emit(ir, TGSI_OPCODE_MOV, mat_column, src); + emit_asm(ir, TGSI_OPCODE_MOV, mat_column, src); mat_column.index++; } @@ -2891,7 +2891,7 @@ glsl_to_tgsi_visitor::visit(ir_call *ir) l.cond_mask = COND_TR; for (i = 0; i < type_size(param->type); i++) { - emit(ir, TGSI_OPCODE_MOV, l, r); + emit_asm(ir, TGSI_OPCODE_MOV, l, r); l.index++; r.index++; } @@ -2899,7 +2899,7 @@ glsl_to_tgsi_visitor::visit(ir_call *ir) } /* Emit call instruction */ - call_inst = emit(ir, TGSI_OPCODE_CAL); + call_inst = emit_asm(ir, TGSI_OPCODE_CAL); call_inst->function = entry; /* Process out parameters. */ @@ -2924,7 +2924,7 @@ glsl_to_tgsi_visitor::visit(ir_call *ir) st_dst_reg l = st_dst_reg(this->result); for (i = 0; i < type_size(param->type); i++) { - emit(ir, TGSI_OPCODE_MOV, l, r); + emit_asm(ir, TGSI_OPCODE_MOV, l, r); l.index++; r.index++; } @@ -2967,7 +2967,7 @@ glsl_to_tgsi_visitor::visit(ir_texture *ir) coord = get_temp(glsl_type::vec4_type); coord_dst = st_dst_reg(coord); coord_dst.writemask = (1 << ir->coordinate->type->vector_elements) - 1; - emit(ir, TGSI_OPCODE_MOV, coord_dst, this->result); + emit_asm(ir, TGSI_OPCODE_MOV, coord_dst, this->result); } if (ir->projector) { @@ -3076,7 +3076,7 @@ glsl_to_tgsi_visitor::visit(ir_texture *ir) if (opcode == TGSI_OPCODE_TEX) { /* Slot the projector in as the last component of the coord. */ coord_dst.writemask = WRITEMASK_W; - emit(ir, TGSI_OPCODE_MOV, coord_dst, projector); + emit_asm(ir, TGSI_OPCODE_MOV, coord_dst, projector); coord_dst.writemask = WRITEMASK_XYZW; opcode = TGSI_OPCODE_TXP; } else { @@ -3088,7 +3088,7 @@ glsl_to_tgsi_visitor::visit(ir_texture *ir) * projective divide now. */ coord_dst.writemask = WRITEMASK_W; - emit(ir, TGSI_OPCODE_RCP, coord_dst, projector); + emit_asm(ir, TGSI_OPCODE_RCP, coord_dst, projector); /* In the case where we have to project the coordinates "by hand," * the shadow comparator value must also be projected. @@ -3107,14 +3107,14 @@ glsl_to_tgsi_visitor::visit(ir_texture *ir) assert(!sampler_type->sampler_array); tmp_dst.writemask = WRITEMASK_Z; - emit(ir, TGSI_OPCODE_MOV, tmp_dst, this->result); + emit_asm(ir, TGSI_OPCODE_MOV, tmp_dst, this->result); tmp_dst.writemask = WRITEMASK_XY; - emit(ir, TGSI_OPCODE_MOV, tmp_dst, coord); + emit_asm(ir, TGSI_OPCODE_MOV, tmp_dst, coord); } coord_dst.writemask = WRITEMASK_XYZ; - emit(ir, TGSI_OPCODE_MUL, coord_dst, tmp_src, coord_w); + emit_asm(ir, TGSI_OPCODE_MUL, coord_dst, tmp_src, coord_w); coord_dst.writemask = WRITEMASK_XYZW; coord.swizzle = SWIZZLE_XYZW; @@ -3135,7 +3135,7 @@ glsl_to_tgsi_visitor::visit(ir_texture *ir) cube_sc = get_temp(glsl_type::float_type); cube_sc_dst = st_dst_reg(cube_sc); cube_sc_dst.writemask = WRITEMASK_X; - emit(ir, TGSI_OPCODE_MOV, cube_sc_dst, this->result); + emit_asm(ir, TGSI_OPCODE_MOV, cube_sc_dst, this->result); cube_sc_dst.writemask = WRITEMASK_X; } else { @@ -3146,20 +3146,20 @@ glsl_to_tgsi_visitor::visit(ir_texture *ir) } else { coord_dst.writemask = WRITEMASK_Z; } - emit(ir, TGSI_OPCODE_MOV, coord_dst, this->result); + emit_asm(ir, TGSI_OPCODE_MOV, coord_dst, this->result); coord_dst.writemask = WRITEMASK_XYZW; } } if (ir->op == ir_txf_ms) { coord_dst.writemask = WRITEMASK_W; - emit(ir, TGSI_OPCODE_MOV, coord_dst, sample_index); + emit_asm(ir, TGSI_OPCODE_MOV, coord_dst, sample_index); coord_dst.writemask = WRITEMASK_XYZW; } else if (opcode == TGSI_OPCODE_TXL || opcode == TGSI_OPCODE_TXB || opcode == TGSI_OPCODE_TXF) { /* TGSI stores LOD or LOD bias in the last channel of the coords. */ coord_dst.writemask = WRITEMASK_W; - emit(ir, TGSI_OPCODE_MOV, coord_dst, lod_info); + emit_asm(ir, TGSI_OPCODE_MOV, coord_dst, lod_info); coord_dst.writemask = WRITEMASK_XYZW; } @@ -3169,30 +3169,30 @@ glsl_to_tgsi_visitor::visit(ir_texture *ir) } if (opcode == TGSI_OPCODE_TXD) - inst = emit(ir, opcode, result_dst, coord, dx, dy); + inst = emit_asm(ir, opcode, result_dst, coord, dx, dy); else if (opcode == TGSI_OPCODE_TXQ) { if (ir->op == ir_query_levels) { /* the level is stored in W */ - inst = emit(ir, opcode, st_dst_reg(levels_src), lod_info); + inst = emit_asm(ir, opcode, st_dst_reg(levels_src), lod_info); result_dst.writemask = WRITEMASK_X; levels_src.swizzle = SWIZZLE_WWWW; - emit(ir, TGSI_OPCODE_MOV, result_dst, levels_src); + emit_asm(ir, TGSI_OPCODE_MOV, result_dst, levels_src); } else - inst = emit(ir, opcode, result_dst, lod_info); + inst = emit_asm(ir, opcode, result_dst, lod_info); } else if (opcode == TGSI_OPCODE_TXF) { - inst = emit(ir, opcode, result_dst, coord); + inst = emit_asm(ir, opcode, result_dst, coord); } else if (opcode == TGSI_OPCODE_TXL2 || opcode == TGSI_OPCODE_TXB2) { - inst = emit(ir, opcode, result_dst, coord, lod_info); + inst = emit_asm(ir, opcode, result_dst, coord, lod_info); } else if (opcode == TGSI_OPCODE_TEX2) { - inst = emit(ir, opcode, result_dst, coord, cube_sc); + inst = emit_asm(ir, opcode, result_dst, coord, cube_sc); } else if (opcode == TGSI_OPCODE_TG4) { if (is_cube_array && ir->shadow_comparitor) { - inst = emit(ir, opcode, result_dst, coord, cube_sc); + inst = emit_asm(ir, opcode, result_dst, coord, cube_sc); } else { - inst = emit(ir, opcode, result_dst, coord, component); + inst = emit_asm(ir, opcode, result_dst, coord, component); } } else - inst = emit(ir, opcode, result_dst, coord); + inst = emit_asm(ir, opcode, result_dst, coord); if (ir->shadow_comparitor) inst->tex_shadow = GL_TRUE; @@ -3266,13 +3266,13 @@ glsl_to_tgsi_visitor::visit(ir_return *ir) l = st_dst_reg(current_function->return_reg); for (i = 0; i < type_size(current_function->sig->return_type); i++) { - emit(ir, TGSI_OPCODE_MOV, l, r); + emit_asm(ir, TGSI_OPCODE_MOV, l, r); l.index++; r.index++; } } - emit(ir, TGSI_OPCODE_RET); + emit_asm(ir, TGSI_OPCODE_RET); } void @@ -3285,16 +3285,16 @@ glsl_to_tgsi_visitor::visit(ir_discard *ir) /* Convert the bool condition to a float so we can negate. */ if (native_integers) { st_src_reg temp = get_temp(ir->condition->type); - emit(ir, TGSI_OPCODE_AND, st_dst_reg(temp), + emit_asm(ir, TGSI_OPCODE_AND, st_dst_reg(temp), condition, st_src_reg_for_float(1.0)); condition = temp; } condition.negate = ~condition.negate; - emit(ir, TGSI_OPCODE_KILL_IF, undef_dst, condition); + emit_asm(ir, TGSI_OPCODE_KILL_IF, undef_dst, condition); } else { /* unconditional kil */ - emit(ir, TGSI_OPCODE_KILL); + emit_asm(ir, TGSI_OPCODE_KILL); } } @@ -3309,18 +3309,18 @@ glsl_to_tgsi_visitor::visit(ir_if *ir) if_opcode = native_integers ? TGSI_OPCODE_UIF : TGSI_OPCODE_IF; - if_inst = emit(ir->condition, if_opcode, undef_dst, this->result); + if_inst = emit_asm(ir->condition, if_opcode, undef_dst, this->result); this->instructions.push_tail(if_inst); visit_exec_list(&ir->then_instructions, this); if (!ir->else_instructions.is_empty()) { - emit(ir->condition, TGSI_OPCODE_ELSE); + emit_asm(ir->condition, TGSI_OPCODE_ELSE); visit_exec_list(&ir->else_instructions, this); } - if_inst = emit(ir->condition, TGSI_OPCODE_ENDIF); + if_inst = emit_asm(ir->condition, TGSI_OPCODE_ENDIF); } @@ -3330,7 +3330,7 @@ glsl_to_tgsi_visitor::visit(ir_emit_vertex *ir) assert(this->prog->Target == GL_GEOMETRY_PROGRAM_NV); ir->stream->accept(this); - emit(ir, TGSI_OPCODE_EMIT, undef_dst, this->result); + emit_asm(ir, TGSI_OPCODE_EMIT, undef_dst, this->result); } void @@ -3339,7 +3339,7 @@ glsl_to_tgsi_visitor::visit(ir_end_primitive *ir) assert(this->prog->Target == GL_GEOMETRY_PROGRAM_NV); ir->stream->accept(this); - emit(ir, TGSI_OPCODE_ENDPRIM, undef_dst, this->result); + emit_asm(ir, TGSI_OPCODE_ENDPRIM, undef_dst, this->result); } glsl_to_tgsi_visitor::glsl_to_tgsi_visitor() @@ -4182,7 +4182,7 @@ get_pixel_transfer_visitor(struct st_fragment_program *fp, coord = st_src_reg(PROGRAM_INPUT, VARYING_SLOT_TEX0, glsl_type::vec2_type); src0 = v->get_temp(glsl_type::vec4_type); dst0 = st_dst_reg(src0); - inst = v->emit(NULL, TGSI_OPCODE_TEX, dst0, coord); + inst = v->emit_asm(NULL, TGSI_OPCODE_TEX, dst0, coord); inst->sampler_array_size = 1; inst->tex_target = TEXTURE_2D_INDEX; @@ -4206,7 +4206,7 @@ get_pixel_transfer_visitor(struct st_fragment_program *fp, /* MAD colorTemp, colorTemp, scale, bias; */ scale = st_src_reg(PROGRAM_STATE_VAR, scale_p, GLSL_TYPE_FLOAT); bias = st_src_reg(PROGRAM_STATE_VAR, bias_p, GLSL_TYPE_FLOAT); - inst = v->emit(NULL, TGSI_OPCODE_MAD, dst0, src0, scale, bias); + inst = v->emit_asm(NULL, TGSI_OPCODE_MAD, dst0, src0, scale, bias); } if (pixel_maps) { @@ -4222,7 +4222,7 @@ get_pixel_transfer_visitor(struct st_fragment_program *fp, /* TEX temp.rg, colorTemp.rgba, texture[1], 2D; */ temp_dst.writemask = WRITEMASK_XY; /* write R,G */ - inst = v->emit(NULL, TGSI_OPCODE_TEX, temp_dst, src0); + inst = v->emit_asm(NULL, TGSI_OPCODE_TEX, temp_dst, src0); inst->sampler.index = 1; inst->sampler_array_size = 1; inst->tex_target = TEXTURE_2D_INDEX; @@ -4230,7 +4230,7 @@ get_pixel_transfer_visitor(struct st_fragment_program *fp, /* TEX temp.ba, colorTemp.baba, texture[1], 2D; */ src0.swizzle = MAKE_SWIZZLE4(SWIZZLE_Z, SWIZZLE_W, SWIZZLE_Z, SWIZZLE_W); temp_dst.writemask = WRITEMASK_ZW; /* write B,A */ - inst = v->emit(NULL, TGSI_OPCODE_TEX, temp_dst, src0); + inst = v->emit_asm(NULL, TGSI_OPCODE_TEX, temp_dst, src0); inst->sampler.index = 1; inst->sampler_array_size = 1; inst->tex_target = TEXTURE_2D_INDEX; @@ -4239,7 +4239,7 @@ get_pixel_transfer_visitor(struct st_fragment_program *fp, v->samplers_used |= (1 << 1); /* MOV colorTemp, temp; */ - inst = v->emit(NULL, TGSI_OPCODE_MOV, dst0, temp); + inst = v->emit_asm(NULL, TGSI_OPCODE_MOV, dst0, temp); } /* Now copy the instructions from the original glsl_to_tgsi_visitor into the @@ -4262,7 +4262,7 @@ get_pixel_transfer_visitor(struct st_fragment_program *fp, prog->InputsRead |= BITFIELD64_BIT(src_regs[i].index); } - newinst = v->emit(NULL, inst->op, inst->dst[0], src_regs[0], src_regs[1], src_regs[2]); + newinst = v->emit_asm(NULL, inst->op, inst->dst[0], src_regs[0], src_regs[1], src_regs[2]); newinst->tex_target = inst->tex_target; newinst->sampler_array_size = inst->sampler_array_size; } @@ -4312,7 +4312,7 @@ get_bitmap_visitor(struct st_fragment_program *fp, coord = st_src_reg(PROGRAM_INPUT, VARYING_SLOT_TEX0, glsl_type::vec2_type); src0 = v->get_temp(glsl_type::vec4_type); dst0 = st_dst_reg(src0); - inst = v->emit(NULL, TGSI_OPCODE_TEX, dst0, coord); + inst = v->emit_asm(NULL, TGSI_OPCODE_TEX, dst0, coord); inst->sampler.index = samplerIndex; inst->sampler_array_size = 1; inst->tex_target = TEXTURE_2D_INDEX; @@ -4325,7 +4325,7 @@ get_bitmap_visitor(struct st_fragment_program *fp, src0.negate = NEGATE_XYZW; if (st->bitmap.tex_format == PIPE_FORMAT_L8_UNORM) src0.swizzle = SWIZZLE_XXXX; - inst = v->emit(NULL, TGSI_OPCODE_KILL_IF, undef_dst, src0); + inst = v->emit_asm(NULL, TGSI_OPCODE_KILL_IF, undef_dst, src0); /* Now copy the instructions from the original glsl_to_tgsi_visitor into the * new visitor. */ @@ -4342,7 +4342,7 @@ get_bitmap_visitor(struct st_fragment_program *fp, prog->InputsRead |= BITFIELD64_BIT(src_regs[i].index); } - newinst = v->emit(NULL, inst->op, inst->dst[0], src_regs[0], src_regs[1], src_regs[2]); + newinst = v->emit_asm(NULL, inst->op, inst->dst[0], src_regs[0], src_regs[1], src_regs[2]); newinst->tex_target = inst->tex_target; newinst->sampler_array_size = inst->sampler_array_size; } @@ -5477,7 +5477,7 @@ get_mesa_program(struct gl_context *ctx, if (!entry->bgn_inst) { v->current_function = entry; - entry->bgn_inst = v->emit(NULL, TGSI_OPCODE_BGNSUB); + entry->bgn_inst = v->emit_asm(NULL, TGSI_OPCODE_BGNSUB); entry->bgn_inst->function = entry; visit_exec_list(&entry->sig->body, v); @@ -5485,10 +5485,10 @@ get_mesa_program(struct gl_context *ctx, glsl_to_tgsi_instruction *last; last = (glsl_to_tgsi_instruction *)v->instructions.get_tail(); if (last->op != TGSI_OPCODE_RET) - v->emit(NULL, TGSI_OPCODE_RET); + v->emit_asm(NULL, TGSI_OPCODE_RET); glsl_to_tgsi_instruction *end; - end = v->emit(NULL, TGSI_OPCODE_ENDSUB); + end = v->emit_asm(NULL, TGSI_OPCODE_ENDSUB); end->function = entry; progress = GL_TRUE; @@ -5520,7 +5520,7 @@ get_mesa_program(struct gl_context *ctx, v->renumber_registers(); /* Write the END instruction. */ - v->emit(NULL, TGSI_OPCODE_END); + v->emit_asm(NULL, TGSI_OPCODE_END); if (ctx->_Shader->Flags & GLSL_DUMP) { _mesa_log("\n"); From 26c8a49bc40839298d5cd986181d4bee31a48936 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Sat, 9 May 2015 23:54:35 +0200 Subject: [PATCH 427/834] glsl_to_tgsi: remove some emit functions by using C++ default values --- src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 73 ++++------------------ 1 file changed, 12 insertions(+), 61 deletions(-) diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index ff5969af54c..f1479f21b79 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -297,6 +297,9 @@ public: st_src_reg return_reg; }; +static st_src_reg undef_src = st_src_reg(PROGRAM_UNDEFINED, 0, GLSL_TYPE_ERROR); +static st_dst_reg undef_dst = st_dst_reg(PROGRAM_UNDEFINED, SWIZZLE_NOOP, GLSL_TYPE_ERROR); + struct glsl_to_tgsi_visitor : public ir_visitor { public: glsl_to_tgsi_visitor(); @@ -386,31 +389,19 @@ public: /** List of glsl_to_tgsi_instruction */ exec_list instructions; - glsl_to_tgsi_instruction *emit_asm(ir_instruction *ir, unsigned op); - glsl_to_tgsi_instruction *emit_asm(ir_instruction *ir, unsigned op, - st_dst_reg dst, st_src_reg src0); + st_dst_reg dst = undef_dst, + st_src_reg src0 = undef_src, + st_src_reg src1 = undef_src, + st_src_reg src2 = undef_src, + st_src_reg src3 = undef_src); glsl_to_tgsi_instruction *emit_asm(ir_instruction *ir, unsigned op, st_dst_reg dst, st_dst_reg dst1, - st_src_reg src0); - - glsl_to_tgsi_instruction *emit_asm(ir_instruction *ir, unsigned op, - st_dst_reg dst, st_src_reg src0, st_src_reg src1); - - glsl_to_tgsi_instruction *emit_asm(ir_instruction *ir, unsigned op, - st_dst_reg dst, - st_src_reg src0, st_src_reg src1, st_src_reg src2); - - glsl_to_tgsi_instruction *emit_asm(ir_instruction *ir, unsigned op, - st_dst_reg dst, - st_src_reg src0, st_src_reg src1, - st_src_reg src2, st_src_reg src3); - - glsl_to_tgsi_instruction *emit_asm(ir_instruction *ir, unsigned op, - st_dst_reg dst, st_dst_reg dst1, - st_src_reg src0, st_src_reg src1, - st_src_reg src2, st_src_reg src3); + st_src_reg src0 = undef_src, + st_src_reg src1 = undef_src, + st_src_reg src2 = undef_src, + st_src_reg src3 = undef_src); unsigned get_opcode(ir_instruction *ir, unsigned op, st_dst_reg dst, @@ -464,10 +455,6 @@ public: void *mem_ctx; }; -static st_src_reg undef_src = st_src_reg(PROGRAM_UNDEFINED, 0, GLSL_TYPE_ERROR); - -static st_dst_reg undef_dst = st_dst_reg(PROGRAM_UNDEFINED, SWIZZLE_NOOP, GLSL_TYPE_ERROR); - static st_dst_reg address_reg = st_dst_reg(PROGRAM_ADDRESS, WRITEMASK_X, GLSL_TYPE_FLOAT, 0); static st_dst_reg address_reg2 = st_dst_reg(PROGRAM_ADDRESS, WRITEMASK_X, GLSL_TYPE_FLOAT, 1); static st_dst_reg sampler_reladdr = st_dst_reg(PROGRAM_ADDRESS, WRITEMASK_X, GLSL_TYPE_FLOAT, 2); @@ -720,42 +707,6 @@ glsl_to_tgsi_visitor::emit_asm(ir_instruction *ir, unsigned op, return emit_asm(ir, op, dst, undef_dst, src0, src1, src2, src3); } -glsl_to_tgsi_instruction * -glsl_to_tgsi_visitor::emit_asm(ir_instruction *ir, unsigned op, - st_dst_reg dst, st_src_reg src0, - st_src_reg src1, st_src_reg src2) -{ - return emit_asm(ir, op, dst, undef_dst, src0, src1, src2, undef_src); -} - -glsl_to_tgsi_instruction * -glsl_to_tgsi_visitor::emit_asm(ir_instruction *ir, unsigned op, - st_dst_reg dst, st_src_reg src0, st_src_reg src1) -{ - return emit_asm(ir, op, dst, undef_dst, src0, src1, undef_src, undef_src); -} - -glsl_to_tgsi_instruction * -glsl_to_tgsi_visitor::emit_asm(ir_instruction *ir, unsigned op, - st_dst_reg dst, st_src_reg src0) -{ - assert(dst.writemask != 0); - return emit_asm(ir, op, dst, undef_dst, src0, undef_src, undef_src, undef_src); -} - -glsl_to_tgsi_instruction * -glsl_to_tgsi_visitor::emit_asm(ir_instruction *ir, unsigned op, - st_dst_reg dst, st_dst_reg dst1, st_src_reg src0) -{ - return emit_asm(ir, op, dst, dst1, src0, undef_src, undef_src, undef_src); -} - -glsl_to_tgsi_instruction * -glsl_to_tgsi_visitor::emit_asm(ir_instruction *ir, unsigned op) -{ - return emit_asm(ir, op, undef_dst, undef_dst, undef_src, undef_src, undef_src, undef_src); -} - /** * Determines whether to use an integer, unsigned integer, or float opcode * based on the operands and input opcode, then emits the result. From 9b1921100ef5f265403b278fab26ae404db719da Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Sun, 10 May 2015 13:03:27 +0200 Subject: [PATCH 428/834] glsl_to_tgsi: use TGSI array declarations for GS,FS arrays of inputs (v2) v2: don't use PIPE_MAX_SHADER_ARRAYS --- src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 216 ++++++++++++++++++--- src/mesa/state_tracker/st_glsl_to_tgsi.h | 1 + src/mesa/state_tracker/st_program.c | 9 + 3 files changed, 202 insertions(+), 24 deletions(-) diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index f1479f21b79..ac26762302e 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -84,6 +84,7 @@ public: this->reladdr2 = NULL; this->has_index2 = false; this->double_reg2 = false; + this->array_id = 0; } st_src_reg(gl_register_file file, int index, int type) @@ -98,6 +99,7 @@ public: this->reladdr2 = NULL; this->has_index2 = false; this->double_reg2 = false; + this->array_id = 0; } st_src_reg(gl_register_file file, int index, int type, int index2D) @@ -112,6 +114,7 @@ public: this->reladdr2 = NULL; this->has_index2 = false; this->double_reg2 = false; + this->array_id = 0; } st_src_reg() @@ -126,6 +129,7 @@ public: this->reladdr2 = NULL; this->has_index2 = false; this->double_reg2 = false; + this->array_id = 0; } explicit st_src_reg(st_dst_reg reg); @@ -145,6 +149,7 @@ public: * currently used for input mapping only. */ bool double_reg2; + unsigned array_id; }; class st_dst_reg { @@ -202,6 +207,7 @@ st_src_reg::st_src_reg(st_dst_reg reg) this->reladdr2 = NULL; this->has_index2 = false; this->double_reg2 = false; + this->array_id = 0; } st_dst_reg::st_dst_reg(st_src_reg reg) @@ -212,6 +218,7 @@ st_dst_reg::st_dst_reg(st_src_reg reg) this->writemask = WRITEMASK_XYZW; this->cond_mask = COND_TR; this->reladdr = reg.reladdr; + assert(reg.array_id == 0); } class glsl_to_tgsi_instruction : public exec_node { @@ -239,8 +246,9 @@ public: class variable_storage : public exec_node { public: - variable_storage(ir_variable *var, gl_register_file file, int index) - : file(file), index(index), var(var) + variable_storage(ir_variable *var, gl_register_file file, int index, + unsigned array_id = 0) + : file(file), index(index), var(var), array_id(array_id) { /* empty */ } @@ -248,6 +256,7 @@ public: gl_register_file file; int index; ir_variable *var; /* variable that maps to this, if any */ + unsigned array_id; }; class immediate_storage : public exec_node { @@ -300,6 +309,12 @@ public: static st_src_reg undef_src = st_src_reg(PROGRAM_UNDEFINED, 0, GLSL_TYPE_ERROR); static st_dst_reg undef_dst = st_dst_reg(PROGRAM_UNDEFINED, SWIZZLE_NOOP, GLSL_TYPE_ERROR); +struct array_decl { + unsigned mesa_index; + unsigned array_id; + unsigned array_size; +}; + struct glsl_to_tgsi_visitor : public ir_visitor { public: glsl_to_tgsi_visitor(); @@ -319,6 +334,9 @@ public: unsigned max_num_arrays; unsigned next_array; + struct array_decl input_arrays[PIPE_MAX_SHADER_INPUTS]; + unsigned num_input_arrays; + int num_address_regs; int samplers_used; bool indirect_addr_consts; @@ -2196,11 +2214,38 @@ glsl_to_tgsi_visitor::visit(ir_swizzle *ir) this->result = src; } +/* Test if the variable is an array. Note that geometry and + * tessellation shader inputs are outputs are always arrays (except + * for patch inputs), so only the array element type is considered. + */ +static bool +is_inout_array(unsigned stage, ir_variable *var, bool *is_2d) +{ + const glsl_type *type = var->type; + + if ((stage == MESA_SHADER_VERTEX && var->data.mode == ir_var_shader_in) || + (stage == MESA_SHADER_FRAGMENT && var->data.mode == ir_var_shader_out)) + return false; + + *is_2d = false; + + if (stage == MESA_SHADER_GEOMETRY && var->data.mode == ir_var_shader_in) { + if (!var->type->is_array()) + return false; /* a system value probably */ + + type = var->type->fields.array; + *is_2d = true; + } + + return type->is_array() || type->is_matrix(); +} + void glsl_to_tgsi_visitor::visit(ir_dereference_variable *ir) { variable_storage *entry = find_variable_storage(ir->var); ir_variable *var = ir->var; + bool is_2d; if (!entry) { switch (var->data.mode) { @@ -2216,9 +2261,29 @@ glsl_to_tgsi_visitor::visit(ir_dereference_variable *ir) * user-defined varyings. */ assert(var->data.location != -1); - entry = new(mem_ctx) variable_storage(var, - PROGRAM_INPUT, - var->data.location); + + if (is_inout_array(shader->Stage, var, &is_2d)) { + struct array_decl *decl = &input_arrays[num_input_arrays]; + + decl->mesa_index = var->data.location; + decl->array_id = num_input_arrays + 1; + if (is_2d) + decl->array_size = type_size(var->type->fields.array); + else + decl->array_size = type_size(var->type); + num_input_arrays++; + + entry = new(mem_ctx) variable_storage(var, + PROGRAM_INPUT, + var->data.location, + decl->array_id); + } + else { + entry = new(mem_ctx) variable_storage(var, + PROGRAM_INPUT, + var->data.location); + } + this->variables.push_tail(entry); break; case ir_var_shader_out: assert(var->data.location != -1); @@ -2249,10 +2314,43 @@ glsl_to_tgsi_visitor::visit(ir_dereference_variable *ir) } this->result = st_src_reg(entry->file, entry->index, var->type); + this->result.array_id = entry->array_id; if (!native_integers) this->result.type = GLSL_TYPE_FLOAT; } +static void +shrink_array_declarations(struct array_decl *arrays, unsigned count, + GLbitfield64 usage_mask) +{ + unsigned i, j; + + /* Fix array declarations by removing unused array elements at both ends + * of the arrays. For example, mat4[3] where only mat[1] is used. + */ + for (i = 0; i < count; i++) { + struct array_decl *decl = &arrays[i]; + + /* Shrink the beginning. */ + for (j = 0; j < decl->array_size; j++) { + if (usage_mask & BITFIELD64_BIT(decl->mesa_index+j)) + break; + + decl->mesa_index++; + decl->array_size--; + j--; + } + + /* Shrink the end. */ + for (j = decl->array_size-1; j >= 0; j--) { + if (usage_mask & BITFIELD64_BIT(decl->mesa_index+j)) + break; + + decl->array_size--; + } + } +} + void glsl_to_tgsi_visitor::visit(ir_dereference_array *ir) { @@ -3300,6 +3398,7 @@ glsl_to_tgsi_visitor::glsl_to_tgsi_visitor() array_sizes = NULL; max_num_arrays = 0; next_array = 0; + num_input_arrays = 0; next_signature_id = 1; num_immediates = 0; current_function = NULL; @@ -3690,6 +3789,7 @@ glsl_to_tgsi_visitor::copy_propagate(void) inst->src[r].index2D = first->src[0].index2D; inst->src[r].has_index2 = first->src[0].has_index2; inst->src[r].double_reg2 = first->src[0].double_reg2; + inst->src[r].array_id = first->src[0].array_id; int swizzle = 0; for (int i = 0; i < 4; i++) { @@ -4332,6 +4432,7 @@ struct st_translate { struct ureg_src systemValues[SYSTEM_VALUE_MAX]; struct tgsi_texture_offset tex_offsets[MAX_GLSL_TEXTURE_OFFSET]; unsigned *array_sizes; + struct array_decl *input_arrays; const GLuint *inputMapping; const GLuint *outputMapping; @@ -4556,8 +4657,20 @@ src_register(struct st_translate *t, const st_src_reg *reg) * map back to the original index and add the offset after * mapping. */ index -= double_reg2; - assert(t->inputMapping[index] < ARRAY_SIZE(t->inputs)); - return t->inputs[t->inputMapping[index] + double_reg2]; + if (!reg->array_id) { + assert(t->inputMapping[index] < ARRAY_SIZE(t->inputs)); + assert(t->inputs[t->inputMapping[index]].File != TGSI_FILE_NULL); + return t->inputs[t->inputMapping[index]]; + } + else { + struct array_decl *decl = &t->input_arrays[reg->array_id-1]; + unsigned mesa_index = decl->mesa_index; + int slot = t->inputMapping[mesa_index]; + + assert(slot != -1 && t->inputs[slot].File == TGSI_FILE_INPUT); + assert(t->inputs[slot].ArrayID == reg->array_id); + return ureg_src_array_offset(t->inputs[slot], index - mesa_index); + } case PROGRAM_OUTPUT: assert(t->outputMapping[reg->index] < ARRAY_SIZE(t->outputs)); @@ -5018,6 +5131,25 @@ emit_edgeflags(struct st_translate *t) ureg_MOV(ureg, edge_dst, edge_src); } +static bool +find_array(unsigned attr, struct array_decl *arrays, unsigned count, + unsigned *array_id, unsigned *array_size) +{ + unsigned i; + + for (i = 0; i < count; i++) { + struct array_decl *decl = &arrays[i]; + + if (attr == decl->mesa_index) { + *array_id = decl->array_id; + *array_size = decl->array_size; + assert(*array_size); + return true; + } + } + return false; +} + /** * Translate intermediate IR (glsl_to_tgsi_instruction) to TGSI format. * \param program the program to translate @@ -5047,6 +5179,7 @@ st_translate_program( const struct gl_program *proginfo, GLuint numInputs, const GLuint inputMapping[], + const GLuint inputSlotToAttr[], const ubyte inputSemanticName[], const ubyte inputSemanticIndex[], const GLuint interpMode[], @@ -5102,15 +5235,57 @@ st_translate_program( /* * Declare input attributes. */ - if (procType == TGSI_PROCESSOR_FRAGMENT) { + switch (procType) { + case TGSI_PROCESSOR_FRAGMENT: for (i = 0; i < numInputs; i++) { - t->inputs[i] = ureg_DECL_fs_input_cyl_centroid(ureg, - inputSemanticName[i], - inputSemanticIndex[i], - interpMode[i], 0, - interpLocation[i], 0, 1); - } + unsigned array_id = 0; + unsigned array_size; + if (find_array(inputSlotToAttr[i], program->input_arrays, + program->num_input_arrays, &array_id, &array_size)) { + /* We've found an array. Declare it so. */ + t->inputs[i] = ureg_DECL_fs_input_cyl_centroid(ureg, + inputSemanticName[i], inputSemanticIndex[i], + interpMode[i], 0, interpLocation[i], + array_id, array_size); + i += array_size - 1; + } + else { + t->inputs[i] = ureg_DECL_fs_input_cyl_centroid(ureg, + inputSemanticName[i], inputSemanticIndex[i], + interpMode[i], 0, interpLocation[i], 0, 1); + } + } + break; + case TGSI_PROCESSOR_GEOMETRY: + for (i = 0; i < numInputs; i++) { + unsigned array_id = 0; + unsigned array_size; + + if (find_array(inputSlotToAttr[i], program->input_arrays, + program->num_input_arrays, &array_id, &array_size)) { + /* We've found an array. Declare it so. */ + t->inputs[i] = ureg_DECL_input(ureg, inputSemanticName[i], + inputSemanticIndex[i], + array_id, array_size); + i += array_size - 1; + } + else { + t->inputs[i] = ureg_DECL_input(ureg, inputSemanticName[i], + inputSemanticIndex[i], 0, 1); + } + } + break; + case TGSI_PROCESSOR_VERTEX: + for (i = 0; i < numInputs; i++) { + t->inputs[i] = ureg_DECL_vs_input(ureg, i); + } + break; + default: + assert(0); + } + + if (procType == TGSI_PROCESSOR_FRAGMENT) { if (proginfo->InputsRead & VARYING_BIT_POS) { /* Must do this after setting up t->inputs. */ emit_wpos(st_context(ctx), t, proginfo, ureg, @@ -5159,12 +5334,6 @@ st_translate_program( } } else if (procType == TGSI_PROCESSOR_GEOMETRY) { - for (i = 0; i < numInputs; i++) { - t->inputs[i] = ureg_DECL_input(ureg, - inputSemanticName[i], - inputSemanticIndex[i], 0, 1); - } - for (i = 0; i < numOutputs; i++) { t->outputs[i] = ureg_DECL_output(ureg, outputSemanticName[i], @@ -5174,10 +5343,6 @@ st_translate_program( else { assert(procType == TGSI_PROCESSOR_VERTEX); - for (i = 0; i < numInputs; i++) { - t->inputs[i] = ureg_DECL_vs_input(ureg, i); - } - for (i = 0; i < numOutputs; i++) { t->outputs[i] = ureg_DECL_output(ureg, outputSemanticName[i], @@ -5237,6 +5402,7 @@ st_translate_program( } t->array_sizes = program->array_sizes; + t->input_arrays = program->input_arrays; /* Emit constants and uniforms. TGSI uses a single index space for these, * so we put all the translated regs in t->constants. @@ -5486,6 +5652,8 @@ get_mesa_program(struct gl_context *ctx, prog->NumInstructions = 0; do_set_program_inouts(shader->ir, prog, shader->Stage); + shrink_array_declarations(v->input_arrays, v->num_input_arrays, + prog->InputsRead); count_resources(v, prog); /* This must be done before the uniform storage is associated. */ diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.h b/src/mesa/state_tracker/st_glsl_to_tgsi.h index 2cb80bcf961..d7536350394 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.h +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.h @@ -43,6 +43,7 @@ enum pipe_error st_translate_program( const struct gl_program *proginfo, GLuint numInputs, const GLuint inputMapping[], + const GLuint inputSlotToAttr[], const ubyte inputSemanticName[], const ubyte inputSemanticIndex[], const GLuint interpMode[], diff --git a/src/mesa/state_tracker/st_program.c b/src/mesa/state_tracker/st_program.c index 9191cd68416..02d8689092a 100644 --- a/src/mesa/state_tracker/st_program.c +++ b/src/mesa/state_tracker/st_program.c @@ -351,6 +351,7 @@ st_translate_vertex_program(struct st_context *st, /* inputs */ vpv->num_inputs, stvp->input_to_index, + NULL, /* inputSlotToAttr */ NULL, /* input semantic name */ NULL, /* input semantic index */ NULL, /* interp mode */ @@ -482,6 +483,7 @@ st_translate_fragment_program(struct st_context *st, GLuint outputMapping[FRAG_RESULT_MAX]; GLuint inputMapping[VARYING_SLOT_MAX]; + GLuint inputSlotToAttr[VARYING_SLOT_MAX]; GLuint interpMode[PIPE_MAX_SHADER_INPUTS]; /* XXX size? */ GLuint interpLocation[PIPE_MAX_SHADER_INPUTS]; GLuint attr; @@ -502,6 +504,7 @@ st_translate_fragment_program(struct st_context *st, return NULL; assert(!(key->bitmap && key->drawpixels)); + memset(inputSlotToAttr, ~0, sizeof(inputSlotToAttr)); if (key->bitmap) { /* glBitmap drawing */ @@ -543,6 +546,7 @@ st_translate_fragment_program(struct st_context *st, const GLuint slot = fs_num_inputs++; inputMapping[attr] = slot; + inputSlotToAttr[slot] = attr; if (stfp->Base.IsCentroid & BITFIELD64_BIT(attr)) interpLocation[slot] = TGSI_INTERPOLATE_LOC_CENTROID; else if (stfp->Base.IsSample & BITFIELD64_BIT(attr)) @@ -778,6 +782,7 @@ st_translate_fragment_program(struct st_context *st, /* inputs */ fs_num_inputs, inputMapping, + inputSlotToAttr, input_semantic_name, input_semantic_index, interpMode, @@ -867,6 +872,7 @@ st_translate_geometry_program(struct st_context *st, struct st_geometry_program *stgp, const struct st_gp_variant_key *key) { + GLuint inputSlotToAttr[VARYING_SLOT_MAX]; GLuint inputMapping[VARYING_SLOT_MAX]; GLuint outputMapping[VARYING_SLOT_MAX]; struct pipe_context *pipe = st->pipe; @@ -896,6 +902,7 @@ st_translate_geometry_program(struct st_context *st, return NULL; } + memset(inputSlotToAttr, 0, sizeof(inputSlotToAttr)); memset(inputMapping, 0, sizeof(inputMapping)); memset(outputMapping, 0, sizeof(outputMapping)); @@ -907,6 +914,7 @@ st_translate_geometry_program(struct st_context *st, const GLuint slot = gs_num_inputs++; inputMapping[attr] = slot; + inputSlotToAttr[slot] = attr; switch (attr) { case VARYING_SLOT_PRIMITIVE_ID: @@ -1080,6 +1088,7 @@ st_translate_geometry_program(struct st_context *st, /* inputs */ gs_num_inputs, inputMapping, + inputSlotToAttr, input_semantic_name, input_semantic_index, NULL, From 6bf3729a3fa32a779e9593ffdf1c14e7a4f6dbad Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Wed, 20 May 2015 11:43:55 +0200 Subject: [PATCH 429/834] glsl_to_tgsi: use TGSI array declarations for VS,GS arrays of outputs (v2) v2: don't use PIPE_MAX_SHADER_ARRAYS --- src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 135 ++++++++++++++------- src/mesa/state_tracker/st_glsl_to_tgsi.h | 1 + src/mesa/state_tracker/st_program.c | 7 ++ src/mesa/state_tracker/st_program.h | 1 + 4 files changed, 103 insertions(+), 41 deletions(-) diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index ac26762302e..0e60d95c575 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -162,6 +162,7 @@ public: this->cond_mask = COND_TR; this->reladdr = NULL; this->type = type; + this->array_id = 0; } st_dst_reg(gl_register_file file, int writemask, int type) @@ -172,6 +173,7 @@ public: this->cond_mask = COND_TR; this->reladdr = NULL; this->type = type; + this->array_id = 0; } st_dst_reg() @@ -182,6 +184,7 @@ public: this->writemask = 0; this->cond_mask = COND_TR; this->reladdr = NULL; + this->array_id = 0; } explicit st_dst_reg(st_src_reg reg); @@ -193,6 +196,7 @@ public: int type; /** GLSL_TYPE_* from GLSL IR (enum glsl_base_type) */ /** Register index should be offset by the integer in this reg. */ st_src_reg *reladdr; + unsigned array_id; }; st_src_reg::st_src_reg(st_dst_reg reg) @@ -207,7 +211,7 @@ st_src_reg::st_src_reg(st_dst_reg reg) this->reladdr2 = NULL; this->has_index2 = false; this->double_reg2 = false; - this->array_id = 0; + this->array_id = reg.array_id; } st_dst_reg::st_dst_reg(st_src_reg reg) @@ -218,7 +222,7 @@ st_dst_reg::st_dst_reg(st_src_reg reg) this->writemask = WRITEMASK_XYZW; this->cond_mask = COND_TR; this->reladdr = reg.reladdr; - assert(reg.array_id == 0); + this->array_id = reg.array_id; } class glsl_to_tgsi_instruction : public exec_node { @@ -336,6 +340,8 @@ public: struct array_decl input_arrays[PIPE_MAX_SHADER_INPUTS]; unsigned num_input_arrays; + struct array_decl output_arrays[PIPE_MAX_SHADER_OUTPUTS]; + unsigned num_output_arrays; int num_address_regs; int samplers_used; @@ -2287,10 +2293,30 @@ glsl_to_tgsi_visitor::visit(ir_dereference_variable *ir) break; case ir_var_shader_out: assert(var->data.location != -1); - entry = new(mem_ctx) variable_storage(var, - PROGRAM_OUTPUT, - var->data.location - + var->data.index); + + if (is_inout_array(shader->Stage, var, &is_2d)) { + struct array_decl *decl = &output_arrays[num_output_arrays]; + + decl->mesa_index = var->data.location; + decl->array_id = num_output_arrays + 1; + if (is_2d) + decl->array_size = type_size(var->type->fields.array); + else + decl->array_size = type_size(var->type); + num_output_arrays++; + + entry = new(mem_ctx) variable_storage(var, + PROGRAM_OUTPUT, + var->data.location, + decl->array_id); + } + else { + entry = new(mem_ctx) variable_storage(var, + PROGRAM_OUTPUT, + var->data.location + + var->data.index); + } + this->variables.push_tail(entry); break; case ir_var_system_value: entry = new(mem_ctx) variable_storage(var, @@ -3399,6 +3425,7 @@ glsl_to_tgsi_visitor::glsl_to_tgsi_visitor() max_num_arrays = 0; next_array = 0; num_input_arrays = 0; + num_output_arrays = 0; next_signature_id = 1; num_immediates = 0; current_function = NULL; @@ -4433,6 +4460,7 @@ struct st_translate { struct tgsi_texture_offset tex_offsets[MAX_GLSL_TEXTURE_OFFSET]; unsigned *array_sizes; struct array_decl *input_arrays; + struct array_decl *output_arrays; const GLuint *inputMapping; const GLuint *outputMapping; @@ -4556,9 +4584,8 @@ emit_immediate(struct st_translate *t, * Map a glsl_to_tgsi dst register to a TGSI ureg_dst register. */ static struct ureg_dst -dst_register(struct st_translate *t, - gl_register_file file, - GLuint index) +dst_register(struct st_translate *t, gl_register_file file, unsigned index, + unsigned array_id) { unsigned array; @@ -4599,16 +4626,25 @@ dst_register(struct st_translate *t, (int)(index & 0xFFFF) - 0x8000); case PROGRAM_OUTPUT: - if (t->procType == TGSI_PROCESSOR_VERTEX) - assert(index < VARYING_SLOT_MAX); - else if (t->procType == TGSI_PROCESSOR_FRAGMENT) - assert(index < FRAG_RESULT_MAX); - else - assert(index < VARYING_SLOT_MAX); + if (!array_id) { + if (t->procType == TGSI_PROCESSOR_FRAGMENT) + assert(index < FRAG_RESULT_MAX); + else + assert(index < VARYING_SLOT_MAX); - assert(t->outputMapping[index] < ARRAY_SIZE(t->outputs)); + assert(t->outputMapping[index] < ARRAY_SIZE(t->outputs)); + assert(t->outputs[t->outputMapping[index]].File != TGSI_FILE_NULL); + return t->outputs[t->outputMapping[index]]; + } + else { + struct array_decl *decl = &t->output_arrays[array_id-1]; + unsigned mesa_index = decl->mesa_index; + int slot = t->outputMapping[mesa_index]; - return t->outputs[t->outputMapping[index]]; + assert(slot != -1 && t->outputs[slot].File == TGSI_FILE_OUTPUT); + assert(t->outputs[slot].ArrayID == array_id); + return ureg_dst_array_offset(t->outputs[slot], index - mesa_index); + } case PROGRAM_ADDRESS: return t->address[index]; @@ -4634,7 +4670,8 @@ src_register(struct st_translate *t, const st_src_reg *reg) case PROGRAM_TEMPORARY: case PROGRAM_ARRAY: - return ureg_src(dst_register(t, reg->file, reg->index)); + case PROGRAM_OUTPUT: + return ureg_src(dst_register(t, reg->file, reg->index, reg->array_id)); case PROGRAM_UNIFORM: assert(reg->index >= 0); @@ -4672,10 +4709,6 @@ src_register(struct st_translate *t, const st_src_reg *reg) return ureg_src_array_offset(t->inputs[slot], index - mesa_index); } - case PROGRAM_OUTPUT: - assert(t->outputMapping[reg->index] < ARRAY_SIZE(t->outputs)); - return ureg_src(t->outputs[t->outputMapping[reg->index]]); /* not needed? */ - case PROGRAM_ADDRESS: return ureg_src(t->address[reg->index]); @@ -4697,9 +4730,8 @@ translate_dst(struct st_translate *t, const st_dst_reg *dst_reg, bool saturate, bool clamp_color) { - struct ureg_dst dst = dst_register(t, - dst_reg->file, - dst_reg->index); + struct ureg_dst dst = dst_register(t, dst_reg->file, dst_reg->index, + dst_reg->array_id); if (dst.File == TGSI_FILE_NULL) return dst; @@ -5186,6 +5218,7 @@ st_translate_program( const GLuint interpLocation[], GLuint numOutputs, const GLuint outputMapping[], + const GLuint outputSlotToAttr[], const ubyte outputSemanticName[], const ubyte outputSemanticIndex[], boolean passthrough_edgeflags, @@ -5285,6 +5318,38 @@ st_translate_program( assert(0); } + /* + * Declare output attributes. + */ + switch (procType) { + case TGSI_PROCESSOR_FRAGMENT: + break; + case TGSI_PROCESSOR_GEOMETRY: + case TGSI_PROCESSOR_VERTEX: + for (i = 0; i < numOutputs; i++) { + unsigned array_id = 0; + unsigned array_size; + + if (find_array(outputSlotToAttr[i], program->output_arrays, + program->num_output_arrays, &array_id, &array_size)) { + /* We've found an array. Declare it so. */ + t->outputs[i] = ureg_DECL_output_array(ureg, + outputSemanticName[i], + outputSemanticIndex[i], + array_id, array_size); + i += array_size - 1; + } + else { + t->outputs[i] = ureg_DECL_output(ureg, + outputSemanticName[i], + outputSemanticIndex[i]); + } + } + break; + default: + assert(0); + } + if (procType == TGSI_PROCESSOR_FRAGMENT) { if (proginfo->InputsRead & VARYING_BIT_POS) { /* Must do this after setting up t->inputs. */ @@ -5295,9 +5360,6 @@ st_translate_program( if (proginfo->InputsRead & VARYING_BIT_FACE) emit_face_var(ctx, t); - /* - * Declare output attributes. - */ for (i = 0; i < numOutputs; i++) { switch (outputSemanticName[i]) { case TGSI_SEMANTIC_POSITION: @@ -5333,20 +5395,8 @@ st_translate_program( } } } - else if (procType == TGSI_PROCESSOR_GEOMETRY) { + else if (procType == TGSI_PROCESSOR_VERTEX) { for (i = 0; i < numOutputs; i++) { - t->outputs[i] = ureg_DECL_output(ureg, - outputSemanticName[i], - outputSemanticIndex[i]); - } - } - else { - assert(procType == TGSI_PROCESSOR_VERTEX); - - for (i = 0; i < numOutputs; i++) { - t->outputs[i] = ureg_DECL_output(ureg, - outputSemanticName[i], - outputSemanticIndex[i]); if (outputSemanticName[i] == TGSI_SEMANTIC_FOG) { /* force register to contain a fog coordinate in the form (F, 0, 0, 1). */ ureg_MOV(ureg, @@ -5403,6 +5453,7 @@ st_translate_program( t->array_sizes = program->array_sizes; t->input_arrays = program->input_arrays; + t->output_arrays = program->output_arrays; /* Emit constants and uniforms. TGSI uses a single index space for these, * so we put all the translated regs in t->constants. @@ -5654,6 +5705,8 @@ get_mesa_program(struct gl_context *ctx, do_set_program_inouts(shader->ir, prog, shader->Stage); shrink_array_declarations(v->input_arrays, v->num_input_arrays, prog->InputsRead); + shrink_array_declarations(v->output_arrays, v->num_output_arrays, + prog->OutputsWritten); count_resources(v, prog); /* This must be done before the uniform storage is associated. */ diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.h b/src/mesa/state_tracker/st_glsl_to_tgsi.h index d7536350394..4af747fa9de 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.h +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.h @@ -50,6 +50,7 @@ enum pipe_error st_translate_program( const GLuint interpLocation[], GLuint numOutputs, const GLuint outputMapping[], + const GLuint outputSlotToAttr[], const ubyte outputSemanticName[], const ubyte outputSemanticIndex[], boolean passthrough_edgeflags, diff --git a/src/mesa/state_tracker/st_program.c b/src/mesa/state_tracker/st_program.c index 02d8689092a..35faa7bb4d0 100644 --- a/src/mesa/state_tracker/st_program.c +++ b/src/mesa/state_tracker/st_program.c @@ -215,6 +215,7 @@ st_prepare_vertex_program(struct gl_context *ctx, unsigned slot = stvp->num_outputs++; stvp->result_to_output[attr] = slot; + stvp->output_slot_to_attr[slot] = attr; switch (attr) { case VARYING_SLOT_POS: @@ -359,6 +360,7 @@ st_translate_vertex_program(struct st_context *st, /* outputs */ num_outputs, stvp->result_to_output, + stvp->output_slot_to_attr, stvp->output_semantic_name, stvp->output_semantic_index, key->passthrough_edgeflags, @@ -790,6 +792,7 @@ st_translate_fragment_program(struct st_context *st, /* outputs */ fs_num_outputs, outputMapping, + NULL, fs_output_semantic_name, fs_output_semantic_index, FALSE, key->clamp_color ); @@ -874,6 +877,7 @@ st_translate_geometry_program(struct st_context *st, { GLuint inputSlotToAttr[VARYING_SLOT_MAX]; GLuint inputMapping[VARYING_SLOT_MAX]; + GLuint outputSlotToAttr[VARYING_SLOT_MAX]; GLuint outputMapping[VARYING_SLOT_MAX]; struct pipe_context *pipe = st->pipe; GLuint attr; @@ -904,6 +908,7 @@ st_translate_geometry_program(struct st_context *st, memset(inputSlotToAttr, 0, sizeof(inputSlotToAttr)); memset(inputMapping, 0, sizeof(inputMapping)); + memset(outputSlotToAttr, 0, sizeof(outputSlotToAttr)); memset(outputMapping, 0, sizeof(outputMapping)); /* @@ -993,6 +998,7 @@ st_translate_geometry_program(struct st_context *st, GLuint slot = gs_num_outputs++; outputMapping[attr] = slot; + outputSlotToAttr[slot] = attr; switch (attr) { case VARYING_SLOT_POS: @@ -1096,6 +1102,7 @@ st_translate_geometry_program(struct st_context *st, /* outputs */ gs_num_outputs, outputMapping, + outputSlotToAttr, gs_output_semantic_name, gs_output_semantic_index, FALSE, diff --git a/src/mesa/state_tracker/st_program.h b/src/mesa/state_tracker/st_program.h index a2c56062d6e..bb77eb6ed65 100644 --- a/src/mesa/state_tracker/st_program.h +++ b/src/mesa/state_tracker/st_program.h @@ -163,6 +163,7 @@ struct st_vertex_program /** Maps VARYING_SLOT_x to slot */ GLuint result_to_output[VARYING_SLOT_MAX]; + GLuint output_slot_to_attr[VARYING_SLOT_MAX]; ubyte output_semantic_name[VARYING_SLOT_MAX]; ubyte output_semantic_index[VARYING_SLOT_MAX]; GLuint num_outputs; From 6aff87bb01d2bd583ac629d02ebf56ecbf86ec2c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Tue, 26 May 2015 19:07:35 +0200 Subject: [PATCH 430/834] r600g: fix a coverity defect in streamout code Reported by Ilia Mirkin. --- src/gallium/drivers/r600/r600_shader.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index 87b6e6e06ec..2e83143b29d 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -1337,7 +1337,7 @@ static int emit_streamout(struct r600_shader_ctx *ctx, struct pipe_stream_output int i, j, r; /* Sanity checking. */ - if (so->num_outputs > PIPE_MAX_SHADER_OUTPUTS) { + if (so->num_outputs > PIPE_MAX_SO_OUTPUTS) { R600_ERR("Too many stream outputs: %d\n", so->num_outputs); r = -EINVAL; goto out_err; From 3d16b5af1dca889ccc3716470f38c1fa84713f26 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Tue, 26 May 2015 19:07:35 +0200 Subject: [PATCH 431/834] tgsi/ureg: fix a coverity defect in emit_decls Reported by Ilia Mirkin. --- src/gallium/auxiliary/tgsi/tgsi_ureg.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/gallium/auxiliary/tgsi/tgsi_ureg.c b/src/gallium/auxiliary/tgsi/tgsi_ureg.c index 1cea0919ce4..0eaf1dfa7ae 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_ureg.c +++ b/src/gallium/auxiliary/tgsi/tgsi_ureg.c @@ -111,7 +111,7 @@ struct ureg_program } input[UREG_MAX_INPUT]; unsigned nr_inputs, nr_input_regs; - unsigned vs_inputs[UREG_MAX_INPUT/32]; + unsigned vs_inputs[PIPE_MAX_ATTRIBS/32]; struct { unsigned index; @@ -298,7 +298,8 @@ ureg_DECL_vs_input( struct ureg_program *ureg, unsigned index ) { assert(ureg->processor == TGSI_PROCESSOR_VERTEX); - + assert(index / 32 < ARRAY_SIZE(ureg->vs_inputs)); + ureg->vs_inputs[index/32] |= 1 << (index % 32); return ureg_src_register( TGSI_FILE_INPUT, index ); } @@ -1513,7 +1514,7 @@ static void emit_decls( struct ureg_program *ureg ) emit_property(ureg, i, ureg->properties[i]); if (ureg->processor == TGSI_PROCESSOR_VERTEX) { - for (i = 0; i < UREG_MAX_INPUT; i++) { + for (i = 0; i < PIPE_MAX_ATTRIBS; i++) { if (ureg->vs_inputs[i/32] & (1 << (i%32))) { emit_decl_range( ureg, TGSI_FILE_INPUT, i, 1 ); } From e8b040477e271324a88d35c003775337848a500c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Thu, 28 May 2015 16:09:23 +0200 Subject: [PATCH 432/834] mesa: remove unused geometry shader variables These states are for GS assembly shaders only. We don't support those. Reviewed-by: Dave Airlie Reviewed-by: Brian Paul --- src/mesa/main/context.c | 1 - src/mesa/main/mtypes.h | 7 ------- src/mesa/main/shared.c | 1 - src/mesa/program/program.c | 9 --------- 4 files changed, 18 deletions(-) diff --git a/src/mesa/main/context.c b/src/mesa/main/context.c index 8a59b5ed42f..db494ca73cc 100644 --- a/src/mesa/main/context.c +++ b/src/mesa/main/context.c @@ -1331,7 +1331,6 @@ _mesa_free_context_data( struct gl_context *ctx ) _mesa_reference_vertprog(ctx, &ctx->VertexProgram._Current, NULL); _mesa_reference_vertprog(ctx, &ctx->VertexProgram._TnlProgram, NULL); - _mesa_reference_geomprog(ctx, &ctx->GeometryProgram.Current, NULL); _mesa_reference_geomprog(ctx, &ctx->GeometryProgram._Current, NULL); _mesa_reference_fragprog(ctx, &ctx->FragmentProgram.Current, NULL); diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h index 0aa607653d1..1872845df8d 100644 --- a/src/mesa/main/mtypes.h +++ b/src/mesa/main/mtypes.h @@ -2275,16 +2275,10 @@ struct gl_vertex_program_state */ struct gl_geometry_program_state { - GLboolean Enabled; /**< GL_ARB_GEOMETRY_SHADER4 */ - GLboolean _Enabled; /**< Enabled and valid program? */ - struct gl_geometry_program *Current; /**< user-bound geometry program */ - /** Currently enabled and valid program (including internal programs * and compiled shader programs). */ struct gl_geometry_program *_Current; - - GLfloat Parameters[MAX_PROGRAM_ENV_PARAMS][4]; /**< Env params */ }; /** @@ -3004,7 +2998,6 @@ struct gl_shared_state struct _mesa_HashTable *Programs; /**< All vertex/fragment programs */ struct gl_vertex_program *DefaultVertexProgram; struct gl_fragment_program *DefaultFragmentProgram; - struct gl_geometry_program *DefaultGeometryProgram; /*@}*/ /* GL_ATI_fragment_shader */ diff --git a/src/mesa/main/shared.c b/src/mesa/main/shared.c index 0b76cc01218..d5ac9f1fb13 100644 --- a/src/mesa/main/shared.c +++ b/src/mesa/main/shared.c @@ -313,7 +313,6 @@ free_shared_state(struct gl_context *ctx, struct gl_shared_state *shared) _mesa_DeleteHashTable(shared->Programs); _mesa_reference_vertprog(ctx, &shared->DefaultVertexProgram, NULL); - _mesa_reference_geomprog(ctx, &shared->DefaultGeometryProgram, NULL); _mesa_reference_fragprog(ctx, &shared->DefaultFragmentProgram, NULL); _mesa_HashDeleteAll(shared->ATIShaders, delete_fragshader_cb, ctx); diff --git a/src/mesa/program/program.c b/src/mesa/program/program.c index fb61f4d360d..f0a47ac9201 100644 --- a/src/mesa/program/program.c +++ b/src/mesa/program/program.c @@ -97,11 +97,6 @@ _mesa_init_program(struct gl_context *ctx) assert(ctx->FragmentProgram.Current); ctx->FragmentProgram.Cache = _mesa_new_program_cache(); - ctx->GeometryProgram.Enabled = GL_FALSE; - /* right now by default we don't have a geometry program */ - _mesa_reference_geomprog(ctx, &ctx->GeometryProgram.Current, - NULL); - _mesa_reference_compprog(ctx, &ctx->ComputeProgram.Current, NULL); /* XXX probably move this stuff */ @@ -122,7 +117,6 @@ _mesa_free_program_data(struct gl_context *ctx) _mesa_delete_program_cache(ctx, ctx->VertexProgram.Cache); _mesa_reference_fragprog(ctx, &ctx->FragmentProgram.Current, NULL); _mesa_delete_shader_cache(ctx, ctx->FragmentProgram.Cache); - _mesa_reference_geomprog(ctx, &ctx->GeometryProgram.Current, NULL); _mesa_reference_compprog(ctx, &ctx->ComputeProgram.Current, NULL); /* XXX probably move this stuff */ @@ -153,9 +147,6 @@ _mesa_update_default_objects_program(struct gl_context *ctx) ctx->Shared->DefaultFragmentProgram); assert(ctx->FragmentProgram.Current); - _mesa_reference_geomprog(ctx, &ctx->GeometryProgram.Current, - ctx->Shared->DefaultGeometryProgram); - /* XXX probably move this stuff */ if (ctx->ATIFragmentShader.Current) { ctx->ATIFragmentShader.Current->RefCount--; From b7ef7903b8f582438172ef1bdc72788be3aa0860 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Thu, 28 May 2015 16:28:39 +0200 Subject: [PATCH 433/834] mesa: remove useless gl_compute_program_state::Current This is for user assembly shaders only (not GLSL). We won't support those. Reviewed-by: Dave Airlie Reviewed-by: Brian Paul --- src/mesa/main/mtypes.h | 2 -- src/mesa/program/program.c | 3 --- 2 files changed, 5 deletions(-) diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h index 1872845df8d..5006aeff851 100644 --- a/src/mesa/main/mtypes.h +++ b/src/mesa/main/mtypes.h @@ -2314,8 +2314,6 @@ struct gl_fragment_program_state */ struct gl_compute_program_state { - struct gl_compute_program *Current; /**< user-bound compute program */ - /** Currently enabled and valid program (including internal programs * and compiled shader programs). */ diff --git a/src/mesa/program/program.c b/src/mesa/program/program.c index f0a47ac9201..1167adf5ddd 100644 --- a/src/mesa/program/program.c +++ b/src/mesa/program/program.c @@ -97,8 +97,6 @@ _mesa_init_program(struct gl_context *ctx) assert(ctx->FragmentProgram.Current); ctx->FragmentProgram.Cache = _mesa_new_program_cache(); - _mesa_reference_compprog(ctx, &ctx->ComputeProgram.Current, NULL); - /* XXX probably move this stuff */ ctx->ATIFragmentShader.Enabled = GL_FALSE; ctx->ATIFragmentShader.Current = ctx->Shared->DefaultFragmentShader; @@ -117,7 +115,6 @@ _mesa_free_program_data(struct gl_context *ctx) _mesa_delete_program_cache(ctx, ctx->VertexProgram.Cache); _mesa_reference_fragprog(ctx, &ctx->FragmentProgram.Current, NULL); _mesa_delete_shader_cache(ctx, ctx->FragmentProgram.Cache); - _mesa_reference_compprog(ctx, &ctx->ComputeProgram.Current, NULL); /* XXX probably move this stuff */ if (ctx->ATIFragmentShader.Current) { From 3b2721ce1145cc60bf35e41e9f50a92849142a06 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Thu, 28 May 2015 18:10:08 +0200 Subject: [PATCH 434/834] mesa: use _mesa_has_geometry_shader in get_programiv Reviewed-by: Dave Airlie Reviewed-by: Brian Paul --- src/mesa/main/shaderapi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mesa/main/shaderapi.c b/src/mesa/main/shaderapi.c index 6d8e6e23e9c..a4296adf799 100644 --- a/src/mesa/main/shaderapi.c +++ b/src/mesa/main/shaderapi.c @@ -532,7 +532,7 @@ get_programiv(struct gl_context *ctx, GLuint program, GLenum pname, /* True if geometry shaders (of the form that was adopted into GLSL 1.50 * and GL 3.2) are available in this context */ - const bool has_core_gs = _mesa_is_desktop_gl(ctx) && ctx->Version >= 32; + const bool has_core_gs = _mesa_has_geometry_shaders(ctx); /* Are uniform buffer objects available in this context? */ From 4312b4f5704ddd88e27b3bf2c17eaf054567f067 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Thu, 28 May 2015 18:14:29 +0200 Subject: [PATCH 435/834] mesa: use GL_GEOMETRY_PROGRAM_NV instead of MESA_GEOMETRY_PROGRAM There's no reason to use our own definition. Tessellation will use the NV definitions too. Reviewed-by: Dave Airlie Reviewed-by: Brian Paul --- src/mesa/drivers/dri/i965/brw_program.c | 2 +- src/mesa/main/glheader.h | 6 ------ src/mesa/main/state.c | 2 +- src/mesa/program/prog_print.c | 2 +- src/mesa/program/program.c | 12 ++++++------ src/mesa/state_tracker/st_atom_shader.c | 2 +- src/mesa/state_tracker/st_cb_program.c | 8 ++++---- src/mesa/state_tracker/st_program.c | 2 +- 8 files changed, 15 insertions(+), 21 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_program.c b/src/mesa/drivers/dri/i965/brw_program.c index e5c0d3c7604..414eab9c002 100644 --- a/src/mesa/drivers/dri/i965/brw_program.c +++ b/src/mesa/drivers/dri/i965/brw_program.c @@ -88,7 +88,7 @@ static struct gl_program *brwNewProgram( struct gl_context *ctx, return NULL; } - case MESA_GEOMETRY_PROGRAM: { + case GL_GEOMETRY_PROGRAM_NV: { struct brw_geometry_program *prog = CALLOC_STRUCT(brw_geometry_program); if (prog) { prog->id = get_new_program_id(brw->intelScreen); diff --git a/src/mesa/main/glheader.h b/src/mesa/main/glheader.h index 7f7f9a39b3b..a2d98d4ddff 100644 --- a/src/mesa/main/glheader.h +++ b/src/mesa/main/glheader.h @@ -135,12 +135,6 @@ typedef void *GLeglImageOES; #define GL_SHADER_PROGRAM_MESA 0x9999 -/** - * Internal token for geometry programs. - * Use the value for GL_GEOMETRY_PROGRAM_NV for now. - */ -#define MESA_GEOMETRY_PROGRAM 0x8c26 - /* Several fields of struct gl_config can take these as values. Since * GLX header files may not be available everywhere they need to be used, * redefine them here. diff --git a/src/mesa/main/state.c b/src/mesa/main/state.c index 2657c532f88..5b970081a3f 100644 --- a/src/mesa/main/state.c +++ b/src/mesa/main/state.c @@ -225,7 +225,7 @@ update_program(struct gl_context *ctx) if (ctx->GeometryProgram._Current != prevGP) { new_state |= _NEW_PROGRAM; if (ctx->Driver.BindProgram) { - ctx->Driver.BindProgram(ctx, MESA_GEOMETRY_PROGRAM, + ctx->Driver.BindProgram(ctx, GL_GEOMETRY_PROGRAM_NV, (struct gl_program *) ctx->GeometryProgram._Current); } } diff --git a/src/mesa/program/prog_print.c b/src/mesa/program/prog_print.c index e360f09b72c..e4faa63c06f 100644 --- a/src/mesa/program/prog_print.c +++ b/src/mesa/program/prog_print.c @@ -864,7 +864,7 @@ _mesa_fprint_program_opt(FILE *f, else fprintf(f, "# Fragment Program/Shader %u\n", prog->Id); break; - case MESA_GEOMETRY_PROGRAM: + case GL_GEOMETRY_PROGRAM_NV: fprintf(f, "# Geometry Shader\n"); } diff --git a/src/mesa/program/program.c b/src/mesa/program/program.c index 1167adf5ddd..c13e61b1630 100644 --- a/src/mesa/program/program.c +++ b/src/mesa/program/program.c @@ -328,7 +328,7 @@ _mesa_new_program(struct gl_context *ctx, GLenum target, GLuint id) CALLOC_STRUCT(gl_fragment_program), target, id ); break; - case MESA_GEOMETRY_PROGRAM: + case GL_GEOMETRY_PROGRAM_NV: prog = _mesa_init_geometry_program(ctx, CALLOC_STRUCT(gl_geometry_program), target, id); @@ -414,8 +414,8 @@ _mesa_reference_program_(struct gl_context *ctx, else if ((*ptr)->Target == GL_FRAGMENT_PROGRAM_ARB) assert(prog->Target == GL_FRAGMENT_PROGRAM_ARB || prog->Target == GL_FRAGMENT_PROGRAM_NV); - else if ((*ptr)->Target == MESA_GEOMETRY_PROGRAM) - assert(prog->Target == MESA_GEOMETRY_PROGRAM); + else if ((*ptr)->Target == GL_GEOMETRY_PROGRAM_NV) + assert(prog->Target == GL_GEOMETRY_PROGRAM_NV); } #endif @@ -427,7 +427,7 @@ _mesa_reference_program_(struct gl_context *ctx, printf("Program %p ID=%u Target=%s Refcount-- to %d\n", *ptr, (*ptr)->Id, ((*ptr)->Target == GL_VERTEX_PROGRAM_ARB ? "VP" : - ((*ptr)->Target == MESA_GEOMETRY_PROGRAM ? "GP" : "FP")), + ((*ptr)->Target == GL_GEOMETRY_PROGRAM_NV ? "GP" : "FP")), (*ptr)->RefCount - 1); #endif assert((*ptr)->RefCount > 0); @@ -452,7 +452,7 @@ _mesa_reference_program_(struct gl_context *ctx, printf("Program %p ID=%u Target=%s Refcount++ to %d\n", prog, prog->Id, (prog->Target == GL_VERTEX_PROGRAM_ARB ? "VP" : - (prog->Target == MESA_GEOMETRY_PROGRAM ? "GP" : "FP")), + (prog->Target == GL_GEOMETRY_PROGRAM_NV ? "GP" : "FP")), prog->RefCount); #endif /*mtx_unlock(&prog->Mutex);*/ @@ -542,7 +542,7 @@ _mesa_clone_program(struct gl_context *ctx, const struct gl_program *prog) fpc->PixelCenterInteger = fp->PixelCenterInteger; } break; - case MESA_GEOMETRY_PROGRAM: + case GL_GEOMETRY_PROGRAM_NV: { const struct gl_geometry_program *gp = gl_geometry_program_const(prog); struct gl_geometry_program *gpc = gl_geometry_program(clone); diff --git a/src/mesa/state_tracker/st_atom_shader.c b/src/mesa/state_tracker/st_atom_shader.c index 629f54f25de..ad8d2624fc9 100644 --- a/src/mesa/state_tracker/st_atom_shader.c +++ b/src/mesa/state_tracker/st_atom_shader.c @@ -189,7 +189,7 @@ update_gp( struct st_context *st ) } stgp = st_geometry_program(st->ctx->GeometryProgram._Current); - assert(stgp->Base.Base.Target == MESA_GEOMETRY_PROGRAM); + assert(stgp->Base.Base.Target == GL_GEOMETRY_PROGRAM_NV); memset(&key, 0, sizeof(key)); key.st = st; diff --git a/src/mesa/state_tracker/st_cb_program.c b/src/mesa/state_tracker/st_cb_program.c index c382d7d2ca3..6aa7d5796d9 100644 --- a/src/mesa/state_tracker/st_cb_program.c +++ b/src/mesa/state_tracker/st_cb_program.c @@ -65,7 +65,7 @@ st_bind_program(struct gl_context *ctx, GLenum target, struct gl_program *prog) case GL_FRAGMENT_PROGRAM_ARB: st->dirty.st |= ST_NEW_FRAGMENT_PROGRAM; break; - case MESA_GEOMETRY_PROGRAM: + case GL_GEOMETRY_PROGRAM_NV: st->dirty.st |= ST_NEW_GEOMETRY_PROGRAM; break; } @@ -105,7 +105,7 @@ st_new_program(struct gl_context *ctx, GLenum target, GLuint id) return _mesa_init_fragment_program(ctx, &prog->Base, target, id); } - case MESA_GEOMETRY_PROGRAM: { + case GL_GEOMETRY_PROGRAM_NV: { struct st_geometry_program *prog = ST_CALLOC_STRUCT(st_geometry_program); return _mesa_init_geometry_program(ctx, &prog->Base, target, id); } @@ -135,7 +135,7 @@ st_delete_program(struct gl_context *ctx, struct gl_program *prog) free_glsl_to_tgsi_visitor(stvp->glsl_to_tgsi); } break; - case MESA_GEOMETRY_PROGRAM: + case GL_GEOMETRY_PROGRAM_NV: { struct st_geometry_program *stgp = (struct st_geometry_program *) prog; @@ -198,7 +198,7 @@ st_program_string_notify( struct gl_context *ctx, if (st->fp == stfp) st->dirty.st |= ST_NEW_FRAGMENT_PROGRAM; } - else if (target == MESA_GEOMETRY_PROGRAM) { + else if (target == GL_GEOMETRY_PROGRAM_NV) { struct st_geometry_program *stgp = (struct st_geometry_program *) prog; st_release_gp_variants(st, stgp); diff --git a/src/mesa/state_tracker/st_program.c b/src/mesa/state_tracker/st_program.c index 35faa7bb4d0..d5a124f2188 100644 --- a/src/mesa/state_tracker/st_program.c +++ b/src/mesa/state_tracker/st_program.c @@ -1217,7 +1217,7 @@ destroy_program_variants(struct st_context *st, struct gl_program *program) } } break; - case MESA_GEOMETRY_PROGRAM: + case GL_GEOMETRY_PROGRAM_NV: { struct st_geometry_program *stgp = (struct st_geometry_program *) program; From f52e8572ae1d91bcb6aef9fd3aac02ede62dee4e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Thu, 28 May 2015 16:13:37 +0200 Subject: [PATCH 436/834] mesa: remove unused gl_config::colorIndexMode Reviewed-by: Dave Airlie Reviewed-by: Brian Paul --- src/mesa/main/mtypes.h | 1 - 1 file changed, 1 deletion(-) diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h index 5006aeff851..1598e2cdbd0 100644 --- a/src/mesa/main/mtypes.h +++ b/src/mesa/main/mtypes.h @@ -398,7 +398,6 @@ struct gl_config { GLboolean rgbMode; GLboolean floatMode; - GLboolean colorIndexMode; /* XXX is this used anywhere? */ GLuint doubleBufferMode; GLuint stereoMode; From 49ae822183aa4daf6c6df9ef33e0b9a148d1a0d1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Fri, 15 May 2015 19:55:24 +0200 Subject: [PATCH 437/834] egl: import egl.h from registry (v2) v2: split the commit into 3 patches Reviewed-by: Emil Velikov Reviewed-by: Chad Versace --- include/EGL/egl.h | 560 ++++++++++++++++++++++------------------------ 1 file changed, 267 insertions(+), 293 deletions(-) diff --git a/include/EGL/egl.h b/include/EGL/egl.h index 99ea342a477..0d514e4defb 100644 --- a/include/EGL/egl.h +++ b/include/EGL/egl.h @@ -1,11 +1,12 @@ -/* -*- mode: c; tab-width: 8; -*- */ -/* vi: set sw=4 ts=8: */ -/* Reference version of egl.h for EGL 1.4. - * $Revision: 9356 $ on $Date: 2009-10-21 02:52:25 -0700 (Wed, 21 Oct 2009) $ - */ +#ifndef __egl_h_ +#define __egl_h_ 1 + +#ifdef __cplusplus +extern "C" { +#endif /* -** Copyright (c) 2007-2009 The Khronos Group Inc. +** Copyright (c) 2013-2014 The Khronos Group Inc. ** ** Permission is hereby granted, free of charge, to any person obtaining a ** copy of this software and/or associated documentation files (the @@ -26,304 +27,277 @@ ** TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE ** MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS. */ +/* +** This header is generated from the Khronos OpenGL / OpenGL ES XML +** API Registry. The current version of the Registry, generator scripts +** used to make the header, and the header can be found at +** http://www.opengl.org/registry/ +** +** Khronos $Revision: 31039 $ on $Date: 2015-05-04 17:01:57 -0700 (Mon, 04 May 2015) $ +*/ -#ifndef __egl_h_ -#define __egl_h_ - -/* All platform-dependent types and macro boilerplate (such as EGLAPI - * and EGLAPIENTRY) should go in eglplatform.h. - */ #include -#ifdef __cplusplus -extern "C" { -#endif +/* Generated on date 20150504 */ -/* EGL Types */ -/* EGLint is defined in eglplatform.h */ +/* Generated C header for: + * API: egl + * Versions considered: .* + * Versions emitted: .* + * Default extensions included: None + * Additional extensions included: _nomatch_^ + * Extensions removed: _nomatch_^ + */ + +#ifndef EGL_VERSION_1_0 +#define EGL_VERSION_1_0 1 typedef unsigned int EGLBoolean; -typedef unsigned int EGLenum; -typedef void *EGLConfig; -typedef void *EGLContext; typedef void *EGLDisplay; +#include +#include +typedef void *EGLConfig; typedef void *EGLSurface; -typedef void *EGLClientBuffer; - -/* EGL Versioning */ -#define EGL_VERSION_1_0 1 -#define EGL_VERSION_1_1 1 -#define EGL_VERSION_1_2 1 -#define EGL_VERSION_1_3 1 -#define EGL_VERSION_1_4 1 - -/* EGL Enumerants. Bitmasks and other exceptional cases aside, most - * enums are assigned unique values starting at 0x3000. - */ - -/* EGL aliases */ -#define EGL_FALSE 0 -#define EGL_TRUE 1 - -/* Out-of-band handle values */ -#define EGL_DEFAULT_DISPLAY ((EGLNativeDisplayType)0) -#define EGL_NO_CONTEXT ((EGLContext)0) -#define EGL_NO_DISPLAY ((EGLDisplay)0) -#define EGL_NO_SURFACE ((EGLSurface)0) - -/* Out-of-band attribute value */ -#define EGL_DONT_CARE ((EGLint)-1) - -/* Errors / GetError return values */ -#define EGL_SUCCESS 0x3000 -#define EGL_NOT_INITIALIZED 0x3001 -#define EGL_BAD_ACCESS 0x3002 -#define EGL_BAD_ALLOC 0x3003 -#define EGL_BAD_ATTRIBUTE 0x3004 -#define EGL_BAD_CONFIG 0x3005 -#define EGL_BAD_CONTEXT 0x3006 -#define EGL_BAD_CURRENT_SURFACE 0x3007 -#define EGL_BAD_DISPLAY 0x3008 -#define EGL_BAD_MATCH 0x3009 -#define EGL_BAD_NATIVE_PIXMAP 0x300A -#define EGL_BAD_NATIVE_WINDOW 0x300B -#define EGL_BAD_PARAMETER 0x300C -#define EGL_BAD_SURFACE 0x300D -#define EGL_CONTEXT_LOST 0x300E /* EGL 1.1 - IMG_power_management */ - -/* Reserved 0x300F-0x301F for additional errors */ - -/* Config attributes */ -#define EGL_BUFFER_SIZE 0x3020 -#define EGL_ALPHA_SIZE 0x3021 -#define EGL_BLUE_SIZE 0x3022 -#define EGL_GREEN_SIZE 0x3023 -#define EGL_RED_SIZE 0x3024 -#define EGL_DEPTH_SIZE 0x3025 -#define EGL_STENCIL_SIZE 0x3026 -#define EGL_CONFIG_CAVEAT 0x3027 -#define EGL_CONFIG_ID 0x3028 -#define EGL_LEVEL 0x3029 -#define EGL_MAX_PBUFFER_HEIGHT 0x302A -#define EGL_MAX_PBUFFER_PIXELS 0x302B -#define EGL_MAX_PBUFFER_WIDTH 0x302C -#define EGL_NATIVE_RENDERABLE 0x302D -#define EGL_NATIVE_VISUAL_ID 0x302E -#define EGL_NATIVE_VISUAL_TYPE 0x302F -#define EGL_SAMPLES 0x3031 -#define EGL_SAMPLE_BUFFERS 0x3032 -#define EGL_SURFACE_TYPE 0x3033 -#define EGL_TRANSPARENT_TYPE 0x3034 -#define EGL_TRANSPARENT_BLUE_VALUE 0x3035 -#define EGL_TRANSPARENT_GREEN_VALUE 0x3036 -#define EGL_TRANSPARENT_RED_VALUE 0x3037 -#define EGL_NONE 0x3038 /* Attrib list terminator */ -#define EGL_BIND_TO_TEXTURE_RGB 0x3039 -#define EGL_BIND_TO_TEXTURE_RGBA 0x303A -#define EGL_MIN_SWAP_INTERVAL 0x303B -#define EGL_MAX_SWAP_INTERVAL 0x303C -#define EGL_LUMINANCE_SIZE 0x303D -#define EGL_ALPHA_MASK_SIZE 0x303E -#define EGL_COLOR_BUFFER_TYPE 0x303F -#define EGL_RENDERABLE_TYPE 0x3040 -#define EGL_MATCH_NATIVE_PIXMAP 0x3041 /* Pseudo-attribute (not queryable) */ -#define EGL_CONFORMANT 0x3042 - -/* Reserved 0x3041-0x304F for additional config attributes */ - -/* Config attribute values */ -#define EGL_SLOW_CONFIG 0x3050 /* EGL_CONFIG_CAVEAT value */ -#define EGL_NON_CONFORMANT_CONFIG 0x3051 /* EGL_CONFIG_CAVEAT value */ -#define EGL_TRANSPARENT_RGB 0x3052 /* EGL_TRANSPARENT_TYPE value */ -#define EGL_RGB_BUFFER 0x308E /* EGL_COLOR_BUFFER_TYPE value */ -#define EGL_LUMINANCE_BUFFER 0x308F /* EGL_COLOR_BUFFER_TYPE value */ - -/* More config attribute values, for EGL_TEXTURE_FORMAT */ -#define EGL_NO_TEXTURE 0x305C -#define EGL_TEXTURE_RGB 0x305D -#define EGL_TEXTURE_RGBA 0x305E -#define EGL_TEXTURE_2D 0x305F - -/* Config attribute mask bits */ -#define EGL_PBUFFER_BIT 0x0001 /* EGL_SURFACE_TYPE mask bits */ -#define EGL_PIXMAP_BIT 0x0002 /* EGL_SURFACE_TYPE mask bits */ -#define EGL_WINDOW_BIT 0x0004 /* EGL_SURFACE_TYPE mask bits */ -#define EGL_VG_COLORSPACE_LINEAR_BIT 0x0020 /* EGL_SURFACE_TYPE mask bits */ -#define EGL_VG_ALPHA_FORMAT_PRE_BIT 0x0040 /* EGL_SURFACE_TYPE mask bits */ -#define EGL_MULTISAMPLE_RESOLVE_BOX_BIT 0x0200 /* EGL_SURFACE_TYPE mask bits */ -#define EGL_SWAP_BEHAVIOR_PRESERVED_BIT 0x0400 /* EGL_SURFACE_TYPE mask bits */ - -#define EGL_OPENGL_ES_BIT 0x0001 /* EGL_RENDERABLE_TYPE mask bits */ -#define EGL_OPENVG_BIT 0x0002 /* EGL_RENDERABLE_TYPE mask bits */ -#define EGL_OPENGL_ES2_BIT 0x0004 /* EGL_RENDERABLE_TYPE mask bits */ -#define EGL_OPENGL_BIT 0x0008 /* EGL_RENDERABLE_TYPE mask bits */ - -/* QueryString targets */ -#define EGL_VENDOR 0x3053 -#define EGL_VERSION 0x3054 -#define EGL_EXTENSIONS 0x3055 -#define EGL_CLIENT_APIS 0x308D - -/* QuerySurface / SurfaceAttrib / CreatePbufferSurface targets */ -#define EGL_HEIGHT 0x3056 -#define EGL_WIDTH 0x3057 -#define EGL_LARGEST_PBUFFER 0x3058 -#define EGL_TEXTURE_FORMAT 0x3080 -#define EGL_TEXTURE_TARGET 0x3081 -#define EGL_MIPMAP_TEXTURE 0x3082 -#define EGL_MIPMAP_LEVEL 0x3083 -#define EGL_RENDER_BUFFER 0x3086 -#define EGL_VG_COLORSPACE 0x3087 -#define EGL_VG_ALPHA_FORMAT 0x3088 -#define EGL_HORIZONTAL_RESOLUTION 0x3090 -#define EGL_VERTICAL_RESOLUTION 0x3091 -#define EGL_PIXEL_ASPECT_RATIO 0x3092 -#define EGL_SWAP_BEHAVIOR 0x3093 -#define EGL_MULTISAMPLE_RESOLVE 0x3099 - -/* EGL_RENDER_BUFFER values / BindTexImage / ReleaseTexImage buffer targets */ -#define EGL_BACK_BUFFER 0x3084 -#define EGL_SINGLE_BUFFER 0x3085 - -/* OpenVG color spaces */ -#define EGL_VG_COLORSPACE_sRGB 0x3089 /* EGL_VG_COLORSPACE value */ -#define EGL_VG_COLORSPACE_LINEAR 0x308A /* EGL_VG_COLORSPACE value */ - -/* OpenVG alpha formats */ -#define EGL_VG_ALPHA_FORMAT_NONPRE 0x308B /* EGL_ALPHA_FORMAT value */ -#define EGL_VG_ALPHA_FORMAT_PRE 0x308C /* EGL_ALPHA_FORMAT value */ - -/* Constant scale factor by which fractional display resolutions & - * aspect ratio are scaled when queried as integer values. - */ -#define EGL_DISPLAY_SCALING 10000 - -/* Unknown display resolution/aspect ratio */ -#define EGL_UNKNOWN ((EGLint)-1) - -/* Back buffer swap behaviors */ -#define EGL_BUFFER_PRESERVED 0x3094 /* EGL_SWAP_BEHAVIOR value */ -#define EGL_BUFFER_DESTROYED 0x3095 /* EGL_SWAP_BEHAVIOR value */ - -/* CreatePbufferFromClientBuffer buffer types */ -#define EGL_OPENVG_IMAGE 0x3096 - -/* QueryContext targets */ -#define EGL_CONTEXT_CLIENT_TYPE 0x3097 - -/* CreateContext attributes */ -#define EGL_CONTEXT_CLIENT_VERSION 0x3098 - -/* Multisample resolution behaviors */ -#define EGL_MULTISAMPLE_RESOLVE_DEFAULT 0x309A /* EGL_MULTISAMPLE_RESOLVE value */ -#define EGL_MULTISAMPLE_RESOLVE_BOX 0x309B /* EGL_MULTISAMPLE_RESOLVE value */ - -/* BindAPI/QueryAPI targets */ -#define EGL_OPENGL_ES_API 0x30A0 -#define EGL_OPENVG_API 0x30A1 -#define EGL_OPENGL_API 0x30A2 - -/* GetCurrentSurface targets */ -#define EGL_DRAW 0x3059 -#define EGL_READ 0x305A - -/* WaitNative engines */ -#define EGL_CORE_NATIVE_ENGINE 0x305B - -/* EGL 1.2 tokens renamed for consistency in EGL 1.3 */ -#define EGL_COLORSPACE EGL_VG_COLORSPACE -#define EGL_ALPHA_FORMAT EGL_VG_ALPHA_FORMAT -#define EGL_COLORSPACE_sRGB EGL_VG_COLORSPACE_sRGB -#define EGL_COLORSPACE_LINEAR EGL_VG_COLORSPACE_LINEAR -#define EGL_ALPHA_FORMAT_NONPRE EGL_VG_ALPHA_FORMAT_NONPRE -#define EGL_ALPHA_FORMAT_PRE EGL_VG_ALPHA_FORMAT_PRE - -/* EGL extensions must request enum blocks from the Khronos - * API Registrar, who maintains the enumerant registry. Submit - * a bug in Khronos Bugzilla against task "Registry". - */ - - - -/* EGL Functions */ - -EGLAPI EGLint EGLAPIENTRY eglGetError(void); - -EGLAPI EGLDisplay EGLAPIENTRY eglGetDisplay(EGLNativeDisplayType display_id); -EGLAPI EGLBoolean EGLAPIENTRY eglInitialize(EGLDisplay dpy, EGLint *major, EGLint *minor); -EGLAPI EGLBoolean EGLAPIENTRY eglTerminate(EGLDisplay dpy); - -EGLAPI const char * EGLAPIENTRY eglQueryString(EGLDisplay dpy, EGLint name); - -EGLAPI EGLBoolean EGLAPIENTRY eglGetConfigs(EGLDisplay dpy, EGLConfig *configs, - EGLint config_size, EGLint *num_config); -EGLAPI EGLBoolean EGLAPIENTRY eglChooseConfig(EGLDisplay dpy, const EGLint *attrib_list, - EGLConfig *configs, EGLint config_size, - EGLint *num_config); -EGLAPI EGLBoolean EGLAPIENTRY eglGetConfigAttrib(EGLDisplay dpy, EGLConfig config, - EGLint attribute, EGLint *value); - -EGLAPI EGLSurface EGLAPIENTRY eglCreateWindowSurface(EGLDisplay dpy, EGLConfig config, - EGLNativeWindowType win, - const EGLint *attrib_list); -EGLAPI EGLSurface EGLAPIENTRY eglCreatePbufferSurface(EGLDisplay dpy, EGLConfig config, - const EGLint *attrib_list); -EGLAPI EGLSurface EGLAPIENTRY eglCreatePixmapSurface(EGLDisplay dpy, EGLConfig config, - EGLNativePixmapType pixmap, - const EGLint *attrib_list); -EGLAPI EGLBoolean EGLAPIENTRY eglDestroySurface(EGLDisplay dpy, EGLSurface surface); -EGLAPI EGLBoolean EGLAPIENTRY eglQuerySurface(EGLDisplay dpy, EGLSurface surface, - EGLint attribute, EGLint *value); - -EGLAPI EGLBoolean EGLAPIENTRY eglBindAPI(EGLenum api); -EGLAPI EGLenum EGLAPIENTRY eglQueryAPI(void); - -EGLAPI EGLBoolean EGLAPIENTRY eglWaitClient(void); - -EGLAPI EGLBoolean EGLAPIENTRY eglReleaseThread(void); - -EGLAPI EGLSurface EGLAPIENTRY eglCreatePbufferFromClientBuffer( - EGLDisplay dpy, EGLenum buftype, EGLClientBuffer buffer, - EGLConfig config, const EGLint *attrib_list); - -EGLAPI EGLBoolean EGLAPIENTRY eglSurfaceAttrib(EGLDisplay dpy, EGLSurface surface, - EGLint attribute, EGLint value); -EGLAPI EGLBoolean EGLAPIENTRY eglBindTexImage(EGLDisplay dpy, EGLSurface surface, EGLint buffer); -EGLAPI EGLBoolean EGLAPIENTRY eglReleaseTexImage(EGLDisplay dpy, EGLSurface surface, EGLint buffer); - - -EGLAPI EGLBoolean EGLAPIENTRY eglSwapInterval(EGLDisplay dpy, EGLint interval); - - -EGLAPI EGLContext EGLAPIENTRY eglCreateContext(EGLDisplay dpy, EGLConfig config, - EGLContext share_context, - const EGLint *attrib_list); -EGLAPI EGLBoolean EGLAPIENTRY eglDestroyContext(EGLDisplay dpy, EGLContext ctx); -EGLAPI EGLBoolean EGLAPIENTRY eglMakeCurrent(EGLDisplay dpy, EGLSurface draw, - EGLSurface read, EGLContext ctx); - -EGLAPI EGLContext EGLAPIENTRY eglGetCurrentContext(void); -EGLAPI EGLSurface EGLAPIENTRY eglGetCurrentSurface(EGLint readdraw); -EGLAPI EGLDisplay EGLAPIENTRY eglGetCurrentDisplay(void); -EGLAPI EGLBoolean EGLAPIENTRY eglQueryContext(EGLDisplay dpy, EGLContext ctx, - EGLint attribute, EGLint *value); - -EGLAPI EGLBoolean EGLAPIENTRY eglWaitGL(void); -EGLAPI EGLBoolean EGLAPIENTRY eglWaitNative(EGLint engine); -EGLAPI EGLBoolean EGLAPIENTRY eglSwapBuffers(EGLDisplay dpy, EGLSurface surface); -EGLAPI EGLBoolean EGLAPIENTRY eglCopyBuffers(EGLDisplay dpy, EGLSurface surface, - EGLNativePixmapType target); - -/* This is a generic function pointer type, whose name indicates it must - * be cast to the proper type *and calling convention* before use. - */ +typedef void *EGLContext; typedef void (*__eglMustCastToProperFunctionPointerType)(void); +#define EGL_ALPHA_SIZE 0x3021 +#define EGL_BAD_ACCESS 0x3002 +#define EGL_BAD_ALLOC 0x3003 +#define EGL_BAD_ATTRIBUTE 0x3004 +#define EGL_BAD_CONFIG 0x3005 +#define EGL_BAD_CONTEXT 0x3006 +#define EGL_BAD_CURRENT_SURFACE 0x3007 +#define EGL_BAD_DISPLAY 0x3008 +#define EGL_BAD_MATCH 0x3009 +#define EGL_BAD_NATIVE_PIXMAP 0x300A +#define EGL_BAD_NATIVE_WINDOW 0x300B +#define EGL_BAD_PARAMETER 0x300C +#define EGL_BAD_SURFACE 0x300D +#define EGL_BLUE_SIZE 0x3022 +#define EGL_BUFFER_SIZE 0x3020 +#define EGL_CONFIG_CAVEAT 0x3027 +#define EGL_CONFIG_ID 0x3028 +#define EGL_CORE_NATIVE_ENGINE 0x305B +#define EGL_DEPTH_SIZE 0x3025 +#define EGL_DONT_CARE ((EGLint)-1) +#define EGL_DRAW 0x3059 +#define EGL_EXTENSIONS 0x3055 +#define EGL_FALSE 0 +#define EGL_GREEN_SIZE 0x3023 +#define EGL_HEIGHT 0x3056 +#define EGL_LARGEST_PBUFFER 0x3058 +#define EGL_LEVEL 0x3029 +#define EGL_MAX_PBUFFER_HEIGHT 0x302A +#define EGL_MAX_PBUFFER_PIXELS 0x302B +#define EGL_MAX_PBUFFER_WIDTH 0x302C +#define EGL_NATIVE_RENDERABLE 0x302D +#define EGL_NATIVE_VISUAL_ID 0x302E +#define EGL_NATIVE_VISUAL_TYPE 0x302F +#define EGL_NONE 0x3038 +#define EGL_NON_CONFORMANT_CONFIG 0x3051 +#define EGL_NOT_INITIALIZED 0x3001 +#define EGL_NO_CONTEXT ((EGLContext)0) +#define EGL_NO_DISPLAY ((EGLDisplay)0) +#define EGL_NO_SURFACE ((EGLSurface)0) +#define EGL_PBUFFER_BIT 0x0001 +#define EGL_PIXMAP_BIT 0x0002 +#define EGL_READ 0x305A +#define EGL_RED_SIZE 0x3024 +#define EGL_SAMPLES 0x3031 +#define EGL_SAMPLE_BUFFERS 0x3032 +#define EGL_SLOW_CONFIG 0x3050 +#define EGL_STENCIL_SIZE 0x3026 +#define EGL_SUCCESS 0x3000 +#define EGL_SURFACE_TYPE 0x3033 +#define EGL_TRANSPARENT_BLUE_VALUE 0x3035 +#define EGL_TRANSPARENT_GREEN_VALUE 0x3036 +#define EGL_TRANSPARENT_RED_VALUE 0x3037 +#define EGL_TRANSPARENT_RGB 0x3052 +#define EGL_TRANSPARENT_TYPE 0x3034 +#define EGL_TRUE 1 +#define EGL_VENDOR 0x3053 +#define EGL_VERSION 0x3054 +#define EGL_WIDTH 0x3057 +#define EGL_WINDOW_BIT 0x0004 +EGLAPI EGLBoolean EGLAPIENTRY eglChooseConfig (EGLDisplay dpy, const EGLint *attrib_list, EGLConfig *configs, EGLint config_size, EGLint *num_config); +EGLAPI EGLBoolean EGLAPIENTRY eglCopyBuffers (EGLDisplay dpy, EGLSurface surface, EGLNativePixmapType target); +EGLAPI EGLContext EGLAPIENTRY eglCreateContext (EGLDisplay dpy, EGLConfig config, EGLContext share_context, const EGLint *attrib_list); +EGLAPI EGLSurface EGLAPIENTRY eglCreatePbufferSurface (EGLDisplay dpy, EGLConfig config, const EGLint *attrib_list); +EGLAPI EGLSurface EGLAPIENTRY eglCreatePixmapSurface (EGLDisplay dpy, EGLConfig config, EGLNativePixmapType pixmap, const EGLint *attrib_list); +EGLAPI EGLSurface EGLAPIENTRY eglCreateWindowSurface (EGLDisplay dpy, EGLConfig config, EGLNativeWindowType win, const EGLint *attrib_list); +EGLAPI EGLBoolean EGLAPIENTRY eglDestroyContext (EGLDisplay dpy, EGLContext ctx); +EGLAPI EGLBoolean EGLAPIENTRY eglDestroySurface (EGLDisplay dpy, EGLSurface surface); +EGLAPI EGLBoolean EGLAPIENTRY eglGetConfigAttrib (EGLDisplay dpy, EGLConfig config, EGLint attribute, EGLint *value); +EGLAPI EGLBoolean EGLAPIENTRY eglGetConfigs (EGLDisplay dpy, EGLConfig *configs, EGLint config_size, EGLint *num_config); +EGLAPI EGLDisplay EGLAPIENTRY eglGetCurrentDisplay (void); +EGLAPI EGLSurface EGLAPIENTRY eglGetCurrentSurface (EGLint readdraw); +EGLAPI EGLDisplay EGLAPIENTRY eglGetDisplay (EGLNativeDisplayType display_id); +EGLAPI EGLint EGLAPIENTRY eglGetError (void); +EGLAPI __eglMustCastToProperFunctionPointerType EGLAPIENTRY eglGetProcAddress (const char *procname); +EGLAPI EGLBoolean EGLAPIENTRY eglInitialize (EGLDisplay dpy, EGLint *major, EGLint *minor); +EGLAPI EGLBoolean EGLAPIENTRY eglMakeCurrent (EGLDisplay dpy, EGLSurface draw, EGLSurface read, EGLContext ctx); +EGLAPI EGLBoolean EGLAPIENTRY eglQueryContext (EGLDisplay dpy, EGLContext ctx, EGLint attribute, EGLint *value); +EGLAPI const char *EGLAPIENTRY eglQueryString (EGLDisplay dpy, EGLint name); +EGLAPI EGLBoolean EGLAPIENTRY eglQuerySurface (EGLDisplay dpy, EGLSurface surface, EGLint attribute, EGLint *value); +EGLAPI EGLBoolean EGLAPIENTRY eglSwapBuffers (EGLDisplay dpy, EGLSurface surface); +EGLAPI EGLBoolean EGLAPIENTRY eglTerminate (EGLDisplay dpy); +EGLAPI EGLBoolean EGLAPIENTRY eglWaitGL (void); +EGLAPI EGLBoolean EGLAPIENTRY eglWaitNative (EGLint engine); +#endif /* EGL_VERSION_1_0 */ -/* Now, define eglGetProcAddress using the generic function ptr. type */ -EGLAPI __eglMustCastToProperFunctionPointerType EGLAPIENTRY - eglGetProcAddress(const char *procname); +#ifndef EGL_VERSION_1_1 +#define EGL_VERSION_1_1 1 +#define EGL_BACK_BUFFER 0x3084 +#define EGL_BIND_TO_TEXTURE_RGB 0x3039 +#define EGL_BIND_TO_TEXTURE_RGBA 0x303A +#define EGL_CONTEXT_LOST 0x300E +#define EGL_MIN_SWAP_INTERVAL 0x303B +#define EGL_MAX_SWAP_INTERVAL 0x303C +#define EGL_MIPMAP_TEXTURE 0x3082 +#define EGL_MIPMAP_LEVEL 0x3083 +#define EGL_NO_TEXTURE 0x305C +#define EGL_TEXTURE_2D 0x305F +#define EGL_TEXTURE_FORMAT 0x3080 +#define EGL_TEXTURE_RGB 0x305D +#define EGL_TEXTURE_RGBA 0x305E +#define EGL_TEXTURE_TARGET 0x3081 +EGLAPI EGLBoolean EGLAPIENTRY eglBindTexImage (EGLDisplay dpy, EGLSurface surface, EGLint buffer); +EGLAPI EGLBoolean EGLAPIENTRY eglReleaseTexImage (EGLDisplay dpy, EGLSurface surface, EGLint buffer); +EGLAPI EGLBoolean EGLAPIENTRY eglSurfaceAttrib (EGLDisplay dpy, EGLSurface surface, EGLint attribute, EGLint value); +EGLAPI EGLBoolean EGLAPIENTRY eglSwapInterval (EGLDisplay dpy, EGLint interval); +#endif /* EGL_VERSION_1_1 */ + +#ifndef EGL_VERSION_1_2 +#define EGL_VERSION_1_2 1 +typedef unsigned int EGLenum; +typedef void *EGLClientBuffer; +#define EGL_ALPHA_FORMAT 0x3088 +#define EGL_ALPHA_FORMAT_NONPRE 0x308B +#define EGL_ALPHA_FORMAT_PRE 0x308C +#define EGL_ALPHA_MASK_SIZE 0x303E +#define EGL_BUFFER_PRESERVED 0x3094 +#define EGL_BUFFER_DESTROYED 0x3095 +#define EGL_CLIENT_APIS 0x308D +#define EGL_COLORSPACE 0x3087 +#define EGL_COLORSPACE_sRGB 0x3089 +#define EGL_COLORSPACE_LINEAR 0x308A +#define EGL_COLOR_BUFFER_TYPE 0x303F +#define EGL_CONTEXT_CLIENT_TYPE 0x3097 +#define EGL_DISPLAY_SCALING 10000 +#define EGL_HORIZONTAL_RESOLUTION 0x3090 +#define EGL_LUMINANCE_BUFFER 0x308F +#define EGL_LUMINANCE_SIZE 0x303D +#define EGL_OPENGL_ES_BIT 0x0001 +#define EGL_OPENVG_BIT 0x0002 +#define EGL_OPENGL_ES_API 0x30A0 +#define EGL_OPENVG_API 0x30A1 +#define EGL_OPENVG_IMAGE 0x3096 +#define EGL_PIXEL_ASPECT_RATIO 0x3092 +#define EGL_RENDERABLE_TYPE 0x3040 +#define EGL_RENDER_BUFFER 0x3086 +#define EGL_RGB_BUFFER 0x308E +#define EGL_SINGLE_BUFFER 0x3085 +#define EGL_SWAP_BEHAVIOR 0x3093 +#define EGL_UNKNOWN ((EGLint)-1) +#define EGL_VERTICAL_RESOLUTION 0x3091 +EGLAPI EGLBoolean EGLAPIENTRY eglBindAPI (EGLenum api); +EGLAPI EGLenum EGLAPIENTRY eglQueryAPI (void); +EGLAPI EGLSurface EGLAPIENTRY eglCreatePbufferFromClientBuffer (EGLDisplay dpy, EGLenum buftype, EGLClientBuffer buffer, EGLConfig config, const EGLint *attrib_list); +EGLAPI EGLBoolean EGLAPIENTRY eglReleaseThread (void); +EGLAPI EGLBoolean EGLAPIENTRY eglWaitClient (void); +#endif /* EGL_VERSION_1_2 */ + +#ifndef EGL_VERSION_1_3 +#define EGL_VERSION_1_3 1 +#define EGL_CONFORMANT 0x3042 +#define EGL_CONTEXT_CLIENT_VERSION 0x3098 +#define EGL_MATCH_NATIVE_PIXMAP 0x3041 +#define EGL_OPENGL_ES2_BIT 0x0004 +#define EGL_VG_ALPHA_FORMAT 0x3088 +#define EGL_VG_ALPHA_FORMAT_NONPRE 0x308B +#define EGL_VG_ALPHA_FORMAT_PRE 0x308C +#define EGL_VG_ALPHA_FORMAT_PRE_BIT 0x0040 +#define EGL_VG_COLORSPACE 0x3087 +#define EGL_VG_COLORSPACE_sRGB 0x3089 +#define EGL_VG_COLORSPACE_LINEAR 0x308A +#define EGL_VG_COLORSPACE_LINEAR_BIT 0x0020 +#endif /* EGL_VERSION_1_3 */ + +#ifndef EGL_VERSION_1_4 +#define EGL_VERSION_1_4 1 +#define EGL_DEFAULT_DISPLAY ((EGLNativeDisplayType)0) +#define EGL_MULTISAMPLE_RESOLVE_BOX_BIT 0x0200 +#define EGL_MULTISAMPLE_RESOLVE 0x3099 +#define EGL_MULTISAMPLE_RESOLVE_DEFAULT 0x309A +#define EGL_MULTISAMPLE_RESOLVE_BOX 0x309B +#define EGL_OPENGL_API 0x30A2 +#define EGL_OPENGL_BIT 0x0008 +#define EGL_SWAP_BEHAVIOR_PRESERVED_BIT 0x0400 +EGLAPI EGLContext EGLAPIENTRY eglGetCurrentContext (void); +#endif /* EGL_VERSION_1_4 */ + +#ifndef EGL_VERSION_1_5 +#define EGL_VERSION_1_5 1 +typedef void *EGLSync; +typedef intptr_t EGLAttrib; +typedef khronos_utime_nanoseconds_t EGLTime; +typedef void *EGLImage; +#define EGL_CONTEXT_MAJOR_VERSION 0x3098 +#define EGL_CONTEXT_MINOR_VERSION 0x30FB +#define EGL_CONTEXT_OPENGL_PROFILE_MASK 0x30FD +#define EGL_CONTEXT_OPENGL_RESET_NOTIFICATION_STRATEGY 0x31BD +#define EGL_NO_RESET_NOTIFICATION 0x31BE +#define EGL_LOSE_CONTEXT_ON_RESET 0x31BF +#define EGL_CONTEXT_OPENGL_CORE_PROFILE_BIT 0x00000001 +#define EGL_CONTEXT_OPENGL_COMPATIBILITY_PROFILE_BIT 0x00000002 +#define EGL_CONTEXT_OPENGL_DEBUG 0x31B0 +#define EGL_CONTEXT_OPENGL_FORWARD_COMPATIBLE 0x31B1 +#define EGL_CONTEXT_OPENGL_ROBUST_ACCESS 0x31B2 +#define EGL_OPENGL_ES3_BIT 0x00000040 +#define EGL_CL_EVENT_HANDLE 0x309C +#define EGL_SYNC_CL_EVENT 0x30FE +#define EGL_SYNC_CL_EVENT_COMPLETE 0x30FF +#define EGL_SYNC_PRIOR_COMMANDS_COMPLETE 0x30F0 +#define EGL_SYNC_TYPE 0x30F7 +#define EGL_SYNC_STATUS 0x30F1 +#define EGL_SYNC_CONDITION 0x30F8 +#define EGL_SIGNALED 0x30F2 +#define EGL_UNSIGNALED 0x30F3 +#define EGL_SYNC_FLUSH_COMMANDS_BIT 0x0001 +#define EGL_FOREVER 0xFFFFFFFFFFFFFFFFull +#define EGL_TIMEOUT_EXPIRED 0x30F5 +#define EGL_CONDITION_SATISFIED 0x30F6 +#define EGL_NO_SYNC ((EGLSync)0) +#define EGL_SYNC_FENCE 0x30F9 +#define EGL_GL_COLORSPACE 0x309D +#define EGL_GL_COLORSPACE_SRGB 0x3089 +#define EGL_GL_COLORSPACE_LINEAR 0x308A +#define EGL_GL_RENDERBUFFER 0x30B9 +#define EGL_GL_TEXTURE_2D 0x30B1 +#define EGL_GL_TEXTURE_LEVEL 0x30BC +#define EGL_GL_TEXTURE_3D 0x30B2 +#define EGL_GL_TEXTURE_ZOFFSET 0x30BD +#define EGL_GL_TEXTURE_CUBE_MAP_POSITIVE_X 0x30B3 +#define EGL_GL_TEXTURE_CUBE_MAP_NEGATIVE_X 0x30B4 +#define EGL_GL_TEXTURE_CUBE_MAP_POSITIVE_Y 0x30B5 +#define EGL_GL_TEXTURE_CUBE_MAP_NEGATIVE_Y 0x30B6 +#define EGL_GL_TEXTURE_CUBE_MAP_POSITIVE_Z 0x30B7 +#define EGL_GL_TEXTURE_CUBE_MAP_NEGATIVE_Z 0x30B8 +#define EGL_IMAGE_PRESERVED 0x30D2 +#define EGL_NO_IMAGE ((EGLImage)0) +EGLAPI EGLSync EGLAPIENTRY eglCreateSync (EGLDisplay dpy, EGLenum type, const EGLAttrib *attrib_list); +EGLAPI EGLBoolean EGLAPIENTRY eglDestroySync (EGLDisplay dpy, EGLSync sync); +EGLAPI EGLint EGLAPIENTRY eglClientWaitSync (EGLDisplay dpy, EGLSync sync, EGLint flags, EGLTime timeout); +EGLAPI EGLBoolean EGLAPIENTRY eglGetSyncAttrib (EGLDisplay dpy, EGLSync sync, EGLint attribute, EGLAttrib *value); +EGLAPI EGLImage EGLAPIENTRY eglCreateImage (EGLDisplay dpy, EGLContext ctx, EGLenum target, EGLClientBuffer buffer, const EGLAttrib *attrib_list); +EGLAPI EGLBoolean EGLAPIENTRY eglDestroyImage (EGLDisplay dpy, EGLImage image); +EGLAPI EGLDisplay EGLAPIENTRY eglGetPlatformDisplay (EGLenum platform, void *native_display, const EGLAttrib *attrib_list); +EGLAPI EGLSurface EGLAPIENTRY eglCreatePlatformWindowSurface (EGLDisplay dpy, EGLConfig config, void *native_window, const EGLAttrib *attrib_list); +EGLAPI EGLSurface EGLAPIENTRY eglCreatePlatformPixmapSurface (EGLDisplay dpy, EGLConfig config, void *native_pixmap, const EGLAttrib *attrib_list); +EGLAPI EGLBoolean EGLAPIENTRY eglWaitSync (EGLDisplay dpy, EGLSync sync, EGLint flags); +#endif /* EGL_VERSION_1_5 */ #ifdef __cplusplus } #endif -#endif /* __egl_h_ */ +#endif From 6b31f22338656c154e028b4bc2cbd14ab733a957 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Fri, 15 May 2015 19:58:51 +0200 Subject: [PATCH 438/834] egl: import eglext.h from registry and cleanup eglmesaext.h (v2) v2: include mesa and chromium extensions in eglext.h so as not to break existing users v3: keep PFNEGLSWAPBUFFERSREGIONNOK because piglit uses it Reviewed-by: Emil Velikov Reviewed-by: Chad Versace --- include/EGL/eglext.h | 258 +++++++++++++++++++++++++++++++++++++-- include/EGL/eglmesaext.h | 33 +---- 2 files changed, 247 insertions(+), 44 deletions(-) diff --git a/include/EGL/eglext.h b/include/EGL/eglext.h index 88b39dbc4f7..6043b374fd2 100644 --- a/include/EGL/eglext.h +++ b/include/EGL/eglext.h @@ -6,7 +6,7 @@ extern "C" { #endif /* -** Copyright (c) 2013 The Khronos Group Inc. +** Copyright (c) 2013-2014 The Khronos Group Inc. ** ** Permission is hereby granted, free of charge, to any person obtaining a ** copy of this software and/or associated documentation files (the @@ -33,12 +33,12 @@ extern "C" { ** used to make the header, and the header can be found at ** http://www.opengl.org/registry/ ** -** Khronos $Revision: 24567 $ on $Date: 2013-12-18 09:50:17 -0800 (Wed, 18 Dec 2013) $ +** Khronos $Revision$ on $Date$ */ #include -#define EGL_EGLEXT_VERSION 20131218 +#define EGL_EGLEXT_VERSION 20150508 /* Generated C header for: * API: egl @@ -94,12 +94,28 @@ EGLAPI EGLSyncKHR EGLAPIENTRY eglCreateSync64KHR (EGLDisplay dpy, EGLenum type, #define EGL_OPENGL_ES3_BIT_KHR 0x00000040 #endif /* EGL_KHR_create_context */ +#ifndef EGL_KHR_create_context_no_error +#define EGL_KHR_create_context_no_error 1 +#define EGL_CONTEXT_OPENGL_NO_ERROR_KHR 0x31B3 +#endif /* EGL_KHR_create_context_no_error */ + #ifndef EGL_KHR_fence_sync #define EGL_KHR_fence_sync 1 +typedef khronos_utime_nanoseconds_t EGLTimeKHR; #ifdef KHRONOS_SUPPORT_INT64 #define EGL_SYNC_PRIOR_COMMANDS_COMPLETE_KHR 0x30F0 #define EGL_SYNC_CONDITION_KHR 0x30F8 #define EGL_SYNC_FENCE_KHR 0x30F9 +typedef EGLSyncKHR (EGLAPIENTRYP PFNEGLCREATESYNCKHRPROC) (EGLDisplay dpy, EGLenum type, const EGLint *attrib_list); +typedef EGLBoolean (EGLAPIENTRYP PFNEGLDESTROYSYNCKHRPROC) (EGLDisplay dpy, EGLSyncKHR sync); +typedef EGLint (EGLAPIENTRYP PFNEGLCLIENTWAITSYNCKHRPROC) (EGLDisplay dpy, EGLSyncKHR sync, EGLint flags, EGLTimeKHR timeout); +typedef EGLBoolean (EGLAPIENTRYP PFNEGLGETSYNCATTRIBKHRPROC) (EGLDisplay dpy, EGLSyncKHR sync, EGLint attribute, EGLint *value); +#ifdef EGL_EGLEXT_PROTOTYPES +EGLAPI EGLSyncKHR EGLAPIENTRY eglCreateSyncKHR (EGLDisplay dpy, EGLenum type, const EGLint *attrib_list); +EGLAPI EGLBoolean EGLAPIENTRY eglDestroySyncKHR (EGLDisplay dpy, EGLSyncKHR sync); +EGLAPI EGLint EGLAPIENTRY eglClientWaitSyncKHR (EGLDisplay dpy, EGLSyncKHR sync, EGLint flags, EGLTimeKHR timeout); +EGLAPI EGLBoolean EGLAPIENTRY eglGetSyncAttribKHR (EGLDisplay dpy, EGLSyncKHR sync, EGLint attribute, EGLint *value); +#endif #endif /* KHRONOS_SUPPORT_INT64 */ #endif /* EGL_KHR_fence_sync */ @@ -207,9 +223,38 @@ EGLAPI EGLBoolean EGLAPIENTRY eglQuerySurface64KHR (EGLDisplay dpy, EGLSurface s #endif #endif /* EGL_KHR_lock_surface3 */ +#ifndef EGL_KHR_partial_update +#define EGL_KHR_partial_update 1 +#define EGL_BUFFER_AGE_KHR 0x313D +typedef EGLBoolean (EGLAPIENTRYP PFNEGLSETDAMAGEREGIONKHRPROC) (EGLDisplay dpy, EGLSurface surface, EGLint *rects, EGLint n_rects); +#ifdef EGL_EGLEXT_PROTOTYPES +EGLAPI EGLBoolean EGLAPIENTRY eglSetDamageRegionKHR (EGLDisplay dpy, EGLSurface surface, EGLint *rects, EGLint n_rects); +#endif +#endif /* EGL_KHR_partial_update */ + +#ifndef EGL_KHR_platform_android +#define EGL_KHR_platform_android 1 +#define EGL_PLATFORM_ANDROID_KHR 0x3141 +#endif /* EGL_KHR_platform_android */ + +#ifndef EGL_KHR_platform_gbm +#define EGL_KHR_platform_gbm 1 +#define EGL_PLATFORM_GBM_KHR 0x31D7 +#endif /* EGL_KHR_platform_gbm */ + +#ifndef EGL_KHR_platform_wayland +#define EGL_KHR_platform_wayland 1 +#define EGL_PLATFORM_WAYLAND_KHR 0x31D8 +#endif /* EGL_KHR_platform_wayland */ + +#ifndef EGL_KHR_platform_x11 +#define EGL_KHR_platform_x11 1 +#define EGL_PLATFORM_X11_KHR 0x31D5 +#define EGL_PLATFORM_X11_SCREEN_KHR 0x31D6 +#endif /* EGL_KHR_platform_x11 */ + #ifndef EGL_KHR_reusable_sync #define EGL_KHR_reusable_sync 1 -typedef khronos_utime_nanoseconds_t EGLTimeKHR; #ifdef KHRONOS_SUPPORT_INT64 #define EGL_SYNC_STATUS_KHR 0x30F1 #define EGL_SIGNALED_KHR 0x30F2 @@ -221,17 +266,9 @@ typedef khronos_utime_nanoseconds_t EGLTimeKHR; #define EGL_SYNC_FLUSH_COMMANDS_BIT_KHR 0x0001 #define EGL_FOREVER_KHR 0xFFFFFFFFFFFFFFFFull #define EGL_NO_SYNC_KHR ((EGLSyncKHR)0) -typedef EGLSyncKHR (EGLAPIENTRYP PFNEGLCREATESYNCKHRPROC) (EGLDisplay dpy, EGLenum type, const EGLint *attrib_list); -typedef EGLBoolean (EGLAPIENTRYP PFNEGLDESTROYSYNCKHRPROC) (EGLDisplay dpy, EGLSyncKHR sync); -typedef EGLint (EGLAPIENTRYP PFNEGLCLIENTWAITSYNCKHRPROC) (EGLDisplay dpy, EGLSyncKHR sync, EGLint flags, EGLTimeKHR timeout); typedef EGLBoolean (EGLAPIENTRYP PFNEGLSIGNALSYNCKHRPROC) (EGLDisplay dpy, EGLSyncKHR sync, EGLenum mode); -typedef EGLBoolean (EGLAPIENTRYP PFNEGLGETSYNCATTRIBKHRPROC) (EGLDisplay dpy, EGLSyncKHR sync, EGLint attribute, EGLint *value); #ifdef EGL_EGLEXT_PROTOTYPES -EGLAPI EGLSyncKHR EGLAPIENTRY eglCreateSyncKHR (EGLDisplay dpy, EGLenum type, const EGLint *attrib_list); -EGLAPI EGLBoolean EGLAPIENTRY eglDestroySyncKHR (EGLDisplay dpy, EGLSyncKHR sync); -EGLAPI EGLint EGLAPIENTRY eglClientWaitSyncKHR (EGLDisplay dpy, EGLSyncKHR sync, EGLint flags, EGLTimeKHR timeout); EGLAPI EGLBoolean EGLAPIENTRY eglSignalSyncKHR (EGLDisplay dpy, EGLSyncKHR sync, EGLenum mode); -EGLAPI EGLBoolean EGLAPIENTRY eglGetSyncAttribKHR (EGLDisplay dpy, EGLSyncKHR sync, EGLint attribute, EGLint *value); #endif #endif /* KHRONOS_SUPPORT_INT64 */ #endif /* EGL_KHR_reusable_sync */ @@ -333,6 +370,14 @@ EGLAPI EGLSurface EGLAPIENTRY eglCreateStreamProducerSurfaceKHR (EGLDisplay dpy, #define EGL_KHR_surfaceless_context 1 #endif /* EGL_KHR_surfaceless_context */ +#ifndef EGL_KHR_swap_buffers_with_damage +#define EGL_KHR_swap_buffers_with_damage 1 +typedef EGLBoolean (EGLAPIENTRYP PFNEGLSWAPBUFFERSWITHDAMAGEKHRPROC) (EGLDisplay dpy, EGLSurface surface, EGLint *rects, EGLint n_rects); +#ifdef EGL_EGLEXT_PROTOTYPES +EGLAPI EGLBoolean EGLAPIENTRY eglSwapBuffersWithDamageKHR (EGLDisplay dpy, EGLSurface surface, EGLint *rects, EGLint n_rects); +#endif +#endif /* EGL_KHR_swap_buffers_with_damage */ + #ifndef EGL_KHR_vg_parent_image #define EGL_KHR_vg_parent_image 1 #define EGL_VG_PARENT_IMAGE_KHR 0x30BA @@ -389,6 +434,12 @@ EGLAPI EGLint EGLAPIENTRY eglDupNativeFenceFDANDROID (EGLDisplay dpy, EGLSyncKHR #define EGL_D3D_TEXTURE_2D_SHARE_HANDLE_ANGLE 0x3200 #endif /* EGL_ANGLE_d3d_share_handle_client_buffer */ +#ifndef EGL_ANGLE_device_d3d +#define EGL_ANGLE_device_d3d 1 +#define EGL_D3D9_DEVICE_ANGLE 0x33A0 +#define EGL_D3D11_DEVICE_ANGLE 0x33A1 +#endif /* EGL_ANGLE_device_d3d */ + #ifndef EGL_ANGLE_query_surface_pointer #define EGL_ANGLE_query_surface_pointer 1 typedef EGLBoolean (EGLAPIENTRYP PFNEGLQUERYSURFACEPOINTERANGLEPROC) (EGLDisplay dpy, EGLSurface surface, EGLint attribute, void **value); @@ -401,6 +452,11 @@ EGLAPI EGLBoolean EGLAPIENTRY eglQuerySurfacePointerANGLE (EGLDisplay dpy, EGLSu #define EGL_ANGLE_surface_d3d_texture_2d_share_handle 1 #endif /* EGL_ANGLE_surface_d3d_texture_2d_share_handle */ +#ifndef EGL_ANGLE_window_fixed_size +#define EGL_ANGLE_window_fixed_size 1 +#define EGL_FIXED_SIZE_ANGLE 0x3201 +#endif /* EGL_ANGLE_window_fixed_size */ + #ifndef EGL_ARM_pixmap_multisample_discard #define EGL_ARM_pixmap_multisample_discard 1 #define EGL_DISCARD_SAMPLES_ARM 0x3286 @@ -423,6 +479,42 @@ EGLAPI EGLBoolean EGLAPIENTRY eglQuerySurfacePointerANGLE (EGLDisplay dpy, EGLSu #define EGL_LOSE_CONTEXT_ON_RESET_EXT 0x31BF #endif /* EGL_EXT_create_context_robustness */ +#ifndef EGL_EXT_device_base +#define EGL_EXT_device_base 1 +typedef void *EGLDeviceEXT; +#define EGL_NO_DEVICE_EXT ((EGLDeviceEXT)(0)) +#define EGL_BAD_DEVICE_EXT 0x322B +#define EGL_DEVICE_EXT 0x322C +typedef EGLBoolean (EGLAPIENTRYP PFNEGLQUERYDEVICEATTRIBEXTPROC) (EGLDeviceEXT device, EGLint attribute, EGLAttrib *value); +typedef const char *(EGLAPIENTRYP PFNEGLQUERYDEVICESTRINGEXTPROC) (EGLDeviceEXT device, EGLint name); +typedef EGLBoolean (EGLAPIENTRYP PFNEGLQUERYDEVICESEXTPROC) (EGLint max_devices, EGLDeviceEXT *devices, EGLint *num_devices); +typedef EGLBoolean (EGLAPIENTRYP PFNEGLQUERYDISPLAYATTRIBEXTPROC) (EGLDisplay dpy, EGLint attribute, EGLAttrib *value); +#ifdef EGL_EGLEXT_PROTOTYPES +EGLAPI EGLBoolean EGLAPIENTRY eglQueryDeviceAttribEXT (EGLDeviceEXT device, EGLint attribute, EGLAttrib *value); +EGLAPI const char *EGLAPIENTRY eglQueryDeviceStringEXT (EGLDeviceEXT device, EGLint name); +EGLAPI EGLBoolean EGLAPIENTRY eglQueryDevicesEXT (EGLint max_devices, EGLDeviceEXT *devices, EGLint *num_devices); +EGLAPI EGLBoolean EGLAPIENTRY eglQueryDisplayAttribEXT (EGLDisplay dpy, EGLint attribute, EGLAttrib *value); +#endif +#endif /* EGL_EXT_device_base */ + +#ifndef EGL_EXT_device_drm +#define EGL_EXT_device_drm 1 +#define EGL_DRM_DEVICE_FILE_EXT 0x3233 +#endif /* EGL_EXT_device_drm */ + +#ifndef EGL_EXT_device_enumeration +#define EGL_EXT_device_enumeration 1 +#endif /* EGL_EXT_device_enumeration */ + +#ifndef EGL_EXT_device_openwf +#define EGL_EXT_device_openwf 1 +#define EGL_OPENWF_DEVICE_ID_EXT 0x3237 +#endif /* EGL_EXT_device_openwf */ + +#ifndef EGL_EXT_device_query +#define EGL_EXT_device_query 1 +#endif /* EGL_EXT_device_query */ + #ifndef EGL_EXT_image_dma_buf_import #define EGL_EXT_image_dma_buf_import 1 #define EGL_LINUX_DMA_BUF_EXT 0x3270 @@ -454,6 +546,48 @@ EGLAPI EGLBoolean EGLAPIENTRY eglQuerySurfacePointerANGLE (EGLDisplay dpy, EGLSu #define EGL_MULTIVIEW_VIEW_COUNT_EXT 0x3134 #endif /* EGL_EXT_multiview_window */ +#ifndef EGL_EXT_output_base +#define EGL_EXT_output_base 1 +typedef void *EGLOutputLayerEXT; +typedef void *EGLOutputPortEXT; +#define EGL_NO_OUTPUT_LAYER_EXT ((EGLOutputLayerEXT)0) +#define EGL_NO_OUTPUT_PORT_EXT ((EGLOutputPortEXT)0) +#define EGL_BAD_OUTPUT_LAYER_EXT 0x322D +#define EGL_BAD_OUTPUT_PORT_EXT 0x322E +#define EGL_SWAP_INTERVAL_EXT 0x322F +typedef EGLBoolean (EGLAPIENTRYP PFNEGLGETOUTPUTLAYERSEXTPROC) (EGLDisplay dpy, const EGLAttrib *attrib_list, EGLOutputLayerEXT *layers, EGLint max_layers, EGLint *num_layers); +typedef EGLBoolean (EGLAPIENTRYP PFNEGLGETOUTPUTPORTSEXTPROC) (EGLDisplay dpy, const EGLAttrib *attrib_list, EGLOutputPortEXT *ports, EGLint max_ports, EGLint *num_ports); +typedef EGLBoolean (EGLAPIENTRYP PFNEGLOUTPUTLAYERATTRIBEXTPROC) (EGLDisplay dpy, EGLOutputLayerEXT layer, EGLint attribute, EGLAttrib value); +typedef EGLBoolean (EGLAPIENTRYP PFNEGLQUERYOUTPUTLAYERATTRIBEXTPROC) (EGLDisplay dpy, EGLOutputLayerEXT layer, EGLint attribute, EGLAttrib *value); +typedef const char *(EGLAPIENTRYP PFNEGLQUERYOUTPUTLAYERSTRINGEXTPROC) (EGLDisplay dpy, EGLOutputLayerEXT layer, EGLint name); +typedef EGLBoolean (EGLAPIENTRYP PFNEGLOUTPUTPORTATTRIBEXTPROC) (EGLDisplay dpy, EGLOutputPortEXT port, EGLint attribute, EGLAttrib value); +typedef EGLBoolean (EGLAPIENTRYP PFNEGLQUERYOUTPUTPORTATTRIBEXTPROC) (EGLDisplay dpy, EGLOutputPortEXT port, EGLint attribute, EGLAttrib *value); +typedef const char *(EGLAPIENTRYP PFNEGLQUERYOUTPUTPORTSTRINGEXTPROC) (EGLDisplay dpy, EGLOutputPortEXT port, EGLint name); +#ifdef EGL_EGLEXT_PROTOTYPES +EGLAPI EGLBoolean EGLAPIENTRY eglGetOutputLayersEXT (EGLDisplay dpy, const EGLAttrib *attrib_list, EGLOutputLayerEXT *layers, EGLint max_layers, EGLint *num_layers); +EGLAPI EGLBoolean EGLAPIENTRY eglGetOutputPortsEXT (EGLDisplay dpy, const EGLAttrib *attrib_list, EGLOutputPortEXT *ports, EGLint max_ports, EGLint *num_ports); +EGLAPI EGLBoolean EGLAPIENTRY eglOutputLayerAttribEXT (EGLDisplay dpy, EGLOutputLayerEXT layer, EGLint attribute, EGLAttrib value); +EGLAPI EGLBoolean EGLAPIENTRY eglQueryOutputLayerAttribEXT (EGLDisplay dpy, EGLOutputLayerEXT layer, EGLint attribute, EGLAttrib *value); +EGLAPI const char *EGLAPIENTRY eglQueryOutputLayerStringEXT (EGLDisplay dpy, EGLOutputLayerEXT layer, EGLint name); +EGLAPI EGLBoolean EGLAPIENTRY eglOutputPortAttribEXT (EGLDisplay dpy, EGLOutputPortEXT port, EGLint attribute, EGLAttrib value); +EGLAPI EGLBoolean EGLAPIENTRY eglQueryOutputPortAttribEXT (EGLDisplay dpy, EGLOutputPortEXT port, EGLint attribute, EGLAttrib *value); +EGLAPI const char *EGLAPIENTRY eglQueryOutputPortStringEXT (EGLDisplay dpy, EGLOutputPortEXT port, EGLint name); +#endif +#endif /* EGL_EXT_output_base */ + +#ifndef EGL_EXT_output_drm +#define EGL_EXT_output_drm 1 +#define EGL_DRM_CRTC_EXT 0x3234 +#define EGL_DRM_PLANE_EXT 0x3235 +#define EGL_DRM_CONNECTOR_EXT 0x3236 +#endif /* EGL_EXT_output_drm */ + +#ifndef EGL_EXT_output_openwf +#define EGL_EXT_output_openwf 1 +#define EGL_OPENWF_PIPELINE_ID_EXT 0x3238 +#define EGL_OPENWF_PORT_ID_EXT 0x3239 +#endif /* EGL_EXT_output_openwf */ + #ifndef EGL_EXT_platform_base #define EGL_EXT_platform_base 1 typedef EGLDisplay (EGLAPIENTRYP PFNEGLGETPLATFORMDISPLAYEXTPROC) (EGLenum platform, void *native_display, const EGLint *attrib_list); @@ -466,6 +600,11 @@ EGLAPI EGLSurface EGLAPIENTRY eglCreatePlatformPixmapSurfaceEXT (EGLDisplay dpy, #endif #endif /* EGL_EXT_platform_base */ +#ifndef EGL_EXT_platform_device +#define EGL_EXT_platform_device 1 +#define EGL_PLATFORM_DEVICE_EXT 0x313F +#endif /* EGL_EXT_platform_device */ + #ifndef EGL_EXT_platform_wayland #define EGL_EXT_platform_wayland 1 #define EGL_PLATFORM_WAYLAND_EXT 0x31D8 @@ -477,6 +616,19 @@ EGLAPI EGLSurface EGLAPIENTRY eglCreatePlatformPixmapSurfaceEXT (EGLDisplay dpy, #define EGL_PLATFORM_X11_SCREEN_EXT 0x31D6 #endif /* EGL_EXT_platform_x11 */ +#ifndef EGL_EXT_protected_surface +#define EGL_EXT_protected_surface 1 +#define EGL_PROTECTED_CONTENT_EXT 0x32C0 +#endif /* EGL_EXT_protected_surface */ + +#ifndef EGL_EXT_stream_consumer_egloutput +#define EGL_EXT_stream_consumer_egloutput 1 +typedef EGLBoolean (EGLAPIENTRYP PFNEGLSTREAMCONSUMEROUTPUTEXTPROC) (EGLDisplay dpy, EGLStreamKHR stream, EGLOutputLayerEXT layer); +#ifdef EGL_EGLEXT_PROTOTYPES +EGLAPI EGLBoolean EGLAPIENTRY eglStreamConsumerOutputEXT (EGLDisplay dpy, EGLStreamKHR stream, EGLOutputLayerEXT layer); +#endif +#endif /* EGL_EXT_stream_consumer_egloutput */ + #ifndef EGL_EXT_swap_buffers_with_damage #define EGL_EXT_swap_buffers_with_damage 1 typedef EGLBoolean (EGLAPIENTRYP PFNEGLSWAPBUFFERSWITHDAMAGEEXTPROC) (EGLDisplay dpy, EGLSurface surface, EGLint *rects, EGLint n_rects); @@ -485,6 +637,35 @@ EGLAPI EGLBoolean EGLAPIENTRY eglSwapBuffersWithDamageEXT (EGLDisplay dpy, EGLSu #endif #endif /* EGL_EXT_swap_buffers_with_damage */ +#ifndef EGL_EXT_yuv_surface +#define EGL_EXT_yuv_surface 1 +#define EGL_YUV_ORDER_EXT 0x3301 +#define EGL_YUV_NUMBER_OF_PLANES_EXT 0x3311 +#define EGL_YUV_SUBSAMPLE_EXT 0x3312 +#define EGL_YUV_DEPTH_RANGE_EXT 0x3317 +#define EGL_YUV_CSC_STANDARD_EXT 0x330A +#define EGL_YUV_PLANE_BPP_EXT 0x331A +#define EGL_YUV_BUFFER_EXT 0x3300 +#define EGL_YUV_ORDER_YUV_EXT 0x3302 +#define EGL_YUV_ORDER_YVU_EXT 0x3303 +#define EGL_YUV_ORDER_YUYV_EXT 0x3304 +#define EGL_YUV_ORDER_UYVY_EXT 0x3305 +#define EGL_YUV_ORDER_YVYU_EXT 0x3306 +#define EGL_YUV_ORDER_VYUY_EXT 0x3307 +#define EGL_YUV_ORDER_AYUV_EXT 0x3308 +#define EGL_YUV_SUBSAMPLE_4_2_0_EXT 0x3313 +#define EGL_YUV_SUBSAMPLE_4_2_2_EXT 0x3314 +#define EGL_YUV_SUBSAMPLE_4_4_4_EXT 0x3315 +#define EGL_YUV_DEPTH_RANGE_LIMITED_EXT 0x3318 +#define EGL_YUV_DEPTH_RANGE_FULL_EXT 0x3319 +#define EGL_YUV_CSC_STANDARD_601_EXT 0x330B +#define EGL_YUV_CSC_STANDARD_709_EXT 0x330C +#define EGL_YUV_CSC_STANDARD_2020_EXT 0x330D +#define EGL_YUV_PLANE_BPP_0_EXT 0x331B +#define EGL_YUV_PLANE_BPP_8_EXT 0x331C +#define EGL_YUV_PLANE_BPP_10_EXT 0x331D +#endif /* EGL_EXT_yuv_surface */ + #ifndef EGL_HI_clientpixmap #define EGL_HI_clientpixmap 1 struct EGLClientPixmapHI { @@ -533,11 +714,42 @@ EGLAPI EGLBoolean EGLAPIENTRY eglExportDRMImageMESA (EGLDisplay dpy, EGLImageKHR #endif #endif /* EGL_MESA_drm_image */ +#ifndef EGL_MESA_image_dma_buf_export +#define EGL_MESA_image_dma_buf_export 1 +typedef EGLBoolean (EGLAPIENTRYP PFNEGLEXPORTDMABUFIMAGEQUERYMESAPROC) (EGLDisplay dpy, EGLImageKHR image, int *fourcc, int *num_planes, EGLuint64KHR *modifiers); +typedef EGLBoolean (EGLAPIENTRYP PFNEGLEXPORTDMABUFIMAGEMESAPROC) (EGLDisplay dpy, EGLImageKHR image, int *fds, EGLint *strides, EGLint *offsets); +#ifdef EGL_EGLEXT_PROTOTYPES +EGLAPI EGLBoolean EGLAPIENTRY eglExportDMABUFImageQueryMESA (EGLDisplay dpy, EGLImageKHR image, int *fourcc, int *num_planes, EGLuint64KHR *modifiers); +EGLAPI EGLBoolean EGLAPIENTRY eglExportDMABUFImageMESA (EGLDisplay dpy, EGLImageKHR image, int *fds, EGLint *strides, EGLint *offsets); +#endif +#endif /* EGL_MESA_image_dma_buf_export */ + #ifndef EGL_MESA_platform_gbm #define EGL_MESA_platform_gbm 1 #define EGL_PLATFORM_GBM_MESA 0x31D7 #endif /* EGL_MESA_platform_gbm */ +#ifndef EGL_NOK_swap_region +#define EGL_NOK_swap_region 1 +typedef EGLBoolean (EGLAPIENTRYP PFNEGLSWAPBUFFERSREGIONNOKPROC) (EGLDisplay dpy, EGLSurface surface, EGLint numRects, const EGLint *rects); +#ifdef EGL_EGLEXT_PROTOTYPES +EGLAPI EGLBoolean EGLAPIENTRY eglSwapBuffersRegionNOK (EGLDisplay dpy, EGLSurface surface, EGLint numRects, const EGLint *rects); +#endif +#endif /* EGL_NOK_swap_region */ + +#ifndef EGL_NOK_swap_region2 +#define EGL_NOK_swap_region2 1 +typedef EGLBoolean (EGLAPIENTRYP PFNEGLSWAPBUFFERSREGION2NOKPROC) (EGLDisplay dpy, EGLSurface surface, EGLint numRects, const EGLint *rects); +#ifdef EGL_EGLEXT_PROTOTYPES +EGLAPI EGLBoolean EGLAPIENTRY eglSwapBuffersRegion2NOK (EGLDisplay dpy, EGLSurface surface, EGLint numRects, const EGLint *rects); +#endif +#endif /* EGL_NOK_swap_region2 */ + +#ifndef EGL_NOK_texture_from_pixmap +#define EGL_NOK_texture_from_pixmap 1 +#define EGL_Y_INVERTED_NOK 0x307F +#endif /* EGL_NOK_texture_from_pixmap */ + #ifndef EGL_NV_3dvision_surface #define EGL_NV_3dvision_surface 1 #define EGL_AUTO_STEREO_NV 0x3136 @@ -556,6 +768,13 @@ EGLAPI EGLBoolean EGLAPIENTRY eglExportDRMImageMESA (EGLDisplay dpy, EGLImageKHR #define EGL_COVERAGE_SAMPLE_RESOLVE_NONE_NV 0x3133 #endif /* EGL_NV_coverage_sample_resolve */ +#ifndef EGL_NV_cuda_event +#define EGL_NV_cuda_event 1 +#define EGL_CUDA_EVENT_HANDLE_NV 0x323B +#define EGL_SYNC_CUDA_EVENT_NV 0x323C +#define EGL_SYNC_CUDA_EVENT_COMPLETE_NV 0x323D +#endif /* EGL_NV_cuda_event */ + #ifndef EGL_NV_depth_nonlinear #define EGL_NV_depth_nonlinear 1 #define EGL_DEPTH_ENCODING_NV 0x30E2 @@ -563,6 +782,11 @@ EGLAPI EGLBoolean EGLAPIENTRY eglExportDRMImageMESA (EGLDisplay dpy, EGLImageKHR #define EGL_DEPTH_ENCODING_NONLINEAR_NV 0x30E3 #endif /* EGL_NV_depth_nonlinear */ +#ifndef EGL_NV_device_cuda +#define EGL_NV_device_cuda 1 +#define EGL_CUDA_DEVICE_NV 0x323A +#endif /* EGL_NV_device_cuda */ + #ifndef EGL_NV_native_query #define EGL_NV_native_query 1 typedef EGLBoolean (EGLAPIENTRYP PFNEGLQUERYNATIVEDISPLAYNVPROC) (EGLDisplay dpy, EGLNativeDisplayType *display_id); @@ -645,6 +869,16 @@ EGLAPI EGLuint64NV EGLAPIENTRY eglGetSystemTimeNV (void); #endif /* KHRONOS_SUPPORT_INT64 */ #endif /* EGL_NV_system_time */ +#ifndef EGL_TIZEN_image_native_buffer +#define EGL_TIZEN_image_native_buffer 1 +#define EGL_NATIVE_BUFFER_TIZEN 0x32A0 +#endif /* EGL_TIZEN_image_native_buffer */ + +#ifndef EGL_TIZEN_image_native_surface +#define EGL_TIZEN_image_native_surface 1 +#define EGL_NATIVE_SURFACE_TIZEN 0x32A1 +#endif /* EGL_TIZEN_image_native_surface */ + #include #include diff --git a/include/EGL/eglmesaext.h b/include/EGL/eglmesaext.h index 87748cadbee..917a2043c77 100644 --- a/include/EGL/eglmesaext.h +++ b/include/EGL/eglmesaext.h @@ -87,45 +87,14 @@ typedef struct wl_buffer * (EGLAPIENTRYP PFNEGLCREATEWAYLANDBUFFERFROMIMAGEWL) ( #endif -#ifndef EGL_NOK_swap_region -#define EGL_NOK_swap_region 1 - -#ifdef EGL_EGLEXT_PROTOTYPES -EGLAPI EGLBoolean EGLAPIENTRY eglSwapBuffersRegionNOK(EGLDisplay dpy, EGLSurface surface, EGLint numRects, const EGLint* rects); -#endif - +/* remnant of EGL_NOK_swap_region kept for compatibility because of a non-standard type name */ typedef EGLBoolean (EGLAPIENTRYP PFNEGLSWAPBUFFERSREGIONNOK) (EGLDisplay dpy, EGLSurface surface, EGLint numRects, const EGLint* rects); -#endif - -#ifndef EGL_NOK_texture_from_pixmap -#define EGL_NOK_texture_from_pixmap 1 - -#define EGL_Y_INVERTED_NOK 0x307F -#endif /* EGL_NOK_texture_from_pixmap */ - -#ifndef EGL_ANDROID_image_native_buffer -#define EGL_ANDROID_image_native_buffer 1 -#define EGL_NATIVE_BUFFER_ANDROID 0x3140 /* eglCreateImageKHR target */ -#endif #ifndef EGL_MESA_configless_context #define EGL_MESA_configless_context 1 #define EGL_NO_CONFIG_MESA ((EGLConfig)0) #endif -#if KHRONOS_SUPPORT_INT64 -#ifndef EGL_MESA_image_dma_buf_export -#define EGL_MESA_image_dma_buf_export 1 -#ifdef EGL_EGLEXT_PROTOTYPES -EGLAPI EGLBoolean EGLAPIENTRY eglExportDMABUFImageQueryMESA (EGLDisplay dpy, EGLImageKHR image, EGLint *fourcc, EGLint *nplanes, EGLuint64KHR *modifiers); -EGLAPI EGLBoolean EGLAPIENTRY eglExportDMABUFImageMESA (EGLDisplay dpy, EGLImageKHR image, int *fds, EGLint *strides, EGLint *offsets); -#endif -#endif - -typedef EGLBoolean (EGLAPIENTRYP PFNEGLEXPORTDMABUFIMAGEQUERYMESA) (EGLDisplay dpy, EGLImageKHR image, EGLint *fourcc, EGLint *nplanes, EGLuint64KHR *modifiers); -typedef EGLBoolean (EGLAPIENTRYP PFNEGLEXPORTDMABUFIMAGEMESA) (EGLDisplay dpy, EGLImageKHR image, int *fds, EGLint *strides, EGLint *offsets); - -#endif #ifdef __cplusplus } #endif From 20249d355989668bfdcfed61708a6959794b9710 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Fri, 15 May 2015 19:59:59 +0200 Subject: [PATCH 439/834] egl: import platform headers from registry (v2) v2: don't remove local Mesa changes Reviewed-by: Emil Velikov Reviewed-by: Chad Versace --- include/EGL/eglplatform.h | 19 +++++++++---------- include/KHR/khrplatform.h | 19 ++++++++++++++++--- 2 files changed, 25 insertions(+), 13 deletions(-) diff --git a/include/EGL/eglplatform.h b/include/EGL/eglplatform.h index 2eb6865905c..7802542ad0f 100644 --- a/include/EGL/eglplatform.h +++ b/include/EGL/eglplatform.h @@ -2,7 +2,7 @@ #define __eglplatform_h_ /* -** Copyright (c) 2007-2009 The Khronos Group Inc. +** Copyright (c) 2007-2013 The Khronos Group Inc. ** ** Permission is hereby granted, free of charge, to any person obtaining a ** copy of this software and/or associated documentation files (the @@ -25,7 +25,7 @@ */ /* Platform-specific types and definitions for egl.h - * $Revision: 12306 $ on $Date: 2010-08-25 09:51:28 -0700 (Wed, 25 Aug 2010) $ + * $Revision: 30994 $ on $Date: 2015-04-30 13:36:48 -0700 (Thu, 30 Apr 2015) $ * * Adopters may modify khrplatform.h and this file to suit their platform. * You are encouraged to submit all modifications to the Khronos group so that @@ -77,7 +77,7 @@ typedef HDC EGLNativeDisplayType; typedef HBITMAP EGLNativePixmapType; typedef HWND EGLNativeWindowType; -#elif defined(__WINSCW__) || defined(__SYMBIAN32__) /* Symbian */ +#elif defined(__APPLE__) || defined(__WINSCW__) || defined(__SYMBIAN32__) /* Symbian */ typedef int EGLNativeDisplayType; typedef void *EGLNativeWindowType; @@ -95,14 +95,15 @@ typedef struct gbm_device *EGLNativeDisplayType; typedef struct gbm_bo *EGLNativePixmapType; typedef void *EGLNativeWindowType; -#elif defined(ANDROID) /* Android */ +#elif defined(__ANDROID__) || defined(ANDROID) + +#include -struct ANativeWindow; struct egl_native_pixmap_t; -typedef struct ANativeWindow *EGLNativeWindowType; -typedef struct egl_native_pixmap_t *EGLNativePixmapType; -typedef void *EGLNativeDisplayType; +typedef struct ANativeWindow* EGLNativeWindowType; +typedef struct egl_native_pixmap_t* EGLNativePixmapType; +typedef void* EGLNativeDisplayType; #elif defined(__unix__) @@ -131,9 +132,7 @@ typedef khronos_uintptr_t EGLNativePixmapType; typedef khronos_uintptr_t EGLNativeWindowType; #else - #error "Platform not recognized" - #endif /* EGL 1.2 types, renamed for consistency in EGL 1.3 */ diff --git a/include/KHR/khrplatform.h b/include/KHR/khrplatform.h index 447953940e6..790de44b8f6 100644 --- a/include/KHR/khrplatform.h +++ b/include/KHR/khrplatform.h @@ -26,7 +26,7 @@ /* Khronos platform-specific types and definitions. * - * $Revision: 9356 $ on $Date: 2009-10-21 02:52:25 -0700 (Wed, 21 Oct 2009) $ + * $Revision: 23298 $ on $Date: 2013-09-30 17:07:13 -0700 (Mon, 30 Sep 2013) $ * * Adopters may modify this file to suit their platform. Adopters are * encouraged to submit platform specific modifications to the Khronos @@ -106,9 +106,9 @@ #elif defined (__SYMBIAN32__) # define KHRONOS_APICALL IMPORT_C #elif (defined(__GNUC__) && (__GNUC__ * 100 + __GNUC_MINOR__) >= 303) \ - || (defined(__SUNPRO_C) && (__SUNPRO_C >= 0x590)) + || (defined(__SUNPRO_C) && (__SUNPRO_C >= 0x590)) /* KHRONOS_APIATTRIBUTES is not used by the client API headers yet */ -# define KHRONOS_APICALL __attribute__((visibility("default"))) +# define KHRONOS_APICALL __attribute__((visibility("default"))) #else # define KHRONOS_APICALL #endif @@ -229,10 +229,23 @@ typedef signed char khronos_int8_t; typedef unsigned char khronos_uint8_t; typedef signed short int khronos_int16_t; typedef unsigned short int khronos_uint16_t; + +/* + * Types that differ between LLP64 and LP64 architectures - in LLP64, + * pointers are 64 bits, but 'long' is still 32 bits. Win64 appears + * to be the only LLP64 architecture in current use. + */ +#ifdef _WIN64 +typedef signed long long int khronos_intptr_t; +typedef unsigned long long int khronos_uintptr_t; +typedef signed long long int khronos_ssize_t; +typedef unsigned long long int khronos_usize_t; +#else typedef signed long int khronos_intptr_t; typedef unsigned long int khronos_uintptr_t; typedef signed long int khronos_ssize_t; typedef unsigned long int khronos_usize_t; +#endif #if KHRONOS_SUPPORT_FLOAT /* From 3a83adeb7c6340104e9417beefc086f7d33183bc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Mon, 11 May 2015 21:38:55 +0200 Subject: [PATCH 440/834] egl: remove unused _egl_global::ClientExtensions Reviewed-by: Emil Velikov Reviewed-by: Chad Versace --- src/egl/main/eglglobals.c | 10 ---------- src/egl/main/eglglobals.h | 9 --------- 2 files changed, 19 deletions(-) diff --git a/src/egl/main/eglglobals.c b/src/egl/main/eglglobals.c index 129bf29f1e9..884cff0c36b 100644 --- a/src/egl/main/eglglobals.c +++ b/src/egl/main/eglglobals.c @@ -50,16 +50,6 @@ struct _egl_global _eglGlobal = _eglFiniDisplay }, - /* ClientExtensions */ - { - true, /* EGL_EXT_client_extensions */ - true, /* EGL_EXT_platform_base */ - true, /* EGL_EXT_platform_x11 */ - true, /* EGL_EXT_platform_wayland */ - true, /* EGL_MESA_platform_gbm */ - true, /* EGL_KHR_client_get_all_proc_addresses */ - }, - /* ClientExtensionsString */ "EGL_EXT_client_extensions" " EGL_EXT_platform_base" diff --git a/src/egl/main/eglglobals.h b/src/egl/main/eglglobals.h index 04b96099a3b..ae1b75b4545 100644 --- a/src/egl/main/eglglobals.h +++ b/src/egl/main/eglglobals.h @@ -50,15 +50,6 @@ struct _egl_global EGLint NumAtExitCalls; void (*AtExitCalls[10])(void); - struct _egl_client_extensions { - bool EXT_client_extensions; - bool EXT_platform_base; - bool EXT_platform_x11; - bool EXT_platform_wayland; - bool MESA_platform_gbm; - bool KHR_get_all_proc_addresses; - } ClientExtensions; - const char *ClientExtensionString; }; From efda9c56491f5cb90e77f5fe7979477fc9b2b529 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Mon, 11 May 2015 22:16:52 +0200 Subject: [PATCH 441/834] egl: set the EGL version in common code Reviewed-by: Emil Velikov Reviewed-by: Chad Versace --- src/egl/drivers/dri2/platform_android.c | 4 ---- src/egl/drivers/dri2/platform_drm.c | 4 ---- src/egl/drivers/dri2/platform_wayland.c | 8 -------- src/egl/drivers/dri2/platform_x11.c | 8 -------- src/egl/main/eglapi.c | 7 +++++++ 5 files changed, 7 insertions(+), 24 deletions(-) diff --git a/src/egl/drivers/dri2/platform_android.c b/src/egl/drivers/dri2/platform_android.c index f4825261bad..fed3073088a 100644 --- a/src/egl/drivers/dri2/platform_android.c +++ b/src/egl/drivers/dri2/platform_android.c @@ -707,10 +707,6 @@ dri2_initialize_android(_EGLDriver *drv, _EGLDisplay *dpy) dpy->Extensions.ANDROID_image_native_buffer = EGL_TRUE; dpy->Extensions.KHR_image_base = EGL_TRUE; - /* we're supporting EGL 1.4 */ - dpy->VersionMajor = 1; - dpy->VersionMinor = 4; - /* Fill vtbl last to prevent accidentally calling virtual function during * initialization. */ diff --git a/src/egl/drivers/dri2/platform_drm.c b/src/egl/drivers/dri2/platform_drm.c index 486b0030dcd..3391afc635c 100644 --- a/src/egl/drivers/dri2/platform_drm.c +++ b/src/egl/drivers/dri2/platform_drm.c @@ -715,10 +715,6 @@ dri2_initialize_drm(_EGLDriver *drv, _EGLDisplay *disp) } #endif - /* we're supporting EGL 1.4 */ - disp->VersionMajor = 1; - disp->VersionMinor = 4; - /* Fill vtbl last to prevent accidentally calling virtual function during * initialization. */ diff --git a/src/egl/drivers/dri2/platform_wayland.c b/src/egl/drivers/dri2/platform_wayland.c index 9914b687fb8..ea2f9f23b96 100644 --- a/src/egl/drivers/dri2/platform_wayland.c +++ b/src/egl/drivers/dri2/platform_wayland.c @@ -1206,10 +1206,6 @@ dri2_initialize_wayland_drm(_EGLDriver *drv, _EGLDisplay *disp) disp->Extensions.EXT_swap_buffers_with_damage = EGL_TRUE; - /* we're supporting EGL 1.4 */ - disp->VersionMajor = 1; - disp->VersionMinor = 4; - /* Fill vtbl last to prevent accidentally calling virtual function during * initialization. */ @@ -1853,10 +1849,6 @@ dri2_initialize_wayland_swrast(_EGLDriver *drv, _EGLDisplay *disp) dri2_add_config(disp, config, i + 1, types, NULL, rgb565_masks); } - /* we're supporting EGL 1.4 */ - disp->VersionMajor = 1; - disp->VersionMinor = 4; - /* Fill vtbl last to prevent accidentally calling virtual function during * initialization. */ diff --git a/src/egl/drivers/dri2/platform_x11.c b/src/egl/drivers/dri2/platform_x11.c index 300072d6d92..e0d0fdc8205 100644 --- a/src/egl/drivers/dri2/platform_x11.c +++ b/src/egl/drivers/dri2/platform_x11.c @@ -1129,10 +1129,6 @@ dri2_initialize_x11_swrast(_EGLDriver *drv, _EGLDisplay *disp) goto cleanup_configs; } - /* we're supporting EGL 1.4 */ - disp->VersionMajor = 1; - disp->VersionMinor = 4; - /* Fill vtbl last to prevent accidentally calling virtual function during * initialization. */ @@ -1303,10 +1299,6 @@ dri2_initialize_x11_dri2(_EGLDriver *drv, _EGLDisplay *disp) goto cleanup_configs; } - /* we're supporting EGL 1.4 */ - disp->VersionMajor = 1; - disp->VersionMinor = 4; - /* Fill vtbl last to prevent accidentally calling virtual function during * initialization. */ diff --git a/src/egl/main/eglapi.c b/src/egl/main/eglapi.c index fbb14f1524a..9a17f8d31f2 100644 --- a/src/egl/main/eglapi.c +++ b/src/egl/main/eglapi.c @@ -407,6 +407,12 @@ _eglCreateAPIsString(_EGLDisplay *dpy) assert(strlen(dpy->ClientAPIsString) < sizeof(dpy->ClientAPIsString)); } +static void +_eglComputeVersion(_EGLDisplay *disp) +{ + disp->VersionMajor = 1; + disp->VersionMinor = 4; +} /** * This is typically the second EGL function that an application calls. @@ -444,6 +450,7 @@ eglInitialize(EGLDisplay dpy, EGLint *major, EGLint *minor) */ disp->Extensions.KHR_get_all_proc_addresses = EGL_TRUE; + _eglComputeVersion(disp); _eglCreateExtensionsString(disp); _eglCreateAPIsString(disp); _eglsnprintf(disp->VersionString, sizeof(disp->VersionString), From 0e4b564ef288159f16f7a6886b6cfc0110411af8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Tue, 12 May 2015 16:40:29 +0200 Subject: [PATCH 442/834] egl: combine VersionMajor and VersionMinor into one variable Reviewed-by: Emil Velikov Reviewed-by: Chad Versace --- src/egl/main/eglapi.c | 9 ++++----- src/egl/main/egldisplay.h | 3 +-- 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/src/egl/main/eglapi.c b/src/egl/main/eglapi.c index 9a17f8d31f2..81be6f9943b 100644 --- a/src/egl/main/eglapi.c +++ b/src/egl/main/eglapi.c @@ -410,8 +410,7 @@ _eglCreateAPIsString(_EGLDisplay *dpy) static void _eglComputeVersion(_EGLDisplay *disp) { - disp->VersionMajor = 1; - disp->VersionMinor = 4; + disp->Version = 14; } /** @@ -454,14 +453,14 @@ eglInitialize(EGLDisplay dpy, EGLint *major, EGLint *minor) _eglCreateExtensionsString(disp); _eglCreateAPIsString(disp); _eglsnprintf(disp->VersionString, sizeof(disp->VersionString), - "%d.%d (%s)", disp->VersionMajor, disp->VersionMinor, + "%d.%d (%s)", disp->Version / 10, disp->Version % 10, disp->Driver->Name); } /* Update applications version of major and minor if not NULL */ if ((major != NULL) && (minor != NULL)) { - *major = disp->VersionMajor; - *minor = disp->VersionMinor; + *major = disp->Version / 10; + *minor = disp->Version % 10; } RETURN_EGL_SUCCESS(disp, EGL_TRUE); diff --git a/src/egl/main/egldisplay.h b/src/egl/main/egldisplay.h index bb394ec2962..bc8cc7d6319 100644 --- a/src/egl/main/egldisplay.h +++ b/src/egl/main/egldisplay.h @@ -153,8 +153,7 @@ struct _egl_display /* these fields are set by the driver during init */ void *DriverData; /**< Driver private data */ - EGLint VersionMajor; /**< EGL major version */ - EGLint VersionMinor; /**< EGL minor version */ + EGLint Version; /**< EGL version major*10+minor */ EGLint ClientAPIs; /**< Bitmask of APIs supported (EGL_xxx_BIT) */ _EGLExtensions Extensions; /**< Extensions supported */ From f9f894447e4e7442d5dfa489bb43f2823e2fc71d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Tue, 12 May 2015 00:44:20 +0200 Subject: [PATCH 443/834] egl: fix setting context flags Cc: 10.6 10.5 10.4 Reviewed-by: Emil Velikov Reviewed-by: Chad Versace --- src/egl/main/eglcontext.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/egl/main/eglcontext.c b/src/egl/main/eglcontext.c index 514b91aeef2..e50b8fb55f6 100644 --- a/src/egl/main/eglcontext.c +++ b/src/egl/main/eglcontext.c @@ -131,7 +131,7 @@ _eglParseContextAttribList(_EGLContext *ctx, _EGLDisplay *dpy, break; } - ctx->Flags = val; + ctx->Flags |= val; break; case EGL_CONTEXT_OPENGL_PROFILE_MASK_KHR: @@ -194,7 +194,8 @@ _eglParseContextAttribList(_EGLContext *ctx, _EGLDisplay *dpy, break; } - ctx->Flags = EGL_CONTEXT_OPENGL_ROBUST_ACCESS_BIT_KHR; + if (val == EGL_TRUE) + ctx->Flags |= EGL_CONTEXT_OPENGL_ROBUST_ACCESS_BIT_KHR; break; default: From 706466f4619b76f2475120f187c34d01ab5aa727 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Tue, 12 May 2015 00:44:56 +0200 Subject: [PATCH 444/834] egl: add context attribs from EGL 1.5 Reviewed-by: Emil Velikov Reviewed-by: Chad Versace --- src/egl/main/eglcontext.c | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/src/egl/main/eglcontext.c b/src/egl/main/eglcontext.c index e50b8fb55f6..e767f4b1abe 100644 --- a/src/egl/main/eglcontext.c +++ b/src/egl/main/eglcontext.c @@ -198,6 +198,36 @@ _eglParseContextAttribList(_EGLContext *ctx, _EGLDisplay *dpy, ctx->Flags |= EGL_CONTEXT_OPENGL_ROBUST_ACCESS_BIT_KHR; break; + case EGL_CONTEXT_OPENGL_ROBUST_ACCESS: + if (dpy->Version < 15) { + err = EGL_BAD_ATTRIBUTE; + break; + } + + if (val == EGL_TRUE) + ctx->Flags |= EGL_CONTEXT_OPENGL_ROBUST_ACCESS_BIT_KHR; + break; + + case EGL_CONTEXT_OPENGL_DEBUG: + if (dpy->Version < 15) { + err = EGL_BAD_ATTRIBUTE; + break; + } + + if (val == EGL_TRUE) + ctx->Flags |= EGL_CONTEXT_OPENGL_DEBUG_BIT_KHR; + break; + + case EGL_CONTEXT_OPENGL_FORWARD_COMPATIBLE: + if (dpy->Version < 15) { + err = EGL_BAD_ATTRIBUTE; + break; + } + + if (val == EGL_TRUE) + ctx->Flags |= EGL_CONTEXT_OPENGL_FORWARD_COMPATIBLE_BIT_KHR; + break; + default: err = EGL_BAD_ATTRIBUTE; break; From d333d30632516b1fc5b60181c2c237653e55a8e1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Tue, 12 May 2015 17:34:57 +0200 Subject: [PATCH 445/834] egl: use EGL 1.5 types without suffixes Reviewed-by: Emil Velikov Reviewed-by: Chad Versace --- src/egl/drivers/dri2/egl_dri2.c | 4 ++-- src/egl/main/eglapi.c | 40 ++++++++++++++++----------------- src/egl/main/eglapi.h | 4 ++-- src/egl/main/eglimage.h | 10 ++++----- src/egl/main/eglsync.c | 4 ++-- src/egl/main/eglsync.h | 14 ++++++------ 6 files changed, 38 insertions(+), 38 deletions(-) diff --git a/src/egl/drivers/dri2/egl_dri2.c b/src/egl/drivers/dri2/egl_dri2.c index 8b915ef54d0..82f8843e001 100644 --- a/src/egl/drivers/dri2/egl_dri2.c +++ b/src/egl/drivers/dri2/egl_dri2.c @@ -2216,7 +2216,7 @@ dri2_egl_unref_sync(struct dri2_egl_display *dri2_dpy, static _EGLSync * dri2_create_sync(_EGLDriver *drv, _EGLDisplay *dpy, EGLenum type, const EGLint *attrib_list, - const EGLAttribKHR *attrib_list64) + const EGLAttrib *attrib_list64) { _EGLContext *ctx = _eglGetCurrentContext(); struct dri2_egl_display *dri2_dpy = dri2_egl_display(dpy); @@ -2282,7 +2282,7 @@ dri2_destroy_sync(_EGLDriver *drv, _EGLDisplay *dpy, _EGLSync *sync) static EGLint dri2_client_wait_sync(_EGLDriver *drv, _EGLDisplay *dpy, _EGLSync *sync, - EGLint flags, EGLTimeKHR timeout) + EGLint flags, EGLTime timeout) { _EGLContext *ctx = _eglGetCurrentContext(); struct dri2_egl_display *dri2_dpy = dri2_egl_display(dpy); diff --git a/src/egl/main/eglapi.c b/src/egl/main/eglapi.c index 81be6f9943b..96d97176372 100644 --- a/src/egl/main/eglapi.c +++ b/src/egl/main/eglapi.c @@ -1226,7 +1226,7 @@ eglReleaseThread(void) } -static EGLImageKHR EGLAPIENTRY +static EGLImage EGLAPIENTRY eglCreateImageKHR(EGLDisplay dpy, EGLContext ctx, EGLenum target, EGLClientBuffer buffer, const EGLint *attr_list) { @@ -1234,7 +1234,7 @@ eglCreateImageKHR(EGLDisplay dpy, EGLContext ctx, EGLenum target, _EGLContext *context = _eglLookupContext(ctx, disp); _EGLDriver *drv; _EGLImage *img; - EGLImageKHR ret; + EGLImage ret; _EGL_CHECK_DISPLAY(disp, EGL_NO_IMAGE_KHR, drv); if (!disp->Extensions.KHR_image_base) @@ -1256,7 +1256,7 @@ eglCreateImageKHR(EGLDisplay dpy, EGLContext ctx, EGLenum target, static EGLBoolean EGLAPIENTRY -eglDestroyImageKHR(EGLDisplay dpy, EGLImageKHR image) +eglDestroyImageKHR(EGLDisplay dpy, EGLImage image) { _EGLDisplay *disp = _eglLockDisplay(dpy); _EGLImage *img = _eglLookupImage(image, disp); @@ -1276,15 +1276,15 @@ eglDestroyImageKHR(EGLDisplay dpy, EGLImageKHR image) } -static EGLSyncKHR +static EGLSync _eglCreateSync(EGLDisplay dpy, EGLenum type, const EGLint *attrib_list, - const EGLAttribKHR *attrib_list64, EGLBoolean is64) + const EGLAttrib *attrib_list64, EGLBoolean is64) { _EGLDisplay *disp = _eglLockDisplay(dpy); _EGLContext *ctx = _eglGetCurrentContext(); _EGLDriver *drv; _EGLSync *sync; - EGLSyncKHR ret; + EGLSync ret; _EGL_CHECK_DISPLAY(disp, EGL_NO_SYNC_KHR, drv); @@ -1320,22 +1320,22 @@ _eglCreateSync(EGLDisplay dpy, EGLenum type, const EGLint *attrib_list, } -static EGLSyncKHR EGLAPIENTRY +static EGLSync EGLAPIENTRY eglCreateSyncKHR(EGLDisplay dpy, EGLenum type, const EGLint *attrib_list) { return _eglCreateSync(dpy, type, attrib_list, NULL, EGL_FALSE); } -static EGLSyncKHR EGLAPIENTRY -eglCreateSync64KHR(EGLDisplay dpy, EGLenum type, const EGLAttribKHR *attrib_list) +static EGLSync EGLAPIENTRY +eglCreateSync64KHR(EGLDisplay dpy, EGLenum type, const EGLAttrib *attrib_list) { return _eglCreateSync(dpy, type, NULL, attrib_list, EGL_TRUE); } static EGLBoolean EGLAPIENTRY -eglDestroySyncKHR(EGLDisplay dpy, EGLSyncKHR sync) +eglDestroySyncKHR(EGLDisplay dpy, EGLSync sync) { _EGLDisplay *disp = _eglLockDisplay(dpy); _EGLSync *s = _eglLookupSync(sync, disp); @@ -1354,7 +1354,7 @@ eglDestroySyncKHR(EGLDisplay dpy, EGLSyncKHR sync) static EGLint EGLAPIENTRY -eglClientWaitSyncKHR(EGLDisplay dpy, EGLSyncKHR sync, EGLint flags, EGLTimeKHR timeout) +eglClientWaitSyncKHR(EGLDisplay dpy, EGLSync sync, EGLint flags, EGLTime timeout) { _EGLDisplay *disp = _eglLockDisplay(dpy); _EGLSync *s = _eglLookupSync(sync, disp); @@ -1375,7 +1375,7 @@ eglClientWaitSyncKHR(EGLDisplay dpy, EGLSyncKHR sync, EGLint flags, EGLTimeKHR t static EGLint EGLAPIENTRY -eglWaitSyncKHR(EGLDisplay dpy, EGLSyncKHR sync, EGLint flags) +eglWaitSyncKHR(EGLDisplay dpy, EGLSync sync, EGLint flags) { _EGLDisplay *disp = _eglLockDisplay(dpy); _EGLSync *s = _eglLookupSync(sync, disp); @@ -1401,7 +1401,7 @@ eglWaitSyncKHR(EGLDisplay dpy, EGLSyncKHR sync, EGLint flags) static EGLBoolean EGLAPIENTRY -eglSignalSyncKHR(EGLDisplay dpy, EGLSyncKHR sync, EGLenum mode) +eglSignalSyncKHR(EGLDisplay dpy, EGLSync sync, EGLenum mode) { _EGLDisplay *disp = _eglLockDisplay(dpy); _EGLSync *s = _eglLookupSync(sync, disp); @@ -1417,7 +1417,7 @@ eglSignalSyncKHR(EGLDisplay dpy, EGLSyncKHR sync, EGLenum mode) static EGLBoolean EGLAPIENTRY -eglGetSyncAttribKHR(EGLDisplay dpy, EGLSyncKHR sync, EGLint attribute, EGLint *value) +eglGetSyncAttribKHR(EGLDisplay dpy, EGLSync sync, EGLint attribute, EGLint *value) { _EGLDisplay *disp = _eglLockDisplay(dpy); _EGLSync *s = _eglLookupSync(sync, disp); @@ -1465,13 +1465,13 @@ eglSwapBuffersRegionNOK(EGLDisplay dpy, EGLSurface surface, #ifdef EGL_MESA_drm_image -static EGLImageKHR EGLAPIENTRY +static EGLImage EGLAPIENTRY eglCreateDRMImageMESA(EGLDisplay dpy, const EGLint *attr_list) { _EGLDisplay *disp = _eglLockDisplay(dpy); _EGLDriver *drv; _EGLImage *img; - EGLImageKHR ret; + EGLImage ret; _EGL_CHECK_DISPLAY(disp, EGL_NO_IMAGE_KHR, drv); if (!disp->Extensions.MESA_drm_image) @@ -1484,7 +1484,7 @@ eglCreateDRMImageMESA(EGLDisplay dpy, const EGLint *attr_list) } static EGLBoolean EGLAPIENTRY -eglExportDRMImageMESA(EGLDisplay dpy, EGLImageKHR image, +eglExportDRMImageMESA(EGLDisplay dpy, EGLImage image, EGLint *name, EGLint *handle, EGLint *stride) { _EGLDisplay *disp = _eglLockDisplay(dpy); @@ -1566,7 +1566,7 @@ eglQueryWaylandBufferWL(EGLDisplay dpy, struct wl_resource *buffer, #ifdef EGL_WL_create_wayland_buffer_from_image static struct wl_buffer * EGLAPIENTRY -eglCreateWaylandBufferFromImageWL(EGLDisplay dpy, EGLImageKHR image) +eglCreateWaylandBufferFromImageWL(EGLDisplay dpy, EGLImage image) { _EGLDisplay *disp = _eglLockDisplay(dpy); _EGLImage *img; @@ -1630,7 +1630,7 @@ eglGetSyncValuesCHROMIUM(EGLDisplay display, EGLSurface surface, #ifdef EGL_MESA_image_dma_buf_export static EGLBoolean EGLAPIENTRY -eglExportDMABUFImageQueryMESA(EGLDisplay dpy, EGLImageKHR image, +eglExportDMABUFImageQueryMESA(EGLDisplay dpy, EGLImage image, EGLint *fourcc, EGLint *nplanes, EGLuint64KHR *modifiers) { @@ -1652,7 +1652,7 @@ eglExportDMABUFImageQueryMESA(EGLDisplay dpy, EGLImageKHR image, } static EGLBoolean EGLAPIENTRY -eglExportDMABUFImageMESA(EGLDisplay dpy, EGLImageKHR image, +eglExportDMABUFImageMESA(EGLDisplay dpy, EGLImage image, int *fds, EGLint *strides, EGLint *offsets) { _EGLDisplay *disp = _eglLockDisplay(dpy); diff --git a/src/egl/main/eglapi.h b/src/egl/main/eglapi.h index e07970f8b1c..d2b2eb7b264 100644 --- a/src/egl/main/eglapi.h +++ b/src/egl/main/eglapi.h @@ -91,9 +91,9 @@ typedef _EGLImage *(*CreateImageKHR_t)(_EGLDriver *drv, _EGLDisplay *dpy, _EGLCo typedef EGLBoolean (*DestroyImageKHR_t)(_EGLDriver *drv, _EGLDisplay *dpy, _EGLImage *image); -typedef _EGLSync *(*CreateSyncKHR_t)(_EGLDriver *drv, _EGLDisplay *dpy, EGLenum type, const EGLint *attrib_list, const EGLAttribKHR *attrib_list64); +typedef _EGLSync *(*CreateSyncKHR_t)(_EGLDriver *drv, _EGLDisplay *dpy, EGLenum type, const EGLint *attrib_list, const EGLAttrib *attrib_list64); typedef EGLBoolean (*DestroySyncKHR_t)(_EGLDriver *drv, _EGLDisplay *dpy, _EGLSync *sync); -typedef EGLint (*ClientWaitSyncKHR_t)(_EGLDriver *drv, _EGLDisplay *dpy, _EGLSync *sync, EGLint flags, EGLTimeKHR timeout); +typedef EGLint (*ClientWaitSyncKHR_t)(_EGLDriver *drv, _EGLDisplay *dpy, _EGLSync *sync, EGLint flags, EGLTime timeout); typedef EGLint (*WaitSyncKHR_t)(_EGLDriver *drv, _EGLDisplay *dpy, _EGLSync *sync); typedef EGLBoolean (*SignalSyncKHR_t)(_EGLDriver *drv, _EGLDisplay *dpy, _EGLSync *sync, EGLenum mode); typedef EGLBoolean (*GetSyncAttribKHR_t)(_EGLDriver *drv, _EGLDisplay *dpy, _EGLSync *sync, EGLint attribute, EGLint *value); diff --git a/src/egl/main/eglimage.h b/src/egl/main/eglimage.h index d06f91cd1a4..0dd5e120ad7 100644 --- a/src/egl/main/eglimage.h +++ b/src/egl/main/eglimage.h @@ -121,11 +121,11 @@ _eglPutImage(_EGLImage *img) * Link an image to its display and return the handle of the link. * The handle can be passed to client directly. */ -static inline EGLImageKHR +static inline EGLImage _eglLinkImage(_EGLImage *img) { _eglLinkResource(&img->Resource, _EGL_RESOURCE_IMAGE); - return (EGLImageKHR) img; + return (EGLImage) img; } @@ -145,7 +145,7 @@ _eglUnlinkImage(_EGLImage *img) * Return NULL if the handle has no corresponding linked image. */ static inline _EGLImage * -_eglLookupImage(EGLImageKHR image, _EGLDisplay *dpy) +_eglLookupImage(EGLImage image, _EGLDisplay *dpy) { _EGLImage *img = (_EGLImage *) image; if (!dpy || !_eglCheckResource((void *) img, _EGL_RESOURCE_IMAGE, dpy)) @@ -157,12 +157,12 @@ _eglLookupImage(EGLImageKHR image, _EGLDisplay *dpy) /** * Return the handle of a linked image, or EGL_NO_IMAGE_KHR. */ -static inline EGLImageKHR +static inline EGLImage _eglGetImageHandle(_EGLImage *img) { _EGLResource *res = (_EGLResource *) img; return (res && _eglIsResourceLinked(res)) ? - (EGLImageKHR) img : EGL_NO_IMAGE_KHR; + (EGLImage) img : EGL_NO_IMAGE_KHR; } diff --git a/src/egl/main/eglsync.c b/src/egl/main/eglsync.c index 8b8ab16b0d2..205cdc04581 100644 --- a/src/egl/main/eglsync.c +++ b/src/egl/main/eglsync.c @@ -67,7 +67,7 @@ _eglParseSyncAttribList(_EGLSync *sync, const EGLint *attrib_list) static EGLint -_eglParseSyncAttribList64(_EGLSync *sync, const EGLAttribKHR *attrib_list) +_eglParseSyncAttribList64(_EGLSync *sync, const EGLAttrib *attrib_list) { EGLint i, err = EGL_SUCCESS; @@ -103,7 +103,7 @@ _eglParseSyncAttribList64(_EGLSync *sync, const EGLAttribKHR *attrib_list) EGLBoolean _eglInitSync(_EGLSync *sync, _EGLDisplay *dpy, EGLenum type, - const EGLint *attrib_list, const EGLAttribKHR *attrib_list64) + const EGLint *attrib_list, const EGLAttrib *attrib_list64) { EGLint err; diff --git a/src/egl/main/eglsync.h b/src/egl/main/eglsync.h index 1d2eb11a7a0..4959cf07482 100644 --- a/src/egl/main/eglsync.h +++ b/src/egl/main/eglsync.h @@ -47,13 +47,13 @@ struct _egl_sync EGLenum Type; EGLenum SyncStatus; EGLenum SyncCondition; - EGLAttribKHR CLEvent; + EGLAttrib CLEvent; }; extern EGLBoolean _eglInitSync(_EGLSync *sync, _EGLDisplay *dpy, EGLenum type, - const EGLint *attrib_list, const EGLAttribKHR *attrib_list64); + const EGLint *attrib_list, const EGLAttrib *attrib_list64); extern EGLBoolean @@ -87,11 +87,11 @@ _eglPutSync(_EGLSync *sync) * Link a sync to its display and return the handle of the link. * The handle can be passed to client directly. */ -static inline EGLSyncKHR +static inline EGLSync _eglLinkSync(_EGLSync *sync) { _eglLinkResource(&sync->Resource, _EGL_RESOURCE_SYNC); - return (EGLSyncKHR) sync; + return (EGLSync) sync; } @@ -110,7 +110,7 @@ _eglUnlinkSync(_EGLSync *sync) * Return NULL if the handle has no corresponding linked sync. */ static inline _EGLSync * -_eglLookupSync(EGLSyncKHR handle, _EGLDisplay *dpy) +_eglLookupSync(EGLSync handle, _EGLDisplay *dpy) { _EGLSync *sync = (_EGLSync *) handle; if (!dpy || !_eglCheckResource((void *) sync, _EGL_RESOURCE_SYNC, dpy)) @@ -122,12 +122,12 @@ _eglLookupSync(EGLSyncKHR handle, _EGLDisplay *dpy) /** * Return the handle of a linked sync, or EGL_NO_SYNC_KHR. */ -static inline EGLSyncKHR +static inline EGLSync _eglGetSyncHandle(_EGLSync *sync) { _EGLResource *res = (_EGLResource *) sync; return (res && _eglIsResourceLinked(res)) ? - (EGLSyncKHR) sync : EGL_NO_SYNC_KHR; + (EGLSync) sync : EGL_NO_SYNC_KHR; } From 2885ba0e4cea102d77832e2af4b212d00ab5edd4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Tue, 12 May 2015 20:54:22 +0200 Subject: [PATCH 446/834] egl: add EGL 1.5 functions that don't need any changes from extensions Declare the functions without the suffix, so that the core names are exported. Reviewed-by: Emil Velikov Reviewed-by: Chad Versace --- src/egl/main/eglapi.c | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/src/egl/main/eglapi.c b/src/egl/main/eglapi.c index 96d97176372..7a1969f3014 100644 --- a/src/egl/main/eglapi.c +++ b/src/egl/main/eglapi.c @@ -1255,8 +1255,8 @@ eglCreateImageKHR(EGLDisplay dpy, EGLContext ctx, EGLenum target, } -static EGLBoolean EGLAPIENTRY -eglDestroyImageKHR(EGLDisplay dpy, EGLImage image) +EGLBoolean EGLAPIENTRY +eglDestroyImage(EGLDisplay dpy, EGLImage image) { _EGLDisplay *disp = _eglLockDisplay(dpy); _EGLImage *img = _eglLookupImage(image, disp); @@ -1327,15 +1327,15 @@ eglCreateSyncKHR(EGLDisplay dpy, EGLenum type, const EGLint *attrib_list) } -static EGLSync EGLAPIENTRY -eglCreateSync64KHR(EGLDisplay dpy, EGLenum type, const EGLAttrib *attrib_list) +EGLSync EGLAPIENTRY +eglCreateSync(EGLDisplay dpy, EGLenum type, const EGLAttrib *attrib_list) { return _eglCreateSync(dpy, type, NULL, attrib_list, EGL_TRUE); } -static EGLBoolean EGLAPIENTRY -eglDestroySyncKHR(EGLDisplay dpy, EGLSync sync) +EGLBoolean EGLAPIENTRY +eglDestroySync(EGLDisplay dpy, EGLSync sync) { _EGLDisplay *disp = _eglLockDisplay(dpy); _EGLSync *s = _eglLookupSync(sync, disp); @@ -1353,8 +1353,8 @@ eglDestroySyncKHR(EGLDisplay dpy, EGLSync sync) } -static EGLint EGLAPIENTRY -eglClientWaitSyncKHR(EGLDisplay dpy, EGLSync sync, EGLint flags, EGLTime timeout) +EGLint EGLAPIENTRY +eglClientWaitSync(EGLDisplay dpy, EGLSync sync, EGLint flags, EGLTime timeout) { _EGLDisplay *disp = _eglLockDisplay(dpy); _EGLSync *s = _eglLookupSync(sync, disp); @@ -1717,15 +1717,19 @@ eglGetProcAddress(const char *procname) { "eglWaitClient", (_EGLProc) eglWaitClient }, { "eglWaitGL", (_EGLProc) eglWaitGL }, { "eglWaitNative", (_EGLProc) eglWaitNative }, + { "eglCreateSync", (_EGLProc) eglCreateSync }, + { "eglDestroySync", (_EGLProc) eglDestroySync }, + { "eglClientWaitSync", (_EGLProc) eglClientWaitSync }, + { "eglDestroyImage", (_EGLProc) eglDestroyImage }, #ifdef EGL_MESA_drm_display { "eglGetDRMDisplayMESA", (_EGLProc) eglGetDRMDisplayMESA }, #endif { "eglCreateImageKHR", (_EGLProc) eglCreateImageKHR }, - { "eglDestroyImageKHR", (_EGLProc) eglDestroyImageKHR }, + { "eglDestroyImageKHR", (_EGLProc) eglDestroyImage }, { "eglCreateSyncKHR", (_EGLProc) eglCreateSyncKHR }, - { "eglCreateSync64KHR", (_EGLProc) eglCreateSync64KHR }, - { "eglDestroySyncKHR", (_EGLProc) eglDestroySyncKHR }, - { "eglClientWaitSyncKHR", (_EGLProc) eglClientWaitSyncKHR }, + { "eglCreateSync64KHR", (_EGLProc) eglCreateSync }, + { "eglDestroySyncKHR", (_EGLProc) eglDestroySync }, + { "eglClientWaitSyncKHR", (_EGLProc) eglClientWaitSync }, { "eglWaitSyncKHR", (_EGLProc) eglWaitSyncKHR }, { "eglSignalSyncKHR", (_EGLProc) eglSignalSyncKHR }, { "eglGetSyncAttribKHR", (_EGLProc) eglGetSyncAttribKHR }, From 7524592da6305d52e95d718691d5a6665738aade Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Tue, 12 May 2015 18:13:31 +0200 Subject: [PATCH 447/834] egl: add eglWaitSync Reviewed-by: Emil Velikov Reviewed-by: Chad Versace --- src/egl/main/eglapi.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/src/egl/main/eglapi.c b/src/egl/main/eglapi.c index 7a1969f3014..03a55f11c07 100644 --- a/src/egl/main/eglapi.c +++ b/src/egl/main/eglapi.c @@ -1400,6 +1400,17 @@ eglWaitSyncKHR(EGLDisplay dpy, EGLSync sync, EGLint flags) } +EGLBoolean EGLAPIENTRY +eglWaitSync(EGLDisplay dpy, EGLSync sync, EGLint flags) +{ + /* The KHR version returns EGLint, while the core version returns + * EGLBoolean. In both cases, the return values can only be EGL_FALSE and + * EGL_TRUE. + */ + return eglWaitSyncKHR(dpy, sync, flags); +} + + static EGLBoolean EGLAPIENTRY eglSignalSyncKHR(EGLDisplay dpy, EGLSync sync, EGLenum mode) { @@ -1720,6 +1731,7 @@ eglGetProcAddress(const char *procname) { "eglCreateSync", (_EGLProc) eglCreateSync }, { "eglDestroySync", (_EGLProc) eglDestroySync }, { "eglClientWaitSync", (_EGLProc) eglClientWaitSync }, + { "eglWaitSync", (_EGLProc) eglWaitSync }, { "eglDestroyImage", (_EGLProc) eglDestroyImage }, #ifdef EGL_MESA_drm_display { "eglGetDRMDisplayMESA", (_EGLProc) eglGetDRMDisplayMESA }, From 1e79e054e7dff0c45538fff1257e1f81a206d7c9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Tue, 12 May 2015 18:14:31 +0200 Subject: [PATCH 448/834] egl: add eglGetSyncAttrib (v2) v2: - don't modify "value" in eglGetSyncAttribKHR after an error - rename _egl_api::GetSyncAttribKHR -> GetSyncAttrib - rename GetSyncAttribKHR_t -> GetSyncAttrib_t - rename _eglGetSyncAttribKHR to _eglGetSyncAttrib Reviewed-by: Chad Versace --- src/egl/main/eglapi.c | 25 ++++++++++++++++++++++--- src/egl/main/eglapi.h | 4 ++-- src/egl/main/eglfallbacks.c | 2 +- src/egl/main/eglsync.c | 4 ++-- src/egl/main/eglsync.h | 4 ++-- 5 files changed, 29 insertions(+), 10 deletions(-) diff --git a/src/egl/main/eglapi.c b/src/egl/main/eglapi.c index 03a55f11c07..96968694ac5 100644 --- a/src/egl/main/eglapi.c +++ b/src/egl/main/eglapi.c @@ -1427,8 +1427,8 @@ eglSignalSyncKHR(EGLDisplay dpy, EGLSync sync, EGLenum mode) } -static EGLBoolean EGLAPIENTRY -eglGetSyncAttribKHR(EGLDisplay dpy, EGLSync sync, EGLint attribute, EGLint *value) +EGLBoolean EGLAPIENTRY +eglGetSyncAttrib(EGLDisplay dpy, EGLSync sync, EGLint attribute, EGLAttrib *value) { _EGLDisplay *disp = _eglLockDisplay(dpy); _EGLSync *s = _eglLookupSync(sync, disp); @@ -1438,12 +1438,30 @@ eglGetSyncAttribKHR(EGLDisplay dpy, EGLSync sync, EGLint attribute, EGLint *valu _EGL_CHECK_SYNC(disp, s, EGL_FALSE, drv); assert(disp->Extensions.KHR_reusable_sync || disp->Extensions.KHR_fence_sync); - ret = drv->API.GetSyncAttribKHR(drv, disp, s, attribute, value); + ret = drv->API.GetSyncAttrib(drv, disp, s, attribute, value); RETURN_EGL_EVAL(disp, ret); } +static EGLBoolean EGLAPIENTRY +eglGetSyncAttribKHR(EGLDisplay dpy, EGLSync sync, EGLint attribute, EGLint *value) +{ + EGLAttrib attrib = *value; + EGLBoolean result = eglGetSyncAttrib(dpy, sync, attribute, &attrib); + + /* The EGL_KHR_fence_sync spec says this about eglGetSyncAttribKHR: + * + * If any error occurs, <*value> is not modified. + */ + if (result == EGL_FALSE) + return result; + + *value = attrib; + return result; +} + + #ifdef EGL_NOK_swap_region static EGLBoolean EGLAPIENTRY @@ -1731,6 +1749,7 @@ eglGetProcAddress(const char *procname) { "eglCreateSync", (_EGLProc) eglCreateSync }, { "eglDestroySync", (_EGLProc) eglDestroySync }, { "eglClientWaitSync", (_EGLProc) eglClientWaitSync }, + { "eglGetSyncAttrib", (_EGLProc) eglGetSyncAttrib }, { "eglWaitSync", (_EGLProc) eglWaitSync }, { "eglDestroyImage", (_EGLProc) eglDestroyImage }, #ifdef EGL_MESA_drm_display diff --git a/src/egl/main/eglapi.h b/src/egl/main/eglapi.h index d2b2eb7b264..4e0378d0d5f 100644 --- a/src/egl/main/eglapi.h +++ b/src/egl/main/eglapi.h @@ -96,7 +96,7 @@ typedef EGLBoolean (*DestroySyncKHR_t)(_EGLDriver *drv, _EGLDisplay *dpy, _EGLSy typedef EGLint (*ClientWaitSyncKHR_t)(_EGLDriver *drv, _EGLDisplay *dpy, _EGLSync *sync, EGLint flags, EGLTime timeout); typedef EGLint (*WaitSyncKHR_t)(_EGLDriver *drv, _EGLDisplay *dpy, _EGLSync *sync); typedef EGLBoolean (*SignalSyncKHR_t)(_EGLDriver *drv, _EGLDisplay *dpy, _EGLSync *sync, EGLenum mode); -typedef EGLBoolean (*GetSyncAttribKHR_t)(_EGLDriver *drv, _EGLDisplay *dpy, _EGLSync *sync, EGLint attribute, EGLint *value); +typedef EGLBoolean (*GetSyncAttrib_t)(_EGLDriver *drv, _EGLDisplay *dpy, _EGLSync *sync, EGLint attribute, EGLAttrib *value); #ifdef EGL_NOK_swap_region @@ -178,7 +178,7 @@ struct _egl_api ClientWaitSyncKHR_t ClientWaitSyncKHR; WaitSyncKHR_t WaitSyncKHR; SignalSyncKHR_t SignalSyncKHR; - GetSyncAttribKHR_t GetSyncAttribKHR; + GetSyncAttrib_t GetSyncAttrib; #ifdef EGL_NOK_swap_region SwapBuffersRegionNOK_t SwapBuffersRegionNOK; diff --git a/src/egl/main/eglfallbacks.c b/src/egl/main/eglfallbacks.c index c44ec6cc835..3c3701f4ae9 100644 --- a/src/egl/main/eglfallbacks.c +++ b/src/egl/main/eglfallbacks.c @@ -91,7 +91,7 @@ _eglInitDriverFallbacks(_EGLDriver *drv) drv->API.ClientWaitSyncKHR = NULL; drv->API.WaitSyncKHR = NULL; drv->API.SignalSyncKHR = NULL; - drv->API.GetSyncAttribKHR = _eglGetSyncAttribKHR; + drv->API.GetSyncAttrib = _eglGetSyncAttrib; #ifdef EGL_MESA_drm_image drv->API.CreateDRMImageMESA = NULL; diff --git a/src/egl/main/eglsync.c b/src/egl/main/eglsync.c index 205cdc04581..3019e6e9333 100644 --- a/src/egl/main/eglsync.c +++ b/src/egl/main/eglsync.c @@ -141,8 +141,8 @@ _eglInitSync(_EGLSync *sync, _EGLDisplay *dpy, EGLenum type, EGLBoolean -_eglGetSyncAttribKHR(_EGLDriver *drv, _EGLDisplay *dpy, _EGLSync *sync, - EGLint attribute, EGLint *value) +_eglGetSyncAttrib(_EGLDriver *drv, _EGLDisplay *dpy, _EGLSync *sync, + EGLint attribute, EGLAttrib *value) { if (!value) return _eglError(EGL_BAD_PARAMETER, "eglGetSyncAttribKHR"); diff --git a/src/egl/main/eglsync.h b/src/egl/main/eglsync.h index 4959cf07482..9b2aac8828b 100644 --- a/src/egl/main/eglsync.h +++ b/src/egl/main/eglsync.h @@ -57,8 +57,8 @@ _eglInitSync(_EGLSync *sync, _EGLDisplay *dpy, EGLenum type, extern EGLBoolean -_eglGetSyncAttribKHR(_EGLDriver *drv, _EGLDisplay *dpy, _EGLSync *sync, - EGLint attribute, EGLint *value); +_eglGetSyncAttrib(_EGLDriver *drv, _EGLDisplay *dpy, _EGLSync *sync, + EGLint attribute, EGLAttrib *value); /** From 515f04ed6fe0c914b2cd22c7ea65db6e34c362e3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Tue, 12 May 2015 20:42:05 +0200 Subject: [PATCH 449/834] egl: add eglCreateImage (v2) v2: - use calloc - return BAD_ALLOC if calloc fails Reviewed-by: Chad Versace --- src/egl/main/eglapi.c | 42 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/src/egl/main/eglapi.c b/src/egl/main/eglapi.c index 96968694ac5..e4fd44e90c5 100644 --- a/src/egl/main/eglapi.c +++ b/src/egl/main/eglapi.c @@ -251,6 +251,31 @@ _eglUnlockDisplay(_EGLDisplay *dpy) } +static EGLint * +_eglConvertAttribsToInt(const EGLAttrib *attr_list) +{ + EGLint *int_attribs = NULL; + + /* Convert attributes from EGLAttrib[] to EGLint[] */ + if (attr_list) { + int i, size = 0; + + while (attr_list[size] != EGL_NONE) + size += 2; + + size += 1; /* add space for EGL_NONE */ + + int_attribs = calloc(size, sizeof(int_attribs[0])); + if (!int_attribs) + return NULL; + + for (i = 0; i < size; i++) + int_attribs[i] = attr_list[i]; + } + return int_attribs; +} + + /** * This is typically the first EGL function that an application calls. * It associates a private _EGLDisplay object to the native display. @@ -1255,6 +1280,22 @@ eglCreateImageKHR(EGLDisplay dpy, EGLContext ctx, EGLenum target, } +EGLImage EGLAPIENTRY +eglCreateImage(EGLDisplay dpy, EGLContext ctx, EGLenum target, + EGLClientBuffer buffer, const EGLAttrib *attr_list) +{ + EGLImage image; + EGLint *int_attribs = _eglConvertAttribsToInt(attr_list); + + if (attr_list && !int_attribs) + RETURN_EGL_ERROR(NULL, EGL_BAD_ALLOC, EGL_NO_IMAGE); + + image = eglCreateImageKHR(dpy, ctx, target, buffer, int_attribs); + free(int_attribs); + return image; +} + + EGLBoolean EGLAPIENTRY eglDestroyImage(EGLDisplay dpy, EGLImage image) { @@ -1751,6 +1792,7 @@ eglGetProcAddress(const char *procname) { "eglClientWaitSync", (_EGLProc) eglClientWaitSync }, { "eglGetSyncAttrib", (_EGLProc) eglGetSyncAttrib }, { "eglWaitSync", (_EGLProc) eglWaitSync }, + { "eglCreateImage", (_EGLProc) eglCreateImage }, { "eglDestroyImage", (_EGLProc) eglDestroyImage }, #ifdef EGL_MESA_drm_display { "eglGetDRMDisplayMESA", (_EGLProc) eglGetDRMDisplayMESA }, From 820a4d402ad3891ec460882feab3801fbc646a65 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Tue, 12 May 2015 21:06:41 +0200 Subject: [PATCH 450/834] egl: add new platform functions (v2) These are just wrappers around the existing extension functions. v2: return BAD_ALLOC if _eglConvertAttribsToInt fails Reviewed-by: Chad Versace --- src/egl/main/eglapi.c | 54 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) diff --git a/src/egl/main/eglapi.c b/src/egl/main/eglapi.c index e4fd44e90c5..b9e37c47e71 100644 --- a/src/egl/main/eglapi.c +++ b/src/egl/main/eglapi.c @@ -326,6 +326,21 @@ eglGetPlatformDisplayEXT(EGLenum platform, void *native_display, return _eglGetDisplayHandle(dpy); } +EGLDisplay EGLAPIENTRY +eglGetPlatformDisplay(EGLenum platform, void *native_display, + const EGLAttrib *attrib_list) +{ + EGLDisplay display; + EGLint *int_attribs = _eglConvertAttribsToInt(attrib_list); + + if (attrib_list && !int_attribs) + RETURN_EGL_ERROR(NULL, EGL_BAD_ALLOC, NULL); + + display = eglGetPlatformDisplayEXT(platform, native_display, int_attribs); + free(int_attribs); + return display; +} + /** * Copy the extension into the string and update the string pointer. */ @@ -752,6 +767,24 @@ eglCreatePlatformWindowSurfaceEXT(EGLDisplay dpy, EGLConfig config, } +EGLSurface EGLAPIENTRY +eglCreatePlatformWindowSurface(EGLDisplay dpy, EGLConfig config, + void *native_window, + const EGLAttrib *attrib_list) +{ + EGLSurface surface; + EGLint *int_attribs = _eglConvertAttribsToInt(attrib_list); + + if (attrib_list && !int_attribs) + RETURN_EGL_ERROR(NULL, EGL_BAD_ALLOC, EGL_NO_SURFACE); + + surface = eglCreatePlatformWindowSurfaceEXT(dpy, config, native_window, + int_attribs); + free(int_attribs); + return surface; +} + + static EGLSurface _eglCreatePixmapSurfaceCommon(_EGLDisplay *disp, EGLConfig config, void *native_pixmap, const EGLint *attrib_list) @@ -805,6 +838,24 @@ eglCreatePlatformPixmapSurfaceEXT(EGLDisplay dpy, EGLConfig config, } +EGLSurface EGLAPIENTRY +eglCreatePlatformPixmapSurface(EGLDisplay dpy, EGLConfig config, + void *native_pixmap, + const EGLAttrib *attrib_list) +{ + EGLSurface surface; + EGLint *int_attribs = _eglConvertAttribsToInt(attrib_list); + + if (attrib_list && !int_attribs) + RETURN_EGL_ERROR(NULL, EGL_BAD_ALLOC, EGL_NO_SURFACE); + + surface = eglCreatePlatformPixmapSurfaceEXT(dpy, config, native_pixmap, + int_attribs); + free(int_attribs); + return surface; +} + + EGLSurface EGLAPIENTRY eglCreatePbufferSurface(EGLDisplay dpy, EGLConfig config, const EGLint *attrib_list) @@ -1794,6 +1845,9 @@ eglGetProcAddress(const char *procname) { "eglWaitSync", (_EGLProc) eglWaitSync }, { "eglCreateImage", (_EGLProc) eglCreateImage }, { "eglDestroyImage", (_EGLProc) eglDestroyImage }, + { "eglGetPlatformDisplay", (_EGLProc) eglGetPlatformDisplay }, + { "eglCreatePlatformWindowSurface", (_EGLProc) eglCreatePlatformWindowSurface }, + { "eglCreatePlatformPixmapSurface", (_EGLProc) eglCreatePlatformPixmapSurface }, #ifdef EGL_MESA_drm_display { "eglGetDRMDisplayMESA", (_EGLProc) eglGetDRMDisplayMESA }, #endif From 51c8c66e1d81d03f0db6aee0a510aa85c277053e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Tue, 12 May 2015 21:41:32 +0200 Subject: [PATCH 451/834] egl: return correct invalid-type error from eglCreateSync Reviewed-by: Emil Velikov Reviewed-by: Chad Versace --- src/egl/main/eglapi.c | 27 +++++++++++++++++++-------- 1 file changed, 19 insertions(+), 8 deletions(-) diff --git a/src/egl/main/eglapi.c b/src/egl/main/eglapi.c index b9e37c47e71..bc360fe9107 100644 --- a/src/egl/main/eglapi.c +++ b/src/egl/main/eglapi.c @@ -1370,7 +1370,8 @@ eglDestroyImage(EGLDisplay dpy, EGLImage image) static EGLSync _eglCreateSync(EGLDisplay dpy, EGLenum type, const EGLint *attrib_list, - const EGLAttrib *attrib_list64, EGLBoolean is64) + const EGLAttrib *attrib_list64, EGLBoolean is64, + EGLenum invalid_type_error) { _EGLDisplay *disp = _eglLockDisplay(dpy); _EGLContext *ctx = _eglGetCurrentContext(); @@ -1391,18 +1392,18 @@ _eglCreateSync(EGLDisplay dpy, EGLenum type, const EGLint *attrib_list, switch (type) { case EGL_SYNC_FENCE_KHR: if (!disp->Extensions.KHR_fence_sync) - RETURN_EGL_ERROR(disp, EGL_BAD_ATTRIBUTE, EGL_NO_SYNC_KHR); + RETURN_EGL_ERROR(disp, invalid_type_error, EGL_NO_SYNC_KHR); break; case EGL_SYNC_REUSABLE_KHR: if (!disp->Extensions.KHR_reusable_sync) - RETURN_EGL_ERROR(disp, EGL_BAD_ATTRIBUTE, EGL_NO_SYNC_KHR); + RETURN_EGL_ERROR(disp, invalid_type_error, EGL_NO_SYNC_KHR); break; case EGL_SYNC_CL_EVENT_KHR: if (!disp->Extensions.KHR_cl_event2) - RETURN_EGL_ERROR(disp, EGL_BAD_ATTRIBUTE, EGL_NO_SYNC_KHR); + RETURN_EGL_ERROR(disp, invalid_type_error, EGL_NO_SYNC_KHR); break; default: - RETURN_EGL_ERROR(disp, EGL_BAD_ATTRIBUTE, EGL_NO_SYNC_KHR); + RETURN_EGL_ERROR(disp, invalid_type_error, EGL_NO_SYNC_KHR); } sync = drv->API.CreateSyncKHR(drv, disp, type, attrib_list, attrib_list64); @@ -1415,14 +1416,24 @@ _eglCreateSync(EGLDisplay dpy, EGLenum type, const EGLint *attrib_list, static EGLSync EGLAPIENTRY eglCreateSyncKHR(EGLDisplay dpy, EGLenum type, const EGLint *attrib_list) { - return _eglCreateSync(dpy, type, attrib_list, NULL, EGL_FALSE); + return _eglCreateSync(dpy, type, attrib_list, NULL, EGL_FALSE, + EGL_BAD_ATTRIBUTE); +} + + +static EGLSync EGLAPIENTRY +eglCreateSync64KHR(EGLDisplay dpy, EGLenum type, const EGLAttrib *attrib_list) +{ + return _eglCreateSync(dpy, type, NULL, attrib_list, EGL_TRUE, + EGL_BAD_ATTRIBUTE); } EGLSync EGLAPIENTRY eglCreateSync(EGLDisplay dpy, EGLenum type, const EGLAttrib *attrib_list) { - return _eglCreateSync(dpy, type, NULL, attrib_list, EGL_TRUE); + return _eglCreateSync(dpy, type, NULL, attrib_list, EGL_TRUE, + EGL_BAD_PARAMETER); } @@ -1854,7 +1865,7 @@ eglGetProcAddress(const char *procname) { "eglCreateImageKHR", (_EGLProc) eglCreateImageKHR }, { "eglDestroyImageKHR", (_EGLProc) eglDestroyImage }, { "eglCreateSyncKHR", (_EGLProc) eglCreateSyncKHR }, - { "eglCreateSync64KHR", (_EGLProc) eglCreateSync }, + { "eglCreateSync64KHR", (_EGLProc) eglCreateSync64KHR }, { "eglDestroySyncKHR", (_EGLProc) eglDestroySync }, { "eglClientWaitSyncKHR", (_EGLProc) eglClientWaitSync }, { "eglWaitSyncKHR", (_EGLProc) eglWaitSyncKHR }, From a1cb407b049bb431b0f6f21e6e376d11f136af67 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Mon, 11 May 2015 22:18:04 +0200 Subject: [PATCH 452/834] egl: expose EGL 1.5 if all requirements are met There's no driver support yet, because EGL_KHR_gl_colorspace isn't implemented. Reviewed-by: Emil Velikov Reviewed-by: Chad Versace --- src/egl/main/eglapi.c | 15 +++++++++++++++ src/egl/main/egldisplay.h | 1 + 2 files changed, 16 insertions(+) diff --git a/src/egl/main/eglapi.c b/src/egl/main/eglapi.c index bc360fe9107..105e919683a 100644 --- a/src/egl/main/eglapi.c +++ b/src/egl/main/eglapi.c @@ -451,6 +451,21 @@ static void _eglComputeVersion(_EGLDisplay *disp) { disp->Version = 14; + + if (disp->Extensions.KHR_fence_sync && + disp->Extensions.KHR_cl_event2 && + disp->Extensions.KHR_wait_sync && + disp->Extensions.KHR_image_base && + disp->Extensions.KHR_gl_texture_2D_image && + disp->Extensions.KHR_gl_texture_3D_image && + disp->Extensions.KHR_gl_texture_cubemap_image && + disp->Extensions.KHR_gl_renderbuffer_image && + disp->Extensions.KHR_create_context && + disp->Extensions.EXT_create_context_robustness && + disp->Extensions.KHR_get_all_proc_addresses && + disp->Extensions.KHR_gl_colorspace && + disp->Extensions.KHR_surfaceless_context) + disp->Version = 15; } /** diff --git a/src/egl/main/egldisplay.h b/src/egl/main/egldisplay.h index bc8cc7d6319..84cfbe19f7e 100644 --- a/src/egl/main/egldisplay.h +++ b/src/egl/main/egldisplay.h @@ -101,6 +101,7 @@ struct _egl_extensions EGLBoolean KHR_image_pixmap; EGLBoolean KHR_vg_parent_image; EGLBoolean KHR_get_all_proc_addresses; + EGLBoolean KHR_gl_colorspace; EGLBoolean KHR_gl_texture_2D_image; EGLBoolean KHR_gl_texture_cubemap_image; EGLBoolean KHR_gl_texture_3D_image; From 6acb61fc9c2c5f81569d17d90a480abc48ec6055 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Tue, 12 May 2015 22:53:00 +0200 Subject: [PATCH 453/834] clover: clarify and fix the EGL interop error case Cc: 10.6 --- src/gallium/state_trackers/clover/api/interop.cpp | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/gallium/state_trackers/clover/api/interop.cpp b/src/gallium/state_trackers/clover/api/interop.cpp index ea0c7c73c30..b96069f5167 100644 --- a/src/gallium/state_trackers/clover/api/interop.cpp +++ b/src/gallium/state_trackers/clover/api/interop.cpp @@ -31,7 +31,12 @@ extern "C" { PUBLIC bool opencl_dri_event_add_ref(cl_event event) { - return clRetainEvent(event) == CL_SUCCESS; + /* This should fail if the event hasn't been created by + * clEnqueueReleaseGLObjects or clEnqueueReleaseEGLObjects. + * + * TODO: implement the CL functions + */ + return false; /*return clRetainEvent(event) == CL_SUCCESS;*/ } PUBLIC bool From 77a44512d9ed56be5e53ebf09e917b5aeeba0189 Mon Sep 17 00:00:00 2001 From: Ben Widawsky Date: Thu, 4 Jun 2015 22:05:13 -0700 Subject: [PATCH 454/834] i965: Add buffer sizes to perf debug of fast clears When we cannot do the optimized fast clear it's important to know the buffer size since a small buffer will have much less performance impact. A follow-on patch could restrict printing the message to only certain sizes. Example: Failed to fast clear 1400x1056 depth because of scissors. Possible 5% performance win if avoided. Recommended-by: Kenneth Graunke Signed-off-by: Ben Widawsky Reviewed-by: Kenneth Graunke --- src/mesa/drivers/dri/i965/brw_clear.c | 5 +++-- src/mesa/drivers/dri/i965/brw_meta_fast_clear.c | 6 ++++-- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_clear.c b/src/mesa/drivers/dri/i965/brw_clear.c index 12314204803..1d4ba3cac7e 100644 --- a/src/mesa/drivers/dri/i965/brw_clear.c +++ b/src/mesa/drivers/dri/i965/brw_clear.c @@ -121,8 +121,9 @@ brw_fast_clear_depth(struct gl_context *ctx) * first. */ if ((ctx->Scissor.EnableFlags & 1) && !noop_scissor(ctx, fb)) { - perf_debug("Failed to fast clear depth due to scissor being enabled. " - "Possible 5%% performance win if avoided.\n"); + perf_debug("Failed to fast clear %dx%d depth because of scissors. " + "Possible 5%% performance win if avoided.\n", + mt->logical_width0, mt->logical_height0); return false; } diff --git a/src/mesa/drivers/dri/i965/brw_meta_fast_clear.c b/src/mesa/drivers/dri/i965/brw_meta_fast_clear.c index 06916e28cbd..a864143e6dc 100644 --- a/src/mesa/drivers/dri/i965/brw_meta_fast_clear.c +++ b/src/mesa/drivers/dri/i965/brw_meta_fast_clear.c @@ -466,7 +466,8 @@ brw_meta_fast_clear(struct brw_context *brw, struct gl_framebuffer *fb, * linear (untiled) memory is UNDEFINED." */ if (irb->mt->tiling == I915_TILING_NONE) { - perf_debug("falling back to plain clear because buffers are untiled\n"); + perf_debug("Falling back to plain clear because %dx%d buffer is untiled\n", + irb->mt->logical_width0, irb->mt->logical_height0); clear_type = PLAIN_CLEAR; } @@ -477,7 +478,8 @@ brw_meta_fast_clear(struct brw_context *brw, struct gl_framebuffer *fb, for (int i = 0; i < 4; i++) { if (_mesa_format_has_color_component(irb->mt->format, i) && !color_mask[i]) { - perf_debug("falling back to plain clear because of color mask\n"); + perf_debug("Falling back to plain clear on %dx%d buffer because of color mask\n", + irb->mt->logical_width0, irb->mt->logical_height0); clear_type = PLAIN_CLEAR; } } From b639ed2f1b170d1184c6d94c88c826c51ffc8726 Mon Sep 17 00:00:00 2001 From: Ben Widawsky Date: Thu, 4 Jun 2015 23:59:23 -0700 Subject: [PATCH 455/834] i965: Add gen8 fast clear perf debug In an ideal world I would just implement this instead of adding the perf debug. There are some errata involved which lead me to believe it won't be so simple as flipping a few bits. There is room to add a thing for Gen9s flexibility, but since I am actively working on that I have opted to ignore it. Example: Multi-LOD fast clear - giving up (256x128x8). v2: Use braces for if statements because they are multiple lines (Ken) Signed-off-by: Ben Widawsky Reviewed-by: Kenneth Graunke --- src/mesa/drivers/dri/i965/brw_meta_fast_clear.c | 4 ++++ src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 17 +++++++++++++++-- 2 files changed, 19 insertions(+), 2 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_meta_fast_clear.c b/src/mesa/drivers/dri/i965/brw_meta_fast_clear.c index a864143e6dc..c0c8dfa608d 100644 --- a/src/mesa/drivers/dri/i965/brw_meta_fast_clear.c +++ b/src/mesa/drivers/dri/i965/brw_meta_fast_clear.c @@ -340,6 +340,10 @@ is_color_fast_clear_compatible(struct brw_context *brw, const union gl_color_union *color) { if (_mesa_is_format_integer_color(format)) + if (brw->gen >= 8) { + perf_debug("Integer fast clear not enabled for (%s)", + _mesa_get_format_name(format)); + } return false; for (int i = 0; i < 4; i++) { diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c index 24a5c3dc666..8616c0193c8 100644 --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c @@ -198,10 +198,23 @@ intel_is_non_msrt_mcs_buffer_supported(struct brw_context *brw, return false; if (mt->cpp != 4 && mt->cpp != 8 && mt->cpp != 16) return false; - if (mt->first_level != 0 || mt->last_level != 0) + if (mt->first_level != 0 || mt->last_level != 0) { + if (brw->gen >= 8) { + perf_debug("Multi-LOD fast clear - giving up (%dx%dx%d).\n", + mt->logical_width0, mt->logical_height0, mt->last_level); + } + return false; - if (mt->physical_depth0 != 1) + } + if (mt->physical_depth0 != 1) { + if (brw->gen >= 8) { + perf_debug("Layered fast clear - giving up. (%dx%d%d)\n", + mt->logical_width0, mt->logical_height0, + mt->physical_depth0); + } + return false; + } /* There's no point in using an MCS buffer if the surface isn't in a * renderable format. From 52e5ad7bf8c731280ca4506b7d38e8c7a8e734b9 Mon Sep 17 00:00:00 2001 From: Chris Forbes Date: Wed, 3 Jun 2015 12:11:27 +1200 Subject: [PATCH 456/834] i965: Set max texture buffer size to hardware limit Previously we were leaving this at the default of 64K, which meets the spec but is too small for some real uses. The hardware can handle up to 128M. User was complaining about this on freenode ##OpenGL today. Signed-off-by: Chris Forbes Reviewed-by: Kenneth Graunke --- src/mesa/drivers/dri/i965/brw_context.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c index 274a2379437..652d9a34e8f 100644 --- a/src/mesa/drivers/dri/i965/brw_context.c +++ b/src/mesa/drivers/dri/i965/brw_context.c @@ -545,6 +545,7 @@ brw_initialize_context_constants(struct brw_context *brw) */ ctx->Const.UniformBufferOffsetAlignment = 16; ctx->Const.TextureBufferOffsetAlignment = 16; + ctx->Const.MaxTextureBufferSize = 128 * 1024 * 1024; if (brw->gen >= 6) { ctx->Const.MaxVarying = 32; From 7b8f20ec5505a25958bcd98aabe73a7ca2b6cbba Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Thu, 4 Jun 2015 17:00:17 -0700 Subject: [PATCH 457/834] prog_to_nir: Fix fragment depth writes. In the ARB_fragment_program specification, the result.depth output variable is treated as a vec4, where the fragment depth is stored in the .z component, and the other three components are undefined. This is different than GLSL, which uses a scalar value (gl_FragDepth). To make this consistent for driver backends, this patch makes prog_to_nir use a scalar output variable for FRAG_RESULT_DEPTH, moving result.depth.z into the first component. Fixes Glean's fragProg1 "Z-write test" subtest. Cc: mesa-stable@lists.freedesktop.org Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=90000 Signed-off-by: Kenneth Graunke Reviewed-by: Matt Turner --- src/mesa/program/prog_to_nir.c | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/src/mesa/program/prog_to_nir.c b/src/mesa/program/prog_to_nir.c index d3e3f15c959..d54f934247d 100644 --- a/src/mesa/program/prog_to_nir.c +++ b/src/mesa/program/prog_to_nir.c @@ -924,10 +924,23 @@ ptn_add_output_stores(struct ptn_compile *c) foreach_list_typed(nir_variable, var, node, &b->shader->outputs) { nir_intrinsic_instr *store = nir_intrinsic_instr_create(b->shader, nir_intrinsic_store_var); - store->num_components = 4; + store->num_components = glsl_get_vector_elements(var->type); store->variables[0] = nir_deref_var_create(store, c->output_vars[var->data.location]); - store->src[0].reg.reg = c->output_regs[var->data.location]; + + if (c->prog->Target == GL_FRAGMENT_PROGRAM_ARB && + var->data.location == FRAG_RESULT_DEPTH) { + /* result.depth has this strange convention of being the .z component of + * a vec4 with undefined .xyw components. We resolve it to a scalar, to + * match GLSL's gl_FragDepth and the expectations of most backends. + */ + nir_alu_src alu_src = { NIR_SRC_INIT }; + alu_src.src = nir_src_for_reg(c->output_regs[FRAG_RESULT_DEPTH]); + alu_src.swizzle[0] = SWIZZLE_Z; + store->src[0] = nir_src_for_ssa(nir_fmov_alu(b, alu_src, 1)); + } else { + store->src[0].reg.reg = c->output_regs[var->data.location]; + } nir_instr_insert_after_cf_list(c->build.cf_node_list, &store->instr); } } @@ -1020,7 +1033,10 @@ setup_registers_and_variables(struct ptn_compile *c) reg->num_components = 4; nir_variable *var = rzalloc(shader, nir_variable); - var->type = glsl_vec4_type(); + if (c->prog->Target == GL_FRAGMENT_PROGRAM_ARB && i == FRAG_RESULT_DEPTH) + var->type = glsl_float_type(); + else + var->type = glsl_vec4_type(); var->data.mode = nir_var_shader_out; var->name = ralloc_asprintf(var, "out_%d", i); From 56efe81ab163a0c7af15fc53821ac56c0d7641d8 Mon Sep 17 00:00:00 2001 From: Emil Velikov Date: Sun, 7 Jun 2015 11:13:19 +0100 Subject: [PATCH 458/834] Add release notes for the 10.5.7 release Signed-off-by: Emil Velikov (cherry picked from commit 495bcbc48cf4e7cee0f2de11c1166a1fd6eb3969) --- docs/relnotes/10.5.7.html | 102 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 102 insertions(+) create mode 100644 docs/relnotes/10.5.7.html diff --git a/docs/relnotes/10.5.7.html b/docs/relnotes/10.5.7.html new file mode 100644 index 00000000000..49440c4c34a --- /dev/null +++ b/docs/relnotes/10.5.7.html @@ -0,0 +1,102 @@ + + + + + Mesa Release Notes + + + + +
    +

    The Mesa 3D Graphics Library

    +
    + + +
    + +

    Mesa 10.5.7 Release Notes / June 07, 2015

    + +

    +Mesa 10.5.7 is a bug fix release which fixes bugs found since the 10.5.6 release. +

    +

    +Mesa 10.5.7 implements the OpenGL 3.3 API, but the version reported by +glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) / +glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used. +Some drivers don't support all the features required in OpenGL 3.3. OpenGL +3.3 is only available if requested at context creation +because compatibility contexts are not supported. +

    + + +

    SHA256 checksums

    +
    +TBD
    +
    + + +

    New features

    +

    None

    + +

    Bug fixes

    + +

    This list is likely incomplete.

    + +
      + +
    • Bug 89131 - [Bisected] Graphical corruption in Weston, shows old framebuffer pieces
    • + +
    + + +

    Changes

    + +

    Ben Widawsky (1):

    +
      +
    • i965: Emit 3DSTATE_MULTISAMPLE before WM_HZ_OP (gen8+)
    • +
    + +

    Emil Velikov (4):

    +
      +
    • docs: Add sha256sums for the 10.5.6 release
    • +
    • get-pick-list.sh: Require explicit "10.5" for nominating stable patches
    • +
    • cherry-ignore: add clover build fix not applicable for 10.5
    • +
    • Update version to 10.5.7
    • +
    + +

    Ilia Mirkin (18):

    +
      +
    • nvc0/ir: set ftz when sources are floats, not just destinations
    • +
    • nv50/ir: guess that the constant offset is the starting slot of array
    • +
    • nvc0/ir: LOAD's can't be used for shader inputs
    • +
    • nvc0: a geometry shader can have up to 1024 vertices output
    • +
    • nv50/ir: avoid messing up arg1 of PFETCH
    • +
    • nv30: don't leak fragprog consts
    • +
    • nv30: avoid leaking render state and draw shaders
    • +
    • nv30: fix clip plane uploads and enable changes
    • +
    • nv30/draw: avoid leaving stale pointers in draw state
    • +
    • nv30/draw: draw expects constbuf size in bytes, not vec4 units
    • +
    • st/mesa: don't leak glsl_to_tgsi object on link failure
    • +
    • glsl: avoid leaking linked gl_shader when there's a late linker error
    • +
    • nv30/draw: fix indexed draws with swtnl path and a resource index buffer
    • +
    • nv30/draw: only use the DMA1 object (GART) if the bo is not in VRAM
    • +
    • nv30/draw: allocate vertex buffers in gart
    • +
    • nv30/draw: switch varying hookup logic to know about texcoords
    • +
    • nv30: falling back to draw path for edgeflag does no good
    • +
    • nv30: avoid doing extra work on clear and hitting unexpected states
    • +
    + +

    Jason Ekstrand (1):

    +
      +
    • i965/fs: Fix implied_mrf_writes for scratch writes
    • +
    + +

    Marek Olšák (1):

    +
      +
    • st/dri: fix postprocessing crash when there's no depth buffer
    • +
    + + +
    + + From f7db7fe6ea3d6044b4ceda0c2c477642302e3997 Mon Sep 17 00:00:00 2001 From: Emil Velikov Date: Sun, 7 Jun 2015 11:45:25 +0100 Subject: [PATCH 459/834] docs: Add sha256sums for the 10.5.7 release Signed-off-by: Emil Velikov (cherry picked from commit eb3a704bb0008c1d046abae31dcb0b2b980c66b1) --- docs/relnotes/10.5.7.html | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docs/relnotes/10.5.7.html b/docs/relnotes/10.5.7.html index 49440c4c34a..68c8385496b 100644 --- a/docs/relnotes/10.5.7.html +++ b/docs/relnotes/10.5.7.html @@ -31,7 +31,8 @@ because compatibility contexts are not supported.

    SHA256 checksums

    -TBD
    +8f865ce497435fdf25d4e35f3b5551b2bcd5f9bc6570561183be82af20d18b82  mesa-10.5.7.tar.gz
    +04d06890cd69af8089d6ca76f40e46dcf9cacfe4a9788b32be620574d4638818  mesa-10.5.7.tar.xz
     
    From 9538902c4f0e94e57228f939489d31676c43a778 Mon Sep 17 00:00:00 2001 From: Emil Velikov Date: Sun, 7 Jun 2015 13:44:37 +0100 Subject: [PATCH 460/834] docs: add news item and link release notes for mesa 10.5.7 Signed-off-by: Emil Velikov --- docs/index.html | 6 ++++++ docs/relnotes.html | 1 + 2 files changed, 7 insertions(+) diff --git a/docs/index.html b/docs/index.html index 08f9eb5c1f7..a88f930c9e0 100644 --- a/docs/index.html +++ b/docs/index.html @@ -16,6 +16,12 @@

    News

    +

    June 07, 2015

    +

    +Mesa 10.5.7 is released. +This is a bug-fix release. +

    +

    May 23, 2015

    Mesa 10.5.6 is released. diff --git a/docs/relnotes.html b/docs/relnotes.html index 33a6406fa02..26d196847cb 100644 --- a/docs/relnotes.html +++ b/docs/relnotes.html @@ -21,6 +21,7 @@ The release notes summarize what's new or changed in each Mesa release.

      +
    • 10.5.7 release notes
    • 10.5.6 release notes
    • 10.5.5 release notes
    • 10.5.4 release notes From 79f2acb8f89704c609dd87d969353a506e03b05e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michel=20D=C3=A4nzer?= Date: Wed, 19 Nov 2014 15:31:24 +0900 Subject: [PATCH 461/834] r600g,radeonsi: Assert that there's enough space after flushing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Marek Olšák Reviewed-by: Alex Deucher --- src/gallium/drivers/radeon/r600_pipe_common.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/gallium/drivers/radeon/r600_pipe_common.c b/src/gallium/drivers/radeon/r600_pipe_common.c index 42e681dc7d2..3def4446882 100644 --- a/src/gallium/drivers/radeon/r600_pipe_common.c +++ b/src/gallium/drivers/radeon/r600_pipe_common.c @@ -107,11 +107,10 @@ void r600_draw_rectangle(struct blitter_context *blitter, void r600_need_dma_space(struct r600_common_context *ctx, unsigned num_dw) { - /* The number of dwords we already used in the DMA so far. */ - num_dw += ctx->rings.dma.cs->cdw; /* Flush if there's not enough space. */ - if (num_dw > RADEON_MAX_CMDBUF_DWORDS) { + if ((num_dw + ctx->rings.dma.cs->cdw) > RADEON_MAX_CMDBUF_DWORDS) { ctx->rings.dma.flush(ctx, RADEON_FLUSH_ASYNC, NULL); + assert((num_dw + ctx->rings.dma.cs->cdw) <= RADEON_MAX_CMDBUF_DWORDS); } } From 56e38edc960bf08213cdb0282838ccec3e5ea10e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michel=20D=C3=A4nzer?= Date: Tue, 26 May 2015 16:27:15 +0900 Subject: [PATCH 462/834] radeonsi: Add CIK SDMA support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Based on the corresponding SI support. Same as that, this is currently only enabled for one-dimensional buffer copies due to issues with multi-dimensional SDMA copies. Reviewed-by: Marek Olšák Reviewed-by: Alex Deucher --- src/gallium/drivers/radeonsi/Makefile.sources | 1 + src/gallium/drivers/radeonsi/cik_sdma.c | 364 ++++++++++++++++++ src/gallium/drivers/radeonsi/si_dma.c | 20 - src/gallium/drivers/radeonsi/si_pipe.h | 9 + src/gallium/drivers/radeonsi/si_state.c | 22 +- src/gallium/drivers/radeonsi/si_state.h | 1 + src/gallium/drivers/radeonsi/sid.h | 31 ++ 7 files changed, 427 insertions(+), 21 deletions(-) create mode 100644 src/gallium/drivers/radeonsi/cik_sdma.c diff --git a/src/gallium/drivers/radeonsi/Makefile.sources b/src/gallium/drivers/radeonsi/Makefile.sources index 774dc2285c0..2876c0ae735 100644 --- a/src/gallium/drivers/radeonsi/Makefile.sources +++ b/src/gallium/drivers/radeonsi/Makefile.sources @@ -1,4 +1,5 @@ C_SOURCES := \ + cik_sdma.c \ si_blit.c \ si_commands.c \ si_compute.c \ diff --git a/src/gallium/drivers/radeonsi/cik_sdma.c b/src/gallium/drivers/radeonsi/cik_sdma.c new file mode 100644 index 00000000000..86111cb86e8 --- /dev/null +++ b/src/gallium/drivers/radeonsi/cik_sdma.c @@ -0,0 +1,364 @@ +/* + * Copyright 2010 Jerome Glisse + * Copyright 2014,2015 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: + * Jerome Glisse + */ + +#include "sid.h" +#include "si_pipe.h" +#include "../radeon/r600_cs.h" + +#include "util/u_format.h" + +static uint32_t cik_micro_tile_mode(struct si_screen *sscreen, unsigned tile_mode) +{ + if (sscreen->b.info.si_tile_mode_array_valid) { + uint32_t gb_tile_mode = sscreen->b.info.si_tile_mode_array[tile_mode]; + + return G_009910_MICRO_TILE_MODE_NEW(gb_tile_mode); + } + + /* The kernel cannod return the tile mode array. Guess? */ + return V_009910_ADDR_SURF_THIN_MICRO_TILING; +} + +static void cik_sdma_do_copy_buffer(struct si_context *ctx, + struct pipe_resource *dst, + struct pipe_resource *src, + uint64_t dst_offset, + uint64_t src_offset, + uint64_t size) +{ + struct radeon_winsys_cs *cs = ctx->b.rings.dma.cs; + unsigned i, ncopy, csize; + struct r600_resource *rdst = (struct r600_resource*)dst; + struct r600_resource *rsrc = (struct r600_resource*)src; + + dst_offset += r600_resource(dst)->gpu_address; + src_offset += r600_resource(src)->gpu_address; + + ncopy = (size + CIK_SDMA_COPY_MAX_SIZE - 1) / CIK_SDMA_COPY_MAX_SIZE; + r600_need_dma_space(&ctx->b, ncopy * 7); + + r600_context_bo_reloc(&ctx->b, &ctx->b.rings.dma, rsrc, RADEON_USAGE_READ, + RADEON_PRIO_MIN); + r600_context_bo_reloc(&ctx->b, &ctx->b.rings.dma, rdst, RADEON_USAGE_WRITE, + RADEON_PRIO_MIN); + + for (i = 0; i < ncopy; i++) { + csize = size < CIK_SDMA_COPY_MAX_SIZE ? size : CIK_SDMA_COPY_MAX_SIZE; + cs->buf[cs->cdw++] = CIK_SDMA_PACKET(CIK_SDMA_OPCODE_COPY, + CIK_SDMA_COPY_SUB_OPCODE_LINEAR, + 0); + cs->buf[cs->cdw++] = csize; + cs->buf[cs->cdw++] = 0; /* src/dst endian swap */ + cs->buf[cs->cdw++] = src_offset; + cs->buf[cs->cdw++] = src_offset >> 32; + cs->buf[cs->cdw++] = dst_offset; + cs->buf[cs->cdw++] = dst_offset >> 32; + dst_offset += csize; + src_offset += csize; + size -= csize; + } +} + +static void cik_sdma_copy_buffer(struct si_context *ctx, + struct pipe_resource *dst, + struct pipe_resource *src, + uint64_t dst_offset, + uint64_t src_offset, + uint64_t size) +{ + struct r600_resource *rdst = (struct r600_resource*)dst; + + /* Mark the buffer range of destination as valid (initialized), + * so that transfer_map knows it should wait for the GPU when mapping + * that range. */ + util_range_add(&rdst->valid_buffer_range, dst_offset, + dst_offset + size); + + cik_sdma_do_copy_buffer(ctx, dst, src, dst_offset, src_offset, size); +} + +static void cik_sdma_copy_tile(struct si_context *ctx, + struct pipe_resource *dst, + unsigned dst_level, + struct pipe_resource *src, + unsigned src_level, + unsigned y, + unsigned copy_height, + unsigned y_align, + unsigned pitch, + unsigned bpe) +{ + struct radeon_winsys_cs *cs = ctx->b.rings.dma.cs; + struct si_screen *sscreen = ctx->screen; + struct r600_texture *rsrc = (struct r600_texture*)src; + struct r600_texture *rdst = (struct r600_texture*)dst; + struct r600_texture *rlinear, *rtiled; + unsigned linear_lvl, tiled_lvl; + unsigned array_mode, lbpe, pitch_tile_max, slice_tile_max, size; + unsigned ncopy, height, cheight, detile, i, src_mode, dst_mode; + unsigned sub_op, bank_h, bank_w, mt_aspect, nbanks, tile_split, mt; + uint64_t base, addr; + unsigned pipe_config, tile_mode_index; + + dst_mode = rdst->surface.level[dst_level].mode; + src_mode = rsrc->surface.level[src_level].mode; + /* downcast linear aligned to linear to simplify test */ + src_mode = src_mode == RADEON_SURF_MODE_LINEAR_ALIGNED ? RADEON_SURF_MODE_LINEAR : src_mode; + dst_mode = dst_mode == RADEON_SURF_MODE_LINEAR_ALIGNED ? RADEON_SURF_MODE_LINEAR : dst_mode; + assert(dst_mode != src_mode); + assert(src_mode == RADEON_SURF_MODE_LINEAR || dst_mode == RADEON_SURF_MODE_LINEAR); + + sub_op = CIK_SDMA_COPY_SUB_OPCODE_TILED; + lbpe = util_logbase2(bpe); + pitch_tile_max = ((pitch / bpe) / 8) - 1; + + detile = dst_mode == RADEON_SURF_MODE_LINEAR; + rlinear = detile ? rdst : rsrc; + rtiled = detile ? rsrc : rdst; + linear_lvl = detile ? dst_level : src_level; + tiled_lvl = detile ? src_level : dst_level; + + assert(!util_format_is_depth_and_stencil(rtiled->resource.b.b.format)); + + array_mode = si_array_mode(rtiled->surface.level[tiled_lvl].mode); + slice_tile_max = (rtiled->surface.level[tiled_lvl].nblk_x * + rtiled->surface.level[tiled_lvl].nblk_y) / (8*8) - 1; + height = rlinear->surface.level[linear_lvl].nblk_y; + base = rtiled->surface.level[tiled_lvl].offset; + addr = rlinear->surface.level[linear_lvl].offset; + bank_h = cik_bank_wh(rtiled->surface.bankh); + bank_w = cik_bank_wh(rtiled->surface.bankw); + mt_aspect = cik_macro_tile_aspect(rtiled->surface.mtilea); + tile_split = cik_tile_split(rtiled->surface.tile_split); + tile_mode_index = si_tile_mode_index(rtiled, tiled_lvl, false); + nbanks = si_num_banks(sscreen, rtiled); + base += rtiled->resource.gpu_address; + addr += rlinear->resource.gpu_address; + + pipe_config = cik_db_pipe_config(sscreen, tile_mode_index); + mt = cik_micro_tile_mode(sscreen, tile_mode_index); + + size = (copy_height * pitch) / 4; + cheight = copy_height; + if (((cheight * pitch) / 4) > CIK_SDMA_COPY_MAX_SIZE) { + cheight = (CIK_SDMA_COPY_MAX_SIZE * 4) / pitch; + cheight &= ~(y_align - 1); + } + ncopy = (copy_height + cheight - 1) / cheight; + r600_need_dma_space(&ctx->b, ncopy * 12); + + r600_context_bo_reloc(&ctx->b, &ctx->b.rings.dma, &rsrc->resource, + RADEON_USAGE_READ, RADEON_PRIO_MIN); + r600_context_bo_reloc(&ctx->b, &ctx->b.rings.dma, &rdst->resource, + RADEON_USAGE_WRITE, RADEON_PRIO_MIN); + + copy_height = size * 4 / pitch; + for (i = 0; i < ncopy; i++) { + cheight = copy_height; + if (((cheight * pitch) / 4) > CIK_SDMA_COPY_MAX_SIZE) { + cheight = (CIK_SDMA_COPY_MAX_SIZE * 4) / pitch; + cheight &= ~(y_align - 1); + } + size = (cheight * pitch) / 4; + + cs->buf[cs->cdw++] = CIK_SDMA_PACKET(CIK_SDMA_OPCODE_COPY, + sub_op, detile << 15); + cs->buf[cs->cdw++] = base; + cs->buf[cs->cdw++] = base >> 32; + cs->buf[cs->cdw++] = ((height - 1) << 16) | pitch_tile_max; + cs->buf[cs->cdw++] = slice_tile_max; + cs->buf[cs->cdw++] = (pipe_config << 26) | (mt_aspect << 24) | + (nbanks << 21) | (bank_h << 18) | (bank_w << 15) | + (tile_split << 11) | (mt << 8) | (array_mode << 3) | + lbpe; + cs->buf[cs->cdw++] = y << 16; /* | x */ + cs->buf[cs->cdw++] = 0; /* z */; + cs->buf[cs->cdw++] = addr & 0xfffffffc; + cs->buf[cs->cdw++] = addr >> 32; + cs->buf[cs->cdw++] = (pitch / bpe) - 1; + cs->buf[cs->cdw++] = size; + + copy_height -= cheight; + y += cheight; + } +} + +void cik_sdma_copy(struct pipe_context *ctx, + struct pipe_resource *dst, + unsigned dst_level, + unsigned dstx, unsigned dsty, unsigned dstz, + struct pipe_resource *src, + unsigned src_level, + const struct pipe_box *src_box) +{ + struct si_context *sctx = (struct si_context *)ctx; + struct r600_texture *rsrc = (struct r600_texture*)src; + struct r600_texture *rdst = (struct r600_texture*)dst; + unsigned dst_pitch, src_pitch, bpe, dst_mode, src_mode; + unsigned src_w, dst_w; + unsigned src_x, src_y; + unsigned copy_height, y_align; + unsigned dst_x = dstx, dst_y = dsty, dst_z = dstz; + + if (sctx->b.rings.dma.cs == NULL) { + goto fallback; + } + + if (dst->target == PIPE_BUFFER && src->target == PIPE_BUFFER) { + cik_sdma_copy_buffer(sctx, dst, src, dst_x, src_box->x, src_box->width); + return; + } + + /* Before re-enabling this, please make sure you can hit all newly + * enabled paths in your testing, preferably with both piglit (in + * particular the streaming-texture-leak test) and real world apps + * (e.g. the UE4 Elemental demo). + */ + goto fallback; + + if (src->format != dst->format || + rdst->surface.nsamples > 1 || rsrc->surface.nsamples > 1 || + rdst->dirty_level_mask & (1 << dst_level)) { + goto fallback; + } + + if (rsrc->dirty_level_mask & (1 << src_level)) { + if (rsrc->htile_buffer) + goto fallback; + + ctx->flush_resource(ctx, src); + } + + src_x = util_format_get_nblocksx(src->format, src_box->x); + dst_x = util_format_get_nblocksx(src->format, dst_x); + src_y = util_format_get_nblocksy(src->format, src_box->y); + dst_y = util_format_get_nblocksy(src->format, dst_y); + + dst_pitch = rdst->surface.level[dst_level].pitch_bytes; + src_pitch = rsrc->surface.level[src_level].pitch_bytes; + src_w = rsrc->surface.level[src_level].npix_x; + dst_w = rdst->surface.level[dst_level].npix_x; + + if (src_pitch != dst_pitch || src_box->x || dst_x || src_w != dst_w || + src_box->width != src_w || + rsrc->surface.level[src_level].nblk_y != + rdst->surface.level[dst_level].nblk_y) { + /* FIXME CIK can do partial blit */ + goto fallback; + } + + bpe = rdst->surface.bpe; + copy_height = src_box->height / rsrc->surface.blk_h; + dst_mode = rdst->surface.level[dst_level].mode; + src_mode = rsrc->surface.level[src_level].mode; + /* downcast linear aligned to linear to simplify test */ + src_mode = src_mode == RADEON_SURF_MODE_LINEAR_ALIGNED ? RADEON_SURF_MODE_LINEAR : src_mode; + dst_mode = dst_mode == RADEON_SURF_MODE_LINEAR_ALIGNED ? RADEON_SURF_MODE_LINEAR : dst_mode; + + /* Dimensions must be aligned to (macro)tiles */ + switch (src_mode == RADEON_SURF_MODE_LINEAR ? dst_mode : src_mode) { + case RADEON_SURF_MODE_1D: + if ((src_x % 8) || (src_y % 8) || (dst_x % 8) || (dst_y % 8) || + (copy_height % 8)) + goto fallback; + y_align = 8; + break; + case RADEON_SURF_MODE_2D: { + unsigned mtilew, mtileh, num_banks; + + switch (si_num_banks(sctx->screen, rsrc)) { + case V_02803C_ADDR_SURF_2_BANK: + default: + num_banks = 2; + break; + case V_02803C_ADDR_SURF_4_BANK: + num_banks = 4; + break; + case V_02803C_ADDR_SURF_8_BANK: + num_banks = 8; + break; + case V_02803C_ADDR_SURF_16_BANK: + num_banks = 16; + break; + } + + mtilew = (8 * rsrc->surface.bankw * + sctx->screen->b.tiling_info.num_channels) * + rsrc->surface.mtilea; + assert(!(mtilew & (mtilew - 1))); + mtileh = (8 * rsrc->surface.bankh * num_banks) / + rsrc->surface.mtilea; + assert(!(mtileh & (mtileh - 1))); + + if ((src_x & (mtilew - 1)) || (src_y & (mtileh - 1)) || + (dst_x & (mtilew - 1)) || (dst_y & (mtileh - 1)) || + (copy_height & (mtileh - 1))) + goto fallback; + + y_align = mtileh; + break; + } + default: + y_align = 1; + } + + if (src_mode == dst_mode) { + uint64_t dst_offset, src_offset; + unsigned src_h, dst_h; + + src_h = rsrc->surface.level[src_level].npix_y; + dst_h = rdst->surface.level[dst_level].npix_y; + + if (src_box->depth > 1 && + (src_y || dst_y || src_h != dst_h || src_box->height != src_h)) + goto fallback; + + /* simple dma blit would do NOTE code here assume : + * dst_pitch == src_pitch + */ + src_offset= rsrc->surface.level[src_level].offset; + src_offset += rsrc->surface.level[src_level].slice_size * src_box->z; + src_offset += src_y * src_pitch + src_x * bpe; + dst_offset = rdst->surface.level[dst_level].offset; + dst_offset += rdst->surface.level[dst_level].slice_size * dst_z; + dst_offset += dst_y * dst_pitch + dst_x * bpe; + cik_sdma_do_copy_buffer(sctx, dst, src, dst_offset, src_offset, + src_box->depth * + rsrc->surface.level[src_level].slice_size); + } else { + if (dst_y != src_y || src_box->depth > 1 || src_box->z || dst_z) + goto fallback; + + cik_sdma_copy_tile(sctx, dst, dst_level, src, src_level, + src_y, copy_height, y_align, dst_pitch, bpe); + } + return; + +fallback: + si_resource_copy_region(ctx, dst, dst_level, dstx, dsty, dstz, + src, src_level, src_box); +} diff --git a/src/gallium/drivers/radeonsi/si_dma.c b/src/gallium/drivers/radeonsi/si_dma.c index db523eef318..7a0076e7aa9 100644 --- a/src/gallium/drivers/radeonsi/si_dma.c +++ b/src/gallium/drivers/radeonsi/si_dma.c @@ -30,21 +30,6 @@ #include "util/u_format.h" -static unsigned si_array_mode(unsigned mode) -{ - switch (mode) { - case RADEON_SURF_MODE_LINEAR_ALIGNED: - return V_009910_ARRAY_LINEAR_ALIGNED; - case RADEON_SURF_MODE_1D: - return V_009910_ARRAY_1D_TILED_THIN1; - case RADEON_SURF_MODE_2D: - return V_009910_ARRAY_2D_TILED_THIN1; - default: - case RADEON_SURF_MODE_LINEAR: - return V_009910_ARRAY_LINEAR_GENERAL; - } -} - static uint32_t si_micro_tile_mode(struct si_screen *sscreen, unsigned tile_mode) { if (sscreen->b.info.si_tile_mode_array_valid) { @@ -240,11 +225,6 @@ void si_dma_copy(struct pipe_context *ctx, goto fallback; } - /* TODO: Implement DMA copy for CIK */ - if (sctx->b.chip_class >= CIK) { - goto fallback; - } - if (dst->target == PIPE_BUFFER && src->target == PIPE_BUFFER) { si_dma_copy_buffer(sctx, dst, src, dst_x, src_box->x, src_box->width); return; diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index f98c7a83744..2d67342f160 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -237,6 +237,15 @@ struct si_context { unsigned spi_tmpring_size; }; +/* cik_sdma.c */ +void cik_sdma_copy(struct pipe_context *ctx, + struct pipe_resource *dst, + unsigned dst_level, + unsigned dstx, unsigned dsty, unsigned dstz, + struct pipe_resource *src, + unsigned src_level, + const struct pipe_box *src_box); + /* si_blit.c */ void si_init_blit_functions(struct si_context *sctx); void si_flush_depth_textures(struct si_context *sctx, diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c index d1b3ca2481a..6c18836d189 100644 --- a/src/gallium/drivers/radeonsi/si_state.c +++ b/src/gallium/drivers/radeonsi/si_state.c @@ -44,6 +44,21 @@ static void si_init_atom(struct r600_atom *atom, struct r600_atom **list_elem, *list_elem = atom; } +unsigned si_array_mode(unsigned mode) +{ + switch (mode) { + case RADEON_SURF_MODE_LINEAR_ALIGNED: + return V_009910_ARRAY_LINEAR_ALIGNED; + case RADEON_SURF_MODE_1D: + return V_009910_ARRAY_1D_TILED_THIN1; + case RADEON_SURF_MODE_2D: + return V_009910_ARRAY_2D_TILED_THIN1; + default: + case RADEON_SURF_MODE_LINEAR: + return V_009910_ARRAY_LINEAR_GENERAL; + } +} + uint32_t si_num_banks(struct si_screen *sscreen, struct r600_texture *tex) { if (sscreen->b.chip_class == CIK && @@ -2906,11 +2921,16 @@ void si_init_state_functions(struct si_context *sctx) sctx->b.b.set_polygon_stipple = si_set_polygon_stipple; sctx->b.b.set_min_samples = si_set_min_samples; - sctx->b.dma_copy = si_dma_copy; sctx->b.set_occlusion_query_state = si_set_occlusion_query_state; sctx->b.need_gfx_cs_space = si_need_gfx_cs_space; sctx->b.b.draw_vbo = si_draw_vbo; + + if (sctx->b.chip_class >= CIK) { + sctx->b.dma_copy = cik_sdma_copy; + } else { + sctx->b.dma_copy = si_dma_copy; + } } static void diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h index 2f8a943846a..5e68b162137 100644 --- a/src/gallium/drivers/radeonsi/si_state.h +++ b/src/gallium/drivers/radeonsi/si_state.h @@ -261,6 +261,7 @@ unsigned cik_bank_wh(unsigned bankwh); unsigned cik_db_pipe_config(struct si_screen *sscreen, unsigned tile_mode); unsigned cik_macro_tile_aspect(unsigned macro_tile_aspect); unsigned cik_tile_split(unsigned tile_split); +unsigned si_array_mode(unsigned mode); uint32_t si_num_banks(struct si_screen *sscreen, struct r600_texture *tex); unsigned si_tile_mode_index(struct r600_texture *rtex, unsigned level, bool stencil); diff --git a/src/gallium/drivers/radeonsi/sid.h b/src/gallium/drivers/radeonsi/sid.h index afe011b15c7..35d5ee232a0 100644 --- a/src/gallium/drivers/radeonsi/sid.h +++ b/src/gallium/drivers/radeonsi/sid.h @@ -4516,6 +4516,13 @@ #define V_009910_ADDR_SURF_8_BANK 0x02 #define V_009910_ADDR_SURF_16_BANK 0x03 /* CIK */ +#define S_009910_MICRO_TILE_MODE_NEW(x) (((x) & 0x07) << 22) +#define G_009910_MICRO_TILE_MODE_NEW(x) (((x) >> 22) & 0x07) +#define C_009910_MICRO_TILE_MODE_NEW(x) 0xFE3FFFFF +#define V_009910_ADDR_SURF_DISPLAY_MICRO_TILING 0x00 +#define V_009910_ADDR_SURF_THIN_MICRO_TILING 0x01 +#define V_009910_ADDR_SURF_DEPTH_MICRO_TILING 0x02 +#define V_009910_ADDR_SURF_ROTATED_MICRO_TILING 0x03 #define R_00B01C_SPI_SHADER_PGM_RSRC3_PS 0x00B01C #define S_00B01C_CU_EN(x) (((x) & 0xFFFF) << 0) #define G_00B01C_CU_EN(x) (((x) >> 0) & 0xFFFF) @@ -8696,5 +8703,29 @@ #define SI_DMA_PACKET_CONSTANT_FILL 0xd #define SI_DMA_PACKET_NOP 0xf +/* CIK async DMA packets */ +#define CIK_SDMA_PACKET(op, sub_op, n) ((((n) & 0xFFFF) << 16) | \ + (((sub_op) & 0xFF) << 8) | \ + (((op) & 0xFF) << 0)) +/* CIK async DMA packet types */ +#define CIK_SDMA_OPCODE_NOP 0x0 +#define CIK_SDMA_OPCODE_COPY 0x1 +#define CIK_SDMA_COPY_SUB_OPCODE_LINEAR 0x0 +#define CIK_SDMA_COPY_SUB_OPCODE_TILED 0x1 +#define CIK_SDMA_COPY_SUB_OPCODE_SOA 0x3 +#define CIK_SDMA_COPY_SUB_OPCODE_LINEAR_SUB_WINDOW 0x4 +#define CIK_SDMA_COPY_SUB_OPCODE_TILED_SUB_WINDOW 0x5 +#define CIK_SDMA_COPY_SUB_OPCODE_T2T_SUB_WINDOW 0x6 +#define CIK_SDMA_OPCODE_WRITE 0x2 +#define SDMA_WRITE_SUB_OPCODE_LINEAR 0x0 +#define SDMA_WRTIE_SUB_OPCODE_TILED 0x1 +#define CIK_SDMA_OPCODE_INDIRECT_BUFFER 0x4 +#define CIK_SDMA_PACKET_FENCE 0x5 +#define CIK_SDMA_PACKET_TRAP 0x6 +#define CIK_SDMA_PACKET_SEMAPHORE 0x7 +#define CIK_SDMA_PACKET_CONSTANT_FILL 0xb +#define CIK_SDMA_PACKET_SRBM_WRITE 0xe +#define CIK_SDMA_COPY_MAX_SIZE 0x1fffff + #endif /* _SID_H */ From 184e4de3a126fa21945fe59f68b8a29977919fc4 Mon Sep 17 00:00:00 2001 From: Martin Peres Date: Fri, 5 Jun 2015 15:03:19 +0300 Subject: [PATCH 463/834] main/version: make sure all the output variables get set in get_gl_override MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This fixes 2 warnings in gcc 5.1. Reviewed-by: Brian Paul Reviewed-by: Marek Olšák Signed-off-by: Martin Peres --- src/mesa/main/version.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/mesa/main/version.c b/src/mesa/main/version.c index 409e5ae3cba..60c76040e2a 100644 --- a/src/mesa/main/version.c +++ b/src/mesa/main/version.c @@ -63,7 +63,7 @@ get_gl_override(gl_api api, int *version, bool *fwd_context, static bool compat_suffix = false; if (api == API_OPENGLES) - return; + goto exit; if (override_version < 0) { override_version = 0; @@ -93,6 +93,7 @@ get_gl_override(gl_api api, int *version, bool *fwd_context, } } +exit: *version = override_version; *fwd_context = fc_suffix; *compat_context = compat_suffix; From 8614b9e489e65bb672ab16053d30ce8708856214 Mon Sep 17 00:00:00 2001 From: Martin Peres Date: Fri, 5 Jun 2015 15:19:01 +0300 Subject: [PATCH 464/834] softpipe/query: force parenthesis around a logical not This makes GCC5 happy. Reviewed-by: Brian Paul Signed-off-by: Martin Peres --- src/gallium/drivers/softpipe/sp_query.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gallium/drivers/softpipe/sp_query.c b/src/gallium/drivers/softpipe/sp_query.c index e77387082bc..76105b4c0ec 100644 --- a/src/gallium/drivers/softpipe/sp_query.c +++ b/src/gallium/drivers/softpipe/sp_query.c @@ -277,7 +277,7 @@ softpipe_check_render_cond(struct softpipe_context *sp) b = pipe->get_query_result(pipe, sp->render_cond_query, wait, (void*)&result); if (b) - return (!result == sp->render_cond_cond); + return (!result) == sp->render_cond_cond; else return TRUE; } From 8da79b8378ae87474d8c47ad955e4833edf98359 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 5 Jun 2015 13:49:08 +0100 Subject: [PATCH 465/834] i965: Fix HW blitter pitch limits MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The BLT pitch is specified in bytes for linear surfaces and in dwords for tiled surfaces. In both cases the programmable limit is 32,767, so adjust the check to compensate for the effect of tiling. v2: Tweak whitespace for functions (Kenneth) Signed-off-by: Chris Wilson Cc: Kristian Høgsberg Cc: Kenneth Graunke Reviewed-by Kenneth Graunke Cc: mesa-stable@lists.freedesktop.org --- src/mesa/drivers/dri/i965/intel_blit.c | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/src/mesa/drivers/dri/i965/intel_blit.c b/src/mesa/drivers/dri/i965/intel_blit.c index 7680a402975..aae0d2502d7 100644 --- a/src/mesa/drivers/dri/i965/intel_blit.c +++ b/src/mesa/drivers/dri/i965/intel_blit.c @@ -130,6 +130,15 @@ set_blitter_tiling(struct brw_context *brw, ADVANCE_BATCH(); \ } while (0) +static int +blt_pitch(struct intel_mipmap_tree *mt) +{ + int pitch = mt->pitch; + if (mt->tiling) + pitch /= 4; + return pitch; +} + /** * Implements a rectangular block transfer (blit) of pixels between two * miptrees. @@ -197,14 +206,14 @@ intel_miptree_blit(struct brw_context *brw, * * Furthermore, intelEmitCopyBlit (which is called below) uses a signed * 16-bit integer to represent buffer pitch, so it can only handle buffer - * pitches < 32k. + * pitches < 32k. However, the pitch is measured in bytes for linear buffers + * and dwords for tiled buffers. * * As a result of these two limitations, we can only use the blitter to do - * this copy when the miptree's pitch is less than 32k. + * this copy when the miptree's pitch is less than 32k linear or 128k tiled. */ - if (src_mt->pitch >= 32768 || - dst_mt->pitch >= 32768) { - perf_debug("Falling back due to >=32k pitch\n"); + if (blt_pitch(src_mt) >= 32768 || blt_pitch(dst_mt) >= 32768) { + perf_debug("Falling back due to >= 32k/128k pitch\n"); return false; } From c2d0606827412b710dcaed80268fc665de8c9c5d Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 5 Jun 2015 14:33:36 +0100 Subject: [PATCH 466/834] i915: Blit RGBX<->RGBA drawpixels MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The blitter already has code to accommodate filling in the alpha channel for BGRX destination formats, so expand this to also allow filling the alpha channgel in RGBX formats. More importantly for the next patch is moving the test into its own function for the purpose of exporting the check to the callers. v2: Fix alpha expansion as spotted by Alexander with the fix suggested by Kenneth Signed-off-by: Chris Wilson Cc: Jason Ekstrand Cc: Alexander Monakov Cc: Kristian Høgsberg Cc: Kenneth Graunke Reviewed-by Kenneth Graunke Cc: mesa-stable@lists.freedesktop.org --- src/mesa/drivers/dri/i965/intel_blit.c | 36 +++++++++++++++++++++----- 1 file changed, 29 insertions(+), 7 deletions(-) diff --git a/src/mesa/drivers/dri/i965/intel_blit.c b/src/mesa/drivers/dri/i965/intel_blit.c index aae0d2502d7..059165e6dba 100644 --- a/src/mesa/drivers/dri/i965/intel_blit.c +++ b/src/mesa/drivers/dri/i965/intel_blit.c @@ -139,6 +139,31 @@ blt_pitch(struct intel_mipmap_tree *mt) return pitch; } +static bool +blt_compatible_formats(mesa_format src, mesa_format dst) +{ + /* The BLT doesn't handle sRGB conversion */ + assert(src == _mesa_get_srgb_format_linear(src)); + assert(dst == _mesa_get_srgb_format_linear(dst)); + + /* No swizzle or format conversions possible, except... */ + if (src == dst) + return true; + + /* ...we can either discard the alpha channel when going from A->X, + * or we can fill the alpha channel with 0xff when going from X->A + */ + if (src == MESA_FORMAT_B8G8R8A8_UNORM || src == MESA_FORMAT_B8G8R8X8_UNORM) + return (dst == MESA_FORMAT_B8G8R8A8_UNORM || + dst == MESA_FORMAT_B8G8R8X8_UNORM); + + if (src == MESA_FORMAT_R8G8B8A8_UNORM || src == MESA_FORMAT_R8G8B8X8_UNORM) + return (dst == MESA_FORMAT_R8G8B8A8_UNORM || + dst == MESA_FORMAT_R8G8B8X8_UNORM); + + return false; +} + /** * Implements a rectangular block transfer (blit) of pixels between two * miptrees. @@ -181,11 +206,7 @@ intel_miptree_blit(struct brw_context *brw, * the X channel don't matter), and XRGB8888 to ARGB8888 by setting the A * channel to 1.0 at the end. */ - if (src_format != dst_format && - ((src_format != MESA_FORMAT_B8G8R8A8_UNORM && - src_format != MESA_FORMAT_B8G8R8X8_UNORM) || - (dst_format != MESA_FORMAT_B8G8R8A8_UNORM && - dst_format != MESA_FORMAT_B8G8R8X8_UNORM))) { + if (!blt_compatible_formats(src_format, dst_format)) { perf_debug("%s: Can't use hardware blitter from %s to %s, " "falling back.\n", __func__, _mesa_get_format_name(src_format), @@ -270,8 +291,9 @@ intel_miptree_blit(struct brw_context *brw, return false; } - if (src_mt->format == MESA_FORMAT_B8G8R8X8_UNORM && - dst_mt->format == MESA_FORMAT_B8G8R8A8_UNORM) { + /* XXX This could be done in a single pass using XY_FULL_MONO_PATTERN_BLT */ + if (_mesa_get_format_bits(src_format, GL_ALPHA_BITS) == 0 && + _mesa_get_format_bits(dst_format, GL_ALPHA_BITS) > 0) { intel_miptree_set_alpha_to_one(brw, dst_mt, dst_x, dst_y, width, height); From 922c0c9fd526ce19b87bc74a3159dec7705c1de1 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 5 Jun 2015 14:45:18 +0100 Subject: [PATCH 467/834] i965: Export format comparison for blitting between miptrees MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since the introduction of commit 536003c11e4cb1172c540932ce3cce06f03bf44e Author: Boyan Ding Date: Wed Mar 25 19:36:54 2015 +0800 i965: Add XRGB8888 format to intel_screen_make_configs winsys buffers no longer have an alpha channel. This causes _mesa_format_matches_format_and_type() to reject previously working BGRA uploads from using the BLT fast path. Instead of using the generic routine for matching formats exactly, export the slightly more relaxed check from intel_miptree_blit() which importantly allows the blitter routine to apply a small number of format conversions. References: https://bugs.freedesktop.org/show_bug.cgi?id=90839 Signed-off-by: Chris Wilson Cc: Jason Ekstrand Cc: Alexander Monakov Cc: Kristian Høgsberg Cc: Kenneth Graunke Reviewed-by: Kenneth Graunke Cc: mesa-stable@lists.freedesktop.org --- src/mesa/drivers/dri/i965/intel_blit.c | 6 +++--- src/mesa/drivers/dri/i965/intel_blit.h | 2 ++ src/mesa/drivers/dri/i965/intel_pixel_draw.c | 11 +++++++++-- 3 files changed, 14 insertions(+), 5 deletions(-) diff --git a/src/mesa/drivers/dri/i965/intel_blit.c b/src/mesa/drivers/dri/i965/intel_blit.c index 059165e6dba..5afc771dea8 100644 --- a/src/mesa/drivers/dri/i965/intel_blit.c +++ b/src/mesa/drivers/dri/i965/intel_blit.c @@ -139,8 +139,8 @@ blt_pitch(struct intel_mipmap_tree *mt) return pitch; } -static bool -blt_compatible_formats(mesa_format src, mesa_format dst) +bool +intel_miptree_blit_compatible_formats(mesa_format src, mesa_format dst) { /* The BLT doesn't handle sRGB conversion */ assert(src == _mesa_get_srgb_format_linear(src)); @@ -206,7 +206,7 @@ intel_miptree_blit(struct brw_context *brw, * the X channel don't matter), and XRGB8888 to ARGB8888 by setting the A * channel to 1.0 at the end. */ - if (!blt_compatible_formats(src_format, dst_format)) { + if (!intel_miptree_blit_compatible_formats(src_format, dst_format)) { perf_debug("%s: Can't use hardware blitter from %s to %s, " "falling back.\n", __func__, _mesa_get_format_name(src_format), diff --git a/src/mesa/drivers/dri/i965/intel_blit.h b/src/mesa/drivers/dri/i965/intel_blit.h index f563939fdd9..2287c379c4e 100644 --- a/src/mesa/drivers/dri/i965/intel_blit.h +++ b/src/mesa/drivers/dri/i965/intel_blit.h @@ -46,6 +46,8 @@ intelEmitCopyBlit(struct brw_context *brw, GLshort w, GLshort h, GLenum logicop ); +bool intel_miptree_blit_compatible_formats(mesa_format src, mesa_format dst); + bool intel_miptree_blit(struct brw_context *brw, struct intel_mipmap_tree *src_mt, int src_level, int src_slice, diff --git a/src/mesa/drivers/dri/i965/intel_pixel_draw.c b/src/mesa/drivers/dri/i965/intel_pixel_draw.c index 4ecefc8cf54..d68cbb6e401 100644 --- a/src/mesa/drivers/dri/i965/intel_pixel_draw.c +++ b/src/mesa/drivers/dri/i965/intel_pixel_draw.c @@ -28,6 +28,7 @@ #include "main/glheader.h" #include "main/enums.h" #include "main/image.h" +#include "main/glformats.h" #include "main/mtypes.h" #include "main/condrender.h" #include "main/fbobject.h" @@ -76,8 +77,14 @@ do_blit_drawpixels(struct gl_context * ctx, struct gl_renderbuffer *rb = ctx->DrawBuffer->_ColorDrawBuffers[0]; struct intel_renderbuffer *irb = intel_renderbuffer(rb); - if (!_mesa_format_matches_format_and_type(irb->mt->format, format, type, - false)) { + mesa_format src_format = _mesa_format_from_format_and_type(format, type); + mesa_format dst_format = irb->mt->format; + + /* We can safely discard sRGB encode/decode for the DrawPixels interface */ + src_format = _mesa_get_srgb_format_linear(src_format); + dst_format = _mesa_get_srgb_format_linear(dst_format); + + if (!intel_miptree_blit_compatible_formats(src_format, dst_format)) { DBG("%s: bad format for blit\n", __func__); return false; } From 4f2f5c8d81673473dce8bee3d66b524b4908a823 Mon Sep 17 00:00:00 2001 From: Ben Widawsky Date: Mon, 22 Dec 2014 19:29:24 -0800 Subject: [PATCH 468/834] i965: Disallow saturation for MACH operations. Reviewed-by: Matt Turner Signed-off-by: Ben Widawsky --- src/mesa/drivers/dri/i965/brw_shader.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp index 6222d5258de..76285f273e4 100644 --- a/src/mesa/drivers/dri/i965/brw_shader.cpp +++ b/src/mesa/drivers/dri/i965/brw_shader.cpp @@ -949,7 +949,6 @@ backend_instruction::can_do_saturate() const case BRW_OPCODE_LINE: case BRW_OPCODE_LRP: case BRW_OPCODE_MAC: - case BRW_OPCODE_MACH: case BRW_OPCODE_MAD: case BRW_OPCODE_MATH: case BRW_OPCODE_MOV: From 2cbe730ac53a8510d0decde20a42f1acd51a93a9 Mon Sep 17 00:00:00 2001 From: Anuj Phogat Date: Tue, 14 Apr 2015 22:06:47 -0700 Subject: [PATCH 469/834] i965: Choose tiling in brw_miptree_layout() function This refactoring is required by later patches in this series. Signed-off-by: Anuj Phogat Reviewed-by: Topi Pohjolainen --- src/mesa/drivers/dri/i965/brw_tex_layout.c | 16 ++++++- src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 47 ++++++++++--------- src/mesa/drivers/dri/i965/intel_mipmap_tree.h | 14 +++++- 3 files changed, 52 insertions(+), 25 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_tex_layout.c b/src/mesa/drivers/dri/i965/brw_tex_layout.c index 72b02a2cf0a..4e79cf56331 100644 --- a/src/mesa/drivers/dri/i965/brw_tex_layout.c +++ b/src/mesa/drivers/dri/i965/brw_tex_layout.c @@ -459,7 +459,10 @@ brw_miptree_layout_texture_3d(struct brw_context *brw, } void -brw_miptree_layout(struct brw_context *brw, struct intel_mipmap_tree *mt) +brw_miptree_layout(struct brw_context *brw, + bool for_bo, + enum intel_miptree_tiling_mode requested, + struct intel_mipmap_tree *mt) { bool multisampled = mt->num_samples > 1; bool gen6_hiz_or_stencil = false; @@ -543,6 +546,11 @@ brw_miptree_layout(struct brw_context *brw, struct intel_mipmap_tree *mt) DBG("%s: %dx%dx%d\n", __func__, mt->total_width, mt->total_height, mt->cpp); + if (!mt->total_width || !mt->total_height) { + intel_miptree_release(&mt); + return; + } + /* On Gen9+ the alignment values are expressed in multiples of the block * size */ @@ -552,5 +560,11 @@ brw_miptree_layout(struct brw_context *brw, struct intel_mipmap_tree *mt) mt->align_w /= i; mt->align_h /= j; } + + if (!for_bo) + mt->tiling = intel_miptree_choose_tiling(brw, mt->format, + mt->logical_width0, + mt->num_samples, + requested, mt); } diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c index 8616c0193c8..ef2f932272b 100644 --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c @@ -259,6 +259,7 @@ intel_miptree_create_layout(struct brw_context *brw, GLuint depth0, bool for_bo, GLuint num_samples, + enum intel_miptree_tiling_mode requested, bool force_all_slices_at_each_lod, bool disable_aux_buffers) { @@ -473,7 +474,7 @@ intel_miptree_create_layout(struct brw_context *brw, if (force_all_slices_at_each_lod) mt->array_layout = ALL_SLICES_AT_EACH_LOD; - brw_miptree_layout(brw, mt); + brw_miptree_layout(brw, for_bo, requested, mt); if (mt->disable_aux_buffers) assert(mt->msaa_layout != INTEL_MSAA_LAYOUT_CMS); @@ -484,7 +485,7 @@ intel_miptree_create_layout(struct brw_context *brw, /** * \brief Helper function for intel_miptree_create(). */ -static uint32_t +uint32_t intel_miptree_choose_tiling(struct brw_context *brw, mesa_format format, uint32_t width0, @@ -628,14 +629,14 @@ intel_lower_compressed_format(struct brw_context *brw, mesa_format format) struct intel_mipmap_tree * intel_miptree_create(struct brw_context *brw, - GLenum target, - mesa_format format, - GLuint first_level, - GLuint last_level, - GLuint width0, - GLuint height0, - GLuint depth0, - bool expect_accelerated_upload, + GLenum target, + mesa_format format, + GLuint first_level, + GLuint last_level, + GLuint width0, + GLuint height0, + GLuint depth0, + bool expect_accelerated_upload, GLuint num_samples, enum intel_miptree_tiling_mode requested_tiling, bool force_all_slices_at_each_lod) @@ -653,15 +654,12 @@ intel_miptree_create(struct brw_context *brw, first_level, last_level, width0, height0, depth0, false, num_samples, + requested_tiling, force_all_slices_at_each_lod, false /*disable_aux_buffers*/); - /* - * pitch == 0 || height == 0 indicates the null texture - */ - if (!mt || !mt->total_width || !mt->total_height) { - intel_miptree_release(&mt); + + if (!mt) return NULL; - } total_width = mt->total_width; total_height = mt->total_height; @@ -672,16 +670,11 @@ intel_miptree_create(struct brw_context *brw, total_height = ALIGN(total_height, 64); } - uint32_t tiling = intel_miptree_choose_tiling(brw, format, width0, - num_samples, requested_tiling, - mt); bool y_or_x = false; - if (tiling == (I915_TILING_Y | I915_TILING_X)) { + if (mt->tiling == (I915_TILING_Y | I915_TILING_X)) { y_or_x = true; mt->tiling = I915_TILING_Y; - } else { - mt->tiling = tiling; } unsigned long pitch; @@ -767,10 +760,18 @@ intel_miptree_create_for_bo(struct brw_context *brw, target = depth > 1 ? GL_TEXTURE_2D_ARRAY : GL_TEXTURE_2D; + /* 'requested' parameter of intel_miptree_create_layout() is relevant + * only for non bo miptree. Tiling for bo is already computed above. + * So, the tiling requested (INTEL_MIPTREE_TILING_ANY) below is + * just a place holder and will not make any change to the miptree + * tiling format. + */ mt = intel_miptree_create_layout(brw, target, format, 0, 0, width, height, depth, - true, 0, false, + true, 0, + INTEL_MIPTREE_TILING_ANY, + false, disable_aux_buffers); if (!mt) return NULL; diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.h b/src/mesa/drivers/dri/i965/intel_mipmap_tree.h index 8b42e4adb79..1d51546abac 100644 --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.h +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.h @@ -753,7 +753,11 @@ brw_miptree_get_vertical_slice_pitch(const struct brw_context *brw, const struct intel_mipmap_tree *mt, unsigned level); -void brw_miptree_layout(struct brw_context *brw, struct intel_mipmap_tree *mt); +void +brw_miptree_layout(struct brw_context *brw, + bool for_bo, + enum intel_miptree_tiling_mode requested, + struct intel_mipmap_tree *mt); void *intel_miptree_map_raw(struct brw_context *brw, struct intel_mipmap_tree *mt); @@ -780,6 +784,14 @@ intel_miptree_unmap(struct brw_context *brw, unsigned int level, unsigned int slice); +uint32_t +intel_miptree_choose_tiling(struct brw_context *brw, + mesa_format format, + uint32_t width0, + uint32_t num_samples, + enum intel_miptree_tiling_mode requested, + struct intel_mipmap_tree *mt); + void intel_hiz_exec(struct brw_context *brw, struct intel_mipmap_tree *mt, unsigned int level, unsigned int layer, enum gen6_hiz_op op); From 9edac38f2a7aaa55bc4f33eb268155ba76908925 Mon Sep 17 00:00:00 2001 From: Anuj Phogat Date: Tue, 14 Apr 2015 22:06:47 -0700 Subject: [PATCH 470/834] i965: Move intel_miptree_choose_tiling() to brw_tex_layout.c and change the name to brw_miptree_choose_tiling(). V3: Remove redundant function parameters. (Topi) Signed-off-by: Anuj Phogat Reviewed-by: Topi Pohjolainen --- src/mesa/drivers/dri/i965/brw_tex_layout.c | 107 +++++++++++++++++- src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 104 ----------------- src/mesa/drivers/dri/i965/intel_mipmap_tree.h | 8 -- 3 files changed, 103 insertions(+), 116 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_tex_layout.c b/src/mesa/drivers/dri/i965/brw_tex_layout.c index 4e79cf56331..c77c0cefce5 100644 --- a/src/mesa/drivers/dri/i965/brw_tex_layout.c +++ b/src/mesa/drivers/dri/i965/brw_tex_layout.c @@ -458,6 +458,108 @@ brw_miptree_layout_texture_3d(struct brw_context *brw, align_cube(mt); } +/** + * \brief Helper function for intel_miptree_create(). + */ +static uint32_t +brw_miptree_choose_tiling(struct brw_context *brw, + enum intel_miptree_tiling_mode requested, + const struct intel_mipmap_tree *mt) +{ + if (mt->format == MESA_FORMAT_S_UINT8) { + /* The stencil buffer is W tiled. However, we request from the kernel a + * non-tiled buffer because the GTT is incapable of W fencing. + */ + return I915_TILING_NONE; + } + + /* Some usages may want only one type of tiling, like depth miptrees (Y + * tiled), or temporary BOs for uploading data once (linear). + */ + switch (requested) { + case INTEL_MIPTREE_TILING_ANY: + break; + case INTEL_MIPTREE_TILING_Y: + return I915_TILING_Y; + case INTEL_MIPTREE_TILING_NONE: + return I915_TILING_NONE; + } + + if (mt->num_samples > 1) { + /* From p82 of the Sandy Bridge PRM, dw3[1] of SURFACE_STATE ("Tiled + * Surface"): + * + * [DevSNB+]: For multi-sample render targets, this field must be + * 1. MSRTs can only be tiled. + * + * Our usual reason for preferring X tiling (fast blits using the + * blitting engine) doesn't apply to MSAA, since we'll generally be + * downsampling or upsampling when blitting between the MSAA buffer + * and another buffer, and the blitting engine doesn't support that. + * So use Y tiling, since it makes better use of the cache. + */ + return I915_TILING_Y; + } + + GLenum base_format = _mesa_get_format_base_format(mt->format); + if (base_format == GL_DEPTH_COMPONENT || + base_format == GL_DEPTH_STENCIL_EXT) + return I915_TILING_Y; + + /* 1D textures (and 1D array textures) don't get any benefit from tiling, + * in fact it leads to a less efficient use of memory space and bandwidth + * due to tile alignment. + */ + if (mt->logical_height0 == 1) + return I915_TILING_NONE; + + int minimum_pitch = mt->total_width * mt->cpp; + + /* If the width is much smaller than a tile, don't bother tiling. */ + if (minimum_pitch < 64) + return I915_TILING_NONE; + + if (ALIGN(minimum_pitch, 512) >= 32768 || + mt->total_width >= 32768 || mt->total_height >= 32768) { + perf_debug("%dx%d miptree too large to blit, falling back to untiled", + mt->total_width, mt->total_height); + return I915_TILING_NONE; + } + + /* Pre-gen6 doesn't have BLORP to handle Y-tiling, so use X-tiling. */ + if (brw->gen < 6) + return I915_TILING_X; + + /* From the Sandybridge PRM, Volume 1, Part 2, page 32: + * "NOTE: 128BPE Format Color Buffer ( render target ) MUST be either TileX + * or Linear." + * 128 bits per pixel translates to 16 bytes per pixel. This is necessary + * all the way back to 965, but is permitted on Gen7+. + */ + if (brw->gen < 7 && mt->cpp >= 16) + return I915_TILING_X; + + /* From the Ivy Bridge PRM, Vol4 Part1 2.12.2.1 (SURFACE_STATE for most + * messages), on p64, under the heading "Surface Vertical Alignment": + * + * This field must be set to VALIGN_4 for all tiled Y Render Target + * surfaces. + * + * So if the surface is renderable and uses a vertical alignment of 2, + * force it to be X tiled. This is somewhat conservative (it's possible + * that the client won't ever render to this surface), but it's difficult + * to know that ahead of time. And besides, since we use a vertical + * alignment of 4 as often as we can, this shouldn't happen very often. + */ + if (brw->gen == 7 && mt->align_h == 2 && + brw->format_supported_as_render_target[mt->format]) { + return I915_TILING_X; + } + + return I915_TILING_Y | I915_TILING_X; +} + + void brw_miptree_layout(struct brw_context *brw, bool for_bo, @@ -562,9 +664,6 @@ brw_miptree_layout(struct brw_context *brw, } if (!for_bo) - mt->tiling = intel_miptree_choose_tiling(brw, mt->format, - mt->logical_width0, - mt->num_samples, - requested, mt); + mt->tiling = brw_miptree_choose_tiling(brw, requested, mt); } diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c index ef2f932272b..615cbfb7158 100644 --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c @@ -482,110 +482,6 @@ intel_miptree_create_layout(struct brw_context *brw, return mt; } -/** - * \brief Helper function for intel_miptree_create(). - */ -uint32_t -intel_miptree_choose_tiling(struct brw_context *brw, - mesa_format format, - uint32_t width0, - uint32_t num_samples, - enum intel_miptree_tiling_mode requested, - struct intel_mipmap_tree *mt) -{ - if (format == MESA_FORMAT_S_UINT8) { - /* The stencil buffer is W tiled. However, we request from the kernel a - * non-tiled buffer because the GTT is incapable of W fencing. - */ - return I915_TILING_NONE; - } - - /* Some usages may want only one type of tiling, like depth miptrees (Y - * tiled), or temporary BOs for uploading data once (linear). - */ - switch (requested) { - case INTEL_MIPTREE_TILING_ANY: - break; - case INTEL_MIPTREE_TILING_Y: - return I915_TILING_Y; - case INTEL_MIPTREE_TILING_NONE: - return I915_TILING_NONE; - } - - if (num_samples > 1) { - /* From p82 of the Sandy Bridge PRM, dw3[1] of SURFACE_STATE ("Tiled - * Surface"): - * - * [DevSNB+]: For multi-sample render targets, this field must be - * 1. MSRTs can only be tiled. - * - * Our usual reason for preferring X tiling (fast blits using the - * blitting engine) doesn't apply to MSAA, since we'll generally be - * downsampling or upsampling when blitting between the MSAA buffer - * and another buffer, and the blitting engine doesn't support that. - * So use Y tiling, since it makes better use of the cache. - */ - return I915_TILING_Y; - } - - GLenum base_format = _mesa_get_format_base_format(format); - if (base_format == GL_DEPTH_COMPONENT || - base_format == GL_DEPTH_STENCIL_EXT) - return I915_TILING_Y; - - /* 1D textures (and 1D array textures) don't get any benefit from tiling, - * in fact it leads to a less efficient use of memory space and bandwidth - * due to tile alignment. - */ - if (mt->logical_height0 == 1) - return I915_TILING_NONE; - - int minimum_pitch = mt->total_width * mt->cpp; - - /* If the width is much smaller than a tile, don't bother tiling. */ - if (minimum_pitch < 64) - return I915_TILING_NONE; - - if (ALIGN(minimum_pitch, 512) >= 32768 || - mt->total_width >= 32768 || mt->total_height >= 32768) { - perf_debug("%dx%d miptree too large to blit, falling back to untiled", - mt->total_width, mt->total_height); - return I915_TILING_NONE; - } - - /* Pre-gen6 doesn't have BLORP to handle Y-tiling, so use X-tiling. */ - if (brw->gen < 6) - return I915_TILING_X; - - /* From the Sandybridge PRM, Volume 1, Part 2, page 32: - * "NOTE: 128BPE Format Color Buffer ( render target ) MUST be either TileX - * or Linear." - * 128 bits per pixel translates to 16 bytes per pixel. This is necessary - * all the way back to 965, but is permitted on Gen7+. - */ - if (brw->gen < 7 && mt->cpp >= 16) - return I915_TILING_X; - - /* From the Ivy Bridge PRM, Vol4 Part1 2.12.2.1 (SURFACE_STATE for most - * messages), on p64, under the heading "Surface Vertical Alignment": - * - * This field must be set to VALIGN_4 for all tiled Y Render Target - * surfaces. - * - * So if the surface is renderable and uses a vertical alignment of 2, - * force it to be X tiled. This is somewhat conservative (it's possible - * that the client won't ever render to this surface), but it's difficult - * to know that ahead of time. And besides, since we use a vertical - * alignment of 4 as often as we can, this shouldn't happen very often. - */ - if (brw->gen == 7 && mt->align_h == 2 && - brw->format_supported_as_render_target[format]) { - return I915_TILING_X; - } - - return I915_TILING_Y | I915_TILING_X; -} - /** * Choose an appropriate uncompressed format for a requested diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.h b/src/mesa/drivers/dri/i965/intel_mipmap_tree.h index 1d51546abac..0db6b44b9fd 100644 --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.h +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.h @@ -784,14 +784,6 @@ intel_miptree_unmap(struct brw_context *brw, unsigned int level, unsigned int slice); -uint32_t -intel_miptree_choose_tiling(struct brw_context *brw, - mesa_format format, - uint32_t width0, - uint32_t num_samples, - enum intel_miptree_tiling_mode requested, - struct intel_mipmap_tree *mt); - void intel_hiz_exec(struct brw_context *brw, struct intel_mipmap_tree *mt, unsigned int level, unsigned int layer, enum gen6_hiz_op op); From ef6b9985ea6b60a562daed3a9ed3be0f91f21e01 Mon Sep 17 00:00:00 2001 From: Anuj Phogat Date: Tue, 14 Apr 2015 22:06:47 -0700 Subject: [PATCH 471/834] i965: Pass miptree pointer as function parameter in intel_vertical_texture_alignment_unit Signed-off-by: Anuj Phogat Reviewed-by: Topi Pohjolainen --- src/mesa/drivers/dri/i965/brw_tex_layout.c | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_tex_layout.c b/src/mesa/drivers/dri/i965/brw_tex_layout.c index c77c0cefce5..ec7c6c4daa0 100644 --- a/src/mesa/drivers/dri/i965/brw_tex_layout.c +++ b/src/mesa/drivers/dri/i965/brw_tex_layout.c @@ -99,7 +99,7 @@ intel_horizontal_texture_alignment_unit(struct brw_context *brw, static unsigned int intel_vertical_texture_alignment_unit(struct brw_context *brw, - mesa_format format, bool multisampled) + const struct intel_mipmap_tree *mt) { /** * From the "Alignment Unit Size" section of various specs, namely: @@ -124,11 +124,11 @@ intel_vertical_texture_alignment_unit(struct brw_context *brw, * Where "*" means either VALIGN_2 or VALIGN_4 depending on the setting of * the SURFACE_STATE "Surface Vertical Alignment" field. */ - if (_mesa_is_format_compressed(format)) + if (_mesa_is_format_compressed(mt->format)) /* See comment above for the horizontal alignment */ return brw->gen >= 9 ? 16 : 4; - if (format == MESA_FORMAT_S_UINT8) + if (mt->format == MESA_FORMAT_S_UINT8) return brw->gen >= 7 ? 8 : 4; /* Broadwell only supports VALIGN of 4, 8, and 16. The BSpec says 4 @@ -137,10 +137,10 @@ intel_vertical_texture_alignment_unit(struct brw_context *brw, if (brw->gen >= 8) return 4; - if (multisampled) + if (mt->num_samples > 1) return 4; - GLenum base_format = _mesa_get_format_base_format(format); + GLenum base_format = _mesa_get_format_base_format(mt->format); if (brw->gen >= 6 && (base_format == GL_DEPTH_COMPONENT || @@ -161,7 +161,7 @@ intel_vertical_texture_alignment_unit(struct brw_context *brw, * * VALIGN_4 is not supported for surface format R32G32B32_FLOAT. */ - if (base_format == GL_YCBCR_MESA || format == MESA_FORMAT_RGB_FLOAT32) + if (base_format == GL_YCBCR_MESA || mt->format == MESA_FORMAT_RGB_FLOAT32) return 2; return 4; @@ -566,7 +566,6 @@ brw_miptree_layout(struct brw_context *brw, enum intel_miptree_tiling_mode requested, struct intel_mipmap_tree *mt) { - bool multisampled = mt->num_samples > 1; bool gen6_hiz_or_stencil = false; if (brw->gen == 6 && mt->array_layout == ALL_SLICES_AT_EACH_LOD) { @@ -599,8 +598,7 @@ brw_miptree_layout(struct brw_context *brw, } } else { mt->align_w = intel_horizontal_texture_alignment_unit(brw, mt); - mt->align_h = - intel_vertical_texture_alignment_unit(brw, mt->format, multisampled); + mt->align_h = intel_vertical_texture_alignment_unit(brw, mt); } switch (mt->target) { From 126078faca7a9da0f825d3ad07ce9b1183737240 Mon Sep 17 00:00:00 2001 From: Anuj Phogat Date: Tue, 14 Apr 2015 22:06:47 -0700 Subject: [PATCH 472/834] i965/gen9: Set tiled resource mode for the miptree Signed-off-by: Anuj Phogat Reviewed-by: Topi Pohjolainen --- src/mesa/drivers/dri/i965/brw_tex_layout.c | 2 ++ src/mesa/drivers/dri/i965/intel_mipmap_tree.h | 8 ++++++++ 2 files changed, 10 insertions(+) diff --git a/src/mesa/drivers/dri/i965/brw_tex_layout.c b/src/mesa/drivers/dri/i965/brw_tex_layout.c index ec7c6c4daa0..e461bfc4e05 100644 --- a/src/mesa/drivers/dri/i965/brw_tex_layout.c +++ b/src/mesa/drivers/dri/i965/brw_tex_layout.c @@ -568,6 +568,8 @@ brw_miptree_layout(struct brw_context *brw, { bool gen6_hiz_or_stencil = false; + mt->tr_mode = INTEL_MIPTREE_TRMODE_NONE; + if (brw->gen == 6 && mt->array_layout == ALL_SLICES_AT_EACH_LOD) { const GLenum base_format = _mesa_get_format_base_format(mt->format); gen6_hiz_or_stencil = _mesa_is_depth_or_stencil_format(base_format); diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.h b/src/mesa/drivers/dri/i965/intel_mipmap_tree.h index 0db6b44b9fd..20bed5378ca 100644 --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.h +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.h @@ -330,6 +330,13 @@ struct intel_miptree_aux_buffer struct intel_mipmap_tree *mt; /**< hiz miptree used with Gen6 */ }; +/* Tile resource modes */ +enum intel_miptree_tr_mode { + INTEL_MIPTREE_TRMODE_NONE, + INTEL_MIPTREE_TRMODE_YF, + INTEL_MIPTREE_TRMODE_YS +}; + struct intel_mipmap_tree { /** Buffer object containing the pixel data. */ @@ -338,6 +345,7 @@ struct intel_mipmap_tree uint32_t pitch; /**< pitch in bytes. */ uint32_t tiling; /**< One of the I915_TILING_* flags */ + enum intel_miptree_tr_mode tr_mode; /* Effectively the key: */ From 447410b66436acde4440aeae45f701b0e4502e97 Mon Sep 17 00:00:00 2001 From: Anuj Phogat Date: Tue, 14 Apr 2015 22:06:48 -0700 Subject: [PATCH 473/834] i965/gen9: Set horizontal alignment for the miptree v3: Use ffs() and a switch loop in tr_mode_vertical_texture_alignment() (Ben) Signed-off-by: Anuj Phogat Reviewed-by: Ben Widawsky --- src/mesa/drivers/dri/i965/brw_tex_layout.c | 81 ++++++++++++++++++++++ 1 file changed, 81 insertions(+) diff --git a/src/mesa/drivers/dri/i965/brw_tex_layout.c b/src/mesa/drivers/dri/i965/brw_tex_layout.c index e461bfc4e05..4c66bb53fa1 100644 --- a/src/mesa/drivers/dri/i965/brw_tex_layout.c +++ b/src/mesa/drivers/dri/i965/brw_tex_layout.c @@ -39,6 +39,81 @@ #define FILE_DEBUG_FLAG DEBUG_MIPTREE +static unsigned int +tr_mode_horizontal_texture_alignment(const struct brw_context *brw, + const struct intel_mipmap_tree *mt) +{ + const unsigned *align_yf, *align_ys; + const unsigned bpp = _mesa_get_format_bytes(mt->format) * 8; + unsigned ret_align, divisor; + + /* Horizontal alignment tables for TRMODE_{YF,YS}. Value in below + * tables specifies the horizontal alignment requirement in elements + * for the surface. An element is defined as a pixel in uncompressed + * surface formats, and as a compression block in compressed surface + * formats. For MSFMT_DEPTH_STENCIL type multisampled surfaces, an + * element is a sample. + */ + const unsigned align_1d_yf[] = {4096, 2048, 1024, 512, 256}; + const unsigned align_1d_ys[] = {65536, 32768, 16384, 8192, 4096}; + const unsigned align_2d_yf[] = {64, 64, 32, 32, 16}; + const unsigned align_2d_ys[] = {256, 256, 128, 128, 64}; + const unsigned align_3d_yf[] = {16, 8, 8, 8, 4}; + const unsigned align_3d_ys[] = {64, 32, 32, 32, 16}; + int i = 0; + + /* Alignment computations below assume bpp >= 8 and a power of 2. */ + assert (bpp >= 8 && bpp <= 128 && is_power_of_two(bpp)); + + switch(mt->target) { + case GL_TEXTURE_1D: + case GL_TEXTURE_1D_ARRAY: + align_yf = align_1d_yf; + align_ys = align_1d_ys; + break; + case GL_TEXTURE_2D: + case GL_TEXTURE_RECTANGLE: + case GL_TEXTURE_2D_ARRAY: + case GL_TEXTURE_CUBE_MAP: + case GL_TEXTURE_CUBE_MAP_ARRAY: + case GL_TEXTURE_2D_MULTISAMPLE: + case GL_TEXTURE_2D_MULTISAMPLE_ARRAY: + align_yf = align_2d_yf; + align_ys = align_2d_ys; + break; + case GL_TEXTURE_3D: + align_yf = align_3d_yf; + align_ys = align_3d_ys; + break; + default: + unreachable("not reached"); + } + + /* Compute array index. */ + i = ffs(bpp/8) - 1; + + ret_align = mt->tr_mode == INTEL_MIPTREE_TRMODE_YF ? + align_yf[i] : align_ys[i]; + + assert(is_power_of_two(mt->num_samples)); + + switch (mt->num_samples) { + case 2: + case 4: + divisor = 2; + break; + case 8: + case 16: + divisor = 4; + break; + default: + divisor = 1; + break; + } + return ret_align / divisor; +} + + static unsigned int intel_horizontal_texture_alignment_unit(struct brw_context *brw, struct intel_mipmap_tree *mt) @@ -88,6 +163,12 @@ intel_horizontal_texture_alignment_unit(struct brw_context *brw, if (mt->format == MESA_FORMAT_S_UINT8) return 8; + if (brw->gen >= 9 && mt->tr_mode != INTEL_MIPTREE_TRMODE_NONE) { + uint32_t align = tr_mode_horizontal_texture_alignment(brw, mt); + /* XY_FAST_COPY_BLT doesn't support horizontal alignment < 32. */ + return align < 32 ? 32 : align; + } + if (brw->gen >= 7 && mt->format == MESA_FORMAT_Z_UNORM16) return 8; From 9111377978edf1c688811f877896942be9f8a332 Mon Sep 17 00:00:00 2001 From: Anuj Phogat Date: Tue, 14 Apr 2015 22:06:48 -0700 Subject: [PATCH 474/834] i965/gen9: Set vertical alignment for the miptree v3: Use ffs() and a switch loop in tr_mode_horizontal_texture_alignment() (Ben) Signed-off-by: Anuj Phogat Reviewed-by: Ben Widawsky --- src/mesa/drivers/dri/i965/brw_tex_layout.c | 70 ++++++++++++++++++++++ 1 file changed, 70 insertions(+) diff --git a/src/mesa/drivers/dri/i965/brw_tex_layout.c b/src/mesa/drivers/dri/i965/brw_tex_layout.c index 4c66bb53fa1..9a2a331fcb7 100644 --- a/src/mesa/drivers/dri/i965/brw_tex_layout.c +++ b/src/mesa/drivers/dri/i965/brw_tex_layout.c @@ -178,6 +178,70 @@ intel_horizontal_texture_alignment_unit(struct brw_context *brw, return 4; } +static unsigned int +tr_mode_vertical_texture_alignment(const struct brw_context *brw, + const struct intel_mipmap_tree *mt) +{ + const unsigned *align_yf, *align_ys; + const unsigned bpp = _mesa_get_format_bytes(mt->format) * 8; + unsigned ret_align, divisor; + + /* Vertical alignment tables for TRMODE_YF and TRMODE_YS. */ + const unsigned align_2d_yf[] = {64, 32, 32, 16, 16}; + const unsigned align_2d_ys[] = {256, 128, 128, 64, 64}; + const unsigned align_3d_yf[] = {16, 16, 16, 8, 8}; + const unsigned align_3d_ys[] = {32, 32, 32, 16, 16}; + int i = 0; + + assert(brw->gen >= 9 && + mt->target != GL_TEXTURE_1D && + mt->target != GL_TEXTURE_1D_ARRAY); + + /* Alignment computations below assume bpp >= 8 and a power of 2. */ + assert (bpp >= 8 && bpp <= 128 && is_power_of_two(bpp)) ; + + switch(mt->target) { + case GL_TEXTURE_2D: + case GL_TEXTURE_RECTANGLE: + case GL_TEXTURE_2D_ARRAY: + case GL_TEXTURE_CUBE_MAP: + case GL_TEXTURE_CUBE_MAP_ARRAY: + case GL_TEXTURE_2D_MULTISAMPLE: + case GL_TEXTURE_2D_MULTISAMPLE_ARRAY: + align_yf = align_2d_yf; + align_ys = align_2d_ys; + break; + case GL_TEXTURE_3D: + align_yf = align_3d_yf; + align_ys = align_3d_ys; + break; + default: + unreachable("not reached"); + } + + /* Compute array index. */ + i = ffs(bpp / 8) - 1; + + ret_align = mt->tr_mode == INTEL_MIPTREE_TRMODE_YF ? + align_yf[i] : align_ys[i]; + + assert(is_power_of_two(mt->num_samples)); + + switch (mt->num_samples) { + case 4: + case 8: + divisor = 2; + break; + case 16: + divisor = 4; + break; + default: + divisor = 1; + break; + } + return ret_align / divisor; +} + static unsigned int intel_vertical_texture_alignment_unit(struct brw_context *brw, const struct intel_mipmap_tree *mt) @@ -212,6 +276,12 @@ intel_vertical_texture_alignment_unit(struct brw_context *brw, if (mt->format == MESA_FORMAT_S_UINT8) return brw->gen >= 7 ? 8 : 4; + if (mt->tr_mode != INTEL_MIPTREE_TRMODE_NONE) { + uint32_t align = tr_mode_vertical_texture_alignment(brw, mt); + /* XY_FAST_COPY_BLT doesn't support vertical alignment < 64 */ + return align < 64 ? 64 : align; + } + /* Broadwell only supports VALIGN of 4, 8, and 16. The BSpec says 4 * should always be used, except for stencil buffers, which should be 8. */ From 556b2fbd240bff5d20c5137827757e053c00c3a8 Mon Sep 17 00:00:00 2001 From: Anuj Phogat Date: Tue, 14 Apr 2015 22:06:47 -0700 Subject: [PATCH 475/834] i965: Make a helper function intel_miptree_set_total_width_height() and some more code refactoring. No functional changes in this patch. Signed-off-by: Anuj Phogat Reviewed-by: Topi Pohjolainen --- src/mesa/drivers/dri/i965/brw_tex_layout.c | 95 ++++++++++++---------- 1 file changed, 51 insertions(+), 44 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_tex_layout.c b/src/mesa/drivers/dri/i965/brw_tex_layout.c index 9a2a331fcb7..312a8873afe 100644 --- a/src/mesa/drivers/dri/i965/brw_tex_layout.c +++ b/src/mesa/drivers/dri/i965/brw_tex_layout.c @@ -710,6 +710,56 @@ brw_miptree_choose_tiling(struct brw_context *brw, return I915_TILING_Y | I915_TILING_X; } +static void +intel_miptree_set_total_width_height(struct brw_context *brw, + struct intel_mipmap_tree *mt) +{ + switch (mt->target) { + case GL_TEXTURE_CUBE_MAP: + if (brw->gen == 4) { + /* Gen4 stores cube maps as 3D textures. */ + assert(mt->physical_depth0 == 6); + brw_miptree_layout_texture_3d(brw, mt); + } else { + /* All other hardware stores cube maps as 2D arrays. */ + brw_miptree_layout_texture_array(brw, mt); + } + break; + + case GL_TEXTURE_3D: + if (brw->gen >= 9) + brw_miptree_layout_texture_array(brw, mt); + else + brw_miptree_layout_texture_3d(brw, mt); + break; + + case GL_TEXTURE_1D_ARRAY: + case GL_TEXTURE_2D_ARRAY: + case GL_TEXTURE_2D_MULTISAMPLE_ARRAY: + case GL_TEXTURE_CUBE_MAP_ARRAY: + brw_miptree_layout_texture_array(brw, mt); + break; + + default: + switch (mt->msaa_layout) { + case INTEL_MSAA_LAYOUT_UMS: + case INTEL_MSAA_LAYOUT_CMS: + brw_miptree_layout_texture_array(brw, mt); + break; + case INTEL_MSAA_LAYOUT_NONE: + case INTEL_MSAA_LAYOUT_IMS: + if (use_linear_1d_layout(brw, mt)) + gen9_miptree_layout_1d(mt); + else + brw_miptree_layout_2d(mt); + break; + } + break; + } + + DBG("%s: %dx%dx%d\n", __func__, + mt->total_width, mt->total_height, mt->cpp); +} void brw_miptree_layout(struct brw_context *brw, @@ -754,50 +804,7 @@ brw_miptree_layout(struct brw_context *brw, mt->align_h = intel_vertical_texture_alignment_unit(brw, mt); } - switch (mt->target) { - case GL_TEXTURE_CUBE_MAP: - if (brw->gen == 4) { - /* Gen4 stores cube maps as 3D textures. */ - assert(mt->physical_depth0 == 6); - brw_miptree_layout_texture_3d(brw, mt); - } else { - /* All other hardware stores cube maps as 2D arrays. */ - brw_miptree_layout_texture_array(brw, mt); - } - break; - - case GL_TEXTURE_3D: - if (brw->gen >= 9) - brw_miptree_layout_texture_array(brw, mt); - else - brw_miptree_layout_texture_3d(brw, mt); - break; - - case GL_TEXTURE_1D_ARRAY: - case GL_TEXTURE_2D_ARRAY: - case GL_TEXTURE_2D_MULTISAMPLE_ARRAY: - case GL_TEXTURE_CUBE_MAP_ARRAY: - brw_miptree_layout_texture_array(brw, mt); - break; - - default: - switch (mt->msaa_layout) { - case INTEL_MSAA_LAYOUT_UMS: - case INTEL_MSAA_LAYOUT_CMS: - brw_miptree_layout_texture_array(brw, mt); - break; - case INTEL_MSAA_LAYOUT_NONE: - case INTEL_MSAA_LAYOUT_IMS: - if (use_linear_1d_layout(brw, mt)) - gen9_miptree_layout_1d(mt); - else - brw_miptree_layout_2d(mt); - break; - } - break; - } - DBG("%s: %dx%dx%d\n", __func__, - mt->total_width, mt->total_height, mt->cpp); + intel_miptree_set_total_width_height(brw, mt); if (!mt->total_width || !mt->total_height) { intel_miptree_release(&mt); From f7aad9da20b13c98f77d6a690b327716f39c0a47 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Sun, 5 Apr 2015 16:48:47 +1000 Subject: [PATCH 476/834] mesa/teximage: use correct extension for accept stencil texture. This was using the wrong extension, ARB_stencil_texturing doesn't mention any changes in this area. Fixes "dEQP-GLES3.functional.fbo.completeness.renderable.texture. stencil.stencil_index8." Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=90751 Signed-off-by: Dave Airlie Reviewed-by: Ilia Mirkin Reviewed-by: Kenneth Graunke --- src/mesa/main/teximage.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mesa/main/teximage.c b/src/mesa/main/teximage.c index 7bc1da7f805..3d85615fa45 100644 --- a/src/mesa/main/teximage.c +++ b/src/mesa/main/teximage.c @@ -222,7 +222,7 @@ _mesa_base_tex_format( struct gl_context *ctx, GLint internalFormat ) } } - if (ctx->Extensions.ARB_stencil_texturing) { + if (ctx->Extensions.ARB_texture_stencil8) { switch (internalFormat) { case GL_STENCIL_INDEX: case GL_STENCIL_INDEX1: From 08a1046f6777c589f90eae3fd1e7e41ca364c45c Mon Sep 17 00:00:00 2001 From: Francisco Jerez Date: Mon, 8 Jun 2015 14:46:58 +0300 Subject: [PATCH 477/834] mesa: Include simple_list.h explicitly in errors.c. This seems to be the only user of simple_list in core mesa not including the header explicitly. Reviewed-by: Brian Paul --- src/mesa/main/errors.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/mesa/main/errors.c b/src/mesa/main/errors.c index 2aa1deb635f..16f10ddb694 100644 --- a/src/mesa/main/errors.c +++ b/src/mesa/main/errors.c @@ -39,6 +39,7 @@ #include "mtypes.h" #include "version.h" #include "util/hash_table.h" +#include "util/simple_list.h" static mtx_t DynamicIDMutex = _MTX_INITIALIZER_NP; static GLuint NextDynamicID = 1; From 7065c8153b5bea3fe4f364dbb922488f755bc1db Mon Sep 17 00:00:00 2001 From: Francisco Jerez Date: Mon, 8 Jun 2015 14:47:17 +0300 Subject: [PATCH 478/834] tnl: Include simple_list.h explicitly in t_context.c. Reviewed-by: Brian Paul --- src/mesa/tnl/t_context.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/mesa/tnl/t_context.c b/src/mesa/tnl/t_context.c index 5b9dd54d75a..bc77ba8bf95 100644 --- a/src/mesa/tnl/t_context.c +++ b/src/mesa/tnl/t_context.c @@ -36,6 +36,7 @@ #include "math/m_xform.h" #include "main/state.h" #include "main/viewport.h" +#include "util/simple_list.h" #include "tnl.h" #include "t_context.h" From 277b94f172c44cb4199a740722f42bc701d591dc Mon Sep 17 00:00:00 2001 From: Francisco Jerez Date: Mon, 8 Jun 2015 14:48:29 +0300 Subject: [PATCH 479/834] dri/nouveau: Include simple_list.h explicitly in nv*_state_tnl.c. Reviewed-by: Brian Paul --- src/mesa/drivers/dri/nouveau/nv10_state_tnl.c | 2 ++ src/mesa/drivers/dri/nouveau/nv20_state_tnl.c | 2 ++ 2 files changed, 4 insertions(+) diff --git a/src/mesa/drivers/dri/nouveau/nv10_state_tnl.c b/src/mesa/drivers/dri/nouveau/nv10_state_tnl.c index c0c7b26bbf7..1398385b262 100644 --- a/src/mesa/drivers/dri/nouveau/nv10_state_tnl.c +++ b/src/mesa/drivers/dri/nouveau/nv10_state_tnl.c @@ -31,6 +31,8 @@ #include "nv10_3d.xml.h" #include "nv10_driver.h" +#include "util/simple_list.h" + void nv10_emit_clip_plane(struct gl_context *ctx, int emit) { diff --git a/src/mesa/drivers/dri/nouveau/nv20_state_tnl.c b/src/mesa/drivers/dri/nouveau/nv20_state_tnl.c index f0acbed8560..41395516ea4 100644 --- a/src/mesa/drivers/dri/nouveau/nv20_state_tnl.c +++ b/src/mesa/drivers/dri/nouveau/nv20_state_tnl.c @@ -32,6 +32,8 @@ #include "nv10_driver.h" #include "nv20_driver.h" +#include "util/simple_list.h" + #define LIGHT_MODEL_AMBIENT_R(side) \ ((side) ? NV20_3D_LIGHT_MODEL_BACK_AMBIENT_R : \ NV20_3D_LIGHT_MODEL_FRONT_AMBIENT_R) From f9367191b30956b9cfe578dd8e426b28d2417b6b Mon Sep 17 00:00:00 2001 From: Francisco Jerez Date: Mon, 8 Jun 2015 14:49:31 +0300 Subject: [PATCH 480/834] mesa: Drop include of simple_list.h from mtypes.h. simple_list.h defines a number of macros with short non-namespaced names that can easily collide with other declarations (first_elem, last_elem, next_elem, prev_elem, at_end), and according to the comment it was only being included because of struct simple_node, which is no longer used in this file. Reviewed-by: Brian Paul --- src/mesa/main/mtypes.h | 1 - 1 file changed, 1 deletion(-) diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h index 1598e2cdbd0..e67e8074a88 100644 --- a/src/mesa/main/mtypes.h +++ b/src/mesa/main/mtypes.h @@ -43,7 +43,6 @@ #include "glapi/glapi.h" #include "math/m_matrix.h" /* GLmatrix */ #include "glsl/shader_enums.h" -#include "util/simple_list.h" /* struct simple_node */ #include "main/formats.h" /* MESA_FORMAT_COUNT */ From 239dfc5410d98f3b31a06652ceff13d9858c1f9b Mon Sep 17 00:00:00 2001 From: Francisco Jerez Date: Wed, 3 Jun 2015 21:23:46 +0300 Subject: [PATCH 481/834] i965: Define consistent interface to predicate an instruction. v2: Use set_ prefix. Reviewed-by: Kenneth Graunke --- src/mesa/drivers/dri/i965/brw_ir_fs.h | 22 ++++++++++++++++++++++ src/mesa/drivers/dri/i965/brw_ir_vec4.h | 22 ++++++++++++++++++++++ 2 files changed, 44 insertions(+) diff --git a/src/mesa/drivers/dri/i965/brw_ir_fs.h b/src/mesa/drivers/dri/i965/brw_ir_fs.h index a79713ce201..660bab2c466 100644 --- a/src/mesa/drivers/dri/i965/brw_ir_fs.h +++ b/src/mesa/drivers/dri/i965/brw_ir_fs.h @@ -271,4 +271,26 @@ set_sechalf(fs_inst *inst) return inst; } +/** + * Make the execution of \p inst dependent on the evaluation of a possibly + * inverted predicate. + */ +static inline fs_inst * +set_predicate_inv(enum brw_predicate pred, bool inverse, + fs_inst *inst) +{ + inst->predicate = pred; + inst->predicate_inverse = inverse; + return inst; +} + +/** + * Make the execution of \p inst dependent on the evaluation of a predicate. + */ +static inline fs_inst * +set_predicate(enum brw_predicate pred, fs_inst *inst) +{ + return set_predicate_inv(pred, false, inst); +} + #endif diff --git a/src/mesa/drivers/dri/i965/brw_ir_vec4.h b/src/mesa/drivers/dri/i965/brw_ir_vec4.h index a56fdd6fce9..b9a5a251914 100644 --- a/src/mesa/drivers/dri/i965/brw_ir_vec4.h +++ b/src/mesa/drivers/dri/i965/brw_ir_vec4.h @@ -190,6 +190,28 @@ public: } }; +/** + * Make the execution of \p inst dependent on the evaluation of a possibly + * inverted predicate. + */ +inline vec4_instruction * +set_predicate_inv(enum brw_predicate pred, bool inverse, + vec4_instruction *inst) +{ + inst->predicate = pred; + inst->predicate_inverse = inverse; + return inst; +} + +/** + * Make the execution of \p inst dependent on the evaluation of a predicate. + */ +inline vec4_instruction * +set_predicate(enum brw_predicate pred, vec4_instruction *inst) +{ + return set_predicate_inv(pred, false, inst); +} + } /* namespace brw */ #endif From 7624f8410f64a7ce0ba125a2025904c70610c076 Mon Sep 17 00:00:00 2001 From: Francisco Jerez Date: Wed, 3 Jun 2015 21:24:18 +0300 Subject: [PATCH 482/834] i965: Define consistent interface to enable instruction conditional modifiers. v2: Use set_ prefix. Reviewed-by: Kenneth Graunke --- src/mesa/drivers/dri/i965/brw_ir_fs.h | 11 +++++++++++ src/mesa/drivers/dri/i965/brw_ir_vec4.h | 11 +++++++++++ 2 files changed, 22 insertions(+) diff --git a/src/mesa/drivers/dri/i965/brw_ir_fs.h b/src/mesa/drivers/dri/i965/brw_ir_fs.h index 660bab2c466..07af0082d9d 100644 --- a/src/mesa/drivers/dri/i965/brw_ir_fs.h +++ b/src/mesa/drivers/dri/i965/brw_ir_fs.h @@ -293,4 +293,15 @@ set_predicate(enum brw_predicate pred, fs_inst *inst) return set_predicate_inv(pred, false, inst); } +/** + * Write the result of evaluating the condition given by \p mod to a flag + * register. + */ +static inline fs_inst * +set_condmod(enum brw_conditional_mod mod, fs_inst *inst) +{ + inst->conditional_mod = mod; + return inst; +} + #endif diff --git a/src/mesa/drivers/dri/i965/brw_ir_vec4.h b/src/mesa/drivers/dri/i965/brw_ir_vec4.h index b9a5a251914..96d1cfbeb93 100644 --- a/src/mesa/drivers/dri/i965/brw_ir_vec4.h +++ b/src/mesa/drivers/dri/i965/brw_ir_vec4.h @@ -212,6 +212,17 @@ set_predicate(enum brw_predicate pred, vec4_instruction *inst) return set_predicate_inv(pred, false, inst); } +/** + * Write the result of evaluating the condition given by \p mod to a flag + * register. + */ +inline vec4_instruction * +set_condmod(enum brw_conditional_mod mod, vec4_instruction *inst) +{ + inst->conditional_mod = mod; + return inst; +} + } /* namespace brw */ #endif From 6e040657292d8d0a6fe8fe7d4d94e9808f29e924 Mon Sep 17 00:00:00 2001 From: Francisco Jerez Date: Wed, 3 Jun 2015 21:24:50 +0300 Subject: [PATCH 483/834] i965: Define consistent interface to enable instruction result saturation. v2: Use set_ prefix. Reviewed-by: Kenneth Graunke --- src/mesa/drivers/dri/i965/brw_ir_fs.h | 11 +++++++++++ src/mesa/drivers/dri/i965/brw_ir_vec4.h | 11 +++++++++++ 2 files changed, 22 insertions(+) diff --git a/src/mesa/drivers/dri/i965/brw_ir_fs.h b/src/mesa/drivers/dri/i965/brw_ir_fs.h index 07af0082d9d..eee4d7e1e00 100644 --- a/src/mesa/drivers/dri/i965/brw_ir_fs.h +++ b/src/mesa/drivers/dri/i965/brw_ir_fs.h @@ -304,4 +304,15 @@ set_condmod(enum brw_conditional_mod mod, fs_inst *inst) return inst; } +/** + * Clamp the result of \p inst to the saturation range of its destination + * datatype. + */ +static inline fs_inst * +set_saturate(bool saturate, fs_inst *inst) +{ + inst->saturate = saturate; + return inst; +} + #endif diff --git a/src/mesa/drivers/dri/i965/brw_ir_vec4.h b/src/mesa/drivers/dri/i965/brw_ir_vec4.h index 96d1cfbeb93..fceacae0e51 100644 --- a/src/mesa/drivers/dri/i965/brw_ir_vec4.h +++ b/src/mesa/drivers/dri/i965/brw_ir_vec4.h @@ -223,6 +223,17 @@ set_condmod(enum brw_conditional_mod mod, vec4_instruction *inst) return inst; } +/** + * Clamp the result of \p inst to the saturation range of its destination + * datatype. + */ +inline vec4_instruction * +set_saturate(bool saturate, vec4_instruction *inst) +{ + inst->saturate = saturate; + return inst; +} + } /* namespace brw */ #endif From 8ea8f83c8f6b932749ada32ac666d151a9636508 Mon Sep 17 00:00:00 2001 From: Francisco Jerez Date: Wed, 22 Apr 2015 14:02:47 +0300 Subject: [PATCH 484/834] i965/fs: Introduce FS IR builder. The purpose of this change is threefold: First, it improves the modularity of the compiler back-end by separating the functionality required to construct an i965 IR program from the rest of the visitor god-object, what in turn will reduce the coupling between other components and the visitor allowing a more modular design. This patch doesn't yet remove the equivalent functionality from the visitor classes, as it involves major back-end surgery. Second, it improves consistency between the scalar and vector back-ends. The FS and VEC4 builders can both be used to generate scalar code with a compatible interface or they can be used to generate natural vector width code -- 1 or 4 components respectively. Third, the approach to IR construction is somewhat different to what the visitor classes currently do. All parameters affecting code generation (execution size, half control, point in the program where new instructions are inserted, etc.) are encapsulated in a stand-alone object rather than being quasi-global state (yes, anything defined in one of the visitor classes is effectively global due to the tight coupling with virtually everything else in the compiler back-end). This object is lightweight and can be copied, mutated and passed around, making helper IR-building functions more flexible because they can now simply take a builder object as argument and will inherit its IR generation properties in exactly the same way that a discrete instruction would from the same builder object. The emit_typed_write() function from my image-load-store branch is an example that illustrates the usefulness of the latter point: Due to hardware limitations the function may have to split the untyped surface message in 8-wide chunks. That means that the several functions called to help with the construction of the message payload are themselves required to set the execution width and half control correctly on the instructions they emit, and to allocate all registers with half the default width. With the previous approach this would require the used helper functions to be aware of the parameters that might differ from the default state and explicitly set the instruction bits accordingly. With the new approach they would get a modified builder object as argument that would influence all instructions emitted by the helper function as if it were the default state. Another example is the fs_visitor::VARYING_PULL_CONSTANT_LOAD() method. It doesn't actually emit any instructions, they are simply created and inserted into an exec_list which is returned for the caller to emit at some location of the program. This sort of two-step emission becomes unnecessary with the builder interface because the insertion point is one more of the code generation parameters which are part of the builder object. The caller can simply pass VARYING_PULL_CONSTANT_LOAD() a modified builder object pointing at the location of the program where the effect of the constant load is desired. This two-step emission (which pervades the compiler back-end and is in most cases redundant) goes away: E.g. ADD() now actually adds two registers rather than just creating an ADD instruction in memory, emit(ADD()) is no longer necessary. v2: Drop scalarizing VEC4 builder. v3: Take a backend_shader as constructor argument. Improve handling of debug annotations and execution control flags. v4: Drop Gen6 IF with inline comparison. Rename "instr" variable. Initialize cursor to NULL by default and add method to explicitly point the builder at the end of the program. Reviewed-by: Matt Turner --- src/mesa/drivers/dri/i965/Makefile.sources | 1 + src/mesa/drivers/dri/i965/brw_fs_builder.h | 652 +++++++++++++++++++++ 2 files changed, 653 insertions(+) create mode 100644 src/mesa/drivers/dri/i965/brw_fs_builder.h diff --git a/src/mesa/drivers/dri/i965/Makefile.sources b/src/mesa/drivers/dri/i965/Makefile.sources index 3f852cd21ec..93f336e4b72 100644 --- a/src/mesa/drivers/dri/i965/Makefile.sources +++ b/src/mesa/drivers/dri/i965/Makefile.sources @@ -42,6 +42,7 @@ i965_FILES = \ brw_ff_gs.c \ brw_ff_gs_emit.c \ brw_ff_gs.h \ + brw_fs_builder.h \ brw_fs_channel_expressions.cpp \ brw_fs_cmod_propagation.cpp \ brw_fs_combine_constants.cpp \ diff --git a/src/mesa/drivers/dri/i965/brw_fs_builder.h b/src/mesa/drivers/dri/i965/brw_fs_builder.h new file mode 100644 index 00000000000..58ac5980da5 --- /dev/null +++ b/src/mesa/drivers/dri/i965/brw_fs_builder.h @@ -0,0 +1,652 @@ +/* -*- c++ -*- */ +/* + * Copyright © 2010-2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef BRW_FS_BUILDER_H +#define BRW_FS_BUILDER_H + +#include "brw_ir_fs.h" +#include "brw_shader.h" +#include "brw_context.h" + +namespace brw { + /** + * Toolbox to assemble an FS IR program out of individual instructions. + * + * This object is meant to have an interface consistent with + * brw::vec4_builder. They cannot be fully interchangeable because + * brw::fs_builder generates scalar code while brw::vec4_builder generates + * vector code. + */ + class fs_builder { + public: + /** Type used in this IR to represent a source of an instruction. */ + typedef fs_reg src_reg; + + /** Type used in this IR to represent the destination of an instruction. */ + typedef fs_reg dst_reg; + + /** Type used in this IR to represent an instruction. */ + typedef fs_inst instruction; + + /** + * Construct an fs_builder that inserts instructions into \p shader. + * \p dispatch_width gives the native execution width of the program. + */ + fs_builder(backend_shader *shader, + unsigned dispatch_width) : + shader(shader), block(NULL), cursor(NULL), + _dispatch_width(dispatch_width), + _group(0), + force_writemask_all(false), + annotation() + { + } + + /** + * Construct an fs_builder that inserts instructions before \p cursor in + * basic block \p block, inheriting other code generation parameters + * from this. + */ + fs_builder + at(bblock_t *block, exec_node *cursor) const + { + fs_builder bld = *this; + bld.block = block; + bld.cursor = cursor; + return bld; + } + + /** + * Construct an fs_builder appending instructions at the end of the + * instruction list of the shader, inheriting other code generation + * parameters from this. + */ + fs_builder + at_end() const + { + return at(NULL, (exec_node *)&shader->instructions.tail); + } + + /** + * Construct a builder specifying the default SIMD width and group of + * channel enable signals, inheriting other code generation parameters + * from this. + * + * \p n gives the default SIMD width, \p i gives the slot group used for + * predication and control flow masking in multiples of \p n channels. + */ + fs_builder + group(unsigned n, unsigned i) const + { + assert(n <= dispatch_width() && + i < dispatch_width() / n); + fs_builder bld = *this; + bld._dispatch_width = n; + bld._group += i * n; + return bld; + } + + /** + * Alias for group() with width equal to eight. + */ + fs_builder + half(unsigned i) const + { + return group(8, i); + } + + /** + * Construct a builder with per-channel control flow execution masking + * disabled if \p b is true. If control flow execution masking is + * already disabled this has no effect. + */ + fs_builder + exec_all(bool b = true) const + { + fs_builder bld = *this; + if (b) + bld.force_writemask_all = true; + return bld; + } + + /** + * Construct a builder with the given debug annotation info. + */ + fs_builder + annotate(const char *str, const void *ir = NULL) const + { + fs_builder bld = *this; + bld.annotation.str = str; + bld.annotation.ir = ir; + return bld; + } + + /** + * Get the SIMD width in use. + */ + unsigned + dispatch_width() const + { + return _dispatch_width; + } + + /** + * Allocate a virtual register of natural vector size (one for this IR) + * and SIMD width. \p n gives the amount of space to allocate in + * dispatch_width units (which is just enough space for one logical + * component in this IR). + */ + dst_reg + vgrf(enum brw_reg_type type, unsigned n = 1) const + { + return dst_reg(GRF, shader->alloc.allocate( + DIV_ROUND_UP(n * type_sz(type) * dispatch_width(), + REG_SIZE)), + type, dispatch_width()); + } + + /** + * Create a null register of floating type. + */ + dst_reg + null_reg_f() const + { + return dst_reg(retype(brw_null_vec(dispatch_width()), + BRW_REGISTER_TYPE_F)); + } + + /** + * Create a null register of signed integer type. + */ + dst_reg + null_reg_d() const + { + return dst_reg(retype(brw_null_vec(dispatch_width()), + BRW_REGISTER_TYPE_D)); + } + + /** + * Create a null register of unsigned integer type. + */ + dst_reg + null_reg_ud() const + { + return dst_reg(retype(brw_null_vec(dispatch_width()), + BRW_REGISTER_TYPE_UD)); + } + + /** + * Get the mask of SIMD channels enabled by dispatch and not yet + * disabled by discard. + */ + src_reg + sample_mask_reg() const + { + const bool uses_kill = + (shader->stage == MESA_SHADER_FRAGMENT && + ((brw_wm_prog_data *)shader->stage_prog_data)->uses_kill); + return (shader->stage != MESA_SHADER_FRAGMENT ? src_reg(0xffff) : + uses_kill ? brw_flag_reg(0, 1) : + retype(brw_vec1_grf(1, 7), BRW_REGISTER_TYPE_UD)); + } + + /** + * Insert an instruction into the program. + */ + instruction * + emit(const instruction &inst) const + { + return emit(new(shader->mem_ctx) instruction(inst)); + } + + /** + * Create and insert a nullary control instruction into the program. + */ + instruction * + emit(enum opcode opcode) const + { + return emit(instruction(opcode, dispatch_width())); + } + + /** + * Create and insert a nullary instruction into the program. + */ + instruction * + emit(enum opcode opcode, const dst_reg &dst) const + { + return emit(instruction(opcode, dst)); + } + + /** + * Create and insert a unary instruction into the program. + */ + instruction * + emit(enum opcode opcode, const dst_reg &dst, const src_reg &src0) const + { + switch (opcode) { + case SHADER_OPCODE_RCP: + case SHADER_OPCODE_RSQ: + case SHADER_OPCODE_SQRT: + case SHADER_OPCODE_EXP2: + case SHADER_OPCODE_LOG2: + case SHADER_OPCODE_SIN: + case SHADER_OPCODE_COS: + return fix_math_instruction( + emit(instruction(opcode, dst.width, dst, + fix_math_operand(src0)))); + + default: + return emit(instruction(opcode, dst.width, dst, src0)); + } + } + + /** + * Create and insert a binary instruction into the program. + */ + instruction * + emit(enum opcode opcode, const dst_reg &dst, const src_reg &src0, + const src_reg &src1) const + { + switch (opcode) { + case SHADER_OPCODE_POW: + case SHADER_OPCODE_INT_QUOTIENT: + case SHADER_OPCODE_INT_REMAINDER: + return fix_math_instruction( + emit(instruction(opcode, dst.width, dst, + fix_math_operand(src0), + fix_math_operand(src1)))); + + default: + return emit(instruction(opcode, dst.width, dst, src0, src1)); + + } + } + + /** + * Create and insert a ternary instruction into the program. + */ + instruction * + emit(enum opcode opcode, const dst_reg &dst, const src_reg &src0, + const src_reg &src1, const src_reg &src2) const + { + switch (opcode) { + case BRW_OPCODE_BFE: + case BRW_OPCODE_BFI2: + case BRW_OPCODE_MAD: + case BRW_OPCODE_LRP: + return emit(instruction(opcode, dst.width, dst, + fix_3src_operand(src0), + fix_3src_operand(src1), + fix_3src_operand(src2))); + + default: + return emit(instruction(opcode, dst.width, dst, src0, src1, src2)); + } + } + + /** + * Insert a preallocated instruction into the program. + */ + instruction * + emit(instruction *inst) const + { + assert(inst->exec_size == dispatch_width() || + force_writemask_all); + assert(_group == 0 || _group == 8); + + inst->force_sechalf = (_group == 8); + inst->force_writemask_all = force_writemask_all; + inst->annotation = annotation.str; + inst->ir = annotation.ir; + + if (block) + static_cast(cursor)->insert_before(block, inst); + else + cursor->insert_before(inst); + + return inst; + } + + /** + * Select \p src0 if the comparison of both sources with the given + * conditional mod evaluates to true, otherwise select \p src1. + * + * Generally useful to get the minimum or maximum of two values. + */ + void + emit_minmax(const dst_reg &dst, const src_reg &src0, + const src_reg &src1, brw_conditional_mod mod) const + { + if (shader->devinfo->gen >= 6) { + set_condmod(mod, SEL(dst, fix_unsigned_negate(src0), + fix_unsigned_negate(src1))); + } else { + CMP(null_reg_d(), src0, src1, mod); + set_predicate(BRW_PREDICATE_NORMAL, + SEL(dst, src0, src1)); + } + } + + /** + * Copy any live channel from \p src to the first channel of \p dst. + */ + void + emit_uniformize(const dst_reg &dst, const src_reg &src) const + { + const fs_builder ubld = exec_all(); + const dst_reg chan_index = vgrf(BRW_REGISTER_TYPE_UD); + + ubld.emit(SHADER_OPCODE_FIND_LIVE_CHANNEL, component(chan_index, 0)); + ubld.emit(SHADER_OPCODE_BROADCAST, component(dst, 0), + src, component(chan_index, 0)); + } + + /** + * Assorted arithmetic ops. + * @{ + */ +#define ALU1(op) \ + instruction * \ + op(const dst_reg &dst, const src_reg &src0) const \ + { \ + return emit(BRW_OPCODE_##op, dst, src0); \ + } + +#define ALU2(op) \ + instruction * \ + op(const dst_reg &dst, const src_reg &src0, const src_reg &src1) const \ + { \ + return emit(BRW_OPCODE_##op, dst, src0, src1); \ + } + +#define ALU2_ACC(op) \ + instruction * \ + op(const dst_reg &dst, const src_reg &src0, const src_reg &src1) const \ + { \ + instruction *inst = emit(BRW_OPCODE_##op, dst, src0, src1); \ + inst->writes_accumulator = true; \ + return inst; \ + } + +#define ALU3(op) \ + instruction * \ + op(const dst_reg &dst, const src_reg &src0, const src_reg &src1, \ + const src_reg &src2) const \ + { \ + return emit(BRW_OPCODE_##op, dst, src0, src1, src2); \ + } + + ALU2(ADD) + ALU2_ACC(ADDC) + ALU2(AND) + ALU2(ASR) + ALU2(AVG) + ALU3(BFE) + ALU2(BFI1) + ALU3(BFI2) + ALU1(BFREV) + ALU1(CBIT) + ALU2(CMPN) + ALU3(CSEL) + ALU2(DP2) + ALU2(DP3) + ALU2(DP4) + ALU2(DPH) + ALU1(F16TO32) + ALU1(F32TO16) + ALU1(FBH) + ALU1(FBL) + ALU1(FRC) + ALU2(LINE) + ALU1(LZD) + ALU2(MAC) + ALU2_ACC(MACH) + ALU3(MAD) + ALU1(MOV) + ALU2(MUL) + ALU1(NOT) + ALU2(OR) + ALU2(PLN) + ALU1(RNDD) + ALU1(RNDE) + ALU1(RNDU) + ALU1(RNDZ) + ALU2(SAD2) + ALU2_ACC(SADA2) + ALU2(SEL) + ALU2(SHL) + ALU2(SHR) + ALU2_ACC(SUBB) + ALU2(XOR) + +#undef ALU3 +#undef ALU2_ACC +#undef ALU2 +#undef ALU1 + /** @} */ + + /** + * CMP: Sets the low bit of the destination channels with the result + * of the comparison, while the upper bits are undefined, and updates + * the flag register with the packed 16 bits of the result. + */ + instruction * + CMP(const dst_reg &dst, const src_reg &src0, const src_reg &src1, + brw_conditional_mod condition) const + { + /* Take the instruction: + * + * CMP null src0 src1 + * + * Original gen4 does type conversion to the destination type + * before comparison, producing garbage results for floating + * point comparisons. + * + * The destination type doesn't matter on newer generations, + * so we set the type to match src0 so we can compact the + * instruction. + */ + return set_condmod(condition, + emit(BRW_OPCODE_CMP, retype(dst, src0.type), + fix_unsigned_negate(src0), + fix_unsigned_negate(src1))); + } + + /** + * Gen4 predicated IF. + */ + instruction * + IF(brw_predicate predicate) const + { + return set_predicate(predicate, emit(BRW_OPCODE_IF)); + } + + /** + * Emit a linear interpolation instruction. + */ + instruction * + LRP(const dst_reg &dst, const src_reg &x, const src_reg &y, + const src_reg &a) const + { + if (shader->devinfo->gen >= 6) { + /* The LRP instruction actually does op1 * op0 + op2 * (1 - op0), so + * we need to reorder the operands. + */ + return emit(BRW_OPCODE_LRP, dst, a, y, x); + + } else { + /* We can't use the LRP instruction. Emit x*(1-a) + y*a. */ + const dst_reg y_times_a = vgrf(dst.type); + const dst_reg one_minus_a = vgrf(dst.type); + const dst_reg x_times_one_minus_a = vgrf(dst.type); + + MUL(y_times_a, y, a); + ADD(one_minus_a, negate(a), src_reg(1.0f)); + MUL(x_times_one_minus_a, x, src_reg(one_minus_a)); + return ADD(dst, src_reg(x_times_one_minus_a), src_reg(y_times_a)); + } + } + + /** + * Collect a number of registers in a contiguous range of registers. + */ + instruction * + LOAD_PAYLOAD(const dst_reg &dst, const src_reg *src, + unsigned sources, unsigned header_size) const + { + assert(dst.width % 8 == 0); + instruction *inst = emit(instruction(SHADER_OPCODE_LOAD_PAYLOAD, + dst.width, dst, src, sources)); + inst->header_size = header_size; + + for (unsigned i = 0; i < header_size; i++) + assert(src[i].file != GRF || + src[i].width * type_sz(src[i].type) == 32); + inst->regs_written = header_size; + + for (unsigned i = header_size; i < sources; ++i) + assert(src[i].file != GRF || + src[i].width == dst.width); + inst->regs_written += (sources - header_size) * (dst.width / 8); + + return inst; + } + + backend_shader *shader; + + private: + /** + * Workaround for negation of UD registers. See comment in + * fs_generator::generate_code() for more details. + */ + src_reg + fix_unsigned_negate(const src_reg &src) const + { + if (src.type == BRW_REGISTER_TYPE_UD && + src.negate) { + dst_reg temp = vgrf(BRW_REGISTER_TYPE_UD); + MOV(temp, src); + return src_reg(temp); + } else { + return src; + } + } + + /** + * Workaround for source register modes not supported by the ternary + * instruction encoding. + */ + src_reg + fix_3src_operand(const src_reg &src) const + { + if (src.file == GRF || src.file == UNIFORM || src.stride > 1) { + return src; + } else { + dst_reg expanded = vgrf(src.type); + MOV(expanded, src); + return expanded; + } + } + + /** + * Workaround for source register modes not supported by the math + * instruction. + */ + src_reg + fix_math_operand(const src_reg &src) const + { + /* Can't do hstride == 0 args on gen6 math, so expand it out. We + * might be able to do better by doing execsize = 1 math and then + * expanding that result out, but we would need to be careful with + * masking. + * + * Gen6 hardware ignores source modifiers (negate and abs) on math + * instructions, so we also move to a temp to set those up. + * + * Gen7 relaxes most of the above restrictions, but still can't use IMM + * operands to math + */ + if ((shader->devinfo->gen == 6 && + (src.file == IMM || src.file == UNIFORM || + src.abs || src.negate)) || + (shader->devinfo->gen == 7 && src.file == IMM)) { + const dst_reg tmp = vgrf(src.type); + MOV(tmp, src); + return tmp; + } else { + return src; + } + } + + /** + * Workaround other weirdness of the math instruction. + */ + instruction * + fix_math_instruction(instruction *inst) const + { + if (shader->devinfo->gen < 6) { + inst->base_mrf = 2; + inst->mlen = inst->sources * dispatch_width() / 8; + + if (inst->sources > 1) { + /* From the Ironlake PRM, Volume 4, Part 1, Section 6.1.13 + * "Message Payload": + * + * "Operand0[7]. For the INT DIV functions, this operand is the + * denominator." + * ... + * "Operand1[7]. For the INT DIV functions, this operand is the + * numerator." + */ + const bool is_int_div = inst->opcode != SHADER_OPCODE_POW; + const fs_reg src0 = is_int_div ? inst->src[1] : inst->src[0]; + const fs_reg src1 = is_int_div ? inst->src[0] : inst->src[1]; + + inst->resize_sources(1); + inst->src[0] = src0; + + at(block, inst).MOV(fs_reg(MRF, inst->base_mrf + 1, src1.type, + dispatch_width()), src1); + } + } + + return inst; + } + + bblock_t *block; + exec_node *cursor; + + unsigned _dispatch_width; + unsigned _group; + bool force_writemask_all; + + /** Debug annotation info. */ + struct { + const char *str; + const void *ir; + } annotation; + }; +} + +#endif From e04b4156a745fc09afa066c892c1913362eae9df Mon Sep 17 00:00:00 2001 From: Francisco Jerez Date: Wed, 3 Jun 2015 19:59:44 +0300 Subject: [PATCH 485/834] i965/fs: Allocate a common IR builder object in fs_visitor. v2: Call fs_builder::at_end() to point the builder at the end of the program explicitly. Reviewed-by: Matt Turner --- src/mesa/drivers/dri/i965/brw_fs.cpp | 11 +++++++++++ src/mesa/drivers/dri/i965/brw_fs.h | 2 ++ src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 4 +++- 3 files changed, 16 insertions(+), 1 deletion(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 902f239e3df..9fca9914f2c 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -3989,6 +3989,17 @@ fs_visitor::calculate_register_pressure() void fs_visitor::optimize() { + /* bld is the common builder object pointing at the end of the program we + * used to translate it into i965 IR. For the optimization and lowering + * passes coming next, any code added after the end of the program without + * having explicitly called fs_builder::at() clearly points at a mistake. + * Ideally optimization passes wouldn't be part of the visitor so they + * wouldn't have access to bld at all, but they do, so just in case some + * pass forgets to ask for a location explicitly set it to NULL here to + * make it trip. + */ + bld = bld.at(NULL, NULL); + split_virtual_grfs(); move_uniform_array_access_to_pull_constants(); diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h index 3bdf0a25f37..bdda9d0558b 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.h +++ b/src/mesa/drivers/dri/i965/brw_fs.h @@ -29,6 +29,7 @@ #include "brw_shader.h" #include "brw_ir_fs.h" +#include "brw_fs_builder.h" extern "C" { @@ -453,6 +454,7 @@ public: const unsigned dispatch_width; /**< 8 or 16 */ unsigned promoted_constants; + brw::fs_builder bld; }; /** diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp index 8b9fedea0ab..69c04524fea 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp @@ -47,6 +47,7 @@ #include "glsl/ir_optimization.h" #include "program/sampler.h" +using namespace brw; fs_reg * fs_visitor::emit_vs_system_value(int location) @@ -2045,7 +2046,8 @@ fs_visitor::fs_visitor(struct brw_context *brw, reg_null_d(retype(brw_null_vec(dispatch_width), BRW_REGISTER_TYPE_D)), reg_null_ud(retype(brw_null_vec(dispatch_width), BRW_REGISTER_TYPE_UD)), key(key), prog_data(prog_data), - dispatch_width(dispatch_width), promoted_constants(0) + dispatch_width(dispatch_width), promoted_constants(0), + bld(fs_builder(this, dispatch_width).at_end()) { this->mem_ctx = mem_ctx; From 35e5f118a5116685b30ad3305c1c153f1af37f66 Mon Sep 17 00:00:00 2001 From: Francisco Jerez Date: Wed, 3 Jun 2015 18:20:50 +0300 Subject: [PATCH 486/834] i965/fs: Migrate opt_combine_constants to the IR builder. Reviewed-by: Matt Turner --- .../dri/i965/brw_fs_combine_constants.cpp | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs_combine_constants.cpp b/src/mesa/drivers/dri/i965/brw_fs_combine_constants.cpp index aa62031df73..0af5a915c9f 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_combine_constants.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_combine_constants.cpp @@ -38,6 +38,8 @@ #include "brw_fs_live_variables.h" #include "brw_cfg.h" +using namespace brw; + /* Returns whether an instruction could co-issue if its immediate source were * replaced with a GRF source. */ @@ -270,15 +272,14 @@ fs_visitor::opt_combine_constants() reg.stride = 0; for (int i = 0; i < table.len; i++) { struct imm *imm = &table.imm[i]; + /* Insert it either before the instruction that generated the immediate + * or after the last non-control flow instruction of the common ancestor. + */ + exec_node *n = (imm->inst ? imm->inst : + imm->block->last_non_control_flow_inst()->next); + const fs_builder ibld = bld.at(imm->block, n).exec_all(); - fs_inst *mov = MOV(reg, fs_reg(imm->val)); - mov->force_writemask_all = true; - if (imm->inst) { - imm->inst->insert_before(imm->block, mov); - } else { - backend_instruction *inst = imm->block->last_non_control_flow_inst(); - inst->insert_after(imm->block, mov); - } + ibld.MOV(reg, fs_reg(imm->val)); imm->reg = reg.reg; imm->subreg_offset = reg.subreg_offset; From d86c2e6e539db518dca162145c096b7440d043a7 Mon Sep 17 00:00:00 2001 From: Francisco Jerez Date: Wed, 3 Jun 2015 18:22:17 +0300 Subject: [PATCH 487/834] i965/fs: Migrate opt_peephole_predicated_break to the IR builder. Reviewed-by: Matt Turner --- .../drivers/dri/i965/brw_fs_peephole_predicated_break.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs_peephole_predicated_break.cpp b/src/mesa/drivers/dri/i965/brw_fs_peephole_predicated_break.cpp index cf3da7b1882..d92d4bbd81d 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_peephole_predicated_break.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_peephole_predicated_break.cpp @@ -85,9 +85,9 @@ fs_visitor::opt_peephole_predicated_break() * instruction to set the flag register. */ if (devinfo->gen == 6 && if_inst->conditional_mod) { - fs_inst *cmp_inst = CMP(reg_null_d, if_inst->src[0], if_inst->src[1], - if_inst->conditional_mod); - if_inst->insert_before(if_block, cmp_inst); + bld.at(if_block, if_inst) + .CMP(bld.null_reg_d(), if_inst->src[0], if_inst->src[1], + if_inst->conditional_mod); jump_inst->predicate = BRW_PREDICATE_NORMAL; } else { jump_inst->predicate = if_inst->predicate; From 8013b8147ae5fc652799c7ff01c2d419ebebe3db Mon Sep 17 00:00:00 2001 From: Francisco Jerez Date: Thu, 4 Jun 2015 15:09:10 +0300 Subject: [PATCH 488/834] i965/fs: Take into account all instruction fields in CSE instructions_match(). Most of these fields affect the behaviour of the instruction so it could actually break the program if we CSE a pair of otherwise matching instructions with different values of these fields. Reviewed-by: Matt Turner --- src/mesa/drivers/dri/i965/brw_fs_cse.cpp | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs_cse.cpp b/src/mesa/drivers/dri/i965/brw_fs_cse.cpp index db01f8cf7ab..3ddd17c7e3b 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_cse.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_cse.cpp @@ -152,20 +152,24 @@ static bool instructions_match(fs_inst *a, fs_inst *b, bool *negate) { return a->opcode == b->opcode && + a->force_writemask_all == b->force_writemask_all && + a->exec_size == b->exec_size && + a->force_sechalf == b->force_sechalf && a->saturate == b->saturate && a->predicate == b->predicate && a->predicate_inverse == b->predicate_inverse && a->conditional_mod == b->conditional_mod && + a->flag_subreg == b->flag_subreg && a->dst.type == b->dst.type && + a->offset == b->offset && + a->mlen == b->mlen && + a->regs_written == b->regs_written && + a->base_mrf == b->base_mrf && + a->eot == b->eot && + a->header_size == b->header_size && + a->shadow_compare == b->shadow_compare && + a->pi_noperspective == b->pi_noperspective && a->sources == b->sources && - (a->is_tex() ? (a->offset == b->offset && - a->mlen == b->mlen && - a->regs_written == b->regs_written && - a->base_mrf == b->base_mrf && - a->eot == b->eot && - a->header_size == b->header_size && - a->shadow_compare == b->shadow_compare) - : true) && operands_match(a, b, negate); } From 497d238ae72aa59fb32b21191a1a0444ca09fc10 Mon Sep 17 00:00:00 2001 From: Francisco Jerez Date: Thu, 4 Jun 2015 16:05:33 +0300 Subject: [PATCH 489/834] i965/vec4: Take into account all instruction fields in CSE instructions_match(). Most of these fields affect the behaviour of the instruction, but apparently we currently don't CSE the kind of instructions for which these fields could make a difference in the VEC4 back-end. That's likely to change soon though when we start using send-from-GRF for texture sampling and surface access messages. Reviewed-by: Matt Turner --- src/mesa/drivers/dri/i965/brw_vec4_cse.cpp | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/mesa/drivers/dri/i965/brw_vec4_cse.cpp b/src/mesa/drivers/dri/i965/brw_vec4_cse.cpp index 9147c3cbb79..c9fe0cebf27 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_cse.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_cse.cpp @@ -114,8 +114,16 @@ instructions_match(vec4_instruction *a, vec4_instruction *b) { return a->opcode == b->opcode && a->saturate == b->saturate && + a->predicate == b->predicate && + a->predicate_inverse == b->predicate_inverse && a->conditional_mod == b->conditional_mod && + a->flag_subreg == b->flag_subreg && a->dst.type == b->dst.type && + a->offset == b->offset && + a->mlen == b->mlen && + a->base_mrf == b->base_mrf && + a->header_size == b->header_size && + a->shadow_compare == b->shadow_compare && a->dst.writemask == b->dst.writemask && a->force_writemask_all == b->force_writemask_all && a->regs_written == b->regs_written && From e7069fbc701de68b65a876e1b4bfde4f111dd084 Mon Sep 17 00:00:00 2001 From: Francisco Jerez Date: Thu, 4 Jun 2015 16:09:47 +0300 Subject: [PATCH 490/834] i965/fs: Don't drop force_writemask_all and _sechalf when copying a CSE temporary. LOAD_PAYLOAD instructions need the same treatment as any other generator instructions, at least FB writes and typed surface messages will need a payload built with non-zero execution controls. Reviewed-by: Matt Turner --- src/mesa/drivers/dri/i965/brw_fs_cse.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs_cse.cpp b/src/mesa/drivers/dri/i965/brw_fs_cse.cpp index 3ddd17c7e3b..822a6a3a2f8 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_cse.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_cse.cpp @@ -206,9 +206,10 @@ create_copy_instr(fs_visitor *v, fs_inst *inst, fs_reg src, bool negate) copy = v->LOAD_PAYLOAD(inst->dst, payload, sources, header_size); } else { copy = v->MOV(inst->dst, src); - copy->force_writemask_all = inst->force_writemask_all; copy->src[0].negate = negate; } + copy->force_writemask_all = inst->force_writemask_all; + copy->force_sechalf = inst->force_sechalf; assert(copy->regs_written == written); return copy; From 74c2458ecf492f2dd344b4f6114b13a376f90657 Mon Sep 17 00:00:00 2001 From: Francisco Jerez Date: Thu, 4 Jun 2015 16:13:35 +0300 Subject: [PATCH 491/834] i965/fs: Migrate opt_cse to the IR builder. Reviewed-by: Matt Turner --- src/mesa/drivers/dri/i965/brw_fs_cse.cpp | 27 +++++++++++------------- 1 file changed, 12 insertions(+), 15 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs_cse.cpp b/src/mesa/drivers/dri/i965/brw_fs_cse.cpp index 822a6a3a2f8..70f0217b93d 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_cse.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_cse.cpp @@ -32,6 +32,8 @@ * 13.1 (p378). */ +using namespace brw; + namespace { struct aeb_entry : public exec_node { /** The instruction that generates the expression value. */ @@ -173,11 +175,13 @@ instructions_match(fs_inst *a, fs_inst *b, bool *negate) operands_match(a, b, negate); } -static fs_inst * -create_copy_instr(fs_visitor *v, fs_inst *inst, fs_reg src, bool negate) +static void +create_copy_instr(const fs_builder &bld, fs_inst *inst, fs_reg src, bool negate) { int written = inst->regs_written; int dst_width = inst->dst.width / 8; + const fs_builder ubld = bld.group(inst->exec_size, inst->force_sechalf) + .exec_all(inst->force_writemask_all); fs_inst *copy; if (written > dst_width) { @@ -193,7 +197,7 @@ create_copy_instr(fs_visitor *v, fs_inst *inst, fs_reg src, bool negate) } assert(src.file == GRF); - payload = ralloc_array(v->mem_ctx, fs_reg, sources); + payload = ralloc_array(bld.shader->mem_ctx, fs_reg, sources); for (int i = 0; i < header_size; i++) { payload[i] = src; payload[i].width = 8; @@ -203,16 +207,12 @@ create_copy_instr(fs_visitor *v, fs_inst *inst, fs_reg src, bool negate) payload[i] = src; src = offset(src, 1); } - copy = v->LOAD_PAYLOAD(inst->dst, payload, sources, header_size); + copy = ubld.LOAD_PAYLOAD(inst->dst, payload, sources, header_size); } else { - copy = v->MOV(inst->dst, src); + copy = ubld.MOV(inst->dst, src); copy->src[0].negate = negate; } - copy->force_writemask_all = inst->force_writemask_all; - copy->force_sechalf = inst->force_sechalf; assert(copy->regs_written == written); - - return copy; } bool @@ -266,9 +266,8 @@ fs_visitor::opt_cse_local(bblock_t *block) entry->generator->dst.type, entry->generator->dst.width); - fs_inst *copy = create_copy_instr(this, entry->generator, - entry->tmp, false); - entry->generator->insert_after(block, copy); + create_copy_instr(bld.at(block, entry->generator->next), + entry->generator, entry->tmp, false); entry->generator->dst = entry->tmp; } @@ -279,9 +278,7 @@ fs_visitor::opt_cse_local(bblock_t *block) assert(inst->dst.width == entry->generator->dst.width); assert(inst->dst.type == entry->tmp.type); - fs_inst *copy = create_copy_instr(this, inst, - entry->tmp, negate); - inst->insert_before(block, copy); + create_copy_instr(bld.at(block, inst), inst, entry->tmp, negate); } /* Set our iterator so that next time through the loop inst->next From 78f7c9edeb21ec4e7a4f96aa12b51cecc40e9688 Mon Sep 17 00:00:00 2001 From: Francisco Jerez Date: Wed, 3 Jun 2015 19:33:44 +0300 Subject: [PATCH 492/834] i965/fs: Create and emit instructions in one step in opt_peephole_sel. This simplifies opt_peephole_sel() slightly by emitting the SEL instructions immediately after they are created, what makes the sel_inst and mov_imm_inst arrays unnecessary and will make it possible to get rid of the explicit inserts when the pass is migrated to the IR builder. Reviewed-by: Matt Turner --- .../drivers/dri/i965/brw_fs_sel_peephole.cpp | 46 +++++++++---------- 1 file changed, 21 insertions(+), 25 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs_sel_peephole.cpp b/src/mesa/drivers/dri/i965/brw_fs_sel_peephole.cpp index 52aa5590c2e..635c91b9d46 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_sel_peephole.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_sel_peephole.cpp @@ -153,9 +153,6 @@ fs_visitor::opt_peephole_sel() if (movs == 0) continue; - fs_inst *sel_inst[MAX_MOVS] = { NULL }; - fs_inst *mov_imm_inst[MAX_MOVS] = { NULL }; - enum brw_predicate predicate; bool predicate_inverse; if (devinfo->gen == 6 && if_inst->conditional_mod) { @@ -188,25 +185,6 @@ fs_visitor::opt_peephole_sel() movs = i; break; } - - if (then_mov[i]->src[0].equals(else_mov[i]->src[0])) { - sel_inst[i] = MOV(then_mov[i]->dst, then_mov[i]->src[0]); - } else { - /* Only the last source register can be a constant, so if the MOV - * in the "then" clause uses a constant, we need to put it in a - * temporary. - */ - fs_reg src0(then_mov[i]->src[0]); - if (src0.file == IMM) { - src0 = vgrf(glsl_type::float_type); - src0.type = then_mov[i]->src[0].type; - mov_imm_inst[i] = MOV(src0, then_mov[i]->src[0]); - } - - sel_inst[i] = SEL(then_mov[i]->dst, src0, else_mov[i]->src[0]); - sel_inst[i]->predicate = predicate; - sel_inst[i]->predicate_inverse = predicate_inverse; - } } if (movs == 0) @@ -220,9 +198,27 @@ fs_visitor::opt_peephole_sel() } for (int i = 0; i < movs; i++) { - if (mov_imm_inst[i]) - if_inst->insert_before(block, mov_imm_inst[i]); - if_inst->insert_before(block, sel_inst[i]); + if (then_mov[i]->src[0].equals(else_mov[i]->src[0])) { + fs_inst *inst = MOV(then_mov[i]->dst, then_mov[i]->src[0]); + if_inst->insert_before(block, inst); + } else { + /* Only the last source register can be a constant, so if the MOV + * in the "then" clause uses a constant, we need to put it in a + * temporary. + */ + fs_reg src0(then_mov[i]->src[0]); + if (src0.file == IMM) { + src0 = vgrf(glsl_type::float_type); + src0.type = then_mov[i]->src[0].type; + fs_inst *inst = MOV(src0, then_mov[i]->src[0]); + if_inst->insert_before(block, inst); + } + + fs_inst *inst = SEL(then_mov[i]->dst, src0, else_mov[i]->src[0]); + inst->predicate = predicate; + inst->predicate_inverse = predicate_inverse; + if_inst->insert_before(block, inst); + } then_mov[i]->remove(then_block); else_mov[i]->remove(else_block); From a800ec04ad84abeb6243897a276facc4ef6cac82 Mon Sep 17 00:00:00 2001 From: Francisco Jerez Date: Wed, 3 Jun 2015 19:51:47 +0300 Subject: [PATCH 493/834] i965/fs: Migrate opt_peephole_sel to the IR builder. Reviewed-by: Matt Turner --- .../drivers/dri/i965/brw_fs_sel_peephole.cpp | 25 +++++++++---------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs_sel_peephole.cpp b/src/mesa/drivers/dri/i965/brw_fs_sel_peephole.cpp index 635c91b9d46..8660ec08b8f 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_sel_peephole.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_sel_peephole.cpp @@ -37,6 +37,8 @@ */ #define MAX_MOVS 8 /**< The maximum number of MOVs to attempt to match. */ +using namespace brw; + /** * Scans forwards from an IF counting consecutive MOV instructions in the * "then" and "else" blocks of the if statement. @@ -190,17 +192,16 @@ fs_visitor::opt_peephole_sel() if (movs == 0) continue; + const fs_builder ibld = bld.at(block, if_inst); + /* Emit a CMP if our IF used the embedded comparison */ - if (devinfo->gen == 6 && if_inst->conditional_mod) { - fs_inst *cmp_inst = CMP(reg_null_d, if_inst->src[0], if_inst->src[1], - if_inst->conditional_mod); - if_inst->insert_before(block, cmp_inst); - } + if (devinfo->gen == 6 && if_inst->conditional_mod) + ibld.CMP(ibld.null_reg_d(), if_inst->src[0], if_inst->src[1], + if_inst->conditional_mod); for (int i = 0; i < movs; i++) { if (then_mov[i]->src[0].equals(else_mov[i]->src[0])) { - fs_inst *inst = MOV(then_mov[i]->dst, then_mov[i]->src[0]); - if_inst->insert_before(block, inst); + ibld.MOV(then_mov[i]->dst, then_mov[i]->src[0]); } else { /* Only the last source register can be a constant, so if the MOV * in the "then" clause uses a constant, we need to put it in a @@ -210,14 +211,12 @@ fs_visitor::opt_peephole_sel() if (src0.file == IMM) { src0 = vgrf(glsl_type::float_type); src0.type = then_mov[i]->src[0].type; - fs_inst *inst = MOV(src0, then_mov[i]->src[0]); - if_inst->insert_before(block, inst); + ibld.MOV(src0, then_mov[i]->src[0]); } - fs_inst *inst = SEL(then_mov[i]->dst, src0, else_mov[i]->src[0]); - inst->predicate = predicate; - inst->predicate_inverse = predicate_inverse; - if_inst->insert_before(block, inst); + set_predicate_inv(predicate, predicate_inverse, + ibld.SEL(then_mov[i]->dst, src0, + else_mov[i]->src[0])); } then_mov[i]->remove(then_block); From 6114ba4dccfdb8f7c657feeed8f8c9b69debba91 Mon Sep 17 00:00:00 2001 From: Francisco Jerez Date: Wed, 3 Jun 2015 20:46:31 +0300 Subject: [PATCH 494/834] i965/fs: Migrate opt_sampler_eot to the IR builder. Reviewed-by: Matt Turner --- src/mesa/drivers/dri/i965/brw_fs.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 9fca9914f2c..32798563287 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -2582,7 +2582,7 @@ fs_visitor::opt_sampler_eot() tex_inst->offset |= fb_write->target << 24; tex_inst->eot = true; - tex_inst->dst = reg_null_ud; + tex_inst->dst = bld.null_reg_ud(); fb_write->remove(cfg->blocks[cfg->num_blocks - 1]); /* If a header is present, marking the eot is sufficient. Otherwise, we need @@ -2594,7 +2594,8 @@ fs_visitor::opt_sampler_eot() if (tex_inst->header_size != 0) return true; - fs_reg send_header = vgrf(load_payload->sources + 1); + fs_reg send_header = bld.vgrf(BRW_REGISTER_TYPE_F, + load_payload->sources + 1); fs_reg *new_sources = ralloc_array(mem_ctx, fs_reg, load_payload->sources + 1); From 3e6ac0bcedfe1b5d092d6ee19323c3ef87b99dba Mon Sep 17 00:00:00 2001 From: Francisco Jerez Date: Wed, 3 Jun 2015 21:05:45 +0300 Subject: [PATCH 495/834] i965/fs: Migrate try_replace_with_sel to the IR builder. Reviewed-by: Matt Turner --- src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp index 69c04524fea..dbc61e5e260 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp @@ -1132,21 +1132,21 @@ fs_visitor::try_replace_with_sel() if (src0.file == IMM) { src0 = vgrf(glsl_type::float_type); src0.type = then_mov->src[0].type; - emit(MOV(src0, then_mov->src[0])); + bld.MOV(src0, then_mov->src[0]); } - fs_inst *sel; if (if_inst->conditional_mod) { /* Sandybridge-specific IF with embedded comparison */ - emit(CMP(reg_null_d, if_inst->src[0], if_inst->src[1], - if_inst->conditional_mod)); - sel = emit(BRW_OPCODE_SEL, then_mov->dst, src0, else_mov->src[0]); - sel->predicate = BRW_PREDICATE_NORMAL; + bld.CMP(bld.null_reg_d(), if_inst->src[0], if_inst->src[1], + if_inst->conditional_mod); + set_predicate(BRW_PREDICATE_NORMAL, + bld.emit(BRW_OPCODE_SEL, then_mov->dst, + src0, else_mov->src[0])); } else { /* Separate CMP and IF instructions */ - sel = emit(BRW_OPCODE_SEL, then_mov->dst, src0, else_mov->src[0]); - sel->predicate = if_inst->predicate; - sel->predicate_inverse = if_inst->predicate_inverse; + set_predicate_inv(if_inst->predicate, if_inst->predicate_inverse, + bld.emit(BRW_OPCODE_SEL, then_mov->dst, + src0, else_mov->src[0])); } return true; From 8f8c6b7bdab1fc25fe8277705ebb1818ab220821 Mon Sep 17 00:00:00 2001 From: Francisco Jerez Date: Wed, 3 Jun 2015 19:05:54 +0300 Subject: [PATCH 496/834] i965/fs: Migrate register spills and fills to the IR builder. Yes, it's incorrect to use the 0-th channel enable group unconditionally without considering the execution and regioning controls of the instruction that uses the spilled value, but it matches the previous behaviour exactly, the builder just makes the preexisting problem more obvious because emitting an instruction of non-native SIMD width without having called .group() or .exec_all() explicitly would have led to an assertion failure. I'll fix the problem in a follow-up series, as the solution is going to be non-trivial. Reviewed-by: Matt Turner --- .../drivers/dri/i965/brw_fs_reg_allocate.cpp | 29 ++++++++++--------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp index 582d0993f1c..3faf49ab8aa 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp @@ -30,6 +30,8 @@ #include "glsl/glsl_types.h" #include "glsl/ir_optimization.h" +using namespace brw; + static void assign_reg(unsigned *reg_hw_locations, fs_reg *reg) { @@ -696,25 +698,24 @@ fs_visitor::emit_unspill(bblock_t *block, fs_inst *inst, fs_reg dst, dst.width = 16; } + const fs_builder ibld = bld.annotate(inst->annotation, inst->ir) + .group(reg_size * 8, 0) + .at(block, inst); + for (int i = 0; i < count / reg_size; i++) { /* The gen7 descriptor-based offset is 12 bits of HWORD units. */ bool gen7_read = devinfo->gen >= 7 && spill_offset < (1 << 12) * REG_SIZE; - - fs_inst *unspill_inst = - new(mem_ctx) fs_inst(gen7_read ? - SHADER_OPCODE_GEN7_SCRATCH_READ : - SHADER_OPCODE_GEN4_SCRATCH_READ, - dst); + fs_inst *unspill_inst = ibld.emit(gen7_read ? + SHADER_OPCODE_GEN7_SCRATCH_READ : + SHADER_OPCODE_GEN4_SCRATCH_READ, + dst); unspill_inst->offset = spill_offset; - unspill_inst->ir = inst->ir; - unspill_inst->annotation = inst->annotation; unspill_inst->regs_written = reg_size; if (!gen7_read) { unspill_inst->base_mrf = 14; unspill_inst->mlen = 1; /* header contains offset */ } - inst->insert_before(block, unspill_inst); dst.reg_offset += reg_size; spill_offset += reg_size * REG_SIZE; @@ -732,17 +733,17 @@ fs_visitor::emit_spill(bblock_t *block, fs_inst *inst, fs_reg src, reg_size = 2; } + const fs_builder ibld = bld.annotate(inst->annotation, inst->ir) + .group(reg_size * 8, 0) + .at(block, inst->next); + for (int i = 0; i < count / reg_size; i++) { fs_inst *spill_inst = - new(mem_ctx) fs_inst(SHADER_OPCODE_GEN4_SCRATCH_WRITE, - reg_size * 8, reg_null_f, src); + ibld.emit(SHADER_OPCODE_GEN4_SCRATCH_WRITE, bld.null_reg_f(), src); src.reg_offset += reg_size; spill_inst->offset = spill_offset + i * reg_size * REG_SIZE; - spill_inst->ir = inst->ir; - spill_inst->annotation = inst->annotation; spill_inst->mlen = 1 + reg_size; /* header, value */ spill_inst->base_mrf = spill_base_mrf; - inst->insert_after(block, spill_inst); } } From efa60e49f2e5dd56f1c81487e9aad9f89136d8b4 Mon Sep 17 00:00:00 2001 From: Francisco Jerez Date: Wed, 3 Jun 2015 20:36:47 +0300 Subject: [PATCH 497/834] i965/fs: Migrate lower_load_payload to the IR builder. Reviewed-by: Matt Turner --- src/mesa/drivers/dri/i965/brw_fs.cpp | 34 +++++++++------------------- 1 file changed, 11 insertions(+), 23 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 32798563287..ee7a5d50c06 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -49,6 +49,8 @@ #include "glsl/glsl_types.h" #include "program/sampler.h" +using namespace brw; + void fs_inst::init(enum opcode opcode, uint8_t exec_size, const fs_reg &dst, const fs_reg *src, unsigned sources) @@ -3321,6 +3323,9 @@ fs_visitor::lower_load_payload() assert(inst->dst.file == MRF || inst->dst.file == GRF); assert(inst->saturate == false); + const fs_builder ibld = bld.group(inst->exec_size, inst->force_sechalf) + .exec_all(inst->force_writemask_all) + .at(block, inst); fs_reg dst = inst->dst; /* Get rid of COMPR4. We'll add it back in if we need it */ @@ -3333,9 +3338,7 @@ fs_visitor::lower_load_payload() fs_reg mov_dst = retype(dst, BRW_REGISTER_TYPE_UD); fs_reg mov_src = retype(inst->src[i], BRW_REGISTER_TYPE_UD); mov_src.width = 8; - fs_inst *mov = MOV(mov_dst, mov_src); - mov->force_writemask_all = true; - inst->insert_before(block, mov); + ibld.exec_all().MOV(mov_dst, mov_src); } dst = offset(dst, 1); } @@ -3366,23 +3369,13 @@ fs_visitor::lower_load_payload() if (devinfo->has_compr4) { fs_reg compr4_dst = retype(dst, inst->src[i].type); compr4_dst.reg |= BRW_MRF_COMPR4; - - fs_inst *mov = MOV(compr4_dst, inst->src[i]); - mov->force_writemask_all = inst->force_writemask_all; - inst->insert_before(block, mov); + ibld.MOV(compr4_dst, inst->src[i]); } else { /* Platform doesn't have COMPR4. We have to fake it */ fs_reg mov_dst = retype(dst, inst->src[i].type); mov_dst.width = 8; - - fs_inst *mov = MOV(mov_dst, half(inst->src[i], 0)); - mov->force_writemask_all = inst->force_writemask_all; - inst->insert_before(block, mov); - - mov = MOV(offset(mov_dst, 4), half(inst->src[i], 1)); - mov->force_writemask_all = inst->force_writemask_all; - mov->force_sechalf = true; - inst->insert_before(block, mov); + ibld.half(0).MOV(mov_dst, half(inst->src[i], 0)); + ibld.half(1).MOV(offset(mov_dst, 4), half(inst->src[i], 1)); } } @@ -3405,13 +3398,8 @@ fs_visitor::lower_load_payload() } for (uint8_t i = inst->header_size; i < inst->sources; i++) { - if (inst->src[i].file != BAD_FILE) { - fs_inst *mov = MOV(retype(dst, inst->src[i].type), - inst->src[i]); - mov->force_writemask_all = inst->force_writemask_all; - mov->force_sechalf = inst->force_sechalf; - inst->insert_before(block, mov); - } + if (inst->src[i].file != BAD_FILE) + ibld.MOV(retype(dst, inst->src[i].type), inst->src[i]); dst = offset(dst, 1); } From 4af4cfba9ee1014baa4a777660fc9d53d57e4c82 Mon Sep 17 00:00:00 2001 From: Francisco Jerez Date: Wed, 3 Jun 2015 20:49:32 +0300 Subject: [PATCH 498/834] i965/fs: Migrate lower_integer_multiplication to the IR builder. Reviewed-by: Matt Turner --- src/mesa/drivers/dri/i965/brw_fs.cpp | 24 +++++++++++------------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index ee7a5d50c06..e1b202b0637 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -3431,7 +3431,7 @@ fs_visitor::lower_integer_multiplication() inst->dst.type != BRW_REGISTER_TYPE_UD)) continue; -#define insert(instr) inst->insert_before(block, instr) + const fs_builder ibld = bld.at(block, inst); /* The MUL instruction isn't commutative. On Gen <= 6, only the low * 16-bits of src0 are read, and on Gen >= 7 only the low 16-bits of @@ -3445,10 +3445,10 @@ fs_visitor::lower_integer_multiplication() if (devinfo->gen < 7) { fs_reg imm(GRF, alloc.allocate(dispatch_width / 8), inst->dst.type, dispatch_width); - insert(MOV(imm, inst->src[1])); - insert(MUL(inst->dst, imm, inst->src[0])); + ibld.MOV(imm, inst->src[1]); + ibld.MUL(inst->dst, imm, inst->src[0]); } else { - insert(MUL(inst->dst, inst->src[0], inst->src[1])); + ibld.MUL(inst->dst, inst->src[0], inst->src[1]); } } else { /* Gen < 8 (and some Gen8+ low-power parts like Cherryview) cannot @@ -3519,8 +3519,8 @@ fs_visitor::lower_integer_multiplication() src1_1_w.stride = 2; src1_1_w.subreg_offset += type_sz(BRW_REGISTER_TYPE_UW); } - insert(MUL(low, inst->src[0], src1_0_w)); - insert(MUL(high, inst->src[0], src1_1_w)); + ibld.MUL(low, inst->src[0], src1_0_w); + ibld.MUL(high, inst->src[0], src1_1_w); } else { fs_reg src0_0_w = inst->src[0]; fs_reg src0_1_w = inst->src[0]; @@ -3532,8 +3532,8 @@ fs_visitor::lower_integer_multiplication() src0_1_w.stride = 2; src0_1_w.subreg_offset += type_sz(BRW_REGISTER_TYPE_UW); - insert(MUL(low, src0_0_w, inst->src[1])); - insert(MUL(high, src0_1_w, inst->src[1])); + ibld.MUL(low, src0_0_w, inst->src[1]); + ibld.MUL(high, src0_1_w, inst->src[1]); } fs_reg dst = inst->dst; @@ -3548,16 +3548,14 @@ fs_visitor::lower_integer_multiplication() low.subreg_offset = 2; low.stride = 2; - insert(ADD(dst, low, high)); + ibld.ADD(dst, low, high); if (inst->conditional_mod) { fs_reg null(retype(brw_null_reg(), inst->dst.type)); - fs_inst *mov = MOV(null, inst->dst); - mov->conditional_mod = inst->conditional_mod; - insert(mov); + set_condmod(inst->conditional_mod, + ibld.MOV(null, inst->dst)); } } -#undef insert inst->remove(block); progress = true; From 8f626c14989f005599f7841b89144d2bf58b5704 Mon Sep 17 00:00:00 2001 From: Francisco Jerez Date: Wed, 3 Jun 2015 22:22:10 +0300 Subject: [PATCH 499/834] i965/fs: Migrate Gen4 send dependency workarounds to the IR builder. v2: Change brw_null_reg() to bld.null_reg_f(). Reviewed-by: Matt Turner --- src/mesa/drivers/dri/i965/brw_fs.cpp | 26 ++++++++++---------------- src/mesa/drivers/dri/i965/brw_fs.h | 2 +- 2 files changed, 11 insertions(+), 17 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index e1b202b0637..6e3d5cb6ebc 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -418,20 +418,16 @@ fs_visitor::VARYING_PULL_CONSTANT_LOAD(const fs_reg &dst, * A helper for MOV generation for fixing up broken hardware SEND dependency * handling. */ -fs_inst * -fs_visitor::DEP_RESOLVE_MOV(int grf) +void +fs_visitor::DEP_RESOLVE_MOV(const fs_builder &bld, int grf) { - fs_inst *inst = MOV(brw_null_reg(), fs_reg(GRF, grf, BRW_REGISTER_TYPE_F)); - - inst->ir = NULL; - inst->annotation = "send dependency resolve"; - /* The caller always wants uncompressed to emit the minimal extra * dependencies, and to avoid having to deal with aligning its regs to 2. */ - inst->exec_size = 8; + const fs_builder ubld = bld.annotate("send dependency resolve") + .half(0); - return inst; + ubld.MOV(ubld.null_reg_f(), fs_reg(GRF, grf, BRW_REGISTER_TYPE_F)); } bool @@ -3117,9 +3113,8 @@ fs_visitor::insert_gen4_pre_send_dependency_workarounds(bblock_t *block, */ if (block->start() == scan_inst) { for (int i = 0; i < write_len; i++) { - if (needs_dep[i]) { - inst->insert_before(block, DEP_RESOLVE_MOV(first_write_grf + i)); - } + if (needs_dep[i]) + DEP_RESOLVE_MOV(bld.at(block, inst), first_write_grf + i); } return; } @@ -3135,7 +3130,7 @@ fs_visitor::insert_gen4_pre_send_dependency_workarounds(bblock_t *block, if (reg >= first_write_grf && reg < first_write_grf + write_len && needs_dep[reg - first_write_grf]) { - inst->insert_before(block, DEP_RESOLVE_MOV(reg)); + DEP_RESOLVE_MOV(bld.at(block, inst), reg); needs_dep[reg - first_write_grf] = false; if (scan_inst->exec_size == 16) needs_dep[reg - first_write_grf + 1] = false; @@ -3182,8 +3177,7 @@ fs_visitor::insert_gen4_post_send_dependency_workarounds(bblock_t *block, fs_ins if (block->end() == scan_inst) { for (int i = 0; i < write_len; i++) { if (needs_dep[i]) - scan_inst->insert_before(block, - DEP_RESOLVE_MOV(first_write_grf + i)); + DEP_RESOLVE_MOV(bld.at(block, scan_inst), first_write_grf + i); } return; } @@ -3198,7 +3192,7 @@ fs_visitor::insert_gen4_post_send_dependency_workarounds(bblock_t *block, fs_ins scan_inst->dst.reg >= first_write_grf && scan_inst->dst.reg < first_write_grf + write_len && needs_dep[scan_inst->dst.reg - first_write_grf]) { - scan_inst->insert_before(block, DEP_RESOLVE_MOV(scan_inst->dst.reg)); + DEP_RESOLVE_MOV(bld.at(block, scan_inst), scan_inst->dst.reg); needs_dep[scan_inst->dst.reg - first_write_grf] = false; } diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h index bdda9d0558b..5ad137b2858 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.h +++ b/src/mesa/drivers/dri/i965/brw_fs.h @@ -131,7 +131,6 @@ public: enum brw_conditional_mod condition); fs_inst *LRP(const fs_reg &dst, const fs_reg &a, const fs_reg &y, const fs_reg &x); - fs_inst *DEP_RESOLVE_MOV(int grf); fs_inst *BFREV(const fs_reg &dst, const fs_reg &value); fs_inst *BFE(const fs_reg &dst, const fs_reg &bits, const fs_reg &offset, const fs_reg &value); @@ -159,6 +158,7 @@ public: const fs_reg &surf_index, const fs_reg &varying_offset, uint32_t const_offset); + void DEP_RESOLVE_MOV(const brw::fs_builder &bld, int grf); bool run_fs(); bool run_vs(); From 546839ef639bf871feaa62ab7d811f2fc783bdcd Mon Sep 17 00:00:00 2001 From: Francisco Jerez Date: Wed, 3 Jun 2015 22:22:39 +0300 Subject: [PATCH 500/834] i965/fs: Migrate pull constant loads to the IR builder. Reviewed-by: Matt Turner --- src/mesa/drivers/dri/i965/brw_fs.cpp | 39 +++++++++--------------- src/mesa/drivers/dri/i965/brw_fs.h | 9 +++--- src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 4 +-- 3 files changed, 21 insertions(+), 31 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 6e3d5cb6ebc..c4d3f575de3 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -351,15 +351,13 @@ fs_visitor::LOAD_PAYLOAD(const fs_reg &dst, fs_reg *src, int sources, return inst; } -exec_list -fs_visitor::VARYING_PULL_CONSTANT_LOAD(const fs_reg &dst, +void +fs_visitor::VARYING_PULL_CONSTANT_LOAD(const fs_builder &bld, + const fs_reg &dst, const fs_reg &surf_index, const fs_reg &varying_offset, uint32_t const_offset) { - exec_list instructions; - fs_inst *inst; - /* We have our constant surface use a pitch of 4 bytes, so our index can * be any component of a vector, and then we load 4 contiguous * components starting from that. @@ -372,8 +370,7 @@ fs_visitor::VARYING_PULL_CONSTANT_LOAD(const fs_reg &dst, * the redundant ones. */ fs_reg vec4_offset = vgrf(glsl_type::int_type); - instructions.push_tail(ADD(vec4_offset, - varying_offset, fs_reg(const_offset & ~3))); + bld.ADD(vec4_offset, varying_offset, fs_reg(const_offset & ~3)); int scale = 1; if (devinfo->gen == 4 && dst.width == 8) { @@ -395,9 +392,8 @@ fs_visitor::VARYING_PULL_CONSTANT_LOAD(const fs_reg &dst, int regs_written = 4 * (dst.width / 8) * scale; fs_reg vec4_result = fs_reg(GRF, alloc.allocate(regs_written), dst.type, dst.width); - inst = new(mem_ctx) fs_inst(op, vec4_result, surf_index, vec4_offset); + fs_inst *inst = bld.emit(op, vec4_result, surf_index, vec4_offset); inst->regs_written = regs_written; - instructions.push_tail(inst); if (devinfo->gen < 7) { inst->base_mrf = 13; @@ -408,10 +404,7 @@ fs_visitor::VARYING_PULL_CONSTANT_LOAD(const fs_reg &dst, inst->mlen = 1 + dispatch_width / 8; } - fs_reg result = offset(vec4_result, (const_offset & 3) * scale); - instructions.push_tail(MOV(dst, result)); - - return instructions; + bld.MOV(dst, offset(vec4_result, (const_offset & 3) * scale)); } /** @@ -2211,26 +2204,22 @@ fs_visitor::demote_pull_constants() continue; /* Set up the annotation tracking for new generated instructions. */ - base_ir = inst->ir; - current_annotation = inst->annotation; - + const fs_builder ibld = bld.annotate(inst->annotation, inst->ir) + .at(block, inst); fs_reg surf_index(stage_prog_data->binding_table.pull_constants_start); fs_reg dst = vgrf(glsl_type::float_type); /* Generate a pull load into dst. */ if (inst->src[i].reladdr) { - exec_list list = VARYING_PULL_CONSTANT_LOAD(dst, - surf_index, - *inst->src[i].reladdr, - pull_index); - inst->insert_before(block, &list); + VARYING_PULL_CONSTANT_LOAD(ibld, dst, + surf_index, + *inst->src[i].reladdr, + pull_index); inst->src[i].reladdr = NULL; } else { fs_reg offset = fs_reg((unsigned)(pull_index * 4) & ~15); - fs_inst *pull = - new(mem_ctx) fs_inst(FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD, 8, - dst, surf_index, offset); - inst->insert_before(block, pull); + ibld.emit(FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD, + dst, surf_index, offset); inst->src[i].set_smear(pull_index & 3); } diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h index 5ad137b2858..cf568368541 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.h +++ b/src/mesa/drivers/dri/i965/brw_fs.h @@ -154,10 +154,11 @@ public: fs_inst *LOAD_PAYLOAD(const fs_reg &dst, fs_reg *src, int sources, int header_size); - exec_list VARYING_PULL_CONSTANT_LOAD(const fs_reg &dst, - const fs_reg &surf_index, - const fs_reg &varying_offset, - uint32_t const_offset); + void VARYING_PULL_CONSTANT_LOAD(const brw::fs_builder &bld, + const fs_reg &dst, + const fs_reg &surf_index, + const fs_reg &varying_offset, + uint32_t const_offset); void DEP_RESOLVE_MOV(const brw::fs_builder &bld, int grf); bool run_fs(); diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp index 5d3501c60ba..133c99ce992 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp @@ -1397,8 +1397,8 @@ fs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) unsigned vec4_offset = instr->const_index[0] / 4; for (int i = 0; i < instr->num_components; i++) - emit(VARYING_PULL_CONSTANT_LOAD(offset(dest, i), surf_index, - base_offset, vec4_offset + i)); + VARYING_PULL_CONSTANT_LOAD(bld, offset(dest, i), surf_index, + base_offset, vec4_offset + i); } else { fs_reg packed_consts = vgrf(glsl_type::float_type); packed_consts.type = dest.type; From db83d9d2d0f2743cf64ece731c753f21aba87da6 Mon Sep 17 00:00:00 2001 From: Francisco Jerez Date: Wed, 3 Jun 2015 21:05:28 +0300 Subject: [PATCH 501/834] i965/fs: Migrate texturing implementation to the IR builder. v2: Remove tabs from modified lines. Reviewed-by: Matt Turner --- src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 159 +++++++++---------- 1 file changed, 78 insertions(+), 81 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp index dbc61e5e260..0b56579c344 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp @@ -132,7 +132,7 @@ fs_visitor::emit_texture_gen4(ir_texture_opcode op, fs_reg dst, if (shadow_c.file != BAD_FILE) { for (int i = 0; i < coord_components; i++) { - emit(MOV(fs_reg(MRF, base_mrf + mlen + i), coordinate)); + bld.MOV(fs_reg(MRF, base_mrf + mlen + i), coordinate); coordinate = offset(coordinate, 1); } @@ -140,7 +140,7 @@ fs_visitor::emit_texture_gen4(ir_texture_opcode op, fs_reg dst, * the unused slots must be zeroed. */ for (int i = coord_components; i < 3; i++) { - emit(MOV(fs_reg(MRF, base_mrf + mlen + i), fs_reg(0.0f))); + bld.MOV(fs_reg(MRF, base_mrf + mlen + i), fs_reg(0.0f)); } mlen += 3; @@ -148,25 +148,25 @@ fs_visitor::emit_texture_gen4(ir_texture_opcode op, fs_reg dst, /* There's no plain shadow compare message, so we use shadow * compare with a bias of 0.0. */ - emit(MOV(fs_reg(MRF, base_mrf + mlen), fs_reg(0.0f))); + bld.MOV(fs_reg(MRF, base_mrf + mlen), fs_reg(0.0f)); mlen++; } else if (op == ir_txb || op == ir_txl) { - emit(MOV(fs_reg(MRF, base_mrf + mlen), lod)); + bld.MOV(fs_reg(MRF, base_mrf + mlen), lod); mlen++; } else { unreachable("Should not get here."); } - emit(MOV(fs_reg(MRF, base_mrf + mlen), shadow_c)); + bld.MOV(fs_reg(MRF, base_mrf + mlen), shadow_c); mlen++; } else if (op == ir_tex) { for (int i = 0; i < coord_components; i++) { - emit(MOV(fs_reg(MRF, base_mrf + mlen + i), coordinate)); + bld.MOV(fs_reg(MRF, base_mrf + mlen + i), coordinate); coordinate = offset(coordinate, 1); } /* zero the others. */ for (int i = coord_components; i<3; i++) { - emit(MOV(fs_reg(MRF, base_mrf + mlen + i), fs_reg(0.0f))); + bld.MOV(fs_reg(MRF, base_mrf + mlen + i), fs_reg(0.0f)); } /* gen4's SIMD8 sampler always has the slots for u,v,r present. */ mlen += 3; @@ -174,7 +174,7 @@ fs_visitor::emit_texture_gen4(ir_texture_opcode op, fs_reg dst, fs_reg &dPdx = lod; for (int i = 0; i < coord_components; i++) { - emit(MOV(fs_reg(MRF, base_mrf + mlen + i), coordinate)); + bld.MOV(fs_reg(MRF, base_mrf + mlen + i), coordinate); coordinate = offset(coordinate, 1); } /* the slots for u and v are always present, but r is optional */ @@ -195,20 +195,20 @@ fs_visitor::emit_texture_gen4(ir_texture_opcode op, fs_reg dst, * m5 m6 m7 m8 m9 m10 */ for (int i = 0; i < grad_components; i++) { - emit(MOV(fs_reg(MRF, base_mrf + mlen), dPdx)); + bld.MOV(fs_reg(MRF, base_mrf + mlen), dPdx); dPdx = offset(dPdx, 1); } mlen += MAX2(grad_components, 2); for (int i = 0; i < grad_components; i++) { - emit(MOV(fs_reg(MRF, base_mrf + mlen), dPdy)); + bld.MOV(fs_reg(MRF, base_mrf + mlen), dPdy); dPdy = offset(dPdy, 1); } mlen += MAX2(grad_components, 2); } else if (op == ir_txs) { /* There's no SIMD8 resinfo message on Gen4. Use SIMD16 instead. */ simd16 = true; - emit(MOV(fs_reg(MRF, base_mrf + mlen, BRW_REGISTER_TYPE_UD), lod)); + bld.MOV(fs_reg(MRF, base_mrf + mlen, BRW_REGISTER_TYPE_UD), lod); mlen += 2; } else { /* Oh joy. gen4 doesn't have SIMD8 non-shadow-compare bias/lod @@ -218,8 +218,8 @@ fs_visitor::emit_texture_gen4(ir_texture_opcode op, fs_reg dst, assert(op == ir_txb || op == ir_txl || op == ir_txf); for (int i = 0; i < coord_components; i++) { - emit(MOV(fs_reg(MRF, base_mrf + mlen + i * 2, coordinate.type), - coordinate)); + bld.MOV(fs_reg(MRF, base_mrf + mlen + i * 2, coordinate.type), + coordinate); coordinate = offset(coordinate, 1); } @@ -227,13 +227,13 @@ fs_visitor::emit_texture_gen4(ir_texture_opcode op, fs_reg dst, * be necessary for TXF (ld), but seems wise to do for all messages. */ for (int i = coord_components; i < 3; i++) { - emit(MOV(fs_reg(MRF, base_mrf + mlen + i * 2), fs_reg(0.0f))); + bld.MOV(fs_reg(MRF, base_mrf + mlen + i * 2), fs_reg(0.0f)); } /* lod/bias appears after u/v/r. */ mlen += 6; - emit(MOV(fs_reg(MRF, base_mrf + mlen, lod.type), lod)); + bld.MOV(fs_reg(MRF, base_mrf + mlen, lod.type), lod); mlen++; /* The unused upper half. */ @@ -261,7 +261,7 @@ fs_visitor::emit_texture_gen4(ir_texture_opcode op, fs_reg dst, unreachable("not reached"); } - fs_inst *inst = emit(opcode, dst, reg_undef, fs_reg(sampler)); + fs_inst *inst = bld.emit(opcode, dst, reg_undef, fs_reg(sampler)); inst->base_mrf = base_mrf; inst->mlen = mlen; inst->header_size = 1; @@ -269,7 +269,7 @@ fs_visitor::emit_texture_gen4(ir_texture_opcode op, fs_reg dst, if (simd16) { for (int i = 0; i < 4; i++) { - emit(MOV(orig_dst, dst)); + bld.MOV(orig_dst, dst); orig_dst = offset(orig_dst, 1); dst = offset(dst, 2); } @@ -295,7 +295,7 @@ fs_visitor::emit_texture_gen4_simd16(ir_texture_opcode op, fs_reg dst, /* Copy the coordinates. */ for (int i = 0; i < vector_elements; i++) { - emit(MOV(retype(offset(message, i), coordinate.type), coordinate)); + bld.MOV(retype(offset(message, i), coordinate.type), coordinate); coordinate = offset(coordinate, 1); } @@ -304,20 +304,20 @@ fs_visitor::emit_texture_gen4_simd16(ir_texture_opcode op, fs_reg dst, /* Messages other than sample and ld require all three components */ if (has_lod || shadow_c.file != BAD_FILE) { for (int i = vector_elements; i < 3; i++) { - emit(MOV(offset(message, i), fs_reg(0.0f))); + bld.MOV(offset(message, i), fs_reg(0.0f)); } } if (has_lod) { fs_reg msg_lod = retype(offset(message, 3), op == ir_txf ? BRW_REGISTER_TYPE_UD : BRW_REGISTER_TYPE_F); - emit(MOV(msg_lod, lod)); + bld.MOV(msg_lod, lod); msg_end = offset(msg_lod, 1); } if (shadow_c.file != BAD_FILE) { fs_reg msg_ref = offset(message, 3 + has_lod); - emit(MOV(msg_ref, shadow_c)); + bld.MOV(msg_ref, shadow_c); msg_end = offset(msg_ref, 1); } @@ -332,7 +332,7 @@ fs_visitor::emit_texture_gen4_simd16(ir_texture_opcode op, fs_reg dst, default: unreachable("not reached"); } - fs_inst *inst = emit(opcode, dst, reg_undef, fs_reg(sampler)); + fs_inst *inst = bld.emit(opcode, dst, reg_undef, fs_reg(sampler)); inst->base_mrf = message.reg - 1; inst->mlen = msg_end.reg - inst->base_mrf; inst->header_size = 1; @@ -372,7 +372,7 @@ fs_visitor::emit_texture_gen5(ir_texture_opcode op, fs_reg dst, } for (int i = 0; i < vector_elements; i++) { - emit(MOV(retype(offset(msg_coords, i), coordinate.type), coordinate)); + bld.MOV(retype(offset(msg_coords, i), coordinate.type), coordinate); coordinate = offset(coordinate, 1); } fs_reg msg_end = offset(msg_coords, vector_elements); @@ -380,7 +380,7 @@ fs_visitor::emit_texture_gen5(ir_texture_opcode op, fs_reg dst, if (shadow_c.file != BAD_FILE) { fs_reg msg_shadow = msg_lod; - emit(MOV(msg_shadow, shadow_c)); + bld.MOV(msg_shadow, shadow_c); msg_lod = offset(msg_shadow, 1); msg_end = msg_lod; } @@ -391,13 +391,13 @@ fs_visitor::emit_texture_gen5(ir_texture_opcode op, fs_reg dst, opcode = SHADER_OPCODE_TEX; break; case ir_txb: - emit(MOV(msg_lod, lod)); + bld.MOV(msg_lod, lod); msg_end = offset(msg_lod, 1); opcode = FS_OPCODE_TXB; break; case ir_txl: - emit(MOV(msg_lod, lod)); + bld.MOV(msg_lod, lod); msg_end = offset(msg_lod, 1); opcode = SHADER_OPCODE_TXL; @@ -414,11 +414,11 @@ fs_visitor::emit_texture_gen5(ir_texture_opcode op, fs_reg dst, */ msg_end = msg_lod; for (int i = 0; i < grad_components; i++) { - emit(MOV(msg_end, lod)); + bld.MOV(msg_end, lod); lod = offset(lod, 1); msg_end = offset(msg_end, 1); - emit(MOV(msg_end, lod2)); + bld.MOV(msg_end, lod2); lod2 = offset(lod2, 1); msg_end = offset(msg_end, 1); } @@ -428,21 +428,21 @@ fs_visitor::emit_texture_gen5(ir_texture_opcode op, fs_reg dst, } case ir_txs: msg_lod = retype(msg_end, BRW_REGISTER_TYPE_UD); - emit(MOV(msg_lod, lod)); + bld.MOV(msg_lod, lod); msg_end = offset(msg_lod, 1); opcode = SHADER_OPCODE_TXS; break; case ir_query_levels: msg_lod = msg_end; - emit(MOV(retype(msg_lod, BRW_REGISTER_TYPE_UD), fs_reg(0u))); + bld.MOV(retype(msg_lod, BRW_REGISTER_TYPE_UD), fs_reg(0u)); msg_end = offset(msg_lod, 1); opcode = SHADER_OPCODE_TXS; break; case ir_txf: msg_lod = offset(msg_coords, 3); - emit(MOV(retype(msg_lod, BRW_REGISTER_TYPE_UD), lod)); + bld.MOV(retype(msg_lod, BRW_REGISTER_TYPE_UD), lod); msg_end = offset(msg_lod, 1); opcode = SHADER_OPCODE_TXF; @@ -450,9 +450,9 @@ fs_visitor::emit_texture_gen5(ir_texture_opcode op, fs_reg dst, case ir_txf_ms: msg_lod = offset(msg_coords, 3); /* lod */ - emit(MOV(retype(msg_lod, BRW_REGISTER_TYPE_UD), fs_reg(0u))); + bld.MOV(retype(msg_lod, BRW_REGISTER_TYPE_UD), fs_reg(0u)); /* sample index */ - emit(MOV(retype(offset(msg_lod, 1), BRW_REGISTER_TYPE_UD), sample_index)); + bld.MOV(retype(offset(msg_lod, 1), BRW_REGISTER_TYPE_UD), sample_index); msg_end = offset(msg_lod, 2); opcode = SHADER_OPCODE_TXF_CMS; @@ -467,7 +467,7 @@ fs_visitor::emit_texture_gen5(ir_texture_opcode op, fs_reg dst, unreachable("not reached"); } - fs_inst *inst = emit(opcode, dst, reg_undef, fs_reg(sampler)); + fs_inst *inst = bld.emit(opcode, dst, reg_undef, fs_reg(sampler)); inst->base_mrf = message.reg; inst->mlen = msg_end.reg - message.reg; inst->header_size = header_size; @@ -525,7 +525,7 @@ fs_visitor::emit_texture_gen7(ir_texture_opcode op, fs_reg dst, } if (shadow_c.file != BAD_FILE) { - emit(MOV(sources[length], shadow_c)); + bld.MOV(sources[length], shadow_c); length++; } @@ -548,11 +548,11 @@ fs_visitor::emit_texture_gen7(ir_texture_opcode op, fs_reg dst, case ir_lod: break; case ir_txb: - emit(MOV(sources[length], lod)); + bld.MOV(sources[length], lod); length++; break; case ir_txl: - emit(MOV(sources[length], lod)); + bld.MOV(sources[length], lod); length++; break; case ir_txd: { @@ -562,7 +562,7 @@ fs_visitor::emit_texture_gen7(ir_texture_opcode op, fs_reg dst, * [hdr], [ref], x, dPdx.x, dPdy.x, y, dPdx.y, dPdy.y, z, dPdx.z, dPdy.z */ for (int i = 0; i < coord_components; i++) { - emit(MOV(sources[length], coordinate)); + bld.MOV(sources[length], coordinate); coordinate = offset(coordinate, 1); length++; @@ -570,11 +570,11 @@ fs_visitor::emit_texture_gen7(ir_texture_opcode op, fs_reg dst, * only derivatives for (u, v, r). */ if (i < grad_components) { - emit(MOV(sources[length], lod)); + bld.MOV(sources[length], lod); lod = offset(lod, 1); length++; - emit(MOV(sources[length], lod2)); + bld.MOV(sources[length], lod2); lod2 = offset(lod2, 1); length++; } @@ -584,11 +584,11 @@ fs_visitor::emit_texture_gen7(ir_texture_opcode op, fs_reg dst, break; } case ir_txs: - emit(MOV(retype(sources[length], BRW_REGISTER_TYPE_UD), lod)); + bld.MOV(retype(sources[length], BRW_REGISTER_TYPE_UD), lod); length++; break; case ir_query_levels: - emit(MOV(retype(sources[length], BRW_REGISTER_TYPE_UD), fs_reg(0u))); + bld.MOV(retype(sources[length], BRW_REGISTER_TYPE_UD), fs_reg(0u)); length++; break; case ir_txf: @@ -596,23 +596,23 @@ fs_visitor::emit_texture_gen7(ir_texture_opcode op, fs_reg dst, * On Gen9 they are u, v, lod, r */ - emit(MOV(retype(sources[length], BRW_REGISTER_TYPE_D), coordinate)); + bld.MOV(retype(sources[length], BRW_REGISTER_TYPE_D), coordinate); coordinate = offset(coordinate, 1); length++; if (devinfo->gen >= 9) { if (coord_components >= 2) { - emit(MOV(retype(sources[length], BRW_REGISTER_TYPE_D), coordinate)); + bld.MOV(retype(sources[length], BRW_REGISTER_TYPE_D), coordinate); coordinate = offset(coordinate, 1); } length++; } - emit(MOV(retype(sources[length], BRW_REGISTER_TYPE_D), lod)); + bld.MOV(retype(sources[length], BRW_REGISTER_TYPE_D), lod); length++; for (int i = devinfo->gen >= 9 ? 2 : 1; i < coord_components; i++) { - emit(MOV(retype(sources[length], BRW_REGISTER_TYPE_D), coordinate)); + bld.MOV(retype(sources[length], BRW_REGISTER_TYPE_D), coordinate); coordinate = offset(coordinate, 1); length++; } @@ -620,18 +620,18 @@ fs_visitor::emit_texture_gen7(ir_texture_opcode op, fs_reg dst, coordinate_done = true; break; case ir_txf_ms: - emit(MOV(retype(sources[length], BRW_REGISTER_TYPE_UD), sample_index)); + bld.MOV(retype(sources[length], BRW_REGISTER_TYPE_UD), sample_index); length++; /* data from the multisample control surface */ - emit(MOV(retype(sources[length], BRW_REGISTER_TYPE_UD), mcs)); + bld.MOV(retype(sources[length], BRW_REGISTER_TYPE_UD), mcs); length++; /* there is no offsetting for this message; just copy in the integer * texture coordinates */ for (int i = 0; i < coord_components; i++) { - emit(MOV(retype(sources[length], BRW_REGISTER_TYPE_D), coordinate)); + bld.MOV(retype(sources[length], BRW_REGISTER_TYPE_D), coordinate); coordinate = offset(coordinate, 1); length++; } @@ -645,19 +645,19 @@ fs_visitor::emit_texture_gen7(ir_texture_opcode op, fs_reg dst, /* More crazy intermixing */ for (int i = 0; i < 2; i++) { /* u, v */ - emit(MOV(sources[length], coordinate)); + bld.MOV(sources[length], coordinate); coordinate = offset(coordinate, 1); length++; } for (int i = 0; i < 2; i++) { /* offu, offv */ - emit(MOV(retype(sources[length], BRW_REGISTER_TYPE_D), offset_value)); + bld.MOV(retype(sources[length], BRW_REGISTER_TYPE_D), offset_value); offset_value = offset(offset_value, 1); length++; } if (coord_components == 3) { /* r if present */ - emit(MOV(sources[length], coordinate)); + bld.MOV(sources[length], coordinate); coordinate = offset(coordinate, 1); length++; } @@ -670,7 +670,7 @@ fs_visitor::emit_texture_gen7(ir_texture_opcode op, fs_reg dst, /* Set up the coordinate (except for cases where it was done above) */ if (!coordinate_done) { for (int i = 0; i < coord_components; i++) { - emit(MOV(sources[length], coordinate)); + bld.MOV(sources[length], coordinate); coordinate = offset(coordinate, 1); length++; } @@ -684,7 +684,7 @@ fs_visitor::emit_texture_gen7(ir_texture_opcode op, fs_reg dst, fs_reg src_payload = fs_reg(GRF, alloc.allocate(mlen), BRW_REGISTER_TYPE_F, dispatch_width); - emit(LOAD_PAYLOAD(src_payload, sources, length, header_size)); + bld.LOAD_PAYLOAD(src_payload, sources, length, header_size); /* Generate the SEND */ enum opcode opcode; @@ -707,7 +707,7 @@ fs_visitor::emit_texture_gen7(ir_texture_opcode op, fs_reg dst, default: unreachable("not reached"); } - fs_inst *inst = emit(opcode, dst, src_payload, sampler); + fs_inst *inst = bld.emit(opcode, dst, src_payload, sampler); inst->base_mrf = -1; inst->mlen = mlen; inst->header_size = header_size; @@ -725,7 +725,6 @@ fs_reg fs_visitor::rescale_texcoord(fs_reg coordinate, int coord_components, bool is_rect, uint32_t sampler, int texunit) { - fs_inst *inst = NULL; bool needs_gl_clamp = true; fs_reg scale_x, scale_y; @@ -784,10 +783,10 @@ fs_visitor::rescale_texcoord(fs_reg coordinate, int coord_components, fs_reg src = coordinate; coordinate = dst; - emit(MUL(dst, src, scale_x)); + bld.MUL(dst, src, scale_x); dst = offset(dst, 1); src = offset(src, 1); - emit(MUL(dst, src, scale_y)); + bld.MUL(dst, src, scale_y); } else if (is_rect) { /* On gen6+, the sampler handles the rectangle coordinates * natively, without needing rescaling. But that means we have @@ -801,8 +800,8 @@ fs_visitor::rescale_texcoord(fs_reg coordinate, int coord_components, fs_reg chan = coordinate; chan = offset(chan, i); - inst = emit(BRW_OPCODE_SEL, chan, chan, fs_reg(0.0f)); - inst->conditional_mod = BRW_CONDITIONAL_GE; + set_condmod(BRW_CONDITIONAL_GE, + bld.emit(BRW_OPCODE_SEL, chan, chan, fs_reg(0.0f))); /* Our parameter comes in as 1.0/width or 1.0/height, * because that's what people normally want for doing @@ -811,11 +810,11 @@ fs_visitor::rescale_texcoord(fs_reg coordinate, int coord_components, * parameter type, so just invert back. */ fs_reg limit = vgrf(glsl_type::float_type); - emit(MOV(limit, i == 0 ? scale_x : scale_y)); - emit(SHADER_OPCODE_RCP, limit, limit); + bld.MOV(limit, i == 0 ? scale_x : scale_y); + bld.emit(SHADER_OPCODE_RCP, limit, limit); - inst = emit(BRW_OPCODE_SEL, chan, chan, limit); - inst->conditional_mod = BRW_CONDITIONAL_L; + set_condmod(BRW_CONDITIONAL_L, + bld.emit(BRW_OPCODE_SEL, chan, chan, limit)); } } } @@ -825,9 +824,7 @@ fs_visitor::rescale_texcoord(fs_reg coordinate, int coord_components, if (key_tex->gl_clamp_mask[i] & (1 << sampler)) { fs_reg chan = coordinate; chan = offset(chan, i); - - fs_inst *inst = emit(MOV(chan, chan)); - inst->saturate = true; + set_saturate(true, bld.MOV(chan, chan)); } } } @@ -847,13 +844,13 @@ fs_visitor::emit_mcs_fetch(fs_reg coordinate, int components, fs_reg sampler) /* parameters are: u, v, r; missing parameters are treated as zero */ for (int i = 0; i < components; i++) { sources[i] = vgrf(glsl_type::float_type); - emit(MOV(retype(sources[i], BRW_REGISTER_TYPE_D), coordinate)); + bld.MOV(retype(sources[i], BRW_REGISTER_TYPE_D), coordinate); coordinate = offset(coordinate, 1); } - emit(LOAD_PAYLOAD(payload, sources, components, 0)); + bld.LOAD_PAYLOAD(payload, sources, components, 0); - fs_inst *inst = emit(SHADER_OPCODE_TXF_MCS, dest, payload, sampler); + fs_inst *inst = bld.emit(SHADER_OPCODE_TXF_MCS, dest, payload, sampler); inst->base_mrf = -1; inst->mlen = components * reg_width; inst->header_size = 0; @@ -893,7 +890,7 @@ fs_visitor::emit_texture(ir_texture_opcode op, this->result = res; for (int i=0; i<4; i++) { - emit(MOV(res, fs_reg(swiz == SWIZZLE_ZERO ? 0.0f : 1.0f))); + bld.MOV(res, fs_reg(swiz == SWIZZLE_ZERO ? 0.0f : 1.0f)); res = offset(res, 1); } return; @@ -950,7 +947,7 @@ fs_visitor::emit_texture(ir_texture_opcode op, if (op == ir_txs && is_cube_array) { fs_reg depth = offset(dst, 2); fs_reg fixed_depth = vgrf(glsl_type::int_type); - emit_math(SHADER_OPCODE_INT_QUOTIENT, fixed_depth, depth, fs_reg(6)); + bld.emit(SHADER_OPCODE_INT_QUOTIENT, fixed_depth, depth, fs_reg(6)); fs_reg *fixed_payload = ralloc_array(mem_ctx, fs_reg, inst->regs_written); int components = inst->regs_written / (dst.width / 8); @@ -961,7 +958,7 @@ fs_visitor::emit_texture(ir_texture_opcode op, fixed_payload[i] = offset(dst, i); } } - emit(LOAD_PAYLOAD(dst, fixed_payload, components, 0)); + bld.LOAD_PAYLOAD(dst, fixed_payload, components, 0); } swizzle_result(op, dest_type->vector_elements, dst, sampler); @@ -981,16 +978,16 @@ fs_visitor::emit_gen6_gather_wa(uint8_t wa, fs_reg dst) for (int i = 0; i < 4; i++) { fs_reg dst_f = retype(dst, BRW_REGISTER_TYPE_F); /* Convert from UNORM to UINT */ - emit(MUL(dst_f, dst_f, fs_reg((float)((1 << width) - 1)))); - emit(MOV(dst, dst_f)); + bld.MUL(dst_f, dst_f, fs_reg((float)((1 << width) - 1))); + bld.MOV(dst, dst_f); if (wa & WA_SIGN) { /* Reinterpret the UINT value as a signed INT value by * shifting the sign bit into place, then shifting back * preserving sign. */ - emit(SHL(dst, dst, fs_reg(32 - width))); - emit(ASR(dst, dst, fs_reg(32 - width))); + bld.SHL(dst, dst, fs_reg(32 - width)); + bld.ASR(dst, dst, fs_reg(32 - width)); } dst = offset(dst, 1); @@ -1054,12 +1051,12 @@ fs_visitor::swizzle_result(ir_texture_opcode op, int dest_components, l = offset(l, i); if (swiz == SWIZZLE_ZERO) { - emit(MOV(l, fs_reg(0.0f))); + bld.MOV(l, fs_reg(0.0f)); } else if (swiz == SWIZZLE_ONE) { - emit(MOV(l, fs_reg(1.0f))); + bld.MOV(l, fs_reg(1.0f)); } else { - emit(MOV(l, offset(orig_val, - GET_SWZ(key_tex->swizzles[sampler], i)))); + bld.MOV(l, offset(orig_val, + GET_SWZ(key_tex->swizzles[sampler], i))); } } this->result = swizzled_result; From 35e64f2a769c915bedeafdb86152b0c4a2067b35 Mon Sep 17 00:00:00 2001 From: Francisco Jerez Date: Wed, 3 Jun 2015 21:06:13 +0300 Subject: [PATCH 502/834] i965/fs: Migrate untyped surface read and atomic to the IR builder. Reviewed-by: Matt Turner --- src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 55 ++++++++++---------- 1 file changed, 27 insertions(+), 28 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp index 0b56579c344..69fad4e4923 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp @@ -1164,17 +1164,16 @@ fs_visitor::emit_untyped_atomic(unsigned atomic_op, unsigned surf_index, sources[0] = fs_reg(GRF, alloc.allocate(1), BRW_REGISTER_TYPE_UD); /* Initialize the sample mask in the message header. */ - emit(MOV(sources[0], fs_reg(0u))) - ->force_writemask_all = true; + bld.exec_all().MOV(sources[0], fs_reg(0u)); if (stage == MESA_SHADER_FRAGMENT) { if (((brw_wm_prog_data*)this->prog_data)->uses_kill) { - emit(MOV(component(sources[0], 7), brw_flag_reg(0, 1))) - ->force_writemask_all = true; + bld.exec_all() + .MOV(component(sources[0], 7), brw_flag_reg(0, 1)); } else { - emit(MOV(component(sources[0], 7), - retype(brw_vec1_grf(1, 7), BRW_REGISTER_TYPE_UD))) - ->force_writemask_all = true; + bld.exec_all() + .MOV(component(sources[0], 7), + retype(brw_vec1_grf(1, 7), BRW_REGISTER_TYPE_UD)); } } else { /* The execution mask is part of the side-band information sent together with @@ -1183,37 +1182,37 @@ fs_visitor::emit_untyped_atomic(unsigned atomic_op, unsigned surf_index, * the atomic operation. */ assert(stage == MESA_SHADER_VERTEX || stage == MESA_SHADER_COMPUTE); - emit(MOV(component(sources[0], 7), - fs_reg(0xffffu)))->force_writemask_all = true; + bld.exec_all() + .MOV(component(sources[0], 7), fs_reg(0xffffu)); } length++; /* Set the atomic operation offset. */ sources[1] = vgrf(glsl_type::uint_type); - emit(MOV(sources[1], offset)); + bld.MOV(sources[1], offset); length++; /* Set the atomic operation arguments. */ if (src0.file != BAD_FILE) { sources[length] = vgrf(glsl_type::uint_type); - emit(MOV(sources[length], src0)); + bld.MOV(sources[length], src0); length++; } if (src1.file != BAD_FILE) { sources[length] = vgrf(glsl_type::uint_type); - emit(MOV(sources[length], src1)); + bld.MOV(sources[length], src1); length++; } int mlen = 1 + (length - 1) * reg_width; fs_reg src_payload = fs_reg(GRF, alloc.allocate(mlen), BRW_REGISTER_TYPE_UD, dispatch_width); - emit(LOAD_PAYLOAD(src_payload, sources, length, 1)); + bld.LOAD_PAYLOAD(src_payload, sources, length, 1); /* Emit the instruction. */ - fs_inst *inst = emit(SHADER_OPCODE_UNTYPED_ATOMIC, dst, src_payload, - fs_reg(surf_index), fs_reg(atomic_op)); + fs_inst *inst = bld.emit(SHADER_OPCODE_UNTYPED_ATOMIC, dst, src_payload, + fs_reg(surf_index), fs_reg(atomic_op)); inst->mlen = mlen; } @@ -1227,17 +1226,17 @@ fs_visitor::emit_untyped_surface_read(unsigned surf_index, fs_reg dst, sources[0] = fs_reg(GRF, alloc.allocate(1), BRW_REGISTER_TYPE_UD); /* Initialize the sample mask in the message header. */ - emit(MOV(sources[0], fs_reg(0u))) - ->force_writemask_all = true; + bld.exec_all() + .MOV(sources[0], fs_reg(0u)); if (stage == MESA_SHADER_FRAGMENT) { if (((brw_wm_prog_data*)this->prog_data)->uses_kill) { - emit(MOV(component(sources[0], 7), brw_flag_reg(0, 1))) - ->force_writemask_all = true; + bld.exec_all() + .MOV(component(sources[0], 7), brw_flag_reg(0, 1)); } else { - emit(MOV(component(sources[0], 7), - retype(brw_vec1_grf(1, 7), BRW_REGISTER_TYPE_UD))) - ->force_writemask_all = true; + bld.exec_all() + .MOV(component(sources[0], 7), + retype(brw_vec1_grf(1, 7), BRW_REGISTER_TYPE_UD)); } } else { /* The execution mask is part of the side-band information sent together with @@ -1246,22 +1245,22 @@ fs_visitor::emit_untyped_surface_read(unsigned surf_index, fs_reg dst, * the atomic operation. */ assert(stage == MESA_SHADER_VERTEX || stage == MESA_SHADER_COMPUTE); - emit(MOV(component(sources[0], 7), - fs_reg(0xffffu)))->force_writemask_all = true; + bld.exec_all() + .MOV(component(sources[0], 7), fs_reg(0xffffu)); } /* Set the surface read offset. */ sources[1] = vgrf(glsl_type::uint_type); - emit(MOV(sources[1], offset)); + bld.MOV(sources[1], offset); int mlen = 1 + reg_width; fs_reg src_payload = fs_reg(GRF, alloc.allocate(mlen), BRW_REGISTER_TYPE_UD, dispatch_width); - fs_inst *inst = emit(LOAD_PAYLOAD(src_payload, sources, 2, 1)); + fs_inst *inst = bld.LOAD_PAYLOAD(src_payload, sources, 2, 1); /* Emit the instruction. */ - inst = emit(SHADER_OPCODE_UNTYPED_SURFACE_READ, dst, src_payload, - fs_reg(surf_index), fs_reg(1)); + inst = bld.emit(SHADER_OPCODE_UNTYPED_SURFACE_READ, dst, src_payload, + fs_reg(surf_index), fs_reg(1)); inst->mlen = mlen; } From d3c10ad42729c1fe74a7f7c67465bd2beb7f9e75 Mon Sep 17 00:00:00 2001 From: Francisco Jerez Date: Wed, 3 Jun 2015 20:43:09 +0300 Subject: [PATCH 503/834] i965/fs: Migrate shader time to the IR builder. v2: Change null register destination type to UD so it can be compacted. Reviewed-by: Matt Turner --- src/mesa/drivers/dri/i965/brw_fs.cpp | 54 +++++++++++----------------- src/mesa/drivers/dri/i965/brw_fs.h | 5 +-- 2 files changed, 23 insertions(+), 36 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index c4d3f575de3..bbf3bc27f1a 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -675,7 +675,7 @@ fs_visitor::type_size(const struct glsl_type *type) * the destination of the MOV, with extra parameters set. */ fs_reg -fs_visitor::get_timestamp(fs_inst **out_mov) +fs_visitor::get_timestamp(const fs_builder &bld) { assert(devinfo->gen >= 7); @@ -686,11 +686,10 @@ fs_visitor::get_timestamp(fs_inst **out_mov) fs_reg dst = fs_reg(GRF, alloc.allocate(1), BRW_REGISTER_TYPE_UD, 4); - fs_inst *mov = MOV(dst, ts); /* We want to read the 3 fields we care about even if it's not enabled in * the dispatch. */ - mov->force_writemask_all = true; + bld.exec_all().MOV(dst, ts); /* The caller wants the low 32 bits of the timestamp. Since it's running * at the GPU clock rate of ~1.2ghz, it will roll over every ~3 seconds, @@ -704,24 +703,18 @@ fs_visitor::get_timestamp(fs_inst **out_mov) */ dst.set_smear(0); - *out_mov = mov; return dst; } void fs_visitor::emit_shader_time_begin() { - current_annotation = "shader time start"; - fs_inst *mov; - shader_start_time = get_timestamp(&mov); - emit(mov); + shader_start_time = get_timestamp(bld.annotate("shader time start")); } void fs_visitor::emit_shader_time_end() { - current_annotation = "shader time end"; - enum shader_time_shader_type type, written_type, reset_type; switch (stage) { case MESA_SHADER_VERTEX: @@ -758,47 +751,41 @@ fs_visitor::emit_shader_time_end() /* Insert our code just before the final SEND with EOT. */ exec_node *end = this->instructions.get_tail(); assert(end && ((fs_inst *) end)->eot); + const fs_builder ibld = bld.annotate("shader time end") + .exec_all().at(NULL, end); - fs_inst *tm_read; - fs_reg shader_end_time = get_timestamp(&tm_read); - end->insert_before(tm_read); + fs_reg shader_end_time = get_timestamp(ibld); /* Check that there weren't any timestamp reset events (assuming these * were the only two timestamp reads that happened). */ fs_reg reset = shader_end_time; reset.set_smear(2); - fs_inst *test = AND(reg_null_d, reset, fs_reg(1u)); - test->conditional_mod = BRW_CONDITIONAL_Z; - test->force_writemask_all = true; - end->insert_before(test); - end->insert_before(IF(BRW_PREDICATE_NORMAL)); + set_condmod(BRW_CONDITIONAL_Z, + ibld.AND(ibld.null_reg_ud(), reset, fs_reg(1u))); + ibld.IF(BRW_PREDICATE_NORMAL); fs_reg start = shader_start_time; start.negate = true; fs_reg diff = fs_reg(GRF, alloc.allocate(1), BRW_REGISTER_TYPE_UD, 1); diff.set_smear(0); - fs_inst *add = ADD(diff, start, shader_end_time); - add->force_writemask_all = true; - end->insert_before(add); + ibld.ADD(diff, start, shader_end_time); /* If there were no instructions between the two timestamp gets, the diff * is 2 cycles. Remove that overhead, so I can forget about that when * trying to determine the time taken for single instructions. */ - add = ADD(diff, diff, fs_reg(-2u)); - add->force_writemask_all = true; - end->insert_before(add); - - end->insert_before(SHADER_TIME_ADD(type, diff)); - end->insert_before(SHADER_TIME_ADD(written_type, fs_reg(1u))); - end->insert_before(new(mem_ctx) fs_inst(BRW_OPCODE_ELSE, dispatch_width)); - end->insert_before(SHADER_TIME_ADD(reset_type, fs_reg(1u))); - end->insert_before(new(mem_ctx) fs_inst(BRW_OPCODE_ENDIF, dispatch_width)); + ibld.ADD(diff, diff, fs_reg(-2u)); + SHADER_TIME_ADD(ibld, type, diff); + SHADER_TIME_ADD(ibld, written_type, fs_reg(1u)); + ibld.emit(BRW_OPCODE_ELSE); + SHADER_TIME_ADD(ibld, reset_type, fs_reg(1u)); + ibld.emit(BRW_OPCODE_ENDIF); } -fs_inst * -fs_visitor::SHADER_TIME_ADD(enum shader_time_shader_type type, fs_reg value) +void +fs_visitor::SHADER_TIME_ADD(const fs_builder &bld, + enum shader_time_shader_type type, fs_reg value) { int shader_time_index = brw_get_shader_time_index(brw, shader_prog, prog, type); @@ -810,8 +797,7 @@ fs_visitor::SHADER_TIME_ADD(enum shader_time_shader_type type, fs_reg value) else payload = vgrf(glsl_type::uint_type); - return new(mem_ctx) fs_inst(SHADER_OPCODE_SHADER_TIME_ADD, - fs_reg(), payload, offset, value); + bld.emit(SHADER_OPCODE_SHADER_TIME_ADD, fs_reg(), payload, offset, value); } void diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h index cf568368541..de372982517 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.h +++ b/src/mesa/drivers/dri/i965/brw_fs.h @@ -339,7 +339,8 @@ public: void emit_shader_time_begin(); void emit_shader_time_end(); - fs_inst *SHADER_TIME_ADD(enum shader_time_shader_type type, fs_reg value); + void SHADER_TIME_ADD(const brw::fs_builder &bld, + enum shader_time_shader_type type, fs_reg value); void emit_untyped_atomic(unsigned atomic_op, unsigned surf_index, fs_reg dst, fs_reg offset, fs_reg src0, @@ -350,7 +351,7 @@ public: void resolve_ud_negate(fs_reg *reg); - fs_reg get_timestamp(fs_inst **out_mov); + fs_reg get_timestamp(const brw::fs_builder &bld); struct brw_reg interp_reg(int location, int channel); int implied_mrf_writes(fs_inst *inst); From 31477226ec6cbe956a4bbdcae81cc7ca5ad28cc6 Mon Sep 17 00:00:00 2001 From: Francisco Jerez Date: Wed, 3 Jun 2015 21:54:54 +0300 Subject: [PATCH 504/834] i965/fs: Migrate FS interpolation code to the IR builder. v2: Fix some preexisting trivial codestyle issues. Reviewed-by: Matt Turner --- src/mesa/drivers/dri/i965/brw_fs.cpp | 28 ++++----- src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 66 ++++++++++---------- 2 files changed, 46 insertions(+), 48 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index bbf3bc27f1a..d023fa9adab 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -1130,15 +1130,15 @@ fs_visitor::emit_fragcoord_interpolation(bool pixel_center_integer, /* gl_FragCoord.x */ if (pixel_center_integer) { - emit(MOV(wpos, this->pixel_x)); + bld.MOV(wpos, this->pixel_x); } else { - emit(ADD(wpos, this->pixel_x, fs_reg(0.5f))); + bld.ADD(wpos, this->pixel_x, fs_reg(0.5f)); } wpos = offset(wpos, 1); /* gl_FragCoord.y */ if (!flip && pixel_center_integer) { - emit(MOV(wpos, this->pixel_y)); + bld.MOV(wpos, this->pixel_y); } else { fs_reg pixel_y = this->pixel_y; float offset = (pixel_center_integer ? 0.0 : 0.5); @@ -1148,22 +1148,22 @@ fs_visitor::emit_fragcoord_interpolation(bool pixel_center_integer, offset += key->drawable_height - 1.0; } - emit(ADD(wpos, pixel_y, fs_reg(offset))); + bld.ADD(wpos, pixel_y, fs_reg(offset)); } wpos = offset(wpos, 1); /* gl_FragCoord.z */ if (devinfo->gen >= 6) { - emit(MOV(wpos, fs_reg(brw_vec8_grf(payload.source_depth_reg, 0)))); + bld.MOV(wpos, fs_reg(brw_vec8_grf(payload.source_depth_reg, 0))); } else { - emit(FS_OPCODE_LINTERP, wpos, + bld.emit(FS_OPCODE_LINTERP, wpos, this->delta_xy[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC], interp_reg(VARYING_SLOT_POS, 2)); } wpos = offset(wpos, 1); /* gl_FragCoord.w: Already set up in emit_interpolation */ - emit(BRW_OPCODE_MOV, wpos, this->wpos_w); + bld.MOV(wpos, this->wpos_w); return reg; } @@ -1198,8 +1198,8 @@ fs_visitor::emit_linterp(const fs_reg &attr, const fs_reg &interp, */ barycoord_mode = BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC; } - return emit(FS_OPCODE_LINTERP, attr, - this->delta_xy[barycoord_mode], interp); + return bld.emit(FS_OPCODE_LINTERP, attr, + this->delta_xy[barycoord_mode], interp); } void @@ -1257,7 +1257,7 @@ fs_visitor::emit_general_interpolation(fs_reg attr, const char *name, struct brw_reg interp = interp_reg(location, k); interp = suboffset(interp, 3); interp.type = attr.type; - emit(FS_OPCODE_CINTERP, attr, fs_reg(interp)); + bld.emit(FS_OPCODE_CINTERP, attr, fs_reg(interp)); attr = offset(attr, 1); } } else { @@ -1270,7 +1270,7 @@ fs_visitor::emit_general_interpolation(fs_reg attr, const char *name, * unlit, replace the centroid data with non-centroid * data. */ - emit(FS_OPCODE_MOV_DISPATCH_TO_FLAGS); + bld.emit(FS_OPCODE_MOV_DISPATCH_TO_FLAGS); fs_inst *inst; inst = emit_linterp(attr, fs_reg(interp), interpolation_mode, @@ -1294,7 +1294,7 @@ fs_visitor::emit_general_interpolation(fs_reg attr, const char *name, mod_sample || key->persample_shading); } if (devinfo->gen < 6 && interpolation_mode == INTERP_QUALIFIER_SMOOTH) { - emit(BRW_OPCODE_MUL, attr, attr, this->pixel_w); + bld.MUL(attr, attr, this->pixel_w); } attr = offset(attr, 1); } @@ -1325,7 +1325,7 @@ fs_visitor::emit_frontfacing_interpolation() fs_reg g0 = fs_reg(retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_W)); g0.negate = true; - emit(ASR(*reg, g0, fs_reg(15))); + bld.ASR(*reg, g0, fs_reg(15)); } else { /* Bit 31 of g1.6 is 0 if the polygon is front facing. We want to create * a boolean result from this (1/true or 0/false). @@ -1340,7 +1340,7 @@ fs_visitor::emit_frontfacing_interpolation() fs_reg g1_6 = fs_reg(retype(brw_vec1_grf(1, 6), BRW_REGISTER_TYPE_D)); g1_6.negate = true; - emit(ASR(*reg, g1_6, fs_reg(31))); + bld.ASR(*reg, g1_6, fs_reg(31)); } return reg; diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp index 69fad4e4923..9652e6ba8dd 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp @@ -1354,19 +1354,19 @@ fs_visitor::emit_interpolation_setup_gen4() { struct brw_reg g1_uw = retype(brw_vec1_grf(1, 0), BRW_REGISTER_TYPE_UW); - this->current_annotation = "compute pixel centers"; + fs_builder abld = bld.annotate("compute pixel centers"); this->pixel_x = vgrf(glsl_type::uint_type); this->pixel_y = vgrf(glsl_type::uint_type); this->pixel_x.type = BRW_REGISTER_TYPE_UW; this->pixel_y.type = BRW_REGISTER_TYPE_UW; - emit(ADD(this->pixel_x, + abld.ADD(this->pixel_x, fs_reg(stride(suboffset(g1_uw, 4), 2, 4, 0)), - fs_reg(brw_imm_v(0x10101010)))); - emit(ADD(this->pixel_y, + fs_reg(brw_imm_v(0x10101010))); + abld.ADD(this->pixel_y, fs_reg(stride(suboffset(g1_uw, 5), 2, 4, 0)), - fs_reg(brw_imm_v(0x11001100)))); + fs_reg(brw_imm_v(0x11001100))); - this->current_annotation = "compute pixel deltas from v0"; + abld = bld.annotate("compute pixel deltas from v0"); this->delta_xy[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC] = vgrf(glsl_type::vec2_type); @@ -1375,27 +1375,27 @@ fs_visitor::emit_interpolation_setup_gen4() const fs_reg ystart(negate(brw_vec1_grf(1, 1))); if (devinfo->has_pln && dispatch_width == 16) { - emit(ADD(half(offset(delta_xy, 0), 0), half(this->pixel_x, 0), xstart)); - emit(ADD(half(offset(delta_xy, 0), 1), half(this->pixel_y, 0), ystart)); - emit(ADD(half(offset(delta_xy, 1), 0), half(this->pixel_x, 1), xstart)) - ->force_sechalf = true; - emit(ADD(half(offset(delta_xy, 1), 1), half(this->pixel_y, 1), ystart)) - ->force_sechalf = true; + for (unsigned i = 0; i < 2; i++) { + abld.half(i).ADD(half(offset(delta_xy, i), 0), + half(this->pixel_x, i), xstart); + abld.half(i).ADD(half(offset(delta_xy, i), 1), + half(this->pixel_y, i), ystart); + } } else { - emit(ADD(offset(delta_xy, 0), this->pixel_x, xstart)); - emit(ADD(offset(delta_xy, 1), this->pixel_y, ystart)); + abld.ADD(offset(delta_xy, 0), this->pixel_x, xstart); + abld.ADD(offset(delta_xy, 1), this->pixel_y, ystart); } - this->current_annotation = "compute pos.w and 1/pos.w"; + abld = bld.annotate("compute pos.w and 1/pos.w"); /* Compute wpos.w. It's always in our setup, since it's needed to * interpolate the other attributes. */ this->wpos_w = vgrf(glsl_type::float_type); - emit(FS_OPCODE_LINTERP, wpos_w, delta_xy, interp_reg(VARYING_SLOT_POS, 3)); + abld.emit(FS_OPCODE_LINTERP, wpos_w, delta_xy, + interp_reg(VARYING_SLOT_POS, 3)); /* Compute the pixel 1/W value from wpos.w. */ this->pixel_w = vgrf(glsl_type::float_type); - emit_math(SHADER_OPCODE_RCP, this->pixel_w, wpos_w); - this->current_annotation = NULL; + abld.emit(SHADER_OPCODE_RCP, this->pixel_w, wpos_w); } /** Emits the interpolation for the varying inputs. */ @@ -1404,7 +1404,7 @@ fs_visitor::emit_interpolation_setup_gen6() { struct brw_reg g1_uw = retype(brw_vec1_grf(1, 0), BRW_REGISTER_TYPE_UW); - this->current_annotation = "compute pixel centers"; + fs_builder abld = bld.annotate("compute pixel centers"); if (brw->gen >= 8 || dispatch_width == 8) { /* The "Register Region Restrictions" page says for BDW (and newer, * presumably): @@ -1418,15 +1418,15 @@ fs_visitor::emit_interpolation_setup_gen6() */ fs_reg int_pixel_xy(GRF, alloc.allocate(dispatch_width / 8), BRW_REGISTER_TYPE_UW, dispatch_width * 2); - emit(ADD(int_pixel_xy, + abld.exec_all() + .ADD(int_pixel_xy, fs_reg(stride(suboffset(g1_uw, 4), 1, 4, 0)), - fs_reg(brw_imm_v(0x11001010)))) - ->force_writemask_all = true; + fs_reg(brw_imm_v(0x11001010))); this->pixel_x = vgrf(glsl_type::float_type); this->pixel_y = vgrf(glsl_type::float_type); - emit(FS_OPCODE_PIXEL_X, this->pixel_x, int_pixel_xy); - emit(FS_OPCODE_PIXEL_Y, this->pixel_y, int_pixel_xy); + abld.emit(FS_OPCODE_PIXEL_X, this->pixel_x, int_pixel_xy); + abld.emit(FS_OPCODE_PIXEL_Y, this->pixel_y, int_pixel_xy); } else { /* The "Register Region Restrictions" page says for SNB, IVB, HSW: * @@ -1440,12 +1440,12 @@ fs_visitor::emit_interpolation_setup_gen6() fs_reg int_pixel_y = vgrf(glsl_type::uint_type); int_pixel_x.type = BRW_REGISTER_TYPE_UW; int_pixel_y.type = BRW_REGISTER_TYPE_UW; - emit(ADD(int_pixel_x, + abld.ADD(int_pixel_x, fs_reg(stride(suboffset(g1_uw, 4), 2, 4, 0)), - fs_reg(brw_imm_v(0x10101010)))); - emit(ADD(int_pixel_y, + fs_reg(brw_imm_v(0x10101010))); + abld.ADD(int_pixel_y, fs_reg(stride(suboffset(g1_uw, 5), 2, 4, 0)), - fs_reg(brw_imm_v(0x11001100)))); + fs_reg(brw_imm_v(0x11001100))); /* As of gen6, we can no longer mix float and int sources. We have * to turn the integer pixel centers into floats for their actual @@ -1453,21 +1453,19 @@ fs_visitor::emit_interpolation_setup_gen6() */ this->pixel_x = vgrf(glsl_type::float_type); this->pixel_y = vgrf(glsl_type::float_type); - emit(MOV(this->pixel_x, int_pixel_x)); - emit(MOV(this->pixel_y, int_pixel_y)); + abld.MOV(this->pixel_x, int_pixel_x); + abld.MOV(this->pixel_y, int_pixel_y); } - this->current_annotation = "compute pos.w"; + abld = bld.annotate("compute pos.w"); this->pixel_w = fs_reg(brw_vec8_grf(payload.source_w_reg, 0)); this->wpos_w = vgrf(glsl_type::float_type); - emit_math(SHADER_OPCODE_RCP, this->wpos_w, this->pixel_w); + abld.emit(SHADER_OPCODE_RCP, this->wpos_w, this->pixel_w); for (int i = 0; i < BRW_WM_BARYCENTRIC_INTERP_MODE_COUNT; ++i) { uint8_t reg = payload.barycentric_coord_reg[i]; this->delta_xy[i] = fs_reg(brw_vec16_grf(reg, 0)); } - - this->current_annotation = NULL; } void From 46f264638ad97a0b806e6fad7117d62a2cf914b6 Mon Sep 17 00:00:00 2001 From: Francisco Jerez Date: Wed, 3 Jun 2015 21:56:20 +0300 Subject: [PATCH 505/834] i965/fs: Migrate FS gl_SamplePosition/ID computation code to the IR builder. v2: Use fs_builder::AND/SHR/MOV instead of ::emit. Reviewed-by: Matt Turner --- src/mesa/drivers/dri/i965/brw_fs.cpp | 49 ++++++++++++++-------------- 1 file changed, 24 insertions(+), 25 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index d023fa9adab..7d4564deac7 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -1355,9 +1355,9 @@ fs_visitor::compute_sample_position(fs_reg dst, fs_reg int_sample_pos) if (key->compute_pos_offset) { /* Convert int_sample_pos to floating point */ - emit(MOV(dst, int_sample_pos)); + bld.MOV(dst, int_sample_pos); /* Scale to the range [0, 1] */ - emit(MUL(dst, dst, fs_reg(1 / 16.0f))); + bld.MUL(dst, dst, fs_reg(1 / 16.0f)); } else { /* From ARB_sample_shading specification: @@ -1365,7 +1365,7 @@ fs_visitor::compute_sample_position(fs_reg dst, fs_reg int_sample_pos) * rasterization is disabled, gl_SamplePosition will always be * (0.5, 0.5). */ - emit(MOV(dst, fs_reg(0.5f))); + bld.MOV(dst, fs_reg(0.5f)); } } @@ -1374,7 +1374,7 @@ fs_visitor::emit_samplepos_setup() { assert(devinfo->gen >= 6); - this->current_annotation = "compute sample position"; + const fs_builder abld = bld.annotate("compute sample position"); fs_reg *reg = new(this->mem_ctx) fs_reg(vgrf(glsl_type::vec2_type)); fs_reg pos = *reg; fs_reg int_sample_x = vgrf(glsl_type::int_type); @@ -1396,22 +1396,22 @@ fs_visitor::emit_samplepos_setup() BRW_REGISTER_TYPE_B), 16, 8, 2); if (dispatch_width == 8) { - emit(MOV(int_sample_x, fs_reg(sample_pos_reg))); + abld.MOV(int_sample_x, fs_reg(sample_pos_reg)); } else { - emit(MOV(half(int_sample_x, 0), fs_reg(sample_pos_reg))); - emit(MOV(half(int_sample_x, 1), fs_reg(suboffset(sample_pos_reg, 16)))) - ->force_sechalf = true; + abld.half(0).MOV(half(int_sample_x, 0), fs_reg(sample_pos_reg)); + abld.half(1).MOV(half(int_sample_x, 1), + fs_reg(suboffset(sample_pos_reg, 16))); } /* Compute gl_SamplePosition.x */ compute_sample_position(pos, int_sample_x); pos = offset(pos, 1); if (dispatch_width == 8) { - emit(MOV(int_sample_y, fs_reg(suboffset(sample_pos_reg, 1)))); + abld.MOV(int_sample_y, fs_reg(suboffset(sample_pos_reg, 1))); } else { - emit(MOV(half(int_sample_y, 0), - fs_reg(suboffset(sample_pos_reg, 1)))); - emit(MOV(half(int_sample_y, 1), fs_reg(suboffset(sample_pos_reg, 17)))) - ->force_sechalf = true; + abld.half(0).MOV(half(int_sample_y, 0), + fs_reg(suboffset(sample_pos_reg, 1))); + abld.half(1).MOV(half(int_sample_y, 1), + fs_reg(suboffset(sample_pos_reg, 17))); } /* Compute gl_SamplePosition.y */ compute_sample_position(pos, int_sample_y); @@ -1425,7 +1425,7 @@ fs_visitor::emit_sampleid_setup() brw_wm_prog_key *key = (brw_wm_prog_key*) this->key; assert(devinfo->gen >= 6); - this->current_annotation = "compute sample id"; + const fs_builder abld = bld.annotate("compute sample id"); fs_reg *reg = new(this->mem_ctx) fs_reg(vgrf(glsl_type::int_type)); if (key->compute_sample_id) { @@ -1452,26 +1452,25 @@ fs_visitor::emit_sampleid_setup() * are sample 1 of subspan 0; the third group is sample 0 of * subspan 1, and finally sample 1 of subspan 1. */ - fs_inst *inst; - inst = emit(BRW_OPCODE_AND, t1, - fs_reg(retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UD)), - fs_reg(0xc0)); - inst->force_writemask_all = true; - inst = emit(BRW_OPCODE_SHR, t1, t1, fs_reg(5)); - inst->force_writemask_all = true; + abld.exec_all() + .AND(t1, fs_reg(retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UD)), + fs_reg(0xc0)); + abld.exec_all().SHR(t1, t1, fs_reg(5)); + /* This works for both SIMD8 and SIMD16 */ - inst = emit(MOV(t2, brw_imm_v(key->persample_2x ? 0x1010 : 0x3210))); - inst->force_writemask_all = true; + abld.exec_all() + .MOV(t2, brw_imm_v(key->persample_2x ? 0x1010 : 0x3210)); + /* This special instruction takes care of setting vstride=1, * width=4, hstride=0 of t2 during an ADD instruction. */ - emit(FS_OPCODE_SET_SAMPLE_ID, *reg, t1, t2); + abld.emit(FS_OPCODE_SET_SAMPLE_ID, *reg, t1, t2); } else { /* As per GL_ARB_sample_shading specification: * "When rendering to a non-multisample buffer, or if multisample * rasterization is disabled, gl_SampleID will always be zero." */ - emit(BRW_OPCODE_MOV, *reg, fs_reg(0)); + abld.MOV(*reg, fs_reg(0)); } return reg; From ad68853f17868081a69b3f73f4bf4c1bc8b2571d Mon Sep 17 00:00:00 2001 From: Francisco Jerez Date: Wed, 3 Jun 2015 20:45:54 +0300 Subject: [PATCH 506/834] i965/fs: Migrate FS discard handling to the IR builder. Reviewed-by: Matt Turner --- src/mesa/drivers/dri/i965/brw_fs.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 7d4564deac7..0b502172d86 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -1595,7 +1595,7 @@ fs_visitor::emit_discard_jump() /* For performance, after a discard, jump to the end of the * shader if all relevant channels have been discarded. */ - fs_inst *discard_jump = emit(FS_OPCODE_DISCARD_JUMP); + fs_inst *discard_jump = bld.emit(FS_OPCODE_DISCARD_JUMP); discard_jump->flag_subreg = 1; discard_jump->predicate = (dispatch_width == 8) @@ -4185,7 +4185,7 @@ fs_visitor::run_fs() * Initialize it with the dispatched pixels. */ if (wm_prog_data->uses_kill) { - fs_inst *discard_init = emit(FS_OPCODE_MOV_DISPATCH_TO_FLAGS); + fs_inst *discard_init = bld.emit(FS_OPCODE_MOV_DISPATCH_TO_FLAGS); discard_init->flag_subreg = 1; } @@ -4198,7 +4198,7 @@ fs_visitor::run_fs() return false; if (wm_prog_data->uses_kill) - emit(FS_OPCODE_PLACEHOLDER_HALT); + bld.emit(FS_OPCODE_PLACEHOLDER_HALT); if (wm_key->alpha_test_func) emit_alpha_test(); From 840cbef416b47fa1a92d6491cdd2895442f063bc Mon Sep 17 00:00:00 2001 From: Francisco Jerez Date: Wed, 3 Jun 2015 21:07:34 +0300 Subject: [PATCH 507/834] i965/fs: Migrate FS alpha test to the IR builder. Reviewed-by: Matt Turner --- src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp index 9652e6ba8dd..34679cf74cd 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp @@ -1525,7 +1525,7 @@ fs_visitor::emit_alpha_test() { assert(stage == MESA_SHADER_FRAGMENT); brw_wm_prog_key *key = (brw_wm_prog_key*) this->key; - this->current_annotation = "Alpha test"; + const fs_builder abld = bld.annotate("Alpha test"); fs_inst *cmp; if (key->alpha_test_func == GL_ALWAYS) @@ -1535,15 +1535,15 @@ fs_visitor::emit_alpha_test() /* f0.1 = 0 */ fs_reg some_reg = fs_reg(retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW)); - cmp = emit(CMP(reg_null_f, some_reg, some_reg, - BRW_CONDITIONAL_NEQ)); + cmp = abld.CMP(bld.null_reg_f(), some_reg, some_reg, + BRW_CONDITIONAL_NEQ); } else { /* RT0 alpha */ fs_reg color = offset(outputs[0], 3); /* f0.1 &= func(color, ref) */ - cmp = emit(CMP(reg_null_f, color, fs_reg(key->alpha_test_ref), - cond_for_alpha_func(key->alpha_test_func))); + cmp = abld.CMP(bld.null_reg_f(), color, fs_reg(key->alpha_test_ref), + cond_for_alpha_func(key->alpha_test_func)); } cmp->predicate = BRW_PREDICATE_NORMAL; cmp->flag_subreg = 1; From e32c16c47f7a3cf25e2b4d2f3b97d0f8f89669c0 Mon Sep 17 00:00:00 2001 From: Francisco Jerez Date: Wed, 3 Jun 2015 21:07:52 +0300 Subject: [PATCH 508/834] i965/fs: Migrate FS framebuffer writes to the IR builder. The explicit call to fs_builder::group() in emit_single_fb_write() is required by the builder (otherwise the assertion in fs_builder::emit() would fail) because the subsequent LOAD_PAYLOAD and FB_WRITE instructions are in some cases emitted with a non-native execution width. The previous code would always use the channel enables for the first quarter, which is dubious but probably worked in practice because FB writes are never emitted inside non-uniform control flow and we don't pass the kill-pixel mask via predication in the cases where we have to fall-back to SIMD8 writes. Reviewed-by: Matt Turner --- src/mesa/drivers/dri/i965/brw_fs.cpp | 9 ++-- src/mesa/drivers/dri/i965/brw_fs.h | 3 +- src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 57 ++++++++++---------- 3 files changed, 35 insertions(+), 34 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 0b502172d86..46ec33dba7d 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -2918,13 +2918,12 @@ fs_visitor::emit_repclear_shader() int base_mrf = 1; int color_mrf = base_mrf + 2; - fs_inst *mov = emit(MOV(vec4(brw_message_reg(color_mrf)), - fs_reg(UNIFORM, 0, BRW_REGISTER_TYPE_F))); - mov->force_writemask_all = true; + fs_inst *mov = bld.exec_all().MOV(vec4(brw_message_reg(color_mrf)), + fs_reg(UNIFORM, 0, BRW_REGISTER_TYPE_F)); fs_inst *write; if (key->nr_color_regions == 1) { - write = emit(FS_OPCODE_REP_FB_WRITE); + write = bld.emit(FS_OPCODE_REP_FB_WRITE); write->saturate = key->clamp_fragment_color; write->base_mrf = color_mrf; write->target = 0; @@ -2933,7 +2932,7 @@ fs_visitor::emit_repclear_shader() } else { assume(key->nr_color_regions > 0); for (int i = 0; i < key->nr_color_regions; ++i) { - write = emit(FS_OPCODE_REP_FB_WRITE); + write = bld.emit(FS_OPCODE_REP_FB_WRITE); write->saturate = key->clamp_fragment_color; write->base_mrf = base_mrf; write->target = i; diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h index de372982517..571d411f109 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.h +++ b/src/mesa/drivers/dri/i965/brw_fs.h @@ -330,7 +330,8 @@ public: void setup_color_payload(fs_reg *dst, fs_reg color, unsigned components, unsigned exec_size, bool use_2nd_half); void emit_alpha_test(); - fs_inst *emit_single_fb_write(fs_reg color1, fs_reg color2, + fs_inst *emit_single_fb_write(const brw::fs_builder &bld, + fs_reg color1, fs_reg color2, fs_reg src0_alpha, unsigned components, unsigned exec_size, bool use_2nd_half = false); void emit_fb_writes(); diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp index 34679cf74cd..c5aec177163 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp @@ -1296,12 +1296,12 @@ fs_visitor::emit_dummy_fs() /* Everyone's favorite color. */ const float color[4] = { 1.0, 0.0, 1.0, 0.0 }; for (int i = 0; i < 4; i++) { - emit(MOV(fs_reg(MRF, 2 + i * reg_width, BRW_REGISTER_TYPE_F, - dispatch_width), fs_reg(color[i]))); + bld.MOV(fs_reg(MRF, 2 + i * reg_width, BRW_REGISTER_TYPE_F, + dispatch_width), fs_reg(color[i])); } fs_inst *write; - write = emit(FS_OPCODE_FB_WRITE); + write = bld.emit(FS_OPCODE_FB_WRITE); write->eot = true; if (devinfo->gen >= 6) { write->base_mrf = 2; @@ -1479,7 +1479,7 @@ fs_visitor::setup_color_payload(fs_reg *dst, fs_reg color, unsigned components, fs_reg tmp = vgrf(glsl_type::vec4_type); assert(color.type == BRW_REGISTER_TYPE_F); for (unsigned i = 0; i < components; i++) { - inst = emit(MOV(offset(tmp, i), offset(color, i))); + inst = bld.MOV(offset(tmp, i), offset(color, i)); inst->saturate = true; } color = tmp; @@ -1550,15 +1550,14 @@ fs_visitor::emit_alpha_test() } fs_inst * -fs_visitor::emit_single_fb_write(fs_reg color0, fs_reg color1, +fs_visitor::emit_single_fb_write(const fs_builder &bld, + fs_reg color0, fs_reg color1, fs_reg src0_alpha, unsigned components, unsigned exec_size, bool use_2nd_half) { assert(stage == MESA_SHADER_FRAGMENT); brw_wm_prog_data *prog_data = (brw_wm_prog_data*) this->prog_data; brw_wm_prog_key *key = (brw_wm_prog_key*) this->key; - - this->current_annotation = "FB write header"; int header_size = 2, payload_header_size; /* We can potentially have a message length of up to 15, so we have to set @@ -1589,22 +1588,23 @@ fs_visitor::emit_single_fb_write(fs_reg color0, fs_reg color1, if (payload.aa_dest_stencil_reg) { sources[length] = fs_reg(GRF, alloc.allocate(1)); - emit(MOV(sources[length], - fs_reg(brw_vec8_grf(payload.aa_dest_stencil_reg, 0)))); + bld.exec_all().annotate("FB write stencil/AA alpha") + .MOV(sources[length], + fs_reg(brw_vec8_grf(payload.aa_dest_stencil_reg, 0))); length++; } prog_data->uses_omask = prog->OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK); if (prog_data->uses_omask) { - this->current_annotation = "FB write oMask"; assert(this->sample_mask.file != BAD_FILE); /* Hand over gl_SampleMask. Only lower 16 bits are relevant. Since * it's unsinged single words, one vgrf is always 16-wide. */ sources[length] = fs_reg(GRF, alloc.allocate(1), BRW_REGISTER_TYPE_UW, 16); - emit(FS_OPCODE_SET_OMASK, sources[length], this->sample_mask); + bld.exec_all().annotate("FB write oMask") + .emit(FS_OPCODE_SET_OMASK, sources[length], this->sample_mask); length++; } @@ -1665,20 +1665,21 @@ fs_visitor::emit_single_fb_write(fs_reg color0, fs_reg color1, if (payload.dest_depth_reg) sources[length++] = fs_reg(brw_vec8_grf(payload.dest_depth_reg, 0)); + const fs_builder ubld = bld.group(exec_size, use_2nd_half); fs_inst *load; fs_inst *write; if (devinfo->gen >= 7) { /* Send from the GRF */ fs_reg payload = fs_reg(GRF, -1, BRW_REGISTER_TYPE_F, exec_size); - load = emit(LOAD_PAYLOAD(payload, sources, length, payload_header_size)); + load = ubld.LOAD_PAYLOAD(payload, sources, length, payload_header_size); payload.reg = alloc.allocate(load->regs_written); load->dst = payload; - write = emit(FS_OPCODE_FB_WRITE, reg_undef, payload); + write = ubld.emit(FS_OPCODE_FB_WRITE, reg_undef, payload); write->base_mrf = -1; } else { /* Send from the MRF */ - load = emit(LOAD_PAYLOAD(fs_reg(MRF, 1, BRW_REGISTER_TYPE_F, exec_size), - sources, length, payload_header_size)); + load = ubld.LOAD_PAYLOAD(fs_reg(MRF, 1, BRW_REGISTER_TYPE_F, exec_size), + sources, length, payload_header_size); /* On pre-SNB, we have to interlace the color values. LOAD_PAYLOAD * will do this for us if we just give it a COMPR4 destination. @@ -1686,7 +1687,7 @@ fs_visitor::emit_single_fb_write(fs_reg color0, fs_reg color1, if (brw->gen < 6 && exec_size == 16) load->dst.reg |= BRW_MRF_COMPR4; - write = emit(FS_OPCODE_FB_WRITE); + write = ubld.emit(FS_OPCODE_FB_WRITE); write->exec_size = exec_size; write->base_mrf = 1; } @@ -1709,10 +1710,10 @@ fs_visitor::emit_fb_writes() fs_inst *inst = NULL; if (do_dual_src) { - this->current_annotation = ralloc_asprintf(this->mem_ctx, - "FB dual-source write"); - inst = emit_single_fb_write(this->outputs[0], this->dual_src_output, - reg_undef, 4, 8); + const fs_builder abld = bld.annotate("FB dual-source write"); + + inst = emit_single_fb_write(abld, this->outputs[0], + this->dual_src_output, reg_undef, 4, 8); inst->target = 0; /* SIMD16 dual source blending requires to send two SIMD8 dual source @@ -1733,8 +1734,9 @@ fs_visitor::emit_fb_writes() * m + 3: a1 */ if (dispatch_width == 16) { - inst = emit_single_fb_write(this->outputs[0], this->dual_src_output, - reg_undef, 4, 8, true); + inst = emit_single_fb_write(abld, this->outputs[0], + this->dual_src_output, reg_undef, 4, 8, + true); inst->target = 0; } @@ -1745,14 +1747,14 @@ fs_visitor::emit_fb_writes() if (this->outputs[target].file == BAD_FILE) continue; - this->current_annotation = ralloc_asprintf(this->mem_ctx, - "FB write target %d", - target); + const fs_builder abld = bld.annotate( + ralloc_asprintf(this->mem_ctx, "FB write target %d", target)); + fs_reg src0_alpha; if (devinfo->gen >= 6 && key->replicate_alpha && target != 0) src0_alpha = offset(outputs[0], 3); - inst = emit_single_fb_write(this->outputs[target], reg_undef, + inst = emit_single_fb_write(abld, this->outputs[target], reg_undef, src0_alpha, this->output_components[target], dispatch_width); @@ -1765,13 +1767,12 @@ fs_visitor::emit_fb_writes() * alpha out the pipeline to our null renderbuffer to support * alpha-testing, alpha-to-coverage, and so on. */ - inst = emit_single_fb_write(reg_undef, reg_undef, reg_undef, 0, + inst = emit_single_fb_write(bld, reg_undef, reg_undef, reg_undef, 0, dispatch_width); inst->target = 0; } inst->eot = true; - this->current_annotation = NULL; } void From e522f12f03bcb0edb1384adff894918bf8d6d1b6 Mon Sep 17 00:00:00 2001 From: Francisco Jerez Date: Wed, 3 Jun 2015 22:43:00 +0300 Subject: [PATCH 509/834] i965/fs: Migrate VS output writes to the IR builder. Reviewed-by: Matt Turner --- src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 24 +++++++++----------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp index c5aec177163..63747a1bc0d 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp @@ -1823,7 +1823,7 @@ void fs_visitor::compute_clip_distance() setup_uniform_clipplane_values(); - current_annotation = "user clip distances"; + const fs_builder abld = bld.annotate("user clip distances"); this->outputs[VARYING_SLOT_CLIP_DIST0] = vgrf(glsl_type::vec4_type); this->outputs[VARYING_SLOT_CLIP_DIST1] = vgrf(glsl_type::vec4_type); @@ -1833,10 +1833,10 @@ void fs_visitor::compute_clip_distance() fs_reg output = outputs[VARYING_SLOT_CLIP_DIST0 + i / 4]; output.reg_offset = i & 3; - emit(MUL(output, outputs[clip_vertex], u)); + abld.MUL(output, outputs[clip_vertex], u); for (int j = 1; j < 4; j++) { u.reg = userplane[i].reg + j; - emit(MAD(output, output, offset(outputs[clip_vertex], j), u)); + abld.MAD(output, output, offset(outputs[clip_vertex], j), u); } } } @@ -1864,11 +1864,10 @@ fs_visitor::emit_urb_writes() if (vue_map->slots_valid == 0) { fs_reg payload = fs_reg(GRF, alloc.allocate(1), BRW_REGISTER_TYPE_UD); - fs_inst *inst = emit(MOV(payload, fs_reg(retype(brw_vec8_grf(1, 0), - BRW_REGISTER_TYPE_UD)))); - inst->force_writemask_all = true; + bld.exec_all().MOV(payload, fs_reg(retype(brw_vec8_grf(1, 0), + BRW_REGISTER_TYPE_UD))); - inst = emit(SHADER_OPCODE_URB_WRITE_SIMD8, reg_undef, payload); + fs_inst *inst = bld.emit(SHADER_OPCODE_URB_WRITE_SIMD8, reg_undef, payload); inst->eot = true; inst->mlen = 1; inst->offset = 1; @@ -1897,7 +1896,7 @@ fs_visitor::emit_urb_writes() } zero = fs_reg(GRF, alloc.allocate(1), BRW_REGISTER_TYPE_UD); - emit(MOV(zero, fs_reg(0u))); + bld.MOV(zero, fs_reg(0u)); sources[length++] = zero; if (vue_map->slots_valid & VARYING_BIT_LAYER) @@ -1952,8 +1951,7 @@ fs_visitor::emit_urb_writes() for (int i = 0; i < 4; i++) { reg = fs_reg(GRF, alloc.allocate(1), outputs[varying].type); src = offset(this->outputs[varying], i); - fs_inst *inst = emit(MOV(reg, src)); - inst->saturate = true; + set_saturate(true, bld.MOV(reg, src)); sources[length++] = reg; } } else { @@ -1963,7 +1961,7 @@ fs_visitor::emit_urb_writes() break; } - current_annotation = "URB write"; + const fs_builder abld = bld.annotate("URB write"); /* If we've queued up 8 registers of payload (2 VUE slots), if this is * the last slot or if we need to flush (see BAD_FILE varying case @@ -1980,10 +1978,10 @@ fs_visitor::emit_urb_writes() fs_reg(retype(brw_vec8_grf(1, 0), BRW_REGISTER_TYPE_UD)); memcpy(&payload_sources[1], sources, length * sizeof sources[0]); - emit(LOAD_PAYLOAD(payload, payload_sources, length + 1, 1)); + abld.LOAD_PAYLOAD(payload, payload_sources, length + 1, 1); fs_inst *inst = - emit(SHADER_OPCODE_URB_WRITE_SIMD8, reg_undef, payload); + abld.emit(SHADER_OPCODE_URB_WRITE_SIMD8, reg_undef, payload); inst->eot = last; inst->mlen = length + 1; inst->offset = urb_offset; From d5cb2e513794f6c26259665bc93cf507e86ae3b8 Mon Sep 17 00:00:00 2001 From: Francisco Jerez Date: Wed, 3 Jun 2015 21:08:43 +0300 Subject: [PATCH 510/834] i965/fs: Migrate CS terminate message to the IR builder. Reviewed-by: Matt Turner --- src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp index 63747a1bc0d..afd7a5ee4dd 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp @@ -2018,11 +2018,10 @@ fs_visitor::emit_cs_terminate() */ struct brw_reg g0 = retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD); fs_reg payload = fs_reg(GRF, alloc.allocate(1), BRW_REGISTER_TYPE_UD); - fs_inst *inst = emit(MOV(payload, g0)); - inst->force_writemask_all = true; + bld.exec_all().MOV(payload, g0); /* Send a message to the thread spawner to terminate the thread. */ - inst = emit(CS_OPCODE_CS_TERMINATE, reg_undef, payload); + fs_inst *inst = bld.emit(CS_OPCODE_CS_TERMINATE, reg_undef, payload); inst->eot = true; } From 09733f220ac9921ce7d8c3524bc5327d8203c446 Mon Sep 17 00:00:00 2001 From: Francisco Jerez Date: Wed, 3 Jun 2015 21:12:49 +0300 Subject: [PATCH 511/834] i965/fs: Migrate NIR emit_percomp() to the IR builder. Reviewed-by: Matt Turner --- src/mesa/drivers/dri/i965/brw_fs.h | 3 ++- src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 16 ++++++++++------ 2 files changed, 12 insertions(+), 7 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h index 571d411f109..3f775d32347 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.h +++ b/src/mesa/drivers/dri/i965/brw_fs.h @@ -322,7 +322,8 @@ public: void nir_emit_jump(nir_jump_instr *instr); fs_reg get_nir_src(nir_src src); fs_reg get_nir_dest(nir_dest dest); - void emit_percomp(fs_inst *inst, unsigned wr_mask); + void emit_percomp(const brw::fs_builder &bld, const fs_inst &inst, + unsigned wr_mask); bool optimize_frontfacing_ternary(nir_alu_instr *instr, const fs_reg &result); diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp index 133c99ce992..7e33d7271c5 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp @@ -28,6 +28,8 @@ #include "brw_fs.h" #include "brw_nir.h" +using namespace brw; + void fs_visitor::emit_nir_code() { @@ -107,7 +109,7 @@ fs_visitor::nir_setup_inputs(nir_shader *shader) if (var->data.location == VARYING_SLOT_POS) { reg = *emit_fragcoord_interpolation(var->data.pixel_center_integer, var->data.origin_upper_left); - emit_percomp(MOV(input, reg), 0xF); + emit_percomp(bld, fs_inst(BRW_OPCODE_MOV, input, reg), 0xF); } else { emit_general_interpolation(input, var->name, var->type, (glsl_interp_qualifier) var->data.interpolation, @@ -1201,19 +1203,20 @@ fs_visitor::get_nir_dest(nir_dest dest) } void -fs_visitor::emit_percomp(fs_inst *inst, unsigned wr_mask) +fs_visitor::emit_percomp(const fs_builder &bld, const fs_inst &inst, + unsigned wr_mask) { for (unsigned i = 0; i < 4; i++) { if (!((wr_mask >> i) & 1)) continue; - fs_inst *new_inst = new(mem_ctx) fs_inst(*inst); + fs_inst *new_inst = new(mem_ctx) fs_inst(inst); new_inst->dst = offset(new_inst->dst, i); for (unsigned j = 0; j < new_inst->sources; j++) - if (inst->src[j].file == GRF) + if (new_inst->src[j].file == GRF) new_inst->src[j] = offset(new_inst->src[j], i); - emit(new_inst); + bld.emit(new_inst); } } @@ -1738,7 +1741,8 @@ fs_visitor::nir_emit_texture(nir_tex_instr *instr) fs_reg dest = get_nir_dest(instr->dest); dest.type = this->result.type; unsigned num_components = nir_tex_instr_dest_size(instr); - emit_percomp(MOV(dest, this->result), (1 << num_components) - 1); + emit_percomp(bld, fs_inst(BRW_OPCODE_MOV, dest, this->result), + (1 << num_components) - 1); } void From 9976731485abb68eb3b5ae6f11a7838977b95b5b Mon Sep 17 00:00:00 2001 From: Francisco Jerez Date: Wed, 3 Jun 2015 21:17:36 +0300 Subject: [PATCH 512/834] i965/fs: Migrate NIR variable handling to the IR builder. Reviewed-by: Matt Turner --- src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp index 7e33d7271c5..3f848300b08 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp @@ -40,12 +40,12 @@ fs_visitor::emit_nir_code() */ if (nir->num_inputs > 0) { - nir_inputs = vgrf(nir->num_inputs); + nir_inputs = bld.vgrf(BRW_REGISTER_TYPE_F, nir->num_inputs); nir_setup_inputs(nir); } if (nir->num_outputs > 0) { - nir_outputs = vgrf(nir->num_outputs); + nir_outputs = bld.vgrf(BRW_REGISTER_TYPE_F, nir->num_outputs); nir_setup_outputs(nir); } @@ -60,7 +60,7 @@ fs_visitor::emit_nir_code() unsigned array_elems = reg->num_array_elems == 0 ? 1 : reg->num_array_elems; unsigned size = array_elems * reg->num_components; - nir_globals[reg->index] = vgrf(size); + nir_globals[reg->index] = bld.vgrf(BRW_REGISTER_TYPE_F, size); } /* get the main function and emit it */ @@ -95,8 +95,8 @@ fs_visitor::nir_setup_inputs(nir_shader *shader) unsigned array_length = var->type->is_array() ? var->type->length : 1; for (unsigned i = 0; i < array_length; i++) { for (unsigned j = 0; j < components; j++) { - emit(MOV(retype(offset(input, components * i + j), type), - offset(fs_reg(ATTR, var->data.location + i, type), j))); + bld.MOV(retype(offset(input, components * i + j), type), + offset(fs_reg(ATTR, var->data.location + i, type), j)); } } break; @@ -363,7 +363,7 @@ fs_visitor::nir_emit_impl(nir_function_impl *impl) unsigned array_elems = reg->num_array_elems == 0 ? 1 : reg->num_array_elems; unsigned size = array_elems * reg->num_components; - nir_locals[reg->index] = vgrf(size); + nir_locals[reg->index] = bld.vgrf(BRW_REGISTER_TYPE_F, size); } nir_emit_cf_list(&impl->body); @@ -1163,8 +1163,8 @@ fs_reg_for_nir_reg(fs_visitor *v, nir_register *nir_reg, int multiplier = nir_reg->num_components * (v->dispatch_width / 8); reg.reladdr = new(v->mem_ctx) fs_reg(v->vgrf(glsl_type::int_type)); - v->emit(v->MUL(*reg.reladdr, v->get_nir_src(*indirect), - fs_reg(multiplier))); + v->bld.MUL(*reg.reladdr, v->get_nir_src(*indirect), + fs_reg(multiplier)); } return reg; @@ -1176,11 +1176,10 @@ fs_visitor::get_nir_src(nir_src src) if (src.is_ssa) { assert(src.ssa->parent_instr->type == nir_instr_type_load_const); nir_load_const_instr *load = nir_instr_as_load_const(src.ssa->parent_instr); - fs_reg reg = vgrf(src.ssa->num_components); - reg.type = BRW_REGISTER_TYPE_D; + fs_reg reg = bld.vgrf(BRW_REGISTER_TYPE_D, src.ssa->num_components); for (unsigned i = 0; i < src.ssa->num_components; ++i) - emit(MOV(offset(reg, i), fs_reg(load->value.i[i]))); + bld.MOV(offset(reg, i), fs_reg(load->value.i[i])); return reg; } else { From 3632c28bde071950dc57e42eb62a65fb838c8bdc Mon Sep 17 00:00:00 2001 From: Francisco Jerez Date: Wed, 3 Jun 2015 20:57:12 +0300 Subject: [PATCH 513/834] i965/fs: Migrate translation of NIR control flow to the IR builder. Reviewed-by: Matt Turner --- src/mesa/drivers/dri/i965/brw_fs.h | 3 ++- src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 23 ++++++++++++----------- 2 files changed, 14 insertions(+), 12 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h index 3f775d32347..8803d5af8af 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.h +++ b/src/mesa/drivers/dri/i965/brw_fs.h @@ -319,7 +319,8 @@ public: void nir_emit_alu(nir_alu_instr *instr); void nir_emit_intrinsic(nir_intrinsic_instr *instr); void nir_emit_texture(nir_tex_instr *instr); - void nir_emit_jump(nir_jump_instr *instr); + void nir_emit_jump(const brw::fs_builder &bld, + nir_jump_instr *instr); fs_reg get_nir_src(nir_src src); fs_reg get_nir_dest(nir_dest dest); void emit_percomp(const brw::fs_builder &bld, const fs_inst &inst, diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp index 3f848300b08..f31829f8254 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp @@ -397,21 +397,21 @@ void fs_visitor::nir_emit_if(nir_if *if_stmt) { /* first, put the condition into f0 */ - fs_inst *inst = emit(MOV(reg_null_d, + fs_inst *inst = bld.MOV(bld.null_reg_d(), retype(get_nir_src(if_stmt->condition), - BRW_REGISTER_TYPE_D))); + BRW_REGISTER_TYPE_D)); inst->conditional_mod = BRW_CONDITIONAL_NZ; - emit(IF(BRW_PREDICATE_NORMAL)); + bld.IF(BRW_PREDICATE_NORMAL); nir_emit_cf_list(&if_stmt->then_list); /* note: if the else is empty, dead CF elimination will remove it */ - emit(BRW_OPCODE_ELSE); + bld.emit(BRW_OPCODE_ELSE); nir_emit_cf_list(&if_stmt->else_list); - emit(BRW_OPCODE_ENDIF); + bld.emit(BRW_OPCODE_ENDIF); if (!try_replace_with_sel() && devinfo->gen < 6) { no16("Can't support (non-uniform) control flow on SIMD16\n"); @@ -425,11 +425,11 @@ fs_visitor::nir_emit_loop(nir_loop *loop) no16("Can't support (non-uniform) control flow on SIMD16\n"); } - emit(BRW_OPCODE_DO); + bld.emit(BRW_OPCODE_DO); nir_emit_cf_list(&loop->body); - emit(BRW_OPCODE_WHILE); + bld.emit(BRW_OPCODE_WHILE); } void @@ -443,6 +443,7 @@ fs_visitor::nir_emit_block(nir_block *block) void fs_visitor::nir_emit_instr(nir_instr *instr) { + const fs_builder abld = bld.annotate(NULL, instr); this->base_ir = instr; switch (instr->type) { @@ -465,7 +466,7 @@ fs_visitor::nir_emit_instr(nir_instr *instr) break; case nir_instr_type_jump: - nir_emit_jump(nir_instr_as_jump(instr)); + nir_emit_jump(abld, nir_instr_as_jump(instr)); break; default: @@ -1745,14 +1746,14 @@ fs_visitor::nir_emit_texture(nir_tex_instr *instr) } void -fs_visitor::nir_emit_jump(nir_jump_instr *instr) +fs_visitor::nir_emit_jump(const fs_builder &bld, nir_jump_instr *instr) { switch (instr->type) { case nir_jump_break: - emit(BRW_OPCODE_BREAK); + bld.emit(BRW_OPCODE_BREAK); break; case nir_jump_continue: - emit(BRW_OPCODE_CONTINUE); + bld.emit(BRW_OPCODE_CONTINUE); break; case nir_jump_return: default: From fe88c7ae38c72ea09ced69fb12ff00f58bdf1d6e Mon Sep 17 00:00:00 2001 From: Francisco Jerez Date: Wed, 3 Jun 2015 20:59:26 +0300 Subject: [PATCH 514/834] i965/fs: Migrate translation of NIR ALU instructions to the IR builder. Reviewed-by: Matt Turner --- src/mesa/drivers/dri/i965/brw_fs.cpp | 4 +- src/mesa/drivers/dri/i965/brw_fs.h | 2 +- src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 192 +++++++++++------------ 3 files changed, 99 insertions(+), 99 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 46ec33dba7d..0647d8f8f97 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -1482,8 +1482,8 @@ fs_visitor::resolve_source_modifiers(fs_reg *src) if (!src->abs && !src->negate) return; - fs_reg temp = retype(vgrf(1), src->type); - emit(MOV(temp, *src)); + fs_reg temp = bld.vgrf(src->type); + bld.MOV(temp, *src); *src = temp; } diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h index 8803d5af8af..44a0d319acb 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.h +++ b/src/mesa/drivers/dri/i965/brw_fs.h @@ -316,7 +316,7 @@ public: void nir_emit_loop(nir_loop *loop); void nir_emit_block(nir_block *block); void nir_emit_instr(nir_instr *instr); - void nir_emit_alu(nir_alu_instr *instr); + void nir_emit_alu(const brw::fs_builder &bld, nir_alu_instr *instr); void nir_emit_intrinsic(nir_intrinsic_instr *instr); void nir_emit_texture(nir_tex_instr *instr); void nir_emit_jump(const brw::fs_builder &bld, diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp index f31829f8254..cd368809318 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp @@ -448,7 +448,7 @@ fs_visitor::nir_emit_instr(nir_instr *instr) switch (instr->type) { case nir_instr_type_alu: - nir_emit_alu(nir_instr_as_alu(instr)); + nir_emit_alu(abld, nir_instr_as_alu(instr)); break; case nir_instr_type_intrinsic: @@ -546,7 +546,7 @@ fs_visitor::optimize_frontfacing_ternary(nir_alu_instr *instr, tmp.subreg_offset = 2; tmp.stride = 2; - fs_inst *or_inst = emit(OR(tmp, g0, fs_reg(0x3f80))); + fs_inst *or_inst = bld.OR(tmp, g0, fs_reg(0x3f80)); or_inst->src[1].type = BRW_REGISTER_TYPE_UW; tmp.type = BRW_REGISTER_TYPE_D; @@ -571,15 +571,15 @@ fs_visitor::optimize_frontfacing_ternary(nir_alu_instr *instr, g1_6.negate = true; } - emit(OR(tmp, g1_6, fs_reg(0x3f800000))); + bld.OR(tmp, g1_6, fs_reg(0x3f800000)); } - emit(AND(retype(result, BRW_REGISTER_TYPE_D), tmp, fs_reg(0xbf800000))); + bld.AND(retype(result, BRW_REGISTER_TYPE_D), tmp, fs_reg(0xbf800000)); return true; } void -fs_visitor::nir_emit_alu(nir_alu_instr *instr) +fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr) { struct brw_wm_prog_key *fs_key = (struct brw_wm_prog_key *) this->key; fs_inst *inst; @@ -611,7 +611,7 @@ fs_visitor::nir_emit_alu(nir_alu_instr *instr) if (!instr->src[i].src.is_ssa && instr->dest.dest.reg.reg == instr->src[i].src.reg.reg) { need_extra_copy = true; - temp = retype(vgrf(4), result.type); + temp = bld.vgrf(result.type, 4); break; } } @@ -621,11 +621,11 @@ fs_visitor::nir_emit_alu(nir_alu_instr *instr) continue; if (instr->op == nir_op_imov || instr->op == nir_op_fmov) { - inst = emit(MOV(offset(temp, i), - offset(op[0], instr->src[0].swizzle[i]))); + inst = bld.MOV(offset(temp, i), + offset(op[0], instr->src[0].swizzle[i])); } else { - inst = emit(MOV(offset(temp, i), - offset(op[i], instr->src[i].swizzle[0]))); + inst = bld.MOV(offset(temp, i), + offset(op[i], instr->src[i].swizzle[0])); } inst->saturate = instr->dest.saturate; } @@ -639,7 +639,7 @@ fs_visitor::nir_emit_alu(nir_alu_instr *instr) if (!(instr->dest.write_mask & (1 << i))) continue; - emit(MOV(offset(result, i), offset(temp, i))); + bld.MOV(offset(result, i), offset(temp, i)); } } return; @@ -671,13 +671,13 @@ fs_visitor::nir_emit_alu(nir_alu_instr *instr) switch (instr->op) { case nir_op_i2f: case nir_op_u2f: - inst = emit(MOV(result, op[0])); + inst = bld.MOV(result, op[0]); inst->saturate = instr->dest.saturate; break; case nir_op_f2i: case nir_op_f2u: - emit(MOV(result, op[0])); + bld.MOV(result, op[0]); break; case nir_op_fsign: { @@ -686,17 +686,17 @@ fs_visitor::nir_emit_alu(nir_alu_instr *instr) * Predicated OR ORs 1.0 (0x3f800000) with the sign bit if val is not * zero. */ - emit(CMP(reg_null_f, op[0], fs_reg(0.0f), BRW_CONDITIONAL_NZ)); + bld.CMP(bld.null_reg_f(), op[0], fs_reg(0.0f), BRW_CONDITIONAL_NZ); fs_reg result_int = retype(result, BRW_REGISTER_TYPE_UD); op[0].type = BRW_REGISTER_TYPE_UD; result.type = BRW_REGISTER_TYPE_UD; - emit(AND(result_int, op[0], fs_reg(0x80000000u))); + bld.AND(result_int, op[0], fs_reg(0x80000000u)); - inst = emit(OR(result_int, result_int, fs_reg(0x3f800000u))); + inst = bld.OR(result_int, result_int, fs_reg(0x3f800000u)); inst->predicate = BRW_PREDICATE_NORMAL; if (instr->dest.saturate) { - inst = emit(MOV(result, result)); + inst = bld.MOV(result, result); inst->saturate = true; } break; @@ -707,87 +707,87 @@ fs_visitor::nir_emit_alu(nir_alu_instr *instr) * -> non-negative val generates 0x00000000. * Predicated OR sets 1 if val is positive. */ - emit(CMP(reg_null_d, op[0], fs_reg(0), BRW_CONDITIONAL_G)); - emit(ASR(result, op[0], fs_reg(31))); - inst = emit(OR(result, result, fs_reg(1))); + bld.CMP(bld.null_reg_d(), op[0], fs_reg(0), BRW_CONDITIONAL_G); + bld.ASR(result, op[0], fs_reg(31)); + inst = bld.OR(result, result, fs_reg(1)); inst->predicate = BRW_PREDICATE_NORMAL; break; case nir_op_frcp: - inst = emit_math(SHADER_OPCODE_RCP, result, op[0]); + inst = bld.emit(SHADER_OPCODE_RCP, result, op[0]); inst->saturate = instr->dest.saturate; break; case nir_op_fexp2: - inst = emit_math(SHADER_OPCODE_EXP2, result, op[0]); + inst = bld.emit(SHADER_OPCODE_EXP2, result, op[0]); inst->saturate = instr->dest.saturate; break; case nir_op_flog2: - inst = emit_math(SHADER_OPCODE_LOG2, result, op[0]); + inst = bld.emit(SHADER_OPCODE_LOG2, result, op[0]); inst->saturate = instr->dest.saturate; break; case nir_op_fsin: - inst = emit_math(SHADER_OPCODE_SIN, result, op[0]); + inst = bld.emit(SHADER_OPCODE_SIN, result, op[0]); inst->saturate = instr->dest.saturate; break; case nir_op_fcos: - inst = emit_math(SHADER_OPCODE_COS, result, op[0]); + inst = bld.emit(SHADER_OPCODE_COS, result, op[0]); inst->saturate = instr->dest.saturate; break; case nir_op_fddx: if (fs_key->high_quality_derivatives) { - inst = emit(FS_OPCODE_DDX_FINE, result, op[0]); + inst = bld.emit(FS_OPCODE_DDX_FINE, result, op[0]); } else { - inst = emit(FS_OPCODE_DDX_COARSE, result, op[0]); + inst = bld.emit(FS_OPCODE_DDX_COARSE, result, op[0]); } inst->saturate = instr->dest.saturate; break; case nir_op_fddx_fine: - inst = emit(FS_OPCODE_DDX_FINE, result, op[0]); + inst = bld.emit(FS_OPCODE_DDX_FINE, result, op[0]); inst->saturate = instr->dest.saturate; break; case nir_op_fddx_coarse: - inst = emit(FS_OPCODE_DDX_COARSE, result, op[0]); + inst = bld.emit(FS_OPCODE_DDX_COARSE, result, op[0]); inst->saturate = instr->dest.saturate; break; case nir_op_fddy: if (fs_key->high_quality_derivatives) { - inst = emit(FS_OPCODE_DDY_FINE, result, op[0], - fs_reg(fs_key->render_to_fbo)); + inst = bld.emit(FS_OPCODE_DDY_FINE, result, op[0], + fs_reg(fs_key->render_to_fbo)); } else { - inst = emit(FS_OPCODE_DDY_COARSE, result, op[0], - fs_reg(fs_key->render_to_fbo)); + inst = bld.emit(FS_OPCODE_DDY_COARSE, result, op[0], + fs_reg(fs_key->render_to_fbo)); } inst->saturate = instr->dest.saturate; break; case nir_op_fddy_fine: - inst = emit(FS_OPCODE_DDY_FINE, result, op[0], - fs_reg(fs_key->render_to_fbo)); + inst = bld.emit(FS_OPCODE_DDY_FINE, result, op[0], + fs_reg(fs_key->render_to_fbo)); inst->saturate = instr->dest.saturate; break; case nir_op_fddy_coarse: - inst = emit(FS_OPCODE_DDY_COARSE, result, op[0], - fs_reg(fs_key->render_to_fbo)); + inst = bld.emit(FS_OPCODE_DDY_COARSE, result, op[0], + fs_reg(fs_key->render_to_fbo)); inst->saturate = instr->dest.saturate; break; case nir_op_fadd: case nir_op_iadd: - inst = emit(ADD(result, op[0], op[1])); + inst = bld.ADD(result, op[0], op[1]); inst->saturate = instr->dest.saturate; break; case nir_op_fmul: - inst = emit(MUL(result, op[0], op[1])); + inst = bld.MUL(result, op[0], op[1]); inst->saturate = instr->dest.saturate; break; case nir_op_imul: - emit(MUL(result, op[0], op[1])); + bld.MUL(result, op[0], op[1]); break; case nir_op_imul_high: @@ -797,8 +797,8 @@ fs_visitor::nir_emit_alu(nir_alu_instr *instr) struct brw_reg acc = retype(brw_acc_reg(dispatch_width), result.type); - fs_inst *mul = emit(MUL(acc, op[0], op[1])); - emit(MACH(result, op[0], op[1])); + fs_inst *mul = bld.MUL(acc, op[0], op[1]); + bld.MACH(result, op[0], op[1]); /* Until Gen8, integer multiplies read 32-bits from one source, and * 16-bits from the other, and relying on the MACH instruction to @@ -826,7 +826,7 @@ fs_visitor::nir_emit_alu(nir_alu_instr *instr) case nir_op_idiv: case nir_op_udiv: - emit_math(SHADER_OPCODE_INT_QUOTIENT, result, op[0], op[1]); + bld.emit(SHADER_OPCODE_INT_QUOTIENT, result, op[0], op[1]); break; case nir_op_uadd_carry: { @@ -836,8 +836,8 @@ fs_visitor::nir_emit_alu(nir_alu_instr *instr) struct brw_reg acc = retype(brw_acc_reg(dispatch_width), BRW_REGISTER_TYPE_UD); - emit(ADDC(reg_null_ud, op[0], op[1])); - emit(MOV(result, fs_reg(acc))); + bld.ADDC(bld.null_reg_ud(), op[0], op[1]); + bld.MOV(result, fs_reg(acc)); break; } @@ -848,63 +848,63 @@ fs_visitor::nir_emit_alu(nir_alu_instr *instr) struct brw_reg acc = retype(brw_acc_reg(dispatch_width), BRW_REGISTER_TYPE_UD); - emit(SUBB(reg_null_ud, op[0], op[1])); - emit(MOV(result, fs_reg(acc))); + bld.SUBB(bld.null_reg_ud(), op[0], op[1]); + bld.MOV(result, fs_reg(acc)); break; } case nir_op_umod: - emit_math(SHADER_OPCODE_INT_REMAINDER, result, op[0], op[1]); + bld.emit(SHADER_OPCODE_INT_REMAINDER, result, op[0], op[1]); break; case nir_op_flt: case nir_op_ilt: case nir_op_ult: - emit(CMP(result, op[0], op[1], BRW_CONDITIONAL_L)); + bld.CMP(result, op[0], op[1], BRW_CONDITIONAL_L); break; case nir_op_fge: case nir_op_ige: case nir_op_uge: - emit(CMP(result, op[0], op[1], BRW_CONDITIONAL_GE)); + bld.CMP(result, op[0], op[1], BRW_CONDITIONAL_GE); break; case nir_op_feq: case nir_op_ieq: - emit(CMP(result, op[0], op[1], BRW_CONDITIONAL_Z)); + bld.CMP(result, op[0], op[1], BRW_CONDITIONAL_Z); break; case nir_op_fne: case nir_op_ine: - emit(CMP(result, op[0], op[1], BRW_CONDITIONAL_NZ)); + bld.CMP(result, op[0], op[1], BRW_CONDITIONAL_NZ); break; case nir_op_inot: if (devinfo->gen >= 8) { resolve_source_modifiers(&op[0]); } - emit(NOT(result, op[0])); + bld.NOT(result, op[0]); break; case nir_op_ixor: if (devinfo->gen >= 8) { resolve_source_modifiers(&op[0]); resolve_source_modifiers(&op[1]); } - emit(XOR(result, op[0], op[1])); + bld.XOR(result, op[0], op[1]); break; case nir_op_ior: if (devinfo->gen >= 8) { resolve_source_modifiers(&op[0]); resolve_source_modifiers(&op[1]); } - emit(OR(result, op[0], op[1])); + bld.OR(result, op[0], op[1]); break; case nir_op_iand: if (devinfo->gen >= 8) { resolve_source_modifiers(&op[0]); resolve_source_modifiers(&op[1]); } - emit(AND(result, op[0], op[1])); + bld.AND(result, op[0], op[1]); break; case nir_op_fdot2: @@ -952,53 +952,53 @@ fs_visitor::nir_emit_alu(nir_alu_instr *instr) unreachable("not reached: should be handled by ldexp_to_arith()"); case nir_op_fsqrt: - inst = emit_math(SHADER_OPCODE_SQRT, result, op[0]); + inst = bld.emit(SHADER_OPCODE_SQRT, result, op[0]); inst->saturate = instr->dest.saturate; break; case nir_op_frsq: - inst = emit_math(SHADER_OPCODE_RSQ, result, op[0]); + inst = bld.emit(SHADER_OPCODE_RSQ, result, op[0]); inst->saturate = instr->dest.saturate; break; case nir_op_b2i: - emit(AND(result, op[0], fs_reg(1))); + bld.AND(result, op[0], fs_reg(1)); break; case nir_op_b2f: - emit(AND(retype(result, BRW_REGISTER_TYPE_UD), op[0], fs_reg(0x3f800000u))); + bld.AND(retype(result, BRW_REGISTER_TYPE_UD), op[0], fs_reg(0x3f800000u)); break; case nir_op_f2b: - emit(CMP(result, op[0], fs_reg(0.0f), BRW_CONDITIONAL_NZ)); + bld.CMP(result, op[0], fs_reg(0.0f), BRW_CONDITIONAL_NZ); break; case nir_op_i2b: - emit(CMP(result, op[0], fs_reg(0), BRW_CONDITIONAL_NZ)); + bld.CMP(result, op[0], fs_reg(0), BRW_CONDITIONAL_NZ); break; case nir_op_ftrunc: - inst = emit(RNDZ(result, op[0])); + inst = bld.RNDZ(result, op[0]); inst->saturate = instr->dest.saturate; break; case nir_op_fceil: { op[0].negate = !op[0].negate; fs_reg temp = vgrf(glsl_type::float_type); - emit(RNDD(temp, op[0])); + bld.RNDD(temp, op[0]); temp.negate = true; - inst = emit(MOV(result, temp)); + inst = bld.MOV(result, temp); inst->saturate = instr->dest.saturate; break; } case nir_op_ffloor: - inst = emit(RNDD(result, op[0])); + inst = bld.RNDD(result, op[0]); inst->saturate = instr->dest.saturate; break; case nir_op_ffract: - inst = emit(FRC(result, op[0])); + inst = bld.FRC(result, op[0]); inst->saturate = instr->dest.saturate; break; case nir_op_fround_even: - inst = emit(RNDE(result, op[0])); + inst = bld.RNDE(result, op[0]); inst->saturate = instr->dest.saturate; break; @@ -1006,11 +1006,11 @@ fs_visitor::nir_emit_alu(nir_alu_instr *instr) case nir_op_imin: case nir_op_umin: if (devinfo->gen >= 6) { - inst = emit(BRW_OPCODE_SEL, result, op[0], op[1]); + inst = bld.emit(BRW_OPCODE_SEL, result, op[0], op[1]); inst->conditional_mod = BRW_CONDITIONAL_L; } else { - emit(CMP(reg_null_d, op[0], op[1], BRW_CONDITIONAL_L)); - inst = emit(SEL(result, op[0], op[1])); + bld.CMP(bld.null_reg_d(), op[0], op[1], BRW_CONDITIONAL_L); + inst = bld.SEL(result, op[0], op[1]); inst->predicate = BRW_PREDICATE_NORMAL; } inst->saturate = instr->dest.saturate; @@ -1020,11 +1020,11 @@ fs_visitor::nir_emit_alu(nir_alu_instr *instr) case nir_op_imax: case nir_op_umax: if (devinfo->gen >= 6) { - inst = emit(BRW_OPCODE_SEL, result, op[0], op[1]); + inst = bld.emit(BRW_OPCODE_SEL, result, op[0], op[1]); inst->conditional_mod = BRW_CONDITIONAL_GE; } else { - emit(CMP(reg_null_d, op[0], op[1], BRW_CONDITIONAL_GE)); - inst = emit(SEL(result, op[0], op[1])); + bld.CMP(bld.null_reg_d(), op[0], op[1], BRW_CONDITIONAL_GE); + inst = bld.SEL(result, op[0], op[1]); inst->predicate = BRW_PREDICATE_NORMAL; } inst->saturate = instr->dest.saturate; @@ -1043,57 +1043,57 @@ fs_visitor::nir_emit_alu(nir_alu_instr *instr) unreachable("not reached: should be handled by lower_packing_builtins"); case nir_op_unpack_half_2x16_split_x: - inst = emit(FS_OPCODE_UNPACK_HALF_2x16_SPLIT_X, result, op[0]); + inst = bld.emit(FS_OPCODE_UNPACK_HALF_2x16_SPLIT_X, result, op[0]); inst->saturate = instr->dest.saturate; break; case nir_op_unpack_half_2x16_split_y: - inst = emit(FS_OPCODE_UNPACK_HALF_2x16_SPLIT_Y, result, op[0]); + inst = bld.emit(FS_OPCODE_UNPACK_HALF_2x16_SPLIT_Y, result, op[0]); inst->saturate = instr->dest.saturate; break; case nir_op_fpow: - inst = emit_math(SHADER_OPCODE_POW, result, op[0], op[1]); + inst = bld.emit(SHADER_OPCODE_POW, result, op[0], op[1]); inst->saturate = instr->dest.saturate; break; case nir_op_bitfield_reverse: - emit(BFREV(result, op[0])); + bld.BFREV(result, op[0]); break; case nir_op_bit_count: - emit(CBIT(result, op[0])); + bld.CBIT(result, op[0]); break; case nir_op_ufind_msb: case nir_op_ifind_msb: { - emit(FBH(retype(result, BRW_REGISTER_TYPE_UD), op[0])); + bld.FBH(retype(result, BRW_REGISTER_TYPE_UD), op[0]); /* FBH counts from the MSB side, while GLSL's findMSB() wants the count * from the LSB side. If FBH didn't return an error (0xFFFFFFFF), then * subtract the result from 31 to convert the MSB count into an LSB count. */ - emit(CMP(reg_null_d, result, fs_reg(-1), BRW_CONDITIONAL_NZ)); + bld.CMP(bld.null_reg_d(), result, fs_reg(-1), BRW_CONDITIONAL_NZ); fs_reg neg_result(result); neg_result.negate = true; - inst = emit(ADD(result, neg_result, fs_reg(31))); + inst = bld.ADD(result, neg_result, fs_reg(31)); inst->predicate = BRW_PREDICATE_NORMAL; break; } case nir_op_find_lsb: - emit(FBL(result, op[0])); + bld.FBL(result, op[0]); break; case nir_op_ubitfield_extract: case nir_op_ibitfield_extract: - emit(BFE(result, op[2], op[1], op[0])); + bld.BFE(result, op[2], op[1], op[0]); break; case nir_op_bfm: - emit(BFI1(result, op[0], op[1])); + bld.BFI1(result, op[0], op[1]); break; case nir_op_bfi: - emit(BFI2(result, op[0], op[1], op[2])); + bld.BFI2(result, op[0], op[1], op[2]); break; case nir_op_bitfield_insert: @@ -1101,26 +1101,26 @@ fs_visitor::nir_emit_alu(nir_alu_instr *instr) "lower_instructions::bitfield_insert_to_bfm_bfi"); case nir_op_ishl: - emit(SHL(result, op[0], op[1])); + bld.SHL(result, op[0], op[1]); break; case nir_op_ishr: - emit(ASR(result, op[0], op[1])); + bld.ASR(result, op[0], op[1]); break; case nir_op_ushr: - emit(SHR(result, op[0], op[1])); + bld.SHR(result, op[0], op[1]); break; case nir_op_pack_half_2x16_split: - emit(FS_OPCODE_PACK_HALF_2x16_SPLIT, result, op[0], op[1]); + bld.emit(FS_OPCODE_PACK_HALF_2x16_SPLIT, result, op[0], op[1]); break; case nir_op_ffma: - inst = emit(MAD(result, op[2], op[1], op[0])); + inst = bld.MAD(result, op[2], op[1], op[0]); inst->saturate = instr->dest.saturate; break; case nir_op_flrp: - inst = emit_lrp(result, op[0], op[1], op[2]); + inst = bld.LRP(result, op[0], op[1], op[2]); inst->saturate = instr->dest.saturate; break; @@ -1128,8 +1128,8 @@ fs_visitor::nir_emit_alu(nir_alu_instr *instr) if (optimize_frontfacing_ternary(instr, result)) return; - emit(CMP(reg_null_d, op[0], fs_reg(0), BRW_CONDITIONAL_NZ)); - inst = emit(SEL(result, op[1], op[2])); + bld.CMP(bld.null_reg_d(), op[0], fs_reg(0), BRW_CONDITIONAL_NZ); + inst = bld.SEL(result, op[1], op[2]); inst->predicate = BRW_PREDICATE_NORMAL; break; @@ -1143,9 +1143,9 @@ fs_visitor::nir_emit_alu(nir_alu_instr *instr) if (devinfo->gen <= 5 && (instr->instr.pass_flags & BRW_NIR_BOOLEAN_MASK) == BRW_NIR_BOOLEAN_NEEDS_RESOLVE) { fs_reg masked = vgrf(glsl_type::int_type); - emit(AND(masked, result, fs_reg(1))); + bld.AND(masked, result, fs_reg(1)); masked.negate = true; - emit(MOV(retype(result, BRW_REGISTER_TYPE_D), masked)); + bld.MOV(retype(result, BRW_REGISTER_TYPE_D), masked); } } From 979fe2ffee3956186017fe6c115aed53fc87ad3d Mon Sep 17 00:00:00 2001 From: Francisco Jerez Date: Wed, 3 Jun 2015 21:01:32 +0300 Subject: [PATCH 515/834] i965/fs: Migrate translation of NIR intrinsics to the IR builder. v2: Use fs_builder::SEL instead of ::emit. Use set_condmod(). Reviewed-by: Matt Turner --- src/mesa/drivers/dri/i965/brw_fs.h | 3 +- src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 80 ++++++++++++------------ 2 files changed, 42 insertions(+), 41 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h index 44a0d319acb..338c8168778 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.h +++ b/src/mesa/drivers/dri/i965/brw_fs.h @@ -317,7 +317,8 @@ public: void nir_emit_block(nir_block *block); void nir_emit_instr(nir_instr *instr); void nir_emit_alu(const brw::fs_builder &bld, nir_alu_instr *instr); - void nir_emit_intrinsic(nir_intrinsic_instr *instr); + void nir_emit_intrinsic(const brw::fs_builder &bld, + nir_intrinsic_instr *instr); void nir_emit_texture(nir_tex_instr *instr); void nir_emit_jump(const brw::fs_builder &bld, nir_jump_instr *instr); diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp index cd368809318..46448fc4aa0 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp @@ -452,7 +452,7 @@ fs_visitor::nir_emit_instr(nir_instr *instr) break; case nir_instr_type_intrinsic: - nir_emit_intrinsic(nir_instr_as_intrinsic(instr)); + nir_emit_intrinsic(abld, nir_instr_as_intrinsic(instr)); break; case nir_instr_type_tex: @@ -1221,7 +1221,7 @@ fs_visitor::emit_percomp(const fs_builder &bld, const fs_inst &inst, } void -fs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) +fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr) { fs_reg dest; if (nir_intrinsic_infos[instr->intrinsic].has_dest) @@ -1239,12 +1239,12 @@ fs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) */ fs_inst *cmp; if (instr->intrinsic == nir_intrinsic_discard_if) { - cmp = emit(CMP(reg_null_f, get_nir_src(instr->src[0]), - fs_reg(0), BRW_CONDITIONAL_Z)); + cmp = bld.CMP(bld.null_reg_f(), get_nir_src(instr->src[0]), + fs_reg(0), BRW_CONDITIONAL_Z); } else { fs_reg some_reg = fs_reg(retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW)); - cmp = emit(CMP(reg_null_f, some_reg, some_reg, BRW_CONDITIONAL_NZ)); + cmp = bld.CMP(bld.null_reg_f(), some_reg, some_reg, BRW_CONDITIONAL_NZ); } cmp->predicate = BRW_PREDICATE_NORMAL; cmp->flag_subreg = 1; @@ -1281,8 +1281,8 @@ fs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) } case nir_intrinsic_load_front_face: - emit(MOV(retype(dest, BRW_REGISTER_TYPE_D), - *emit_frontfacing_interpolation())); + bld.MOV(retype(dest, BRW_REGISTER_TYPE_D), + *emit_frontfacing_interpolation()); break; case nir_intrinsic_load_vertex_id: @@ -1292,7 +1292,7 @@ fs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) fs_reg vertex_id = nir_system_values[SYSTEM_VALUE_VERTEX_ID_ZERO_BASE]; assert(vertex_id.file != BAD_FILE); dest.type = vertex_id.type; - emit(MOV(dest, vertex_id)); + bld.MOV(dest, vertex_id); break; } @@ -1300,7 +1300,7 @@ fs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) fs_reg base_vertex = nir_system_values[SYSTEM_VALUE_BASE_VERTEX]; assert(base_vertex.file != BAD_FILE); dest.type = base_vertex.type; - emit(MOV(dest, base_vertex)); + bld.MOV(dest, base_vertex); break; } @@ -1308,7 +1308,7 @@ fs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) fs_reg instance_id = nir_system_values[SYSTEM_VALUE_INSTANCE_ID]; assert(instance_id.file != BAD_FILE); dest.type = instance_id.type; - emit(MOV(dest, instance_id)); + bld.MOV(dest, instance_id); break; } @@ -1316,7 +1316,7 @@ fs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) fs_reg sample_mask_in = nir_system_values[SYSTEM_VALUE_SAMPLE_MASK_IN]; assert(sample_mask_in.file != BAD_FILE); dest.type = sample_mask_in.type; - emit(MOV(dest, sample_mask_in)); + bld.MOV(dest, sample_mask_in); break; } @@ -1324,8 +1324,8 @@ fs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) fs_reg sample_pos = nir_system_values[SYSTEM_VALUE_SAMPLE_POS]; assert(sample_pos.file != BAD_FILE); dest.type = sample_pos.type; - emit(MOV(dest, sample_pos)); - emit(MOV(offset(dest, 1), offset(sample_pos, 1))); + bld.MOV(dest, sample_pos); + bld.MOV(offset(dest, 1), offset(sample_pos, 1)); break; } @@ -1333,7 +1333,7 @@ fs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) fs_reg sample_id = nir_system_values[SYSTEM_VALUE_SAMPLE_ID]; assert(sample_id.file != BAD_FILE); dest.type = sample_id.type; - emit(MOV(dest, sample_id)); + bld.MOV(dest, sample_id); break; } @@ -1357,7 +1357,7 @@ fs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) src.reladdr = new(mem_ctx) fs_reg(get_nir_src(instr->src[0])); index++; - emit(MOV(dest, src)); + bld.MOV(dest, src); dest = offset(dest, 1); } break; @@ -1379,9 +1379,9 @@ fs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) * from any live channel. */ surf_index = vgrf(glsl_type::uint_type); - emit(ADD(surf_index, get_nir_src(instr->src[0]), - fs_reg(stage_prog_data->binding_table.ubo_start))); - emit_uniformize(surf_index, surf_index); + bld.ADD(surf_index, get_nir_src(instr->src[0]), + fs_reg(stage_prog_data->binding_table.ubo_start)); + bld.emit_uniformize(surf_index, surf_index); /* Assume this may touch any UBO. It would be nice to provide * a tighter bound, but the array information is already lowered away. @@ -1394,9 +1394,9 @@ fs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) if (has_indirect) { /* Turn the byte offset into a dword offset. */ fs_reg base_offset = vgrf(glsl_type::int_type); - emit(SHR(base_offset, retype(get_nir_src(instr->src[1]), - BRW_REGISTER_TYPE_D), - fs_reg(2))); + bld.SHR(base_offset, retype(get_nir_src(instr->src[1]), + BRW_REGISTER_TYPE_D), + fs_reg(2)); unsigned vec4_offset = instr->const_index[0] / 4; for (int i = 0; i < instr->num_components; i++) @@ -1407,8 +1407,8 @@ fs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) packed_consts.type = dest.type; fs_reg const_offset_reg((unsigned) instr->const_index[0] & ~15); - emit(FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD, packed_consts, - surf_index, const_offset_reg); + bld.emit(FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD, packed_consts, + surf_index, const_offset_reg); for (unsigned i = 0; i < instr->num_components; i++) { packed_consts.set_smear(instr->const_index[0] % 16 / 4 + i); @@ -1418,7 +1418,7 @@ fs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) */ assert(packed_consts.subreg_offset < 32); - emit(MOV(dest, packed_consts)); + bld.MOV(dest, packed_consts); dest = offset(dest, 1); } } @@ -1437,7 +1437,7 @@ fs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) src.reladdr = new(mem_ctx) fs_reg(get_nir_src(instr->src[0])); index++; - emit(MOV(dest, src)); + bld.MOV(dest, src); dest = offset(dest, 1); } break; @@ -1470,7 +1470,7 @@ fs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) */ no16("interpolate_at_* not yet supported in SIMD16 mode."); - fs_reg dst_xy = vgrf(2); + fs_reg dst_xy = bld.vgrf(BRW_REGISTER_TYPE_F, 2); /* For most messages, we need one reg of ignored data; the hardware * requires mlen==1 even when there is no payload. in the per-slot @@ -1482,7 +1482,8 @@ fs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) switch (instr->intrinsic) { case nir_intrinsic_interp_var_at_centroid: - inst = emit(FS_OPCODE_INTERPOLATE_AT_CENTROID, dst_xy, src, fs_reg(0u)); + inst = bld.emit(FS_OPCODE_INTERPOLATE_AT_CENTROID, + dst_xy, src, fs_reg(0u)); break; case nir_intrinsic_interp_var_at_sample: { @@ -1490,8 +1491,8 @@ fs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) nir_const_value *const_sample = nir_src_as_const_value(instr->src[0]); assert(const_sample); unsigned msg_data = const_sample ? const_sample->i[0] << 4 : 0; - inst = emit(FS_OPCODE_INTERPOLATE_AT_SAMPLE, dst_xy, src, - fs_reg(msg_data)); + inst = bld.emit(FS_OPCODE_INTERPOLATE_AT_SAMPLE, dst_xy, src, + fs_reg(msg_data)); break; } @@ -1502,17 +1503,17 @@ fs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) unsigned off_x = MIN2((int)(const_offset->f[0] * 16), 7) & 0xf; unsigned off_y = MIN2((int)(const_offset->f[1] * 16), 7) & 0xf; - inst = emit(FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET, dst_xy, src, - fs_reg(off_x | (off_y << 4))); + inst = bld.emit(FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET, dst_xy, src, + fs_reg(off_x | (off_y << 4))); } else { src = vgrf(glsl_type::ivec2_type); fs_reg offset_src = retype(get_nir_src(instr->src[0]), BRW_REGISTER_TYPE_F); for (int i = 0; i < 2; i++) { fs_reg temp = vgrf(glsl_type::float_type); - emit(MUL(temp, offset(offset_src, i), fs_reg(16.0f))); + bld.MUL(temp, offset(offset_src, i), fs_reg(16.0f)); fs_reg itemp = vgrf(glsl_type::int_type); - emit(MOV(itemp, temp)); /* float to int */ + bld.MOV(itemp, temp); /* float to int */ /* Clamp the upper end of the range to +7/16. * ARB_gpu_shader5 requires that we support a maximum offset @@ -1529,14 +1530,13 @@ fs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) * implementation-dependent constant * FRAGMENT_INTERPOLATION_OFFSET_BITS" */ - - emit(BRW_OPCODE_SEL, offset(src, i), itemp, fs_reg(7)) - ->conditional_mod = BRW_CONDITIONAL_L; /* min(src2, 7) */ + set_condmod(BRW_CONDITIONAL_L, + bld.SEL(offset(src, i), itemp, fs_reg(7))); } mlen = 2; - inst = emit(FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET, dst_xy, src, - fs_reg(0u)); + inst = bld.emit(FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET, dst_xy, src, + fs_reg(0u)); } break; } @@ -1554,7 +1554,7 @@ fs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) fs_reg src = interp_reg(instr->variables[0]->var->data.location, j); src.type = dest.type; - emit(FS_OPCODE_LINTERP, dest, dst_xy, src); + bld.emit(FS_OPCODE_LINTERP, dest, dst_xy, src); dest = offset(dest, 1); } break; @@ -1572,7 +1572,7 @@ fs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) if (has_indirect) src.reladdr = new(mem_ctx) fs_reg(get_nir_src(instr->src[1])); index++; - emit(MOV(new_dest, src)); + bld.MOV(new_dest, src); src = offset(src, 1); } break; From bf83a1a219af8bf82c3c721888bbe0dfc3eced34 Mon Sep 17 00:00:00 2001 From: Francisco Jerez Date: Wed, 3 Jun 2015 21:02:57 +0300 Subject: [PATCH 516/834] i965/fs: Migrate translation of NIR texturing instructions to the IR builder. v2: Don't remove assignments of base_ir just yet. Reviewed-by: Matt Turner --- src/mesa/drivers/dri/i965/brw_fs.h | 3 ++- src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 8 ++++---- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h index 338c8168778..ef0256dce64 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.h +++ b/src/mesa/drivers/dri/i965/brw_fs.h @@ -319,7 +319,8 @@ public: void nir_emit_alu(const brw::fs_builder &bld, nir_alu_instr *instr); void nir_emit_intrinsic(const brw::fs_builder &bld, nir_intrinsic_instr *instr); - void nir_emit_texture(nir_tex_instr *instr); + void nir_emit_texture(const brw::fs_builder &bld, + nir_tex_instr *instr); void nir_emit_jump(const brw::fs_builder &bld, nir_jump_instr *instr); fs_reg get_nir_src(nir_src src); diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp index 46448fc4aa0..372e606a1a1 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp @@ -456,7 +456,7 @@ fs_visitor::nir_emit_instr(nir_instr *instr) break; case nir_instr_type_tex: - nir_emit_texture(nir_instr_as_tex(instr)); + nir_emit_texture(abld, nir_instr_as_tex(instr)); break; case nir_instr_type_load_const: @@ -1584,7 +1584,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr } void -fs_visitor::nir_emit_texture(nir_tex_instr *instr) +fs_visitor::nir_emit_texture(const fs_builder &bld, nir_tex_instr *instr) { unsigned sampler = instr->sampler_index; fs_reg sampler_reg(sampler); @@ -1671,8 +1671,8 @@ fs_visitor::nir_emit_texture(nir_tex_instr *instr) /* Emit code to evaluate the actual indexing expression */ sampler_reg = vgrf(glsl_type::uint_type); - emit(ADD(sampler_reg, src, fs_reg(sampler))); - emit_uniformize(sampler_reg, sampler_reg); + bld.ADD(sampler_reg, src, fs_reg(sampler)); + bld.emit_uniformize(sampler_reg, sampler_reg); break; } From 76c8142d0af45ab9907ebc9cfd2855fa753735b6 Mon Sep 17 00:00:00 2001 From: Francisco Jerez Date: Wed, 3 Jun 2015 18:15:22 +0300 Subject: [PATCH 517/834] i965/fs: Migrate test_fs_saturate_propagation to the IR builder. v2: Use set_saturate. Use fs_builder::OPCODE instead of ::emit. Reviewed-by: Matt Turner --- .../dri/i965/test_fs_saturate_propagation.cpp | 69 +++++++++---------- 1 file changed, 34 insertions(+), 35 deletions(-) diff --git a/src/mesa/drivers/dri/i965/test_fs_saturate_propagation.cpp b/src/mesa/drivers/dri/i965/test_fs_saturate_propagation.cpp index 8b1fab06607..d5142f57872 100644 --- a/src/mesa/drivers/dri/i965/test_fs_saturate_propagation.cpp +++ b/src/mesa/drivers/dri/i965/test_fs_saturate_propagation.cpp @@ -26,6 +26,8 @@ #include "brw_cfg.h" #include "program/program.h" +using namespace brw; + class saturate_propagation_test : public ::testing::Test { virtual void SetUp(); @@ -101,13 +103,13 @@ saturate_propagation(fs_visitor *v) TEST_F(saturate_propagation_test, basic) { + const fs_builder &bld = v->bld; fs_reg dst0 = v->vgrf(glsl_type::float_type); fs_reg dst1 = v->vgrf(glsl_type::float_type); fs_reg src0 = v->vgrf(glsl_type::float_type); fs_reg src1 = v->vgrf(glsl_type::float_type); - v->emit(BRW_OPCODE_ADD, dst0, src0, src1); - v->emit(BRW_OPCODE_MOV, dst1, dst0) - ->saturate = true; + bld.ADD(dst0, src0, src1); + set_saturate(true, bld.MOV(dst1, dst0)); /* = Before = * @@ -136,15 +138,15 @@ TEST_F(saturate_propagation_test, basic) TEST_F(saturate_propagation_test, other_non_saturated_use) { + const fs_builder &bld = v->bld; fs_reg dst0 = v->vgrf(glsl_type::float_type); fs_reg dst1 = v->vgrf(glsl_type::float_type); fs_reg dst2 = v->vgrf(glsl_type::float_type); fs_reg src0 = v->vgrf(glsl_type::float_type); fs_reg src1 = v->vgrf(glsl_type::float_type); - v->emit(BRW_OPCODE_ADD, dst0, src0, src1); - v->emit(BRW_OPCODE_MOV, dst1, dst0) - ->saturate = true; - v->emit(BRW_OPCODE_ADD, dst2, dst0, src0); + bld.ADD(dst0, src0, src1); + set_saturate(true, bld.MOV(dst1, dst0)); + bld.ADD(dst2, dst0, src0); /* = Before = * @@ -174,14 +176,14 @@ TEST_F(saturate_propagation_test, other_non_saturated_use) TEST_F(saturate_propagation_test, predicated_instruction) { + const fs_builder &bld = v->bld; fs_reg dst0 = v->vgrf(glsl_type::float_type); fs_reg dst1 = v->vgrf(glsl_type::float_type); fs_reg src0 = v->vgrf(glsl_type::float_type); fs_reg src1 = v->vgrf(glsl_type::float_type); - v->emit(BRW_OPCODE_ADD, dst0, src0, src1) + bld.ADD(dst0, src0, src1) ->predicate = BRW_PREDICATE_NORMAL; - v->emit(BRW_OPCODE_MOV, dst1, dst0) - ->saturate = true; + set_saturate(true, bld.MOV(dst1, dst0)); /* = Before = * @@ -209,14 +211,14 @@ TEST_F(saturate_propagation_test, predicated_instruction) TEST_F(saturate_propagation_test, neg_mov_sat) { + const fs_builder &bld = v->bld; fs_reg dst0 = v->vgrf(glsl_type::float_type); fs_reg dst1 = v->vgrf(glsl_type::float_type); fs_reg src0 = v->vgrf(glsl_type::float_type); fs_reg src1 = v->vgrf(glsl_type::float_type); - v->emit(BRW_OPCODE_ADD, dst0, src0, src1); + bld.ADD(dst0, src0, src1); dst0.negate = true; - v->emit(BRW_OPCODE_MOV, dst1, dst0) - ->saturate = true; + set_saturate(true, bld.MOV(dst1, dst0)); /* = Before = * @@ -244,14 +246,14 @@ TEST_F(saturate_propagation_test, neg_mov_sat) TEST_F(saturate_propagation_test, abs_mov_sat) { + const fs_builder &bld = v->bld; fs_reg dst0 = v->vgrf(glsl_type::float_type); fs_reg dst1 = v->vgrf(glsl_type::float_type); fs_reg src0 = v->vgrf(glsl_type::float_type); fs_reg src1 = v->vgrf(glsl_type::float_type); - v->emit(BRW_OPCODE_ADD, dst0, src0, src1); + bld.ADD(dst0, src0, src1); dst0.abs = true; - v->emit(BRW_OPCODE_MOV, dst1, dst0) - ->saturate = true; + set_saturate(true, bld.MOV(dst1, dst0)); /* = Before = * @@ -279,16 +281,15 @@ TEST_F(saturate_propagation_test, abs_mov_sat) TEST_F(saturate_propagation_test, producer_saturates) { + const fs_builder &bld = v->bld; fs_reg dst0 = v->vgrf(glsl_type::float_type); fs_reg dst1 = v->vgrf(glsl_type::float_type); fs_reg dst2 = v->vgrf(glsl_type::float_type); fs_reg src0 = v->vgrf(glsl_type::float_type); fs_reg src1 = v->vgrf(glsl_type::float_type); - v->emit(BRW_OPCODE_ADD, dst0, src0, src1) - ->saturate = true; - v->emit(BRW_OPCODE_MOV, dst1, dst0) - ->saturate = true; - v->emit(BRW_OPCODE_MOV, dst2, dst0); + set_saturate(true, bld.ADD(dst0, src0, src1)); + set_saturate(true, bld.MOV(dst1, dst0)); + bld.MOV(dst2, dst0); /* = Before = * @@ -319,16 +320,15 @@ TEST_F(saturate_propagation_test, producer_saturates) TEST_F(saturate_propagation_test, intervening_saturating_copy) { + const fs_builder &bld = v->bld; fs_reg dst0 = v->vgrf(glsl_type::float_type); fs_reg dst1 = v->vgrf(glsl_type::float_type); fs_reg dst2 = v->vgrf(glsl_type::float_type); fs_reg src0 = v->vgrf(glsl_type::float_type); fs_reg src1 = v->vgrf(glsl_type::float_type); - v->emit(BRW_OPCODE_ADD, dst0, src0, src1); - v->emit(BRW_OPCODE_MOV, dst1, dst0) - ->saturate = true; - v->emit(BRW_OPCODE_MOV, dst2, dst0) - ->saturate = true; + bld.ADD(dst0, src0, src1); + set_saturate(true, bld.MOV(dst1, dst0)); + set_saturate(true, bld.MOV(dst2, dst0)); /* = Before = * @@ -361,16 +361,16 @@ TEST_F(saturate_propagation_test, intervening_saturating_copy) TEST_F(saturate_propagation_test, intervening_dest_write) { + const fs_builder &bld = v->bld; fs_reg dst0 = v->vgrf(glsl_type::vec4_type); fs_reg dst1 = v->vgrf(glsl_type::float_type); fs_reg src0 = v->vgrf(glsl_type::float_type); fs_reg src1 = v->vgrf(glsl_type::float_type); fs_reg src2 = v->vgrf(glsl_type::vec2_type); - v->emit(BRW_OPCODE_ADD, offset(dst0, 2), src0, src1); - v->emit(SHADER_OPCODE_TEX, dst0, src2) + bld.ADD(offset(dst0, 2), src0, src1); + bld.emit(SHADER_OPCODE_TEX, dst0, src2) ->regs_written = 4; - v->emit(BRW_OPCODE_MOV, dst1, offset(dst0, 2)) - ->saturate = true; + set_saturate(true, bld.MOV(dst1, offset(dst0, 2))); /* = Before = * @@ -401,18 +401,17 @@ TEST_F(saturate_propagation_test, intervening_dest_write) TEST_F(saturate_propagation_test, mul_neg_mov_sat_mov_sat) { + const fs_builder &bld = v->bld; fs_reg dst0 = v->vgrf(glsl_type::float_type); fs_reg dst1 = v->vgrf(glsl_type::float_type); fs_reg dst2 = v->vgrf(glsl_type::float_type); fs_reg src0 = v->vgrf(glsl_type::float_type); fs_reg src1 = v->vgrf(glsl_type::float_type); - v->emit(BRW_OPCODE_MUL, dst0, src0, src1); + bld.MUL(dst0, src0, src1); dst0.negate = true; - v->emit(BRW_OPCODE_MOV, dst1, dst0) - ->saturate = true; + set_saturate(true, bld.MOV(dst1, dst0)); dst0.negate = false; - v->emit(BRW_OPCODE_MOV, dst2, dst0) - ->saturate = true; + set_saturate(true, bld.MOV(dst2, dst0)); /* = Before = * From 51948085a2e5d97dbf2cd3c255a5873d509773eb Mon Sep 17 00:00:00 2001 From: Francisco Jerez Date: Wed, 3 Jun 2015 18:16:30 +0300 Subject: [PATCH 518/834] i965/fs: Migrate test_fs_cmod_propagation to the IR builder. v2: Use set_predicate/condmod. Use fs_builder::OPCODE instead of ::emit. Reviewed-by: Matt Turner --- .../dri/i965/test_fs_cmod_propagation.cpp | 102 +++++++++--------- 1 file changed, 50 insertions(+), 52 deletions(-) diff --git a/src/mesa/drivers/dri/i965/test_fs_cmod_propagation.cpp b/src/mesa/drivers/dri/i965/test_fs_cmod_propagation.cpp index 0e48e824711..7bb5c4a2fa2 100644 --- a/src/mesa/drivers/dri/i965/test_fs_cmod_propagation.cpp +++ b/src/mesa/drivers/dri/i965/test_fs_cmod_propagation.cpp @@ -26,6 +26,8 @@ #include "brw_cfg.h" #include "program/program.h" +using namespace brw; + class cmod_propagation_test : public ::testing::Test { virtual void SetUp(); @@ -101,13 +103,13 @@ cmod_propagation(fs_visitor *v) TEST_F(cmod_propagation_test, basic) { + const fs_builder &bld = v->bld; fs_reg dest = v->vgrf(glsl_type::float_type); fs_reg src0 = v->vgrf(glsl_type::float_type); fs_reg src1 = v->vgrf(glsl_type::float_type); fs_reg zero(0.0f); - v->emit(BRW_OPCODE_ADD, dest, src0, src1); - v->emit(BRW_OPCODE_CMP, v->reg_null_f, dest, zero) - ->conditional_mod = BRW_CONDITIONAL_GE; + bld.ADD(dest, src0, src1); + bld.CMP(bld.null_reg_f(), dest, zero, BRW_CONDITIONAL_GE); /* = Before = * @@ -133,13 +135,13 @@ TEST_F(cmod_propagation_test, basic) TEST_F(cmod_propagation_test, cmp_nonzero) { + const fs_builder &bld = v->bld; fs_reg dest = v->vgrf(glsl_type::float_type); fs_reg src0 = v->vgrf(glsl_type::float_type); fs_reg src1 = v->vgrf(glsl_type::float_type); fs_reg nonzero(1.0f); - v->emit(BRW_OPCODE_ADD, dest, src0, src1); - v->emit(BRW_OPCODE_CMP, v->reg_null_f, dest, nonzero) - ->conditional_mod = BRW_CONDITIONAL_GE; + bld.ADD(dest, src0, src1); + bld.CMP(bld.null_reg_f(), dest, nonzero, BRW_CONDITIONAL_GE); /* = Before = * @@ -166,12 +168,12 @@ TEST_F(cmod_propagation_test, cmp_nonzero) TEST_F(cmod_propagation_test, non_cmod_instruction) { + const fs_builder &bld = v->bld; fs_reg dest = v->vgrf(glsl_type::uint_type); fs_reg src0 = v->vgrf(glsl_type::uint_type); fs_reg zero(0u); - v->emit(BRW_OPCODE_FBL, dest, src0); - v->emit(BRW_OPCODE_CMP, v->reg_null_ud, dest, zero) - ->conditional_mod = BRW_CONDITIONAL_GE; + bld.FBL(dest, src0); + bld.CMP(bld.null_reg_ud(), dest, zero, BRW_CONDITIONAL_GE); /* = Before = * @@ -198,16 +200,15 @@ TEST_F(cmod_propagation_test, non_cmod_instruction) TEST_F(cmod_propagation_test, intervening_flag_write) { + const fs_builder &bld = v->bld; fs_reg dest = v->vgrf(glsl_type::float_type); fs_reg src0 = v->vgrf(glsl_type::float_type); fs_reg src1 = v->vgrf(glsl_type::float_type); fs_reg src2 = v->vgrf(glsl_type::float_type); fs_reg zero(0.0f); - v->emit(BRW_OPCODE_ADD, dest, src0, src1); - v->emit(BRW_OPCODE_CMP, v->reg_null_f, src2, zero) - ->conditional_mod = BRW_CONDITIONAL_GE; - v->emit(BRW_OPCODE_CMP, v->reg_null_f, dest, zero) - ->conditional_mod = BRW_CONDITIONAL_GE; + bld.ADD(dest, src0, src1); + bld.CMP(bld.null_reg_f(), src2, zero, BRW_CONDITIONAL_GE); + bld.CMP(bld.null_reg_f(), dest, zero, BRW_CONDITIONAL_GE); /* = Before = * @@ -237,17 +238,16 @@ TEST_F(cmod_propagation_test, intervening_flag_write) TEST_F(cmod_propagation_test, intervening_flag_read) { + const fs_builder &bld = v->bld; fs_reg dest0 = v->vgrf(glsl_type::float_type); fs_reg dest1 = v->vgrf(glsl_type::float_type); fs_reg src0 = v->vgrf(glsl_type::float_type); fs_reg src1 = v->vgrf(glsl_type::float_type); fs_reg src2 = v->vgrf(glsl_type::float_type); fs_reg zero(0.0f); - v->emit(BRW_OPCODE_ADD, dest0, src0, src1); - v->emit(BRW_OPCODE_SEL, dest1, src2, zero) - ->predicate = BRW_PREDICATE_NORMAL; - v->emit(BRW_OPCODE_CMP, v->reg_null_f, dest0, zero) - ->conditional_mod = BRW_CONDITIONAL_GE; + bld.ADD(dest0, src0, src1); + set_predicate(BRW_PREDICATE_NORMAL, bld.SEL(dest1, src2, zero)); + bld.CMP(bld.null_reg_f(), dest0, zero, BRW_CONDITIONAL_GE); /* = Before = * @@ -277,16 +277,16 @@ TEST_F(cmod_propagation_test, intervening_flag_read) TEST_F(cmod_propagation_test, intervening_dest_write) { + const fs_builder &bld = v->bld; fs_reg dest = v->vgrf(glsl_type::vec4_type); fs_reg src0 = v->vgrf(glsl_type::float_type); fs_reg src1 = v->vgrf(glsl_type::float_type); fs_reg src2 = v->vgrf(glsl_type::vec2_type); fs_reg zero(0.0f); - v->emit(BRW_OPCODE_ADD, offset(dest, 2), src0, src1); - v->emit(SHADER_OPCODE_TEX, dest, src2) + bld.ADD(offset(dest, 2), src0, src1); + bld.emit(SHADER_OPCODE_TEX, dest, src2) ->regs_written = 4; - v->emit(BRW_OPCODE_CMP, v->reg_null_f, offset(dest, 2), zero) - ->conditional_mod = BRW_CONDITIONAL_GE; + bld.CMP(bld.null_reg_f(), offset(dest, 2), zero, BRW_CONDITIONAL_GE); /* = Before = * @@ -317,18 +317,16 @@ TEST_F(cmod_propagation_test, intervening_dest_write) TEST_F(cmod_propagation_test, intervening_flag_read_same_value) { + const fs_builder &bld = v->bld; fs_reg dest0 = v->vgrf(glsl_type::float_type); fs_reg dest1 = v->vgrf(glsl_type::float_type); fs_reg src0 = v->vgrf(glsl_type::float_type); fs_reg src1 = v->vgrf(glsl_type::float_type); fs_reg src2 = v->vgrf(glsl_type::float_type); fs_reg zero(0.0f); - v->emit(BRW_OPCODE_ADD, dest0, src0, src1) - ->conditional_mod = BRW_CONDITIONAL_GE; - v->emit(BRW_OPCODE_SEL, dest1, src2, zero) - ->predicate = BRW_PREDICATE_NORMAL; - v->emit(BRW_OPCODE_CMP, v->reg_null_f, dest0, zero) - ->conditional_mod = BRW_CONDITIONAL_GE; + set_condmod(BRW_CONDITIONAL_GE, bld.ADD(dest0, src0, src1)); + set_predicate(BRW_PREDICATE_NORMAL, bld.SEL(dest1, src2, zero)); + bld.CMP(bld.null_reg_f(), dest0, zero, BRW_CONDITIONAL_GE); /* = Before = * @@ -358,14 +356,14 @@ TEST_F(cmod_propagation_test, intervening_flag_read_same_value) TEST_F(cmod_propagation_test, negate) { + const fs_builder &bld = v->bld; fs_reg dest = v->vgrf(glsl_type::float_type); fs_reg src0 = v->vgrf(glsl_type::float_type); fs_reg src1 = v->vgrf(glsl_type::float_type); fs_reg zero(0.0f); - v->emit(BRW_OPCODE_ADD, dest, src0, src1); + bld.ADD(dest, src0, src1); dest.negate = true; - v->emit(BRW_OPCODE_CMP, v->reg_null_f, dest, zero) - ->conditional_mod = BRW_CONDITIONAL_GE; + bld.CMP(bld.null_reg_f(), dest, zero, BRW_CONDITIONAL_GE); /* = Before = * @@ -391,13 +389,13 @@ TEST_F(cmod_propagation_test, negate) TEST_F(cmod_propagation_test, movnz) { + const fs_builder &bld = v->bld; fs_reg dest = v->vgrf(glsl_type::float_type); fs_reg src0 = v->vgrf(glsl_type::float_type); fs_reg src1 = v->vgrf(glsl_type::float_type); - v->emit(BRW_OPCODE_CMP, dest, src0, src1) - ->conditional_mod = BRW_CONDITIONAL_GE; - v->emit(BRW_OPCODE_MOV, v->reg_null_f, dest) - ->conditional_mod = BRW_CONDITIONAL_NZ; + bld.CMP(dest, src0, src1, BRW_CONDITIONAL_GE); + set_condmod(BRW_CONDITIONAL_NZ, + bld.MOV(bld.null_reg_f(), dest)); /* = Before = * @@ -423,14 +421,14 @@ TEST_F(cmod_propagation_test, movnz) TEST_F(cmod_propagation_test, different_types_cmod_with_zero) { + const fs_builder &bld = v->bld; fs_reg dest = v->vgrf(glsl_type::int_type); fs_reg src0 = v->vgrf(glsl_type::int_type); fs_reg src1 = v->vgrf(glsl_type::int_type); fs_reg zero(0.0f); - v->emit(BRW_OPCODE_ADD, dest, src0, src1); - v->emit(BRW_OPCODE_CMP, v->reg_null_f, retype(dest, BRW_REGISTER_TYPE_F), - zero) - ->conditional_mod = BRW_CONDITIONAL_GE; + bld.ADD(dest, src0, src1); + bld.CMP(bld.null_reg_f(), retype(dest, BRW_REGISTER_TYPE_F), zero, + BRW_CONDITIONAL_GE); /* = Before = * @@ -457,15 +455,15 @@ TEST_F(cmod_propagation_test, different_types_cmod_with_zero) TEST_F(cmod_propagation_test, andnz_one) { + const fs_builder &bld = v->bld; fs_reg dest = v->vgrf(glsl_type::int_type); fs_reg src0 = v->vgrf(glsl_type::float_type); fs_reg zero(0.0f); fs_reg one(1); - v->emit(BRW_OPCODE_CMP, retype(dest, BRW_REGISTER_TYPE_F), src0, zero) - ->conditional_mod = BRW_CONDITIONAL_L; - v->emit(BRW_OPCODE_AND, v->reg_null_d, dest, one) - ->conditional_mod = BRW_CONDITIONAL_NZ; + bld.CMP(retype(dest, BRW_REGISTER_TYPE_F), src0, zero, BRW_CONDITIONAL_L); + set_condmod(BRW_CONDITIONAL_NZ, + bld.AND(bld.null_reg_d(), dest, one)); /* = Before = * 0: cmp.l.f0(8) dest:F src0:F 0F @@ -492,15 +490,15 @@ TEST_F(cmod_propagation_test, andnz_one) TEST_F(cmod_propagation_test, andnz_non_one) { + const fs_builder &bld = v->bld; fs_reg dest = v->vgrf(glsl_type::int_type); fs_reg src0 = v->vgrf(glsl_type::float_type); fs_reg zero(0.0f); fs_reg nonone(38); - v->emit(BRW_OPCODE_CMP, retype(dest, BRW_REGISTER_TYPE_F), src0, zero) - ->conditional_mod = BRW_CONDITIONAL_L; - v->emit(BRW_OPCODE_AND, v->reg_null_d, dest, nonone) - ->conditional_mod = BRW_CONDITIONAL_NZ; + bld.CMP(retype(dest, BRW_REGISTER_TYPE_F), src0, zero, BRW_CONDITIONAL_L); + set_condmod(BRW_CONDITIONAL_NZ, + bld.AND(bld.null_reg_d(), dest, nonone)); /* = Before = * 0: cmp.l.f0(8) dest:F src0:F 0F @@ -527,15 +525,15 @@ TEST_F(cmod_propagation_test, andnz_non_one) TEST_F(cmod_propagation_test, andz_one) { + const fs_builder &bld = v->bld; fs_reg dest = v->vgrf(glsl_type::int_type); fs_reg src0 = v->vgrf(glsl_type::float_type); fs_reg zero(0.0f); fs_reg one(1); - v->emit(BRW_OPCODE_CMP, retype(dest, BRW_REGISTER_TYPE_F), src0, zero) - ->conditional_mod = BRW_CONDITIONAL_L; - v->emit(BRW_OPCODE_AND, v->reg_null_d, dest, one) - ->conditional_mod = BRW_CONDITIONAL_Z; + bld.CMP(retype(dest, BRW_REGISTER_TYPE_F), src0, zero, BRW_CONDITIONAL_L); + set_condmod(BRW_CONDITIONAL_Z, + bld.AND(bld.null_reg_d(), dest, one)); /* = Before = * 0: cmp.l.f0(8) dest:F src0:F 0F From 44928b799adbbf2671c482431b3b7a390118725c Mon Sep 17 00:00:00 2001 From: Francisco Jerez Date: Mon, 8 Jun 2015 19:32:18 +0300 Subject: [PATCH 519/834] i965/fs: Remove dead IR construction code from the visitor. Reviewed-by: Matt Turner --- src/mesa/drivers/dri/i965/brw_fs.cpp | 284 ------------------- src/mesa/drivers/dri/i965/brw_fs.h | 73 ----- src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 3 - src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 79 ------ 4 files changed, 439 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 0647d8f8f97..7789ca77e57 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -214,143 +214,6 @@ fs_inst::resize_sources(uint8_t num_sources) } } -#define ALU1(op) \ - fs_inst * \ - fs_visitor::op(const fs_reg &dst, const fs_reg &src0) \ - { \ - return new(mem_ctx) fs_inst(BRW_OPCODE_##op, dst, src0); \ - } - -#define ALU2(op) \ - fs_inst * \ - fs_visitor::op(const fs_reg &dst, const fs_reg &src0, \ - const fs_reg &src1) \ - { \ - return new(mem_ctx) fs_inst(BRW_OPCODE_##op, dst, src0, src1); \ - } - -#define ALU2_ACC(op) \ - fs_inst * \ - fs_visitor::op(const fs_reg &dst, const fs_reg &src0, \ - const fs_reg &src1) \ - { \ - fs_inst *inst = new(mem_ctx) fs_inst(BRW_OPCODE_##op, dst, src0, src1);\ - inst->writes_accumulator = true; \ - return inst; \ - } - -#define ALU3(op) \ - fs_inst * \ - fs_visitor::op(const fs_reg &dst, const fs_reg &src0, \ - const fs_reg &src1, const fs_reg &src2) \ - { \ - return new(mem_ctx) fs_inst(BRW_OPCODE_##op, dst, src0, src1, src2);\ - } - -ALU1(NOT) -ALU1(MOV) -ALU1(FRC) -ALU1(RNDD) -ALU1(RNDE) -ALU1(RNDZ) -ALU2(ADD) -ALU2(MUL) -ALU2_ACC(MACH) -ALU2(AND) -ALU2(OR) -ALU2(XOR) -ALU2(SHL) -ALU2(SHR) -ALU2(ASR) -ALU3(LRP) -ALU1(BFREV) -ALU3(BFE) -ALU2(BFI1) -ALU3(BFI2) -ALU1(FBH) -ALU1(FBL) -ALU1(CBIT) -ALU3(MAD) -ALU2_ACC(ADDC) -ALU2_ACC(SUBB) -ALU2(SEL) -ALU2(MAC) - -/** Gen4 predicated IF. */ -fs_inst * -fs_visitor::IF(enum brw_predicate predicate) -{ - fs_inst *inst = new(mem_ctx) fs_inst(BRW_OPCODE_IF, dispatch_width); - inst->predicate = predicate; - return inst; -} - -/** Gen6 IF with embedded comparison. */ -fs_inst * -fs_visitor::IF(const fs_reg &src0, const fs_reg &src1, - enum brw_conditional_mod condition) -{ - assert(devinfo->gen == 6); - fs_inst *inst = new(mem_ctx) fs_inst(BRW_OPCODE_IF, dispatch_width, - reg_null_d, src0, src1); - inst->conditional_mod = condition; - return inst; -} - -/** - * CMP: Sets the low bit of the destination channels with the result - * of the comparison, while the upper bits are undefined, and updates - * the flag register with the packed 16 bits of the result. - */ -fs_inst * -fs_visitor::CMP(fs_reg dst, fs_reg src0, fs_reg src1, - enum brw_conditional_mod condition) -{ - fs_inst *inst; - - /* Take the instruction: - * - * CMP null src0 src1 - * - * Original gen4 does type conversion to the destination type before - * comparison, producing garbage results for floating point comparisons. - * - * The destination type doesn't matter on newer generations, so we set the - * type to match src0 so we can compact the instruction. - */ - dst.type = src0.type; - if (dst.file == HW_REG) - dst.fixed_hw_reg.type = dst.type; - - resolve_ud_negate(&src0); - resolve_ud_negate(&src1); - - inst = new(mem_ctx) fs_inst(BRW_OPCODE_CMP, dst, src0, src1); - inst->conditional_mod = condition; - - return inst; -} - -fs_inst * -fs_visitor::LOAD_PAYLOAD(const fs_reg &dst, fs_reg *src, int sources, - int header_size) -{ - assert(dst.width % 8 == 0); - fs_inst *inst = new(mem_ctx) fs_inst(SHADER_OPCODE_LOAD_PAYLOAD, dst.width, - dst, src, sources); - inst->header_size = header_size; - - for (int i = 0; i < header_size; i++) - assert(src[i].file != GRF || src[i].width * type_sz(src[i].type) == 32); - inst->regs_written = header_size; - - for (int i = header_size; i < sources; ++i) - assert(src[i].file != GRF || src[i].width == dst.width); - inst->regs_written += (sources - header_size) * (dst.width / 8); - - return inst; -} - void fs_visitor::VARYING_PULL_CONSTANT_LOAD(const fs_builder &bld, const fs_reg &dst, @@ -862,45 +725,6 @@ fs_visitor::no16(const char *format, ...) va_end(va); } -fs_inst * -fs_visitor::emit(enum opcode opcode) -{ - return emit(new(mem_ctx) fs_inst(opcode, dispatch_width)); -} - -fs_inst * -fs_visitor::emit(enum opcode opcode, const fs_reg &dst) -{ - return emit(new(mem_ctx) fs_inst(opcode, dst)); -} - -fs_inst * -fs_visitor::emit(enum opcode opcode, const fs_reg &dst, const fs_reg &src0) -{ - return emit(new(mem_ctx) fs_inst(opcode, dst, src0)); -} - -fs_inst * -fs_visitor::emit(enum opcode opcode, const fs_reg &dst, const fs_reg &src0, - const fs_reg &src1) -{ - return emit(new(mem_ctx) fs_inst(opcode, dst, src0, src1)); -} - -fs_inst * -fs_visitor::emit(enum opcode opcode, const fs_reg &dst, const fs_reg &src0, - const fs_reg &src1, const fs_reg &src2) -{ - return emit(new(mem_ctx) fs_inst(opcode, dst, src0, src1, src2)); -} - -fs_inst * -fs_visitor::emit(enum opcode opcode, const fs_reg &dst, - fs_reg src[], int sources) -{ - return emit(new(mem_ctx) fs_inst(opcode, dst, src, sources)); -} - /** * Returns true if the instruction has a flag that means it won't * update an entire destination register. @@ -1053,14 +877,6 @@ fs_visitor::vgrf(const glsl_type *const type) brw_type_for_base_type(type), dispatch_width); } -fs_reg -fs_visitor::vgrf(int num_components) -{ - int reg_width = dispatch_width / 8; - return fs_reg(GRF, alloc.allocate(num_components * reg_width), - BRW_REGISTER_TYPE_F, dispatch_width); -} - /** Fixed HW reg constructor. */ fs_reg::fs_reg(enum register_file file, int reg) { @@ -1487,106 +1303,6 @@ fs_visitor::resolve_source_modifiers(fs_reg *src) *src = temp; } -fs_reg -fs_visitor::fix_math_operand(fs_reg src) -{ - /* Can't do hstride == 0 args on gen6 math, so expand it out. We - * might be able to do better by doing execsize = 1 math and then - * expanding that result out, but we would need to be careful with - * masking. - * - * The hardware ignores source modifiers (negate and abs) on math - * instructions, so we also move to a temp to set those up. - */ - if (devinfo->gen == 6 && src.file != UNIFORM && src.file != IMM && - !src.abs && !src.negate) - return src; - - /* Gen7 relaxes most of the above restrictions, but still can't use IMM - * operands to math - */ - if (devinfo->gen >= 7 && src.file != IMM) - return src; - - fs_reg expanded = vgrf(glsl_type::float_type); - expanded.type = src.type; - emit(BRW_OPCODE_MOV, expanded, src); - return expanded; -} - -fs_inst * -fs_visitor::emit_math(enum opcode opcode, fs_reg dst, fs_reg src) -{ - switch (opcode) { - case SHADER_OPCODE_RCP: - case SHADER_OPCODE_RSQ: - case SHADER_OPCODE_SQRT: - case SHADER_OPCODE_EXP2: - case SHADER_OPCODE_LOG2: - case SHADER_OPCODE_SIN: - case SHADER_OPCODE_COS: - break; - default: - unreachable("not reached: bad math opcode"); - } - - /* Can't do hstride == 0 args to gen6 math, so expand it out. We - * might be able to do better by doing execsize = 1 math and then - * expanding that result out, but we would need to be careful with - * masking. - * - * Gen 6 hardware ignores source modifiers (negate and abs) on math - * instructions, so we also move to a temp to set those up. - */ - if (devinfo->gen == 6 || devinfo->gen == 7) - src = fix_math_operand(src); - - fs_inst *inst = emit(opcode, dst, src); - - if (devinfo->gen < 6) { - inst->base_mrf = 2; - inst->mlen = dispatch_width / 8; - } - - return inst; -} - -fs_inst * -fs_visitor::emit_math(enum opcode opcode, fs_reg dst, fs_reg src0, fs_reg src1) -{ - int base_mrf = 2; - fs_inst *inst; - - if (devinfo->gen >= 8) { - inst = emit(opcode, dst, src0, src1); - } else if (devinfo->gen >= 6) { - src0 = fix_math_operand(src0); - src1 = fix_math_operand(src1); - - inst = emit(opcode, dst, src0, src1); - } else { - /* From the Ironlake PRM, Volume 4, Part 1, Section 6.1.13 - * "Message Payload": - * - * "Operand0[7]. For the INT DIV functions, this operand is the - * denominator." - * ... - * "Operand1[7]. For the INT DIV functions, this operand is the - * numerator." - */ - bool is_int_div = opcode != SHADER_OPCODE_POW; - fs_reg &op0 = is_int_div ? src1 : src0; - fs_reg &op1 = is_int_div ? src0 : src1; - - emit(MOV(fs_reg(MRF, base_mrf + 1, op1.type, dispatch_width), op1)); - inst = emit(opcode, dst, op0, reg_null_f); - - inst->base_mrf = base_mrf; - inst->mlen = 2 * dispatch_width / 8; - } - return inst; -} - void fs_visitor::emit_discard_jump() { diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h index ef0256dce64..225a9735919 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.h +++ b/src/mesa/drivers/dri/i965/brw_fs.h @@ -70,10 +70,6 @@ namespace brw { class fs_visitor : public backend_shader { public: - const fs_reg reg_null_f; - const fs_reg reg_null_d; - const fs_reg reg_null_ud; - fs_visitor(struct brw_context *brw, void *mem_ctx, gl_shader_stage stage, @@ -86,7 +82,6 @@ public: ~fs_visitor(); fs_reg vgrf(const glsl_type *const type); - fs_reg vgrf(int num_components); void import_uniforms(fs_visitor *v); void setup_uniform_clipplane_values(); void compute_clip_distance(); @@ -95,65 +90,11 @@ public: void swizzle_result(ir_texture_opcode op, int dest_components, fs_reg orig_val, uint32_t sampler); - fs_inst *emit(fs_inst *inst); - void emit(exec_list list); - - fs_inst *emit(enum opcode opcode); - fs_inst *emit(enum opcode opcode, const fs_reg &dst); - fs_inst *emit(enum opcode opcode, const fs_reg &dst, const fs_reg &src0); - fs_inst *emit(enum opcode opcode, const fs_reg &dst, const fs_reg &src0, - const fs_reg &src1); - fs_inst *emit(enum opcode opcode, const fs_reg &dst, - const fs_reg &src0, const fs_reg &src1, const fs_reg &src2); - fs_inst *emit(enum opcode opcode, const fs_reg &dst, - fs_reg src[], int sources); - - fs_inst *MOV(const fs_reg &dst, const fs_reg &src); - fs_inst *NOT(const fs_reg &dst, const fs_reg &src); - fs_inst *RNDD(const fs_reg &dst, const fs_reg &src); - fs_inst *RNDE(const fs_reg &dst, const fs_reg &src); - fs_inst *RNDZ(const fs_reg &dst, const fs_reg &src); - fs_inst *FRC(const fs_reg &dst, const fs_reg &src); - fs_inst *ADD(const fs_reg &dst, const fs_reg &src0, const fs_reg &src1); - fs_inst *MUL(const fs_reg &dst, const fs_reg &src0, const fs_reg &src1); - fs_inst *MACH(const fs_reg &dst, const fs_reg &src0, const fs_reg &src1); - fs_inst *MAC(const fs_reg &dst, const fs_reg &src0, const fs_reg &src1); - fs_inst *SHL(const fs_reg &dst, const fs_reg &src0, const fs_reg &src1); - fs_inst *SHR(const fs_reg &dst, const fs_reg &src0, const fs_reg &src1); - fs_inst *ASR(const fs_reg &dst, const fs_reg &src0, const fs_reg &src1); - fs_inst *AND(const fs_reg &dst, const fs_reg &src0, const fs_reg &src1); - fs_inst *OR(const fs_reg &dst, const fs_reg &src0, const fs_reg &src1); - fs_inst *XOR(const fs_reg &dst, const fs_reg &src0, const fs_reg &src1); - fs_inst *IF(enum brw_predicate predicate); - fs_inst *IF(const fs_reg &src0, const fs_reg &src1, - enum brw_conditional_mod condition); - fs_inst *CMP(fs_reg dst, fs_reg src0, fs_reg src1, - enum brw_conditional_mod condition); - fs_inst *LRP(const fs_reg &dst, const fs_reg &a, const fs_reg &y, - const fs_reg &x); - fs_inst *BFREV(const fs_reg &dst, const fs_reg &value); - fs_inst *BFE(const fs_reg &dst, const fs_reg &bits, const fs_reg &offset, - const fs_reg &value); - fs_inst *BFI1(const fs_reg &dst, const fs_reg &bits, const fs_reg &offset); - fs_inst *BFI2(const fs_reg &dst, const fs_reg &bfi1_dst, - const fs_reg &insert, const fs_reg &base); - fs_inst *FBH(const fs_reg &dst, const fs_reg &value); - fs_inst *FBL(const fs_reg &dst, const fs_reg &value); - fs_inst *CBIT(const fs_reg &dst, const fs_reg &value); - fs_inst *MAD(const fs_reg &dst, const fs_reg &c, const fs_reg &b, - const fs_reg &a); - fs_inst *ADDC(const fs_reg &dst, const fs_reg &src0, const fs_reg &src1); - fs_inst *SUBB(const fs_reg &dst, const fs_reg &src0, const fs_reg &src1); - fs_inst *SEL(const fs_reg &dst, const fs_reg &src0, const fs_reg &src1); - int type_size(const struct glsl_type *type); fs_inst *get_instruction_generating_reg(fs_inst *start, fs_inst *end, const fs_reg ®); - fs_inst *LOAD_PAYLOAD(const fs_reg &dst, fs_reg *src, int sources, - int header_size); - void VARYING_PULL_CONSTANT_LOAD(const brw::fs_builder &bld, const fs_reg &dst, const fs_reg &surf_index, @@ -284,14 +225,7 @@ public: fs_reg emit_mcs_fetch(fs_reg coordinate, int components, fs_reg sampler); void emit_gen6_gather_wa(uint8_t wa, fs_reg dst); void resolve_source_modifiers(fs_reg *src); - fs_reg fix_math_operand(fs_reg src); - fs_inst *emit_math(enum opcode op, fs_reg dst, fs_reg src0); - fs_inst *emit_math(enum opcode op, fs_reg dst, fs_reg src0, fs_reg src1); - fs_inst *emit_lrp(const fs_reg &dst, const fs_reg &x, const fs_reg &y, - const fs_reg &a); void emit_discard_jump(); - /** Copy any live channel from \p src to the first channel of \p dst. */ - void emit_uniformize(const fs_reg &dst, const fs_reg &src); bool try_replace_with_sel(); bool opt_peephole_sel(); bool opt_peephole_predicated_break(); @@ -354,8 +288,6 @@ public: void emit_untyped_surface_read(unsigned surf_index, fs_reg dst, fs_reg offset); - void resolve_ud_negate(fs_reg *reg); - fs_reg get_timestamp(const brw::fs_builder &bld); struct brw_reg interp_reg(int location, int channel); @@ -417,11 +349,6 @@ public: fs_reg nir_outputs; fs_reg *nir_system_values; - /** @{ debug annotation info */ - const char *current_annotation; - const void *base_ir; - /** @} */ - bool failed; char *fail_msg; bool simd16_unsupported; diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp index 372e606a1a1..0f1a2860496 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp @@ -444,7 +444,6 @@ void fs_visitor::nir_emit_instr(nir_instr *instr) { const fs_builder abld = bld.annotate(NULL, instr); - this->base_ir = instr; switch (instr->type) { case nir_instr_type_alu: @@ -472,8 +471,6 @@ fs_visitor::nir_emit_instr(nir_instr *instr) default: unreachable("unknown instruction type"); } - - this->base_ir = NULL; } static brw_reg_type diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp index afd7a5ee4dd..c41284b0a5e 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp @@ -77,44 +77,6 @@ fs_visitor::emit_vs_system_value(int location) return reg; } -fs_inst * -fs_visitor::emit_lrp(const fs_reg &dst, const fs_reg &x, const fs_reg &y, - const fs_reg &a) -{ - if (devinfo->gen < 6) { - /* We can't use the LRP instruction. Emit x*(1-a) + y*a. */ - fs_reg y_times_a = vgrf(glsl_type::float_type); - fs_reg one_minus_a = vgrf(glsl_type::float_type); - fs_reg x_times_one_minus_a = vgrf(glsl_type::float_type); - - emit(MUL(y_times_a, y, a)); - - fs_reg negative_a = a; - negative_a.negate = !a.negate; - emit(ADD(one_minus_a, negative_a, fs_reg(1.0f))); - emit(MUL(x_times_one_minus_a, x, one_minus_a)); - - return emit(ADD(dst, x_times_one_minus_a, y_times_a)); - } else { - /* The LRP instruction actually does op1 * op0 + op2 * (1 - op0), so - * we need to reorder the operands. - */ - return emit(LRP(dst, a, y, x)); - } -} - -void -fs_visitor::emit_uniformize(const fs_reg &dst, const fs_reg &src) -{ - const fs_reg chan_index = vgrf(glsl_type::uint_type); - - emit(SHADER_OPCODE_FIND_LIVE_CHANNEL, component(chan_index, 0)) - ->force_writemask_all = true; - emit(SHADER_OPCODE_BROADCAST, component(dst, 0), - src, component(chan_index, 0)) - ->force_writemask_all = true; -} - fs_inst * fs_visitor::emit_texture_gen4(ir_texture_opcode op, fs_reg dst, fs_reg coordinate, int coord_components, @@ -1264,29 +1226,6 @@ fs_visitor::emit_untyped_surface_read(unsigned surf_index, fs_reg dst, inst->mlen = mlen; } -fs_inst * -fs_visitor::emit(fs_inst *inst) -{ - if (dispatch_width == 16 && inst->exec_size == 8) - inst->force_uncompressed = true; - - inst->annotation = this->current_annotation; - inst->ir = this->base_ir; - - this->instructions.push_tail(inst); - - return inst; -} - -void -fs_visitor::emit(exec_list list) -{ - foreach_in_list_safe(fs_inst, inst, &list) { - inst->exec_node::remove(); - emit(inst); - } -} - /** Emits a dummy fragment shader consisting of magenta for bringup purposes. */ void fs_visitor::emit_dummy_fs() @@ -1992,18 +1931,6 @@ fs_visitor::emit_urb_writes() } } -void -fs_visitor::resolve_ud_negate(fs_reg *reg) -{ - if (reg->type != BRW_REGISTER_TYPE_UD || - !reg->negate) - return; - - fs_reg temp = vgrf(glsl_type::uint_type); - emit(MOV(temp, *reg)); - *reg = temp; -} - void fs_visitor::emit_cs_terminate() { @@ -2034,9 +1961,6 @@ fs_visitor::fs_visitor(struct brw_context *brw, struct gl_program *prog, unsigned dispatch_width) : backend_shader(brw, shader_prog, prog, prog_data, stage), - reg_null_f(retype(brw_null_vec(dispatch_width), BRW_REGISTER_TYPE_F)), - reg_null_d(retype(brw_null_vec(dispatch_width), BRW_REGISTER_TYPE_D)), - reg_null_ud(retype(brw_null_vec(dispatch_width), BRW_REGISTER_TYPE_UD)), key(key), prog_data(prog_data), dispatch_width(dispatch_width), promoted_constants(0), bld(fs_builder(this, dispatch_width).at_end()) @@ -2073,9 +1997,6 @@ fs_visitor::fs_visitor(struct brw_context *brw, this->first_non_payload_grf = 0; this->max_grf = devinfo->gen >= 7 ? GEN7_MRF_HACK_START : BRW_MAX_GRF; - this->current_annotation = NULL; - this->base_ir = NULL; - this->virtual_grf_start = NULL; this->virtual_grf_end = NULL; this->live_intervals = NULL; From 698c391521561a1f0e4ff2570e35417be9968eaa Mon Sep 17 00:00:00 2001 From: Francisco Jerez Date: Wed, 3 Jun 2015 18:17:50 +0300 Subject: [PATCH 520/834] i965/fs: Drop fs_inst::force_uncompressed. This is now unused. Saves a whole bit of memory per instruction. Reviewed-by: Matt Turner --- src/mesa/drivers/dri/i965/brw_ir_fs.h | 1 - 1 file changed, 1 deletion(-) diff --git a/src/mesa/drivers/dri/i965/brw_ir_fs.h b/src/mesa/drivers/dri/i965/brw_ir_fs.h index eee4d7e1e00..96dc20da3cf 100644 --- a/src/mesa/drivers/dri/i965/brw_ir_fs.h +++ b/src/mesa/drivers/dri/i965/brw_ir_fs.h @@ -256,7 +256,6 @@ public: uint8_t exec_size; bool eot:1; - bool force_uncompressed:1; bool force_sechalf:1; bool pi_noperspective:1; /**< Pixel interpolator noperspective flag */ }; From 65bd4159b35c7213e0ac27c6299495e08a105ab4 Mon Sep 17 00:00:00 2001 From: Jose Fonseca Date: Tue, 9 Jun 2015 14:43:21 +0100 Subject: [PATCH 521/834] rtasm: Generalize executable memory allocator to all Unices. We're only using fairly portable standard Unix calls here, so might as well save ourselves future trouble by enabling on all Unices by default. https://bugs.freedesktop.org/show_bug.cgi?id=90904 Reviewed-by: Brian Paul Reviewed-by: Roland Scheidegger --- src/gallium/auxiliary/rtasm/rtasm_execmem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gallium/auxiliary/rtasm/rtasm_execmem.c b/src/gallium/auxiliary/rtasm/rtasm_execmem.c index 8c3dbefd109..f7e605e9563 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_execmem.c +++ b/src/gallium/auxiliary/rtasm/rtasm_execmem.c @@ -49,7 +49,7 @@ #include #endif -#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS) || defined(PIPE_OS_HAIKU) || defined(PIPE_OS_CYGWIN) +#if defined(PIPE_OS_UNIX) /* From 86e5afbfee5492235cab1a7be4ea49ac02be1644 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Sat, 6 Jun 2015 12:15:30 -0700 Subject: [PATCH 522/834] i965/fs: Don't let the EOT send message interfere with the MRF hack Previously, we just put the message for the EOT send as high in the file as it would go. This is because the register pre-filling hardware will stop all over the early registers in the file in preparation for the next thread while you're still sending the last message. However, if something happens to spill, then the MRF hack interferes with the EOT send message and, if things aren't scheduled nicely, will stomp on it. Cc: "10.5 10.6" Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=90520 Reviewed-by: Neil Roberts --- src/mesa/drivers/dri/i965/brw_fs.h | 3 ++- .../drivers/dri/i965/brw_fs_reg_allocate.cpp | 17 +++++++++++++++-- 2 files changed, 17 insertions(+), 3 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h index 225a9735919..39e9503c5fd 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.h +++ b/src/mesa/drivers/dri/i965/brw_fs.h @@ -123,7 +123,8 @@ public: void setup_payload_interference(struct ra_graph *g, int payload_reg_count, int first_payload_node); void setup_mrf_hack_interference(struct ra_graph *g, - int first_mrf_hack_node); + int first_mrf_hack_node, + int *first_used_mrf); int choose_spill_reg(struct ra_graph *g); void spill_reg(int spill_reg); void split_virtual_grfs(); diff --git a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp index 3faf49ab8aa..62e27cc8061 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp @@ -503,11 +503,13 @@ fs_visitor::get_used_mrfs(bool *mrf_used) * messages (treated as MRFs in code generation). */ void -fs_visitor::setup_mrf_hack_interference(struct ra_graph *g, int first_mrf_node) +fs_visitor::setup_mrf_hack_interference(struct ra_graph *g, int first_mrf_node, + int *first_used_mrf) { bool mrf_used[BRW_MAX_MRF]; get_used_mrfs(mrf_used); + *first_used_mrf = BRW_MAX_MRF; for (int i = 0; i < BRW_MAX_MRF; i++) { /* Mark each MRF reg node as being allocated to its physical register. * @@ -520,6 +522,9 @@ fs_visitor::setup_mrf_hack_interference(struct ra_graph *g, int first_mrf_node) * that are used as conflicting with all virtual GRFs. */ if (mrf_used[i]) { + if (i < *first_used_mrf) + *first_used_mrf = i; + for (unsigned j = 0; j < this->alloc.count; j++) { ra_add_node_interference(g, first_mrf_node + i, j); } @@ -586,7 +591,8 @@ fs_visitor::assign_regs(bool allow_spilling) setup_payload_interference(g, payload_node_count, first_payload_node); if (devinfo->gen >= 7) { - setup_mrf_hack_interference(g, first_mrf_hack_node); + int first_used_mrf = BRW_MAX_MRF; + setup_mrf_hack_interference(g, first_mrf_hack_node, &first_used_mrf); foreach_block_and_inst(block, fs_inst, inst, cfg) { /* When we do send-from-GRF for FB writes, we need to ensure that @@ -602,6 +608,13 @@ fs_visitor::assign_regs(bool allow_spilling) if (inst->eot) { int size = alloc.sizes[inst->src[0].reg]; int reg = compiler->fs_reg_sets[rsi].class_to_ra_reg_range[size] - 1; + + /* If something happened to spill, we want to push the EOT send + * register early enough in the register file that we don't + * conflict with any used MRF hack registers. + */ + reg -= BRW_MAX_MRF - first_used_mrf; + ra_set_node_reg(g, inst->src[0].reg, reg); break; } From 670862a5069f2759418450698aa4ab7d9f0e079f Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Sat, 6 Jun 2015 12:08:00 -0700 Subject: [PATCH 523/834] fs/reg_allocate: Remove the MRF hack helpers from fs_visitor These are helpers that only exist in this one file. No reason to put them in the visitor. Reviewed-by: Neil Roberts --- src/mesa/drivers/dri/i965/brw_fs.h | 4 --- .../drivers/dri/i965/brw_fs_reg_allocate.cpp | 25 ++++++++++--------- 2 files changed, 13 insertions(+), 16 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h index 39e9503c5fd..ca887ec0b37 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.h +++ b/src/mesa/drivers/dri/i965/brw_fs.h @@ -119,12 +119,8 @@ public: void assign_vs_urb_setup(); bool assign_regs(bool allow_spilling); void assign_regs_trivial(); - void get_used_mrfs(bool *mrf_used); void setup_payload_interference(struct ra_graph *g, int payload_reg_count, int first_payload_node); - void setup_mrf_hack_interference(struct ra_graph *g, - int first_mrf_hack_node, - int *first_used_mrf); int choose_spill_reg(struct ra_graph *g); void spill_reg(int spill_reg); void split_virtual_grfs(); diff --git a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp index 62e27cc8061..cd78816b9f2 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp @@ -470,14 +470,14 @@ fs_visitor::setup_payload_interference(struct ra_graph *g, * see if we can actually use MRFs to do spills without overwriting normal MRF * contents. */ -void -fs_visitor::get_used_mrfs(bool *mrf_used) +static void +get_used_mrfs(fs_visitor *v, bool *mrf_used) { - int reg_width = dispatch_width / 8; + int reg_width = v->dispatch_width / 8; memset(mrf_used, 0, BRW_MAX_MRF * sizeof(bool)); - foreach_block_and_inst(block, fs_inst, inst, cfg) { + foreach_block_and_inst(block, fs_inst, inst, v->cfg) { if (inst->dst.file == MRF) { int reg = inst->dst.reg & ~BRW_MRF_COMPR4; mrf_used[reg] = true; @@ -491,7 +491,7 @@ fs_visitor::get_used_mrfs(bool *mrf_used) } if (inst->mlen > 0) { - for (int i = 0; i < implied_mrf_writes(inst); i++) { + for (int i = 0; i < v->implied_mrf_writes(inst); i++) { mrf_used[inst->base_mrf + i] = true; } } @@ -502,12 +502,12 @@ fs_visitor::get_used_mrfs(bool *mrf_used) * Sets interference between virtual GRFs and usage of the high GRFs for SEND * messages (treated as MRFs in code generation). */ -void -fs_visitor::setup_mrf_hack_interference(struct ra_graph *g, int first_mrf_node, - int *first_used_mrf) +static void +setup_mrf_hack_interference(fs_visitor *v, struct ra_graph *g, + int first_mrf_node, int *first_used_mrf) { bool mrf_used[BRW_MAX_MRF]; - get_used_mrfs(mrf_used); + get_used_mrfs(v, mrf_used); *first_used_mrf = BRW_MAX_MRF; for (int i = 0; i < BRW_MAX_MRF; i++) { @@ -525,7 +525,7 @@ fs_visitor::setup_mrf_hack_interference(struct ra_graph *g, int first_mrf_node, if (i < *first_used_mrf) *first_used_mrf = i; - for (unsigned j = 0; j < this->alloc.count; j++) { + for (unsigned j = 0; j < v->alloc.count; j++) { ra_add_node_interference(g, first_mrf_node + i, j); } } @@ -592,7 +592,8 @@ fs_visitor::assign_regs(bool allow_spilling) setup_payload_interference(g, payload_node_count, first_payload_node); if (devinfo->gen >= 7) { int first_used_mrf = BRW_MAX_MRF; - setup_mrf_hack_interference(g, first_mrf_hack_node, &first_used_mrf); + setup_mrf_hack_interference(this, g, first_mrf_hack_node, + &first_used_mrf); foreach_block_and_inst(block, fs_inst, inst, cfg) { /* When we do send-from-GRF for FB writes, we need to ensure that @@ -853,7 +854,7 @@ fs_visitor::spill_reg(int spill_reg) */ if (!spilled_any_registers) { bool mrf_used[BRW_MAX_MRF]; - get_used_mrfs(mrf_used); + get_used_mrfs(this, mrf_used); for (int i = spill_base_mrf; i < BRW_MAX_MRF; i++) { if (mrf_used[i]) { From 37e0677870febefdd8b89be335f0e97bfd4a7c9b Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 5 Jun 2015 08:00:09 -0600 Subject: [PATCH 524/834] mesa: remove some MAX_NV_FRAGMENT_PROGRAM_* macros GL_NV_fragment_program support was removed a while ago. This is just some clean-up. Reviewed-by: Matt Turner --- src/mesa/main/config.h | 13 ++----------- src/mesa/main/context.c | 4 ++-- 2 files changed, 4 insertions(+), 13 deletions(-) diff --git a/src/mesa/main/config.h b/src/mesa/main/config.h index 5a66a4eec90..9c3baf4c6aa 100644 --- a/src/mesa/main/config.h +++ b/src/mesa/main/config.h @@ -213,19 +213,10 @@ /** For GL_ARB_fragment_program */ /*@{*/ #define MAX_FRAGMENT_PROGRAM_ADDRESS_REGS 0 +#define MAX_FRAGMENT_PROGRAM_PARAMS 64 +#define MAX_FRAGMENT_PROGRAM_INPUTS 12 /*@}*/ -/** For GL_NV_fragment_program */ -/*@{*/ -#define MAX_NV_FRAGMENT_PROGRAM_INSTRUCTIONS 1024 /* 72 for GL_ARB_f_p */ -#define MAX_NV_FRAGMENT_PROGRAM_TEMPS 96 -#define MAX_NV_FRAGMENT_PROGRAM_PARAMS 64 -#define MAX_NV_FRAGMENT_PROGRAM_INPUTS 12 -#define MAX_NV_FRAGMENT_PROGRAM_OUTPUTS 3 -#define MAX_NV_FRAGMENT_PROGRAM_WRITE_ONLYS 2 -/*@}*/ - - /** For GL_ARB_vertex_shader */ /*@{*/ #define MAX_VERTEX_GENERIC_ATTRIBS 16 diff --git a/src/mesa/main/context.c b/src/mesa/main/context.c index db494ca73cc..79fa01849e0 100644 --- a/src/mesa/main/context.c +++ b/src/mesa/main/context.c @@ -489,8 +489,8 @@ init_program_limits(struct gl_constants *consts, gl_shader_stage stage, prog->MaxOutputComponents = 16 * 4; /* old limit not to break tnl and swrast */ break; case MESA_SHADER_FRAGMENT: - prog->MaxParameters = MAX_NV_FRAGMENT_PROGRAM_PARAMS; - prog->MaxAttribs = MAX_NV_FRAGMENT_PROGRAM_INPUTS; + prog->MaxParameters = MAX_FRAGMENT_PROGRAM_PARAMS; + prog->MaxAttribs = MAX_FRAGMENT_PROGRAM_INPUTS; prog->MaxAddressRegs = MAX_FRAGMENT_PROGRAM_ADDRESS_REGS; prog->MaxUniformComponents = 4 * MAX_UNIFORMS; prog->MaxInputComponents = 16 * 4; /* old limit not to break tnl and swrast */ From c10dc485f395d3b8d616bf2857bcdef9712dc47b Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Tue, 9 Jun 2015 09:14:17 -0600 Subject: [PATCH 525/834] glsl: fix comment typo: s/accpet/accept/ --- src/glsl/ir_hierarchical_visitor.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/glsl/ir_hierarchical_visitor.h b/src/glsl/ir_hierarchical_visitor.h index faa52fd79c0..cac78698e73 100644 --- a/src/glsl/ir_hierarchical_visitor.h +++ b/src/glsl/ir_hierarchical_visitor.h @@ -59,7 +59,7 @@ enum ir_visitor_status { * in the composite's \c accept method. The \c accept method for a leaf-node * class will simply call the \c visit method, as usual, and pass its return * value on. The \c accept method for internal-node classes will call the \c - * visit_enter method, call the \c accpet method of each child node, and, + * visit_enter method, call the \c accept method of each child node, and, * finally, call the \c visit_leave method. If any of these return a value * other that \c visit_continue, the correct action must be taken. * From 30ba4faf5dcb9f55352eed1b37a3e820e8efe2ad Mon Sep 17 00:00:00 2001 From: Ben Widawsky Date: Mon, 8 Jun 2015 14:52:07 -0700 Subject: [PATCH 526/834] i965/gen9: Use raw PS invocation count for queries Previously the number needed to be divided by 4 to get the proper results. Now the hardware does the right thing. Through experimentation it seems Braswell (CHV) does also need the division by 4. Fixes piglit test: arb_pipeline_statistics_query-frag Signed-off-by: Ben Widawsky Reviewed-by: Anuj Phogat --- src/mesa/drivers/dri/i965/gen6_queryobj.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mesa/drivers/dri/i965/gen6_queryobj.c b/src/mesa/drivers/dri/i965/gen6_queryobj.c index 6431ed56d81..ba5c944fb3d 100644 --- a/src/mesa/drivers/dri/i965/gen6_queryobj.c +++ b/src/mesa/drivers/dri/i965/gen6_queryobj.c @@ -246,7 +246,7 @@ gen6_queryobj_get_results(struct gl_context *ctx, * and correctly emitted the number of pixel shader invocations, but, * whomever forgot to undo the multiply by 4. */ - if (brw->gen >= 8 || brw->is_haswell) + if (brw->gen == 8 || brw->is_haswell) query->Base.Result /= 4; break; From b8213bbe4cec5bab89e07aab8d225e617d4a2087 Mon Sep 17 00:00:00 2001 From: Chih-Wei Huang Date: Wed, 20 May 2015 11:25:25 +0800 Subject: [PATCH 527/834] android: loader: export the path to be included Signed-off-by: Chih-Wei Huang Reviewed-by: Eric Anholt --- src/egl/drivers/dri2/Android.mk | 1 - src/loader/Android.mk | 2 ++ 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/src/egl/drivers/dri2/Android.mk b/src/egl/drivers/dri2/Android.mk index 5931ce8f2f0..d4d809bc3de 100644 --- a/src/egl/drivers/dri2/Android.mk +++ b/src/egl/drivers/dri2/Android.mk @@ -45,7 +45,6 @@ endif LOCAL_C_INCLUDES := \ $(MESA_TOP)/src/mapi \ $(MESA_TOP)/src/egl/main \ - $(MESA_TOP)/src/loader \ $(DRM_GRALLOC_TOP) LOCAL_STATIC_LIBRARIES := \ diff --git a/src/loader/Android.mk b/src/loader/Android.mk index 8e215de3cda..92d9fd20d3c 100644 --- a/src/loader/Android.mk +++ b/src/loader/Android.mk @@ -40,6 +40,8 @@ else LOCAL_SHARED_LIBRARIES := libdrm endif +LOCAL_EXPORT_C_INCLUDE_DIRS := $(LOCAL_PATH) + LOCAL_MODULE := libmesa_loader include $(MESA_COMMON_MK) From 581aa208fa8fc653dce50d95c1f3400bb0c68ab2 Mon Sep 17 00:00:00 2001 From: Chih-Wei Huang Date: Wed, 20 May 2015 11:25:26 +0800 Subject: [PATCH 528/834] android: export more dirs from libmesa_dri_common The include paths of libmesa_dri_common are also used by modules that need libmesa_dri_common. Signed-off-by: Chih-Wei Huang Reviewed-by: Eric Anholt --- src/mesa/drivers/dri/common/Android.mk | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/mesa/drivers/dri/common/Android.mk b/src/mesa/drivers/dri/common/Android.mk index a7fcd6d572a..c003c942fca 100644 --- a/src/mesa/drivers/dri/common/Android.mk +++ b/src/mesa/drivers/dri/common/Android.mk @@ -39,7 +39,9 @@ intermediates := $(call local-generated-sources-dir) LOCAL_C_INCLUDES := \ $(MESA_DRI_C_INCLUDES) -LOCAL_EXPORT_C_INCLUDE_DIRS := $(intermediates) +LOCAL_EXPORT_C_INCLUDE_DIRS := \ + $(LOCAL_PATH) \ + $(intermediates) # swrast only ifeq ($(MESA_GPU_DRIVERS),swrast) From f4f609b27e4fbefb52b84b617051fb4cdba45c8f Mon Sep 17 00:00:00 2001 From: Chih-Wei Huang Date: Wed, 20 May 2015 11:25:27 +0800 Subject: [PATCH 529/834] android: add rules to build gallium/state_trackers/dri Signed-off-by: Chih-Wei Huang Reviewed-by: Eric Anholt --- src/gallium/Android.mk | 5 +- src/gallium/state_trackers/dri/Android.mk | 64 +++++++++++++++++++++++ 2 files changed, 67 insertions(+), 2 deletions(-) create mode 100644 src/gallium/state_trackers/dri/Android.mk diff --git a/src/gallium/Android.mk b/src/gallium/Android.mk index b2662ffca8c..aaa07bcf6ff 100644 --- a/src/gallium/Android.mk +++ b/src/gallium/Android.mk @@ -79,5 +79,6 @@ ifneq ($(filter vmwgfx, $(MESA_GPU_DRIVERS)),) SUBDIRS += winsys/svga/drm drivers/svga endif -mkfiles := $(patsubst %,$(GALLIUM_TOP)/%/Android.mk,$(SUBDIRS)) -include $(mkfiles) +SUBDIRS += state_trackers/dri + +include $(call all-named-subdir-makefiles,$(SUBDIRS)) diff --git a/src/gallium/state_trackers/dri/Android.mk b/src/gallium/state_trackers/dri/Android.mk new file mode 100644 index 00000000000..188e4a1404d --- /dev/null +++ b/src/gallium/state_trackers/dri/Android.mk @@ -0,0 +1,64 @@ +# Mesa 3-D graphics library +# +# Copyright (C) 2015 Chih-Wei Huang +# Copyright (C) 2015 Android-x86 Open Source Project +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included +# in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +# DEALINGS IN THE SOFTWARE. + +LOCAL_PATH := $(call my-dir) + +include $(LOCAL_PATH)/Makefile.sources + +include $(CLEAR_VARS) + +LOCAL_SRC_FILES := $(common_SOURCES) + +LOCAL_CFLAGS := \ + -DGALLIUM_STATIC_TARGETS=1 \ + +LOCAL_C_INCLUDES := \ + $(MESA_TOP)/src/mapi \ + $(MESA_TOP)/src/mesa \ + +LOCAL_EXPORT_C_INCLUDE_DIRS := \ + $(LOCAL_PATH) \ + $(LOCAL_C_INCLUDES) \ + +LOCAL_STATIC_LIBRARIES := \ + libmesa_dri_common \ + +ifneq ($(filter swrast,$(MESA_GPU_DRIVERS)),) +LOCAL_CFLAGS += -DGALLIUM_SOFTPIPE +LOCAL_SRC_FILES += $(drisw_SOURCES) +endif + +# swrast only? +ifeq ($(MESA_GPU_DRIVERS),swrast) +LOCAL_CFLAGS += -D__NOT_HAVE_DRM_H +else +LOCAL_SRC_FILES += $(dri2_SOURCES) +LOCAL_SHARED_LIBRARIES := libdrm +endif + +LOCAL_MODULE := libmesa_st_dri + +LOCAL_GENERATED_SOURCES := $(MESA_DRI_OPTIONS_H) + +include $(GALLIUM_COMMON_MK) +include $(BUILD_STATIC_LIBRARY) From 933df3d3350867282d7334c94abf1ec677d78029 Mon Sep 17 00:00:00 2001 From: Chih-Wei Huang Date: Wed, 20 May 2015 11:25:28 +0800 Subject: [PATCH 530/834] android: add rules to build a gallium_dri.so This single .so includes all of the enabled gallium drivers. Signed-off-by: Chih-Wei Huang Reviewed-by: Eric Anholt --- src/gallium/Android.mk | 7 +- src/gallium/targets/dri/Android.mk | 112 +++++++++++++++++++++++ src/gallium/winsys/sw/dri/Android.mk | 35 +++++++ src/gallium/winsys/sw/kms-dri/Android.mk | 37 ++++++++ 4 files changed, 189 insertions(+), 2 deletions(-) create mode 100644 src/gallium/targets/dri/Android.mk create mode 100644 src/gallium/winsys/sw/dri/Android.mk create mode 100644 src/gallium/winsys/sw/kms-dri/Android.mk diff --git a/src/gallium/Android.mk b/src/gallium/Android.mk index aaa07bcf6ff..a9c34d9146c 100644 --- a/src/gallium/Android.mk +++ b/src/gallium/Android.mk @@ -33,7 +33,9 @@ SUBDIRS := auxiliary # # swrast -SUBDIRS += winsys/sw/android drivers/softpipe +ifneq ($(filter swrast,$(MESA_GPU_DRIVERS)),) +SUBDIRS += winsys/sw/dri winsys/sw/kms-dri drivers/softpipe +endif # freedreno ifneq ($(filter freedreno, $(MESA_GPU_DRIVERS)),) @@ -79,6 +81,7 @@ ifneq ($(filter vmwgfx, $(MESA_GPU_DRIVERS)),) SUBDIRS += winsys/svga/drm drivers/svga endif -SUBDIRS += state_trackers/dri +# Gallium state trackers and target for dri +SUBDIRS += state_trackers/dri targets/dri include $(call all-named-subdir-makefiles,$(SUBDIRS)) diff --git a/src/gallium/targets/dri/Android.mk b/src/gallium/targets/dri/Android.mk new file mode 100644 index 00000000000..ac33a6ed32f --- /dev/null +++ b/src/gallium/targets/dri/Android.mk @@ -0,0 +1,112 @@ +# Mesa 3-D graphics library +# +# Copyright (C) 2015 Chih-Wei Huang +# Copyright (C) 2015 Android-x86 Open Source Project +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included +# in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +# DEALINGS IN THE SOFTWARE. + +LOCAL_PATH := $(call my-dir) + +include $(CLEAR_VARS) + +LOCAL_MODULE := gallium_dri + +ifeq ($(MESA_LOLLIPOP_BUILD),true) +LOCAL_MODULE_RELATIVE_PATH := $(notdir $(MESA_DRI_MODULE_PATH)) +else +LOCAL_MODULE_PATH := $(MESA_DRI_MODULE_PATH) +endif + +LOCAL_SRC_FILES := target.c + +LOCAL_CFLAGS := -DDRI_TARGET -DHAVE_LIBDRM + +LOCAL_SHARED_LIBRARIES := \ + libdl \ + libglapi \ + libexpat \ + +# swrast only? +ifeq ($(MESA_GPU_DRIVERS),swrast) +LOCAL_CFLAGS += -D__NOT_HAVE_DRM_H +else +LOCAL_SHARED_LIBRARIES += libdrm +endif + +ifneq ($(filter freedreno,$(MESA_GPU_DRIVERS)),) +LOCAL_CFLAGS += -DGALLIUM_FREEDRENO +gallium_DRIVERS += libmesa_winsys_freedreno libmesa_pipe_freedreno +LOCAL_SHARED_LIBRARIES += libdrm_freedreno +endif +ifneq ($(filter i915g,$(MESA_GPU_DRIVERS)),) +gallium_DRIVERS += libmesa_winsys_i915 libmesa_pipe_i915 +LOCAL_SHARED_LIBRARIES += libdrm_intel +LOCAL_CFLAGS += -DGALLIUM_I915 +endif +ifneq ($(filter ilo,$(MESA_GPU_DRIVERS)),) +gallium_DRIVERS += libmesa_winsys_intel libmesa_pipe_ilo +LOCAL_SHARED_LIBRARIES += libdrm_intel +LOCAL_CFLAGS += -DGALLIUM_ILO +endif +ifneq ($(filter nouveau,$(MESA_GPU_DRIVERS)),) +gallium_DRIVERS += libmesa_winsys_nouveau libmesa_pipe_nouveau +LOCAL_CFLAGS += -DGALLIUM_NOUVEAU +LOCAL_SHARED_LIBRARIES += libdrm_nouveau +endif +ifneq ($(filter r%,$(MESA_GPU_DRIVERS)),) +ifneq ($(filter r300g,$(MESA_GPU_DRIVERS)),) +gallium_DRIVERS += libmesa_pipe_r300 +LOCAL_CFLAGS += -DGALLIUM_R300 +endif +ifneq ($(filter r600g,$(MESA_GPU_DRIVERS)),) +gallium_DRIVERS += libmesa_pipe_r600 +LOCAL_CFLAGS += -DGALLIUM_R600 +endif +ifneq ($(filter radeonsi,$(MESA_GPU_DRIVERS)),) +gallium_DRIVERS += libmesa_pipe_radeonsi +LOCAL_CFLAGS += -DGALLIUM_RADEONSI +endif +gallium_DRIVERS += libmesa_winsys_radeon libmesa_pipe_radeon +LOCAL_SHARED_LIBRARIES += libdrm_radeon +endif +ifneq ($(filter swrast,$(MESA_GPU_DRIVERS)),) +gallium_DRIVERS += libmesa_pipe_softpipe libmesa_winsys_sw_dri libmesa_winsys_sw_kms_dri +LOCAL_CFLAGS += -DGALLIUM_SOFTPIPE +endif +ifneq ($(filter vmwgfx,$(MESA_GPU_DRIVERS)),) +gallium_DRIVERS += libmesa_winsys_svga libmesa_pipe_svga +LOCAL_CFLAGS += -DGALLIUM_VMWGFX +endif +ifneq ($(filter nouveau r600g,$(MESA_GPU_DRIVERS)),) +LOCAL_SHARED_LIBRARIES += libstlport +endif + +LOCAL_STATIC_LIBRARIES := \ + $(gallium_DRIVERS) \ + libmesa_st_dri \ + libmesa_st_mesa \ + libmesa_glsl \ + libmesa_dri_common \ + libmesa_megadriver_stub \ + libmesa_gallium \ + libmesa_util \ + libmesa_loader \ + +include $(GALLIUM_COMMON_MK) +include $(BUILD_SHARED_LIBRARY) diff --git a/src/gallium/winsys/sw/dri/Android.mk b/src/gallium/winsys/sw/dri/Android.mk new file mode 100644 index 00000000000..72fb920ff15 --- /dev/null +++ b/src/gallium/winsys/sw/dri/Android.mk @@ -0,0 +1,35 @@ +# Mesa 3-D graphics library +# +# Copyright (C) 2015 Chih-Wei Huang +# Copyright (C) 2015 Android-x86 Open Source Project +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included +# in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +# DEALINGS IN THE SOFTWARE. + +LOCAL_PATH := $(call my-dir) + +include $(LOCAL_PATH)/Makefile.sources + +include $(CLEAR_VARS) + +LOCAL_SRC_FILES := $(C_SOURCES) + +LOCAL_MODULE := libmesa_winsys_sw_dri + +include $(GALLIUM_COMMON_MK) +include $(BUILD_STATIC_LIBRARY) diff --git a/src/gallium/winsys/sw/kms-dri/Android.mk b/src/gallium/winsys/sw/kms-dri/Android.mk new file mode 100644 index 00000000000..b065242aaf3 --- /dev/null +++ b/src/gallium/winsys/sw/kms-dri/Android.mk @@ -0,0 +1,37 @@ +# Mesa 3-D graphics library +# +# Copyright (C) 2015 Chih-Wei Huang +# Copyright (C) 2015 Android-x86 Open Source Project +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included +# in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +# DEALINGS IN THE SOFTWARE. + +LOCAL_PATH := $(call my-dir) + +include $(LOCAL_PATH)/Makefile.sources + +include $(CLEAR_VARS) + +LOCAL_SRC_FILES := $(C_SOURCES) + +LOCAL_MODULE := libmesa_winsys_sw_kms_dri + +LOCAL_SHARED_LIBRARIES := libdrm + +include $(GALLIUM_COMMON_MK) +include $(BUILD_STATIC_LIBRARY) From ac296aee58158ccf1953e812a04f99eb5f8eb57b Mon Sep 17 00:00:00 2001 From: Chih-Wei Huang Date: Wed, 20 May 2015 11:25:29 +0800 Subject: [PATCH 531/834] android: Depend on gallium_dri from EGL, instead of linking in gallium. The Android gallium build used to use gallium_egl, which was removed back in March. Instead, we will now use a normal Mesa libEGL loader with dlopen()ing of a DRI module. v2: add a clean step to rebuild all dri modules properly. v3: Squish the 2 patches doing this together (change by anholt). Signed-off-by: Chih-Wei Huang Reviewed-by: Eric Anholt --- Android.mk | 6 +-- CleanSpec.mk | 1 + src/egl/main/Android.mk | 83 ++--------------------------------------- 3 files changed, 5 insertions(+), 85 deletions(-) diff --git a/Android.mk b/Android.mk index b19419ba7c2..6a09a9db177 100644 --- a/Android.mk +++ b/Android.mk @@ -89,13 +89,9 @@ SUBDIRS := \ src/glsl \ src/mesa \ src/util \ - src/egl/main - -ifeq ($(strip $(MESA_BUILD_CLASSIC)),true) -SUBDIRS += \ + src/egl/main \ src/egl/drivers/dri2 \ src/mesa/drivers/dri -endif ifeq ($(strip $(MESA_BUILD_GALLIUM)),true) SUBDIRS += src/gallium diff --git a/CleanSpec.mk b/CleanSpec.mk index 20681638e21..d08b0def7d0 100644 --- a/CleanSpec.mk +++ b/CleanSpec.mk @@ -13,3 +13,4 @@ $(call add-clean-step, rm -rf $(PRODUCT_OUT)/*/SHARED_LIBRARIES/libGLES_mesa_int $(call add-clean-step, rm -rf $(HOST_OUT_release)/*/EXECUTABLES/mesa_*_intermediates) $(call add-clean-step, rm -rf $(HOST_OUT_release)/*/EXECUTABLES/glsl_compiler_intermediates) $(call add-clean-step, rm -rf $(HOST_OUT_release)/*/STATIC_LIBRARIES/libmesa_*_intermediates) +$(call add-clean-step, rm -rf $(PRODUCT_OUT)/*/SHARED_LIBRARIES/*_dri_intermediates) diff --git a/src/egl/main/Android.mk b/src/egl/main/Android.mk index 12b66d053fc..8f687e9f255 100644 --- a/src/egl/main/Android.mk +++ b/src/egl/main/Android.mk @@ -43,8 +43,6 @@ LOCAL_CFLAGS := \ -D_EGL_DRIVER_SEARCH_DIR=\"/system/lib/egl\" \ -D_EGL_OS_UNIX=1 -LOCAL_STATIC_LIBRARIES := - LOCAL_SHARED_LIBRARIES := \ libglapi \ libdl \ @@ -62,95 +60,20 @@ ifneq ($(MESA_GPU_DRIVERS),swrast) LOCAL_SHARED_LIBRARIES += libdrm endif -ifeq ($(strip $(MESA_BUILD_CLASSIC)),true) LOCAL_CFLAGS += -D_EGL_BUILT_IN_DRIVER_DRI2 -LOCAL_STATIC_LIBRARIES += libmesa_egl_dri2 +ifeq ($(strip $(MESA_BUILD_CLASSIC)),true) # require i915_dri and/or i965_dri LOCAL_REQUIRED_MODULES += \ $(addsuffix _dri, $(filter i915 i965, $(MESA_GPU_DRIVERS))) endif # MESA_BUILD_CLASSIC ifeq ($(strip $(MESA_BUILD_GALLIUM)),true) - -gallium_DRIVERS := - -# swrast -gallium_DRIVERS += libmesa_pipe_softpipe libmesa_winsys_sw_android - -# freedreno -ifneq ($(filter freedreno, $(MESA_GPU_DRIVERS)),) -gallium_DRIVERS += libmesa_winsys_freedreno libmesa_pipe_freedreno -LOCAL_SHARED_LIBRARIES += libdrm_freedreno -endif - -# i915g -ifneq ($(filter i915g, $(MESA_GPU_DRIVERS)),) -gallium_DRIVERS += libmesa_winsys_i915 libmesa_pipe_i915 -LOCAL_SHARED_LIBRARIES += libdrm_intel -endif - -# ilo -ifneq ($(filter ilo, $(MESA_GPU_DRIVERS)),) -gallium_DRIVERS += libmesa_winsys_intel libmesa_pipe_ilo -LOCAL_SHARED_LIBRARIES += libdrm_intel -endif - -# nouveau -ifneq ($(filter nouveau, $(MESA_GPU_DRIVERS)),) -gallium_DRIVERS += libmesa_winsys_nouveau libmesa_pipe_nouveau -LOCAL_SHARED_LIBRARIES += libdrm_nouveau -LOCAL_SHARED_LIBRARIES += libstlport -endif - -# r300g/r600g/radeonsi -ifneq ($(filter r300g r600g radeonsi, $(MESA_GPU_DRIVERS)),) -gallium_DRIVERS += libmesa_winsys_radeon -LOCAL_SHARED_LIBRARIES += libdrm_radeon -ifneq ($(filter r300g, $(MESA_GPU_DRIVERS)),) -gallium_DRIVERS += libmesa_pipe_r300 -endif # r300g -ifneq ($(filter r600g radeonsi, $(MESA_GPU_DRIVERS)),) -ifneq ($(filter r600g, $(MESA_GPU_DRIVERS)),) -gallium_DRIVERS += libmesa_pipe_r600 -LOCAL_SHARED_LIBRARIES += libstlport -endif # r600g -ifneq ($(filter radeonsi, $(MESA_GPU_DRIVERS)),) -gallium_DRIVERS += libmesa_pipe_radeonsi -endif # radeonsi -gallium_DRIVERS += libmesa_pipe_radeon -endif # r600g || radeonsi -endif # r300g || r600g || radeonsi - -# vmwgfx -ifneq ($(filter vmwgfx, $(MESA_GPU_DRIVERS)),) -gallium_DRIVERS += libmesa_winsys_svga libmesa_pipe_svga -endif - -# -# Notes about the order here: -# -# * libmesa_st_egl depends on libmesa_winsys_sw_android in $(gallium_DRIVERS) -# * libmesa_pipe_r300 in $(gallium_DRIVERS) depends on libmesa_st_mesa and -# libmesa_glsl -# * libmesa_st_mesa depends on libmesa_glsl -# * libmesa_glsl depends on libmesa_glsl_utils -# -LOCAL_STATIC_LIBRARIES := \ - libmesa_egl_gallium \ - libmesa_st_egl \ - $(gallium_DRIVERS) \ - libmesa_st_mesa \ - libmesa_util \ - libmesa_glsl \ - libmesa_glsl_utils \ - libmesa_gallium \ - $(LOCAL_STATIC_LIBRARIES) - +LOCAL_REQUIRED_MODULES += gallium_dri endif # MESA_BUILD_GALLIUM LOCAL_STATIC_LIBRARIES := \ - $(LOCAL_STATIC_LIBRARIES) \ + libmesa_egl_dri2 \ libmesa_loader LOCAL_MODULE := libGLES_mesa From c3b5afbd4e682f76e16ea85883af571165bd24ee Mon Sep 17 00:00:00 2001 From: Chih-Wei Huang Date: Wed, 20 May 2015 11:25:30 +0800 Subject: [PATCH 532/834] android: try to load gallium_dri.so directly This avoids needing hardlinks between all of the DRI driver .so names, since we're the only loader on the system. v2: Add early exit on success (like previous block) and log message on failure. Signed-off-by: Chih-Wei Huang Reviewed-by: Eric Anholt --- src/egl/drivers/dri2/egl_dri2.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/egl/drivers/dri2/egl_dri2.c b/src/egl/drivers/dri2/egl_dri2.c index 82f8843e001..44a6c96ae9a 100644 --- a/src/egl/drivers/dri2/egl_dri2.c +++ b/src/egl/drivers/dri2/egl_dri2.c @@ -419,6 +419,15 @@ dri2_open_driver(_EGLDisplay *disp) /* not need continue to loop all paths once the driver is found */ if (dri2_dpy->driver != NULL) break; + +#ifdef ANDROID + snprintf(path, sizeof path, "%.*s/gallium_dri.so", len, p); + dri2_dpy->driver = dlopen(path, RTLD_NOW | RTLD_GLOBAL); + if (dri2_dpy->driver == NULL) + _eglLog(_EGL_DEBUG, "failed to open %s: %s\n", path, dlerror()); + else + break; +#endif } if (dri2_dpy->driver == NULL) { From 1e4081f54aa5c6cba566ed549389d847bf7e6799 Mon Sep 17 00:00:00 2001 From: Chih-Wei Huang Date: Wed, 20 May 2015 11:25:33 +0800 Subject: [PATCH 533/834] android: generate files by $(call es-gen) Use the pre-defined macro es-gen to generate new added files instead of writing new rules manually. The handmade rules that may generate the files before the directory is created result in such an error: /bin/bash: out/target/product/x86/gen/STATIC_LIBRARIES/libmesa_st_mesa_intermediates/main/format_pack.c: No such file or directory make: *** [out/target/product/x86/gen/STATIC_LIBRARIES/libmesa_st_mesa_intermediates/main/format_pack.c] Error 1 Signed-off-by: Chih-Wei Huang Reviewed-by: Eric Anholt --- src/mesa/Android.gen.mk | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/src/mesa/Android.gen.mk b/src/mesa/Android.gen.mk index cc979547e0a..145f2594cda 100644 --- a/src/mesa/Android.gen.mk +++ b/src/mesa/Android.gen.mk @@ -115,9 +115,11 @@ $(intermediates)/main/api_exec.c: $(dispatch_deps) GET_HASH_GEN := $(LOCAL_PATH)/main/get_hash_generator.py +$(intermediates)/main/get_hash.h: PRIVATE_SCRIPT := $(MESA_PYTHON2) $(GET_HASH_GEN) +$(intermediates)/main/get_hash.h: PRIVATE_XML := -f $(glapi)/gl_and_es_API.xml $(intermediates)/main/get_hash.h: $(glapi)/gl_and_es_API.xml \ $(LOCAL_PATH)/main/get_hash_params.py $(GET_HASH_GEN) - @$(MESA_PYTHON2) $(GET_HASH_GEN) -f $< > $@ + $(call es-gen) FORMAT_INFO := $(LOCAL_PATH)/main/format_info.py format_info_deps := \ @@ -125,8 +127,10 @@ format_info_deps := \ $(LOCAL_PATH)/main/format_parser.py \ $(FORMAT_INFO) +$(intermediates)/main/format_info.h: PRIVATE_SCRIPT := $(MESA_PYTHON2) $(FORMAT_INFO) +$(intermediates)/main/format_info.h: PRIVATE_XML := $(intermediates)/main/format_info.h: $(format_info_deps) - @$(MESA_PYTHON2) $(FORMAT_INFO) $< > $@ + $(call es-gen, $<) FORMAT_PACK := $(LOCAL_PATH)/main/format_pack.py format_pack_deps := \ @@ -134,8 +138,10 @@ format_pack_deps := \ $(LOCAL_PATH)/main/format_parser.py \ $(FORMAT_PACK) +$(intermediates)/main/format_pack.c: PRIVATE_SCRIPT := $(MESA_PYTHON2) $(FORMAT_PACK) +$(intermediates)/main/format_pack.c: PRIVATE_XML := $(intermediates)/main/format_pack.c: $(format_pack_deps) - $(hide) $(MESA_PYTHON2) $(FORMAT_PACK) $< > $@ + $(call es-gen, $<) FORMAT_UNPACK := $(LOCAL_PATH)/main/format_unpack.py format_unpack_deps := \ @@ -143,5 +149,7 @@ format_unpack_deps := \ $(LOCAL_PATH)/main/format_parser.py \ $(FORMAT_UNPACK) +$(intermediates)/main/format_unpack.c: PRIVATE_SCRIPT := $(MESA_PYTHON2) $(FORMAT_UNPACK) +$(intermediates)/main/format_unpack.c: PRIVATE_XML := $(intermediates)/main/format_unpack.c: $(format_unpack_deps) - $(hide) $(MESA_PYTHON2) $(FORMAT_UNPACK) $< > $@ + $(call es-gen, $<) From 1842832660c4eade037caa760110b58a2d7f055b Mon Sep 17 00:00:00 2001 From: Chih-Wei Huang Date: Wed, 20 May 2015 11:25:34 +0800 Subject: [PATCH 534/834] android: enable the radeonsi driver Based on the nice work of Paulo Sergio Travaglia . The main modifications are: - Include paths for LLVM header files and shared/static libraries - Set C++ flag "c++11" to avoid compiling errors on LLVM header files - Set defines for LLVM - Add GALLIVM source files - Changes path of libelf library for lollipop Signed-off-by: Chih-Wei Huang Acked-by: Eric Anholt --- Android.common.mk | 8 ++++++++ Android.mk | 2 ++ src/gallium/Android.common.mk | 8 ++++++++ src/gallium/auxiliary/Android.mk | 8 ++++++++ src/gallium/drivers/radeon/Android.mk | 4 ++++ src/gallium/targets/dri/Android.mk | 10 ++++++++++ 6 files changed, 40 insertions(+) diff --git a/Android.common.mk b/Android.common.mk index edf52d6fabb..43766bf8c57 100644 --- a/Android.common.mk +++ b/Android.common.mk @@ -68,6 +68,14 @@ LOCAL_CFLAGS += \ endif endif +ifeq ($(MESA_ENABLE_LLVM),true) +LOCAL_CFLAGS += \ + -DHAVE_LLVM=0x0305 -DLLVM_VERSION_PATCH=2 \ + -D__STDC_CONSTANT_MACROS \ + -D__STDC_FORMAT_MACROS \ + -D__STDC_LIMIT_MACROS +endif + LOCAL_CPPFLAGS += \ -Wno-error=non-virtual-dtor \ -Wno-non-virtual-dtor diff --git a/Android.mk b/Android.mk index 6a09a9db177..341978a68c6 100644 --- a/Android.mk +++ b/Android.mk @@ -80,6 +80,8 @@ else MESA_BUILD_GALLIUM := false endif +MESA_ENABLE_LLVM := $(if $(filter radeonsi,$(MESA_GPU_DRIVERS)),true,false) + # add subdirectories ifneq ($(strip $(MESA_GPU_DRIVERS)),) diff --git a/src/gallium/Android.common.mk b/src/gallium/Android.common.mk index 782510ff0f4..7c6c7ac6820 100644 --- a/src/gallium/Android.common.mk +++ b/src/gallium/Android.common.mk @@ -29,4 +29,12 @@ LOCAL_C_INCLUDES += \ $(GALLIUM_TOP)/winsys \ $(GALLIUM_TOP)/drivers +ifeq ($(MESA_ENABLE_LLVM),true) +LOCAL_C_INCLUDES += \ + external/llvm/include \ + external/llvm/device/include \ + external/libcxx/include \ + external/elfutils/$(if $(filter true,$(MESA_LOLLIPOP_BUILD)),0.153/)libelf +endif + include $(MESA_COMMON_MK) diff --git a/src/gallium/auxiliary/Android.mk b/src/gallium/auxiliary/Android.mk index 96a2125defb..2d91752595c 100644 --- a/src/gallium/auxiliary/Android.mk +++ b/src/gallium/auxiliary/Android.mk @@ -35,6 +35,14 @@ LOCAL_SRC_FILES := \ LOCAL_C_INCLUDES := \ $(GALLIUM_TOP)/auxiliary/util +ifeq ($(MESA_ENABLE_LLVM),true) +LOCAL_SRC_FILES += \ + $(GALLIVM_SOURCES) \ + $(GALLIVM_CPP_SOURCES) + +LOCAL_CPPFLAGS := -std=c++11 +endif + LOCAL_MODULE := libmesa_gallium # generate sources diff --git a/src/gallium/drivers/radeon/Android.mk b/src/gallium/drivers/radeon/Android.mk index d61579280ea..6997a6d3ec3 100644 --- a/src/gallium/drivers/radeon/Android.mk +++ b/src/gallium/drivers/radeon/Android.mk @@ -30,6 +30,10 @@ include $(CLEAR_VARS) LOCAL_SRC_FILES := $(C_SOURCES) +ifeq ($(MESA_ENABLE_LLVM),true) +LOCAL_SRC_FILES += $(LLVM_C_FILES) +endif + LOCAL_SHARED_LIBRARIES := libdrm libdrm_radeon LOCAL_MODULE := libmesa_pipe_radeon diff --git a/src/gallium/targets/dri/Android.mk b/src/gallium/targets/dri/Android.mk index ac33a6ed32f..78f7b7c61f9 100644 --- a/src/gallium/targets/dri/Android.mk +++ b/src/gallium/targets/dri/Android.mk @@ -80,6 +80,7 @@ LOCAL_CFLAGS += -DGALLIUM_R600 endif ifneq ($(filter radeonsi,$(MESA_GPU_DRIVERS)),) gallium_DRIVERS += libmesa_pipe_radeonsi +LOCAL_SHARED_LIBRARIES += libLLVM LOCAL_CFLAGS += -DGALLIUM_RADEONSI endif gallium_DRIVERS += libmesa_winsys_radeon libmesa_pipe_radeon @@ -108,5 +109,14 @@ LOCAL_STATIC_LIBRARIES := \ libmesa_util \ libmesa_loader \ +ifeq ($(MESA_ENABLE_LLVM),true) +LOCAL_STATIC_LIBRARIES += \ + libLLVMR600CodeGen \ + libLLVMR600Desc \ + libLLVMR600Info \ + libLLVMR600AsmPrinter \ + libelf +endif + include $(GALLIUM_COMMON_MK) include $(BUILD_SHARED_LIBRARY) From c5e11e5f7f67fe5a1d28b1446f87af7aa3ba68d8 Mon Sep 17 00:00:00 2001 From: Chih-Wei Huang Date: Wed, 20 May 2015 11:25:39 +0800 Subject: [PATCH 535/834] android: build with libcxx on android lollipop On Lollipop, apparently stlport is gone and libcxx must be used instead. We still support stlport when building on earlier android releases. Signed-off-by: Chih-Wei Huang Reviewed-by: Eric Anholt --- Android.common.mk | 1 + src/gallium/drivers/nouveau/Android.mk | 4 ++++ src/gallium/drivers/r600/Android.mk | 4 ++++ src/gallium/targets/dri/Android.mk | 3 ++- src/glsl/Android.mk | 1 - 5 files changed, 11 insertions(+), 2 deletions(-) diff --git a/Android.common.mk b/Android.common.mk index 43766bf8c57..d662d6018e4 100644 --- a/Android.common.mk +++ b/Android.common.mk @@ -77,6 +77,7 @@ LOCAL_CFLAGS += \ endif LOCAL_CPPFLAGS += \ + $(if $(filter true,$(MESA_LOLLIPOP_BUILD)),-D_USING_LIBCXX) \ -Wno-error=non-virtual-dtor \ -Wno-non-virtual-dtor diff --git a/src/gallium/drivers/nouveau/Android.mk b/src/gallium/drivers/nouveau/Android.mk index 420c8e5734c..daf3abd1bb3 100644 --- a/src/gallium/drivers/nouveau/Android.mk +++ b/src/gallium/drivers/nouveau/Android.mk @@ -39,6 +39,10 @@ LOCAL_SRC_FILES := \ LOCAL_SHARED_LIBRARIES := libdrm libdrm_nouveau LOCAL_MODULE := libmesa_pipe_nouveau +ifeq ($(MESA_LOLLIPOP_BUILD),true) +LOCAL_C_INCLUDES := external/libcxx/include +else include external/stlport/libstlport.mk +endif include $(GALLIUM_COMMON_MK) include $(BUILD_STATIC_LIBRARY) diff --git a/src/gallium/drivers/r600/Android.mk b/src/gallium/drivers/r600/Android.mk index e9357597a9b..bfe39873089 100644 --- a/src/gallium/drivers/r600/Android.mk +++ b/src/gallium/drivers/r600/Android.mk @@ -33,6 +33,10 @@ LOCAL_SRC_FILES := $(C_SOURCES) $(CXX_SOURCES) LOCAL_SHARED_LIBRARIES := libdrm libdrm_radeon LOCAL_MODULE := libmesa_pipe_r600 +ifeq ($(MESA_LOLLIPOP_BUILD),true) +LOCAL_C_INCLUDES := external/libcxx/include +else include external/stlport/libstlport.mk +endif include $(GALLIUM_COMMON_MK) include $(BUILD_STATIC_LIBRARY) diff --git a/src/gallium/targets/dri/Android.mk b/src/gallium/targets/dri/Android.mk index 78f7b7c61f9..1772d250b4b 100644 --- a/src/gallium/targets/dri/Android.mk +++ b/src/gallium/targets/dri/Android.mk @@ -95,7 +95,7 @@ gallium_DRIVERS += libmesa_winsys_svga libmesa_pipe_svga LOCAL_CFLAGS += -DGALLIUM_VMWGFX endif ifneq ($(filter nouveau r600g,$(MESA_GPU_DRIVERS)),) -LOCAL_SHARED_LIBRARIES += libstlport +LOCAL_SHARED_LIBRARIES += $(if $(filter true,$(MESA_LOLLIPOP_BUILD)),libc++,libstlport) endif LOCAL_STATIC_LIBRARIES := \ @@ -116,6 +116,7 @@ LOCAL_STATIC_LIBRARIES += \ libLLVMR600Info \ libLLVMR600AsmPrinter \ libelf +LOCAL_LDLIBS += $(if $(filter true,$(MESA_LOLLIPOP_BUILD)),-lgcc) endif include $(GALLIUM_COMMON_MK) diff --git a/src/glsl/Android.mk b/src/glsl/Android.mk index f20741e0d0f..f63b7daf26e 100644 --- a/src/glsl/Android.mk +++ b/src/glsl/Android.mk @@ -46,7 +46,6 @@ LOCAL_C_INCLUDES := \ LOCAL_MODULE := libmesa_glsl -include external/stlport/libstlport.mk include $(LOCAL_PATH)/Android.gen.mk include $(MESA_COMMON_MK) include $(BUILD_STATIC_LIBRARY) From e67b12eaf89acc9c446de77b77120a2f6cdbbe12 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Mon, 1 Jun 2015 12:50:49 -0700 Subject: [PATCH 536/834] vc4: Update to current kernel validation code. After profiling on real hardware, I found a few ways to cut down the kernel overhead. --- src/gallium/drivers/vc4/kernel/vc4_drv.h | 3 +- src/gallium/drivers/vc4/kernel/vc4_validate.c | 68 +++++++++---------- .../drivers/vc4/vc4_simulator_validate.h | 1 + 3 files changed, 37 insertions(+), 35 deletions(-) diff --git a/src/gallium/drivers/vc4/kernel/vc4_drv.h b/src/gallium/drivers/vc4/kernel/vc4_drv.h index 325f944bf25..dede7162c42 100644 --- a/src/gallium/drivers/vc4/kernel/vc4_drv.h +++ b/src/gallium/drivers/vc4/kernel/vc4_drv.h @@ -89,7 +89,8 @@ struct vc4_exec_info { bool found_wait_on_semaphore_packet; uint8_t bin_tiles_x, bin_tiles_y; uint32_t fb_width, fb_height; - uint32_t tile_alloc_init_block_size; + uint32_t tile_alloc_init_block_mask; + uint32_t tile_alloc_init_block_last; struct drm_gem_cma_object *tile_alloc_bo; /** diff --git a/src/gallium/drivers/vc4/kernel/vc4_validate.c b/src/gallium/drivers/vc4/kernel/vc4_validate.c index 2d04a4a7b9a..2b57ca0b4b0 100644 --- a/src/gallium/drivers/vc4/kernel/vc4_validate.c +++ b/src/gallium/drivers/vc4/kernel/vc4_validate.c @@ -156,24 +156,30 @@ check_tex_size(struct vc4_exec_info *exec, struct drm_gem_cma_object *fbo, uint32_t utile_w = utile_width(cpp); uint32_t utile_h = utile_height(cpp); - /* The values are limited by the packet/texture parameter bitfields, - * so we don't need to worry as much about integer overflow. + /* The shaded vertex format stores signed 12.4 fixed point + * (-2048,2047) offsets from the viewport center, so we should + * never have a render target larger than 4096. The texture + * unit can only sample from 2048x2048, so it's even more + * restricted. This lets us avoid worrying about overflow in + * our math. */ - BUG_ON(width > 65535); - BUG_ON(height > 65535); + if (width > 4096 || height > 4096) { + DRM_ERROR("Surface dimesions (%d,%d) too large", width, height); + return false; + } switch (tiling_format) { case VC4_TILING_FORMAT_LINEAR: - aligned_width = roundup(width, utile_w); + aligned_width = round_up(width, utile_w); aligned_height = height; break; case VC4_TILING_FORMAT_T: - aligned_width = roundup(width, utile_w * 8); - aligned_height = roundup(height, utile_h * 8); + aligned_width = round_up(width, utile_w * 8); + aligned_height = round_up(height, utile_h * 8); break; case VC4_TILING_FORMAT_LT: - aligned_width = roundup(width, utile_w); - aligned_height = roundup(height, utile_h); + aligned_width = round_up(width, utile_w); + aligned_height = round_up(height, utile_h); break; default: DRM_ERROR("buffer tiling %d unsupported\n", tiling_format); @@ -181,13 +187,6 @@ check_tex_size(struct vc4_exec_info *exec, struct drm_gem_cma_object *fbo, } stride = aligned_width * cpp; - - if (INT_MAX / stride < aligned_height) { - DRM_ERROR("Overflow in fbo size (%dx%d -> %dx%d)\n", - width, height, - aligned_width, aligned_height); - return false; - } size = stride * aligned_height; if (size + offset < size || @@ -269,14 +268,11 @@ validate_wait_on_semaphore(VALIDATE_ARGS) static int validate_branch_to_sublist(VALIDATE_ARGS) { - struct drm_gem_cma_object *target; uint32_t offset; - if (!vc4_use_handle(exec, 0, VC4_MODE_TILE_ALLOC, &target)) - return -EINVAL; - - if (target != exec->tile_alloc_bo) { - DRM_ERROR("Jumping to BOs other than tile alloc unsupported\n"); + if (!exec->tile_alloc_bo) { + DRM_ERROR("VC4_PACKET_BRANCH_TO_SUB_LIST seen before " + "binner setup\n"); return -EINVAL; } @@ -286,15 +282,14 @@ validate_branch_to_sublist(VALIDATE_ARGS) } offset = *(uint32_t *)(untrusted + 0); - if (offset % exec->tile_alloc_init_block_size || - offset / exec->tile_alloc_init_block_size >= - exec->bin_tiles_x * exec->bin_tiles_y) { + if (offset & exec->tile_alloc_init_block_mask || + offset > exec->tile_alloc_init_block_last) { DRM_ERROR("VC4_PACKET_BRANCH_TO_SUB_LIST must jump to initial " "tile allocation space.\n"); return -EINVAL; } - *(uint32_t *)(validated + 0) = target->paddr + offset; + *(uint32_t *)(validated + 0) = exec->tile_alloc_bo->paddr + offset; return 0; } @@ -496,6 +491,7 @@ validate_tile_binning_config(VALIDATE_ARGS) struct drm_gem_cma_object *tile_state_data_array; uint8_t flags; uint32_t tile_allocation_size; + uint32_t tile_alloc_init_block_size; if (!vc4_use_handle(exec, 0, VC4_MODE_TILE_ALLOC, &tile_allocation) || !vc4_use_handle(exec, 1, VC4_MODE_TSDA, &tile_state_data_array)) @@ -547,15 +543,19 @@ validate_tile_binning_config(VALIDATE_ARGS) *(uint32_t *)validated = tile_allocation->paddr; exec->tile_alloc_bo = tile_allocation; - exec->tile_alloc_init_block_size = 1 << (5 + ((flags >> 5) & 3)); + tile_alloc_init_block_size = 1 << (5 + ((flags >> 5) & 3)); if (exec->bin_tiles_x * exec->bin_tiles_y * - exec->tile_alloc_init_block_size > tile_allocation_size) { + tile_alloc_init_block_size > tile_allocation_size) { DRM_ERROR("tile init exceeds tile alloc size (%d vs %d)\n", exec->bin_tiles_x * exec->bin_tiles_y * - exec->tile_alloc_init_block_size, + tile_alloc_init_block_size, tile_allocation_size); return -EINVAL; } + exec->tile_alloc_init_block_mask = tile_alloc_init_block_size - 1; + exec->tile_alloc_init_block_last = tile_alloc_init_block_size * + (exec->bin_tiles_x * exec->bin_tiles_y - 1); + if (*(uint32_t *)(untrusted + 8) != 0) { DRM_ERROR("TSDA offset != 0 unsupported\n"); return -EINVAL; @@ -927,15 +927,15 @@ reloc_tex(struct vc4_exec_info *exec, switch (tiling_format) { case VC4_TILING_FORMAT_T: - aligned_width = roundup(level_width, utile_w * 8); - aligned_height = roundup(level_height, utile_h * 8); + aligned_width = round_up(level_width, utile_w * 8); + aligned_height = round_up(level_height, utile_h * 8); break; case VC4_TILING_FORMAT_LT: - aligned_width = roundup(level_width, utile_w); - aligned_height = roundup(level_height, utile_h); + aligned_width = round_up(level_width, utile_w); + aligned_height = round_up(level_height, utile_h); break; default: - aligned_width = roundup(level_width, utile_w); + aligned_width = round_up(level_width, utile_w); aligned_height = level_height; break; } diff --git a/src/gallium/drivers/vc4/vc4_simulator_validate.h b/src/gallium/drivers/vc4/vc4_simulator_validate.h index 1f0c6b67c0f..a1903269a20 100644 --- a/src/gallium/drivers/vc4/vc4_simulator_validate.h +++ b/src/gallium/drivers/vc4/vc4_simulator_validate.h @@ -43,6 +43,7 @@ struct vc4_exec_info; #define kfree(ptr) free(ptr) #define krealloc(ptr, size, args) realloc(ptr, size) #define roundup(x, y) align(x, y) +#define round_up(x, y) align(x, y) #define max(x, y) MAX2(x, y) #define min(x, y) MiN2(x, y) #define BUG_ON(condition) assert(!(condition)) From 8d10b2a0460ca01a5c65a87184717c6c2e4bcaeb Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Wed, 3 Jun 2015 10:18:04 -0700 Subject: [PATCH 537/834] vc4: Drop subdirectory in vc4 build. Just because we put the source in a subdir, doesn't mean we need helper libraries in the build. This will also simplify the Android build setup. --- configure.ac | 1 - src/gallium/drivers/vc4/Makefile.am | 4 +- src/gallium/drivers/vc4/Makefile.sources | 4 ++ src/gallium/drivers/vc4/kernel/Makefile.am | 40 ------------------- .../drivers/vc4/kernel/Makefile.sources | 6 --- 5 files changed, 5 insertions(+), 50 deletions(-) delete mode 100644 src/gallium/drivers/vc4/kernel/Makefile.am delete mode 100644 src/gallium/drivers/vc4/kernel/Makefile.sources diff --git a/configure.ac b/configure.ac index d32aa2492cc..be0cd7dd7d9 100644 --- a/configure.ac +++ b/configure.ac @@ -2350,7 +2350,6 @@ AC_CONFIG_FILES([Makefile src/gallium/drivers/svga/Makefile src/gallium/drivers/trace/Makefile src/gallium/drivers/vc4/Makefile - src/gallium/drivers/vc4/kernel/Makefile src/gallium/state_trackers/clover/Makefile src/gallium/state_trackers/dri/Makefile src/gallium/state_trackers/glx/xlib/Makefile diff --git a/src/gallium/drivers/vc4/Makefile.am b/src/gallium/drivers/vc4/Makefile.am index 3fc591f10c1..774463138d0 100644 --- a/src/gallium/drivers/vc4/Makefile.am +++ b/src/gallium/drivers/vc4/Makefile.am @@ -19,8 +19,6 @@ # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS # IN THE SOFTWARE. -SUBDIRS = kernel - include Makefile.sources include $(top_srcdir)/src/gallium/Automake.inc @@ -39,5 +37,5 @@ AM_CFLAGS = \ noinst_LTLIBRARIES = libvc4.la libvc4_la_SOURCES = $(C_SOURCES) -libvc4_la_LIBADD = $(SIM_LIB) kernel/libvc4_kernel.la +libvc4_la_LIBADD = $(SIM_LIB) libvc4_la_LDFLAGS = $(SIM_LDFLAGS) diff --git a/src/gallium/drivers/vc4/Makefile.sources b/src/gallium/drivers/vc4/Makefile.sources index 49474df3548..f678b2fc0d3 100644 --- a/src/gallium/drivers/vc4/Makefile.sources +++ b/src/gallium/drivers/vc4/Makefile.sources @@ -1,4 +1,8 @@ C_SOURCES := \ + kernel/vc4_drv.h \ + kernel/vc4_gem.c \ + kernel/vc4_validate.c \ + kernel/vc4_validate_shaders.c \ vc4_blit.c \ vc4_bufmgr.c \ vc4_bufmgr.h \ diff --git a/src/gallium/drivers/vc4/kernel/Makefile.am b/src/gallium/drivers/vc4/kernel/Makefile.am deleted file mode 100644 index 1ae5f1c2e83..00000000000 --- a/src/gallium/drivers/vc4/kernel/Makefile.am +++ /dev/null @@ -1,40 +0,0 @@ -# Copyright © 2014 Broadcom -# -# Permission is hereby granted, free of charge, to any person obtaining a -# copy of this software and associated documentation files (the "Software"), -# to deal in the Software without restriction, including without limitation -# the rights to use, copy, modify, merge, publish, distribute, sublicense, -# and/or sell copies of the Software, and to permit persons to whom the -# Software is furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice (including the next -# paragraph) shall be included in all copies or substantial portions of the -# Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS -# IN THE SOFTWARE. - -include Makefile.sources -include $(top_srcdir)/src/gallium/Automake.inc - -if USE_VC4_SIMULATOR -SIM_CFLAGS = -DUSE_VC4_SIMULATOR=1 -endif - -AM_CFLAGS = \ - $(LIBDRM_CFLAGS) \ - $(GALLIUM_DRIVER_CFLAGS) \ - $(SIM_CFLAGS) \ - -I$(top_srcdir)/src/mesa/ \ - -I$(srcdir)/../ \ - $() - -noinst_LTLIBRARIES = libvc4_kernel.la - -libvc4_kernel_la_SOURCES = $(C_SOURCES) -libvc4_kernel_la_LDFLAGS = $(SIM_LDFLAGS) diff --git a/src/gallium/drivers/vc4/kernel/Makefile.sources b/src/gallium/drivers/vc4/kernel/Makefile.sources deleted file mode 100644 index 7d17a898ebf..00000000000 --- a/src/gallium/drivers/vc4/kernel/Makefile.sources +++ /dev/null @@ -1,6 +0,0 @@ -C_SOURCES := \ - vc4_drv.h \ - vc4_gem.c \ - vc4_validate.c \ - vc4_validate_shaders.c \ - $() From 9dca3beb62e894bbd720c5eecb47c0fd2c6132f9 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Sun, 7 Jun 2015 12:02:02 -0700 Subject: [PATCH 538/834] vc4: Drop qir include from vc4_screen.h We didn't need any of it except for the list header, and qir.h pulls in nir.h, which is not really interesting to winsys. --- src/gallium/drivers/vc4/vc4_screen.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gallium/drivers/vc4/vc4_screen.h b/src/gallium/drivers/vc4/vc4_screen.h index 46c4687a3b9..fb08cc1c7a0 100644 --- a/src/gallium/drivers/vc4/vc4_screen.h +++ b/src/gallium/drivers/vc4/vc4_screen.h @@ -27,7 +27,7 @@ #include "pipe/p_screen.h" #include "os/os_thread.h" #include "state_tracker/drm_driver.h" -#include "vc4_qir.h" +#include "util/list.h" struct vc4_bo; From c6877c9e5983287a0741b26a358b7d744aebe232 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Wed, 10 Jun 2015 13:26:56 +1000 Subject: [PATCH 539/834] nouveau: set imported buffers to what the kernel gives us When we import a dma-buf fd from another driver the kernel gives us the right info, and this trashes it. Convert the kernel bo flags into the domain flags. This helps getting reverse prime and glamor working. Cc: mesa-stable@lists.freedesktop.org Acked-by: Ben Skeggs Signed-off-by: Dave Airlie --- src/gallium/drivers/nouveau/nv50/nv50_miptree.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gallium/drivers/nouveau/nv50/nv50_miptree.c b/src/gallium/drivers/nouveau/nv50/nv50_miptree.c index 744a3a5bf8b..10cebb17eee 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_miptree.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_miptree.c @@ -419,7 +419,7 @@ nv50_miptree_from_handle(struct pipe_screen *pscreen, FREE(mt); return NULL; } - mt->base.domain = NOUVEAU_BO_VRAM; + mt->base.domain = mt->base.bo->flags & NOUVEAU_BO_APER; mt->base.address = mt->base.bo->offset; mt->base.base = *templ; From 563706c14641fde2ab604d590b5425680354f280 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Wed, 10 Jun 2015 13:51:59 +1000 Subject: [PATCH 540/834] st/dri: check pscreen is valid before querying param MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit we don't check the validity of pscreen until dri_init_screen_helper hit this trying to init glamor on a device with no driver (udl). Acked-by: Michel Dänzer Signed-off-by: Dave Airlie --- src/gallium/state_trackers/dri/dri2.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gallium/state_trackers/dri/dri2.c b/src/gallium/state_trackers/dri/dri2.c index 792d5651698..8d93f786433 100644 --- a/src/gallium/state_trackers/dri/dri2.c +++ b/src/gallium/state_trackers/dri/dri2.c @@ -1483,7 +1483,7 @@ dri2_init_screen(__DRIscreen * sPriv) } } - if (pscreen->get_param(pscreen, PIPE_CAP_DEVICE_RESET_STATUS_QUERY)) { + if (pscreen && pscreen->get_param(pscreen, PIPE_CAP_DEVICE_RESET_STATUS_QUERY)) { sPriv->extensions = dri_robust_screen_extensions; screen->has_reset_status_query = true; } From adee54f8269c5e9f4fde91d19f0e465afc8f14d8 Mon Sep 17 00:00:00 2001 From: Timothy Arceri Date: Tue, 9 Jun 2015 16:53:55 +1000 Subject: [PATCH 541/834] glsl: remove restriction on unsized arrays in GLSL ES 3.10 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Tapani Pälli --- src/glsl/ast_to_hir.cpp | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/glsl/ast_to_hir.cpp b/src/glsl/ast_to_hir.cpp index 8aebb1320f1..aab0c290c07 100644 --- a/src/glsl/ast_to_hir.cpp +++ b/src/glsl/ast_to_hir.cpp @@ -3849,7 +3849,15 @@ ast_declarator_list::hir(exec_list *instructions, decl->identifier); } - if (state->es_shader) { + /* GLSL ES 3.10 removes the restriction on unsized arrays. + * + * Section 4.1.9 (Arrays) of the GLSL ES 3.10 spec says: + * + * "Variables of the same type can be aggregated into arrays by + * declaring a name followed by brackets ([ ]) enclosing an + * optional size." + */ + if (state->es_shader && state->language_version < 310) { const glsl_type *const t = (earlier == NULL) ? var->type : earlier->type; From 07e4f12e66f64c8075c0d3fd1c23cbd7c657970c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tapani=20P=C3=A4lli?= Date: Mon, 8 Jun 2015 14:53:26 +0300 Subject: [PATCH 542/834] mesa: allow unsized formats GL_RG, GL_RED for GLES 3.0 with half float MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit v2: && -> ||, we enable on gles3 or if ARB_texture_rg is enabled Signed-off-by: Tapani Pälli Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=90748 Reviewed-by: Kenneth Graunke --- src/mesa/main/glformats.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/mesa/main/glformats.c b/src/mesa/main/glformats.c index 6a77c916a9c..057a5d160b4 100644 --- a/src/mesa/main/glformats.c +++ b/src/mesa/main/glformats.c @@ -1678,6 +1678,10 @@ _mesa_error_check_format_and_type(const struct gl_context *ctx, case GL_LUMINANCE: case GL_ALPHA: return GL_NO_ERROR; + case GL_RG: + case GL_RED: + if (_mesa_is_gles3(ctx) || ctx->Extensions.ARB_texture_rg) + return GL_NO_ERROR; default: return GL_INVALID_OPERATION; } From 5b0d6f5c1bc3f7bd37c6efebf48f80ca6ff3ef87 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tapani=20P=C3=A4lli?= Date: Tue, 9 Jun 2015 12:26:48 +0300 Subject: [PATCH 543/834] mesa: add GL_RED, GL_RG support for floating point textures MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Mesa supports EXT_texture_rg and OES_texture_float. This patch adds support for using unsized enums GL_RED and GL_RG for floating point targets and writes proper checks for internalformat when format is GL_RED or GL_RG and type is of GL_FLOAT or GL_HALF_FLOAT. Later, internalformat will get adjusted by adjust_for_oes_float_texture after these checks. v2: simplify to check vs supported enums v3: follow the style and break out if internalFormat ok (Kenneth) Signed-off-by: Tapani Pälli Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=90748 Reviewed-by: Kenneth Graunke --- src/mesa/main/glformats.c | 37 ++++++++++++++++++++++++++++++++++--- 1 file changed, 34 insertions(+), 3 deletions(-) diff --git a/src/mesa/main/glformats.c b/src/mesa/main/glformats.c index 057a5d160b4..ac69fabccaa 100644 --- a/src/mesa/main/glformats.c +++ b/src/mesa/main/glformats.c @@ -2296,8 +2296,18 @@ _mesa_es3_error_check_format_and_type(const struct gl_context *ctx, break; case GL_HALF_FLOAT: - if (internalFormat != GL_RG16F) - return GL_INVALID_OPERATION; + case GL_HALF_FLOAT_OES: + switch (internalFormat) { + case GL_RG16F: + break; + case GL_RG: + if (ctx->Extensions.ARB_texture_rg && + ctx->Extensions.OES_texture_half_float) + break; + /* fallthrough */ + default: + return GL_INVALID_OPERATION; + } break; case GL_FLOAT: @@ -2305,6 +2315,11 @@ _mesa_es3_error_check_format_and_type(const struct gl_context *ctx, case GL_RG16F: case GL_RG32F: break; + case GL_RG: + if (ctx->Extensions.ARB_texture_rg && + ctx->Extensions.OES_texture_float) + break; + /* fallthrough */ default: return GL_INVALID_OPERATION; } @@ -2365,8 +2380,19 @@ _mesa_es3_error_check_format_and_type(const struct gl_context *ctx, break; case GL_HALF_FLOAT: - if (internalFormat != GL_R16F) + case GL_HALF_FLOAT_OES: + switch (internalFormat) { + case GL_R16F: + break; + case GL_RG: + case GL_RED: + if (ctx->Extensions.ARB_texture_rg && + ctx->Extensions.OES_texture_half_float) + break; + /* fallthrough */ + default: return GL_INVALID_OPERATION; + } break; case GL_FLOAT: @@ -2374,6 +2400,11 @@ _mesa_es3_error_check_format_and_type(const struct gl_context *ctx, case GL_R16F: case GL_R32F: break; + case GL_RED: + if (ctx->Extensions.ARB_texture_rg && + ctx->Extensions.OES_texture_float) + break; + /* fallthrough */ default: return GL_INVALID_OPERATION; } From 7217faf39f63f81b74f268d62fbdd94d445b0e6f Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 10 Jun 2015 07:28:40 -0600 Subject: [PATCH 544/834] llvmpipe: simplify lp_resource_copy() Just implement it in terms of util_resource_copy_region(). Both the original code and util_resource_copy_region() boil down to mapping, calling util_copy_box() and unmapping. No piglit regressions. This will also help to implement GL_ARB_copy_image. Reviewed-by: Jose Fonseca --- src/gallium/drivers/llvmpipe/lp_surface.c | 61 +---------------------- 1 file changed, 2 insertions(+), 59 deletions(-) diff --git a/src/gallium/drivers/llvmpipe/lp_surface.c b/src/gallium/drivers/llvmpipe/lp_surface.c index b985877e43e..96f8ed82cd8 100644 --- a/src/gallium/drivers/llvmpipe/lp_surface.c +++ b/src/gallium/drivers/llvmpipe/lp_surface.c @@ -42,13 +42,6 @@ lp_resource_copy(struct pipe_context *pipe, struct pipe_resource *src, unsigned src_level, const struct pipe_box *src_box) { - struct llvmpipe_resource *src_tex = llvmpipe_resource(src); - struct llvmpipe_resource *dst_tex = llvmpipe_resource(dst); - const enum pipe_format format = src_tex->base.format; - unsigned width = src_box->width; - unsigned height = src_box->height; - unsigned depth = src_box->depth; - llvmpipe_flush_resource(pipe, dst, dst_level, FALSE, /* read_only */ @@ -63,58 +56,8 @@ lp_resource_copy(struct pipe_context *pipe, FALSE, /* do_not_block */ "blit src"); - /* Fallback for buffers. */ - if (dst->target == PIPE_BUFFER && src->target == PIPE_BUFFER) { - util_resource_copy_region(pipe, dst, dst_level, dstx, dsty, dstz, - src, src_level, src_box); - return; - } - - /* - printf("surface copy from %u lvl %u to %u lvl %u: %u,%u,%u to %u,%u,%u %u x %u x %u\n", - src_tex->id, src_level, dst_tex->id, dst_level, - src_box->x, src_box->y, src_box->z, dstx, dsty, dstz, - src_box->width, src_box->height, src_box->depth); - */ - - /* make sure display target resources (which cannot have levels/layers) are mapped */ - if (src_tex->dt) - (void) llvmpipe_resource_map(src, src_level, 0, LP_TEX_USAGE_READ); - if (dst_tex->dt) - /* - * Could set this to WRITE_ALL if complete dst is covered but it gets - * ignored anyway. - */ - (void) llvmpipe_resource_map(dst, dst_level, 0, LP_TEX_USAGE_READ_WRITE); - - - /* copy */ - { - const ubyte *src_linear_ptr - = llvmpipe_get_texture_image_address(src_tex, src_box->z, - src_level); - ubyte *dst_linear_ptr - = llvmpipe_get_texture_image_address(dst_tex, dstz, - dst_level); - - if (dst_linear_ptr && src_linear_ptr) { - util_copy_box(dst_linear_ptr, format, - llvmpipe_resource_stride(&dst_tex->base, dst_level), - dst_tex->img_stride[dst_level], - dstx, dsty, 0, - width, height, depth, - src_linear_ptr, - llvmpipe_resource_stride(&src_tex->base, src_level), - src_tex->img_stride[src_level], - src_box->x, src_box->y, 0); - } - } - - if (src_tex->dt) - llvmpipe_resource_unmap(src, 0, 0); - if (dst_tex->dt) - llvmpipe_resource_unmap(dst, 0, 0); - + util_resource_copy_region(pipe, dst, dst_level, dstx, dsty, dstz, + src, src_level, src_box); } From f83b9e58f6e8a748def367c7d523eb7285b1aeb7 Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Tue, 9 Jun 2015 14:33:47 -0700 Subject: [PATCH 545/834] i965: Momentarily pretend to support ARB_texture_stencil8 for blits. Broadwell's stencil blitting code attempts to bind a renderbuffer as a texture, using dd->BindRenderbufferTexImage(). This calls _mesa_init_teximage_fields(), which then attempts to set img->_BaseFormat = _mesa_base_tex_format(ctx, internalFormat), which assert fails if internalFormat is GL_STENCIL_INDEX8 but ARB_texture_stencil8 is unsupported. To work around this, just pretend to support the extension momentarily, during the blit. Meta has already munged a variety of other things in the context (including the API!), so it's not that much worse than what we're already doing. Fixes regressions since commit f7aad9da20b13c98f77d6a690b327716f39c0a47 (mesa/teximage: use correct extension for accept stencil texture.). v2: Add an XXX comment explaining the situation (requested by Jason Ekstrand and Martin Peres), and an assert that we don't support the extension so we remember to remove this hack (requested by Neil Roberts). Signed-off-by: Kenneth Graunke Reviewed-by: Jason Ekstrand --- src/mesa/drivers/dri/i965/brw_meta_stencil_blit.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/mesa/drivers/dri/i965/brw_meta_stencil_blit.c b/src/mesa/drivers/dri/i965/brw_meta_stencil_blit.c index fc7018d15b9..d079197a2a9 100644 --- a/src/mesa/drivers/dri/i965/brw_meta_stencil_blit.c +++ b/src/mesa/drivers/dri/i965/brw_meta_stencil_blit.c @@ -414,6 +414,12 @@ brw_meta_stencil_blit(struct brw_context *brw, GLenum target; _mesa_meta_fb_tex_blit_begin(ctx, &blit); + /* XXX: Pretend to support stencil textures so _mesa_base_tex_format() + * returns a valid format. When we properly support the extension, we + * should remove this. + */ + assert(ctx->Extensions.ARB_texture_stencil8 == false); + ctx->Extensions.ARB_texture_stencil8 = true; _mesa_GenFramebuffers(1, &fbo); /* Force the surface to be configured for level zero. */ @@ -451,6 +457,7 @@ brw_meta_stencil_blit(struct brw_context *brw, _mesa_DrawArrays(GL_TRIANGLE_FAN, 0, 4); error: + ctx->Extensions.ARB_texture_stencil8 = false; _mesa_meta_fb_tex_blit_end(ctx, target, &blit); _mesa_meta_end(ctx); From f9a18acb56c69b24c1e47cd326dc98e14fadcf94 Mon Sep 17 00:00:00 2001 From: Iago Toral Quiroga Date: Wed, 10 Jun 2015 09:07:32 +0200 Subject: [PATCH 546/834] i965: do not round line width when multisampling or antialiaing are enabled In commit fe74fee8fa721a we rounded the line width to the nearest integer to match the GLES3 spec requirements stated in section 13.4.2.1, but that seems to break a dEQP test that renders wide lines in some multisampling scenarios. Ian noted that the Open 4.4 spec has the following similar text: "The actual width of non-antialiased lines is determined by rounding the supplied width to the nearest integer, then clamping it to the implementation-dependent maximum non-antialiased line width." and suggested that when ES removed antialiased lines, they removed "non-antialised" from that paragraph but probably should not have. Going by that note, this patch restricts the quantization implemented in fe74fee8fa721a only to regular aliased lines. This seems to keep the tests fixed with that commit passing while fixing the broken test. v2: - Drop one of the clamps (Ken, Marius) - Add a rule to prevent advertising line widths that when rounded go beyond the limits allowed by the hardware (Ken) - Update comments in the code accordingly (Ian) - Put the code in a utility function (Ian) Fixes: dEQP-GLES3.functional.rasterization.fbo.rbo_multisample_max.primitives.lines_wide Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=90749 Reviewed-by: Kenneth Graunke Reviewed-by: Ian Romanick Cc: "10.6" --- src/mesa/drivers/dri/i965/brw_context.c | 7 +++++++ src/mesa/drivers/dri/i965/brw_util.h | 15 +++++++++++++++ src/mesa/drivers/dri/i965/gen6_sf_state.c | 6 +----- src/mesa/drivers/dri/i965/gen7_sf_state.c | 6 +----- src/mesa/drivers/dri/i965/gen8_sf_state.c | 6 +----- 5 files changed, 25 insertions(+), 15 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c index 652d9a34e8f..ab047046fdb 100644 --- a/src/mesa/drivers/dri/i965/brw_context.c +++ b/src/mesa/drivers/dri/i965/brw_context.c @@ -442,6 +442,13 @@ brw_initialize_context_constants(struct brw_context *brw) ctx->Const.LineWidthGranularity = 0.5; } + /* For non-antialiased lines, we have to round the line width to the + * nearest whole number. Make sure that we don't advertise a line + * width that, when rounded, will be beyond the actual hardware + * maximum. + */ + assert(roundf(ctx->Const.MaxLineWidth) <= ctx->Const.MaxLineWidth); + ctx->Const.MinPointSize = 1.0; ctx->Const.MinPointSizeAA = 1.0; ctx->Const.MaxPointSize = 255.0; diff --git a/src/mesa/drivers/dri/i965/brw_util.h b/src/mesa/drivers/dri/i965/brw_util.h index b548d234538..671d72e1cc7 100644 --- a/src/mesa/drivers/dri/i965/brw_util.h +++ b/src/mesa/drivers/dri/i965/brw_util.h @@ -35,9 +35,24 @@ #include "main/mtypes.h" #include "main/imports.h" +#include "brw_context.h" extern GLuint brw_translate_blend_factor( GLenum factor ); extern GLuint brw_translate_blend_equation( GLenum mode ); extern GLenum brw_fix_xRGB_alpha(GLenum function); +static inline float +brw_get_line_width(struct brw_context *brw) +{ + /* From the OpenGL 4.4 spec: + * + * "The actual width of non-antialiased lines is determined by rounding + * the supplied width to the nearest integer, then clamping it to the + * implementation-dependent maximum non-antialiased line width." + */ + return CLAMP(!brw->ctx.Multisample._Enabled && !brw->ctx.Line.SmoothFlag + ? roundf(brw->ctx.Line.Width) : brw->ctx.Line.Width, + 0.0, brw->ctx.Const.MaxLineWidth); +} + #endif diff --git a/src/mesa/drivers/dri/i965/gen6_sf_state.c b/src/mesa/drivers/dri/i965/gen6_sf_state.c index e445ce25600..d5777647f7e 100644 --- a/src/mesa/drivers/dri/i965/gen6_sf_state.c +++ b/src/mesa/drivers/dri/i965/gen6_sf_state.c @@ -361,11 +361,7 @@ upload_sf_state(struct brw_context *brw) /* _NEW_LINE */ { - /* OpenGL dictates that line width should be rounded to the nearest - * integer - */ - float line_width = - roundf(CLAMP(ctx->Line.Width, 0.0, ctx->Const.MaxLineWidth)); + float line_width = brw_get_line_width(brw); uint32_t line_width_u3_7 = U_FIXED(line_width, 7); /* Line width of 0 is not allowed when MSAA enabled */ diff --git a/src/mesa/drivers/dri/i965/gen7_sf_state.c b/src/mesa/drivers/dri/i965/gen7_sf_state.c index 58e33370c57..87ff284e31c 100644 --- a/src/mesa/drivers/dri/i965/gen7_sf_state.c +++ b/src/mesa/drivers/dri/i965/gen7_sf_state.c @@ -192,11 +192,7 @@ upload_sf_state(struct brw_context *brw) /* _NEW_LINE */ { - /* OpenGL dictates that line width should be rounded to the nearest - * integer - */ - float line_width = - roundf(CLAMP(ctx->Line.Width, 0.0, ctx->Const.MaxLineWidth)); + float line_width = brw_get_line_width(brw); uint32_t line_width_u3_7 = U_FIXED(line_width, 7); /* Line width of 0 is not allowed when MSAA enabled */ if (ctx->Multisample._Enabled) { diff --git a/src/mesa/drivers/dri/i965/gen8_sf_state.c b/src/mesa/drivers/dri/i965/gen8_sf_state.c index 52a21b6a8e8..83ef62bc961 100644 --- a/src/mesa/drivers/dri/i965/gen8_sf_state.c +++ b/src/mesa/drivers/dri/i965/gen8_sf_state.c @@ -154,11 +154,7 @@ upload_sf(struct brw_context *brw) dw1 |= GEN6_SF_VIEWPORT_TRANSFORM_ENABLE; /* _NEW_LINE */ - /* OpenGL dictates that line width should be rounded to the nearest - * integer - */ - float line_width = - roundf(CLAMP(ctx->Line.Width, 0.0, ctx->Const.MaxLineWidth)); + float line_width = brw_get_line_width(brw); uint32_t line_width_u3_7 = U_FIXED(line_width, 7); if (line_width_u3_7 == 0) line_width_u3_7 = 1; From bd38f91f8d80897ca91979962d80d4bc0acef586 Mon Sep 17 00:00:00 2001 From: Alexander Monakov Date: Tue, 9 Jun 2015 20:58:22 +0300 Subject: [PATCH 547/834] i965: do_blit_drawpixels: decode array formats Correct a regression introduced by commit 922c0c9fd526 by converting "array format", if received from _mesa_format_from_format_and_type, to mesa_format. References: https://bugs.freedesktop.org/show_bug.cgi?id=90839 Signed-off-by: Alexander Monakov Tested-by: AnAkkk Reviewed-by: Jason Ekstrand Reviewed-by: Kenneth Graunke Cc: mesa-stable@lists.freedesktop.org --- src/mesa/drivers/dri/i965/intel_pixel_draw.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/mesa/drivers/dri/i965/intel_pixel_draw.c b/src/mesa/drivers/dri/i965/intel_pixel_draw.c index d68cbb6e401..189a592d8d2 100644 --- a/src/mesa/drivers/dri/i965/intel_pixel_draw.c +++ b/src/mesa/drivers/dri/i965/intel_pixel_draw.c @@ -78,6 +78,8 @@ do_blit_drawpixels(struct gl_context * ctx, struct intel_renderbuffer *irb = intel_renderbuffer(rb); mesa_format src_format = _mesa_format_from_format_and_type(format, type); + if (_mesa_format_is_mesa_array_format(src_format)) + src_format = _mesa_format_from_array_format(src_format); mesa_format dst_format = irb->mt->format; /* We can safely discard sRGB encode/decode for the DrawPixels interface */ From fd00c738c08e54c9dfdc195e59f780f30d2f9e07 Mon Sep 17 00:00:00 2001 From: Juha-Pekka Heikkila Date: Fri, 20 Mar 2015 15:13:14 +0200 Subject: [PATCH 548/834] mesa/main: Remove _mesa_HashClone() I didn't find this being used anywhere. Signed-off-by: Juha-Pekka Heikkila Reviewed-by: Timothy Arceri Reviewed-by: Anuj Phogat --- src/mesa/main/hash.c | 28 ---------------------------- src/mesa/main/hash.h | 3 --- 2 files changed, 31 deletions(-) diff --git a/src/mesa/main/hash.c b/src/mesa/main/hash.c index d04cccd94d2..315b5d64004 100644 --- a/src/mesa/main/hash.c +++ b/src/mesa/main/hash.c @@ -388,34 +388,6 @@ _mesa_HashDeleteAll(struct _mesa_HashTable *table, } -/** - * Clone all entries in a hash table, into a new table. - * - * \param table the hash table to clone - */ -struct _mesa_HashTable * -_mesa_HashClone(const struct _mesa_HashTable *table) -{ - /* cast-away const */ - struct _mesa_HashTable *table2 = (struct _mesa_HashTable *) table; - struct hash_entry *entry; - struct _mesa_HashTable *clonetable; - - assert(table); - mtx_lock(&table2->Mutex); - - clonetable = _mesa_NewHashTable(); - assert(clonetable); - hash_table_foreach(table->ht, entry) { - _mesa_HashInsert(clonetable, (GLint)(uintptr_t)entry->key, entry->data); - } - - mtx_unlock(&table2->Mutex); - - return clonetable; -} - - /** * Walk over all entries in a hash table, calling callback function for each. * Note: we use a separate mutex in this function to avoid a recursive diff --git a/src/mesa/main/hash.h b/src/mesa/main/hash.h index e3e8f492e8b..da3b9973d24 100644 --- a/src/mesa/main/hash.h +++ b/src/mesa/main/hash.h @@ -59,9 +59,6 @@ _mesa_HashDeleteAll(struct _mesa_HashTable *table, void (*callback)(GLuint key, void *data, void *userData), void *userData); -extern struct _mesa_HashTable * -_mesa_HashClone(const struct _mesa_HashTable *table); - extern void _mesa_HashWalk(const struct _mesa_HashTable *table, void (*callback)(GLuint key, void *data, void *userData), From 56e9f3b493a8677e60e4473ca0faf0e3d1a79888 Mon Sep 17 00:00:00 2001 From: Juha-Pekka Heikkila Date: Fri, 20 Mar 2015 15:40:26 +0200 Subject: [PATCH 549/834] mesa/main: avoid null access in format_array_table_init() If _mesa_hash_table_create failed we'd get null pointer. Report error and go away. Signed-off-by: Juha-Pekka Heikkila Reviewed-by: Anuj Phogat --- src/mesa/main/formats.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/mesa/main/formats.c b/src/mesa/main/formats.c index 8af44e90520..f7c94024f65 100644 --- a/src/mesa/main/formats.c +++ b/src/mesa/main/formats.c @@ -397,6 +397,11 @@ format_array_format_table_init(void) format_array_format_table = _mesa_hash_table_create(NULL, NULL, array_formats_equal); + if (!format_array_format_table) { + _mesa_error_no_memory(__func__); + return; + } + for (f = 1; f < MESA_FORMAT_COUNT; ++f) { info = _mesa_get_format_info(f); if (!info->ArrayFormat) @@ -432,6 +437,11 @@ _mesa_format_from_array_format(uint32_t array_format) call_once(&format_array_format_table_exists, format_array_format_table_init); + if (!format_array_format_table) { + format_array_format_table_exists = ONCE_FLAG_INIT; + return MESA_FORMAT_NONE; + } + entry = _mesa_hash_table_search_pre_hashed(format_array_format_table, array_format, (void *)(intptr_t)array_format); From 83624c141d3568217190933945c3243913e7ba2c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tapani=20P=C3=A4lli?= Date: Wed, 6 May 2015 13:43:54 +0300 Subject: [PATCH 550/834] mesa/es3.1: enable DRAW_INDIRECT_BUFFER_BINDING for gles3.1 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit (increases ES31-CTS.draw_indirect.basic.* passing tests) v2: only expose DRAW_INDIRECT_BUFFER_BINDING for GL core + ES3.1 Signed-off-by: Tapani Pälli Reviewed-by: Martin Peres Reviewed-by: Ian Romanick --- src/mesa/main/get.c | 12 ++++++++++++ src/mesa/main/get_hash_params.py | 8 ++++++-- 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/src/mesa/main/get.c b/src/mesa/main/get.c index 8a6c81aff87..1bc9b5d82cf 100644 --- a/src/mesa/main/get.c +++ b/src/mesa/main/get.c @@ -138,6 +138,7 @@ enum value_extra { EXTRA_API_GL_CORE, EXTRA_API_ES2, EXTRA_API_ES3, + EXTRA_API_ES31, EXTRA_NEW_BUFFERS, EXTRA_NEW_FRAG_CLAMP, EXTRA_VALID_DRAW_BUFFER, @@ -348,6 +349,12 @@ static const int extra_ARB_shader_image_load_store_and_geometry_shader[] = { EXTRA_END }; +static const int extra_ARB_draw_indirect_es31[] = { + EXT(ARB_draw_indirect), + EXTRA_API_ES31, + EXTRA_END +}; + EXTRA_EXT(ARB_texture_cube_map); EXTRA_EXT(EXT_texture_array); EXTRA_EXT(NV_fog_distance); @@ -1078,6 +1085,11 @@ check_extra(struct gl_context *ctx, const char *func, const struct value_desc *d if (_mesa_is_gles3(ctx)) api_found = GL_TRUE; break; + case EXTRA_API_ES31: + api_check = GL_TRUE; + if (_mesa_is_gles31(ctx)) + api_found = GL_TRUE; + break; case EXTRA_API_GL: api_check = GL_TRUE; if (_mesa_is_desktop_gl(ctx)) diff --git a/src/mesa/main/get_hash_params.py b/src/mesa/main/get_hash_params.py index 41cb2c17b60..513d5d21b3f 100644 --- a/src/mesa/main/get_hash_params.py +++ b/src/mesa/main/get_hash_params.py @@ -409,6 +409,12 @@ descriptor=[ [ "SAMPLER_BINDING", "LOC_CUSTOM, TYPE_INT, GL_SAMPLER_BINDING, NO_EXTRA" ], ]}, +# Enums in OpenGL Core profile and ES 3.1 +{ "apis": ["GL_CORE", "GLES3"], "params": [ +# GL_ARB_draw_indirect / GLES 3.1 + [ "DRAW_INDIRECT_BUFFER_BINDING", "LOC_CUSTOM, TYPE_INT, 0, extra_ARB_draw_indirect_es31" ], +]}, + # Remaining enums are only in OpenGL { "apis": ["GL", "GL_CORE"], "params": [ [ "ACCUM_RED_BITS", "BUFFER_INT(Visual.accumRedBits), NO_EXTRA" ], @@ -804,8 +810,6 @@ descriptor=[ { "apis": ["GL_CORE"], "params": [ # GL_ARB_texture_buffer_range [ "TEXTURE_BUFFER_OFFSET_ALIGNMENT", "CONTEXT_INT(Const.TextureBufferOffsetAlignment), extra_ARB_texture_buffer_range" ], -# GL_ARB_draw_indirect - [ "DRAW_INDIRECT_BUFFER_BINDING", "LOC_CUSTOM, TYPE_INT, 0, extra_ARB_draw_indirect" ], # GL_ARB_viewport_array [ "MAX_VIEWPORTS", "CONTEXT_INT(Const.MaxViewports), extra_ARB_viewport_array" ], From 5b61cb12366f65a5d7e21b47fa3501a03fd884ee Mon Sep 17 00:00:00 2001 From: Martin Peres Date: Tue, 26 May 2015 15:32:21 +0300 Subject: [PATCH 551/834] glsl: fix constructing a vector from a matrix MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Without this patch, the following constructs (not an extensive list) would crash mesa: - mat2 foo = mat2(1); vec4 bar = vec4(foo); - mat3 foo = mat3(1); vec4 bar = vec4(foo); - mat3 foo = mat3(1); ivec4 bar = ivec4(foo); The first case is explicitely allowed by the GLSL spec, as seen on page 101 of the GLSL 4.40 spec: "vec4(mat2) // the vec4 is column 0 followed by column 1" The other cases are implicitely allowed also. The actual changes are quite minimal. We first split each column of the matrix to a list of vectors and then use them to initialize the vector. An additional check to make sure that we are not trying to copy 0 elements of a vector fix the (i)vec4(mat3) case as the last vector (3rd column) is not needed at all. Reviewed-by: Tapani Pälli Signed-off-by: Martin Peres --- src/glsl/ast_function.cpp | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/src/glsl/ast_function.cpp b/src/glsl/ast_function.cpp index 1e77124bd15..92e26bf2416 100644 --- a/src/glsl/ast_function.cpp +++ b/src/glsl/ast_function.cpp @@ -993,11 +993,15 @@ emit_inline_vector_constructor(const glsl_type *type, ir_variable *var = new(ctx) ir_variable(type, "vec_ctor", ir_var_temporary); instructions->push_tail(var); - /* There are two kinds of vector constructors. + /* There are three kinds of vector constructors. * * - Construct a vector from a single scalar by replicating that scalar to * all components of the vector. * + * - Construct a vector from at least a matrix. This case should already + * have been taken care of in ast_function_expression::hir by breaking + * down the matrix into a series of column vectors. + * * - Construct a vector from an arbirary combination of vectors and * scalars. The components of the constructor parameters are assigned * to the vector in order until the vector is full. @@ -1091,6 +1095,14 @@ emit_inline_vector_constructor(const glsl_type *type, rhs_components = lhs_components - base_component; } + /* If we do not have any components left to copy, break out of the + * loop. This can happen when initializing a vec4 with a mat3 as the + * mat3 would have been broken into a series of column vectors. + */ + if (rhs_components == 0) { + break; + } + const ir_constant *const c = param->as_constant(); if (c == NULL) { /* Mask of fields to be written in the assignment. @@ -1681,11 +1693,11 @@ ast_function_expression::hir(exec_list *instructions, return ir_rvalue::error_value(ctx); } - /* Later, we cast each parameter to the same base type as the - * constructor. Since there are no non-floating point matrices, we - * need to break them up into a series of column vectors. + /* Matrices can never be consumed as is by any constructor but matrix + * constructors. If the constructor type is not matrix, always break the + * matrix up into a series of column vectors. */ - if (constructor_type->base_type != GLSL_TYPE_FLOAT) { + if (!constructor_type->is_matrix()) { foreach_in_list_safe(ir_rvalue, matrix, &actual_parameters) { if (!matrix->type->is_matrix()) continue; From 0f1fe649b7fdfb3ab8c7b14e642bc0e3831fc092 Mon Sep 17 00:00:00 2001 From: Iago Toral Quiroga Date: Thu, 11 Jun 2015 08:49:46 +0200 Subject: [PATCH 552/834] i965/gen8: Fix antialiased line rendering with width < 1.5 The same fix Marius implemented for gen6 (commit a9b04d8a) and gen7 (commit 24ecf37a). Also, we need the same code to handle special cases of line width in gen6, gen7 and now gen8, so put that in the helper function we use to compute the line width. Reviewed-by: Kenneth Graunke --- src/mesa/drivers/dri/i965/brw_util.h | 31 ++++++++++++++++++++--- src/mesa/drivers/dri/i965/gen6_sf_state.c | 22 +--------------- src/mesa/drivers/dri/i965/gen7_sf_state.c | 21 +-------------- src/mesa/drivers/dri/i965/gen8_sf_state.c | 5 +--- 4 files changed, 30 insertions(+), 49 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_util.h b/src/mesa/drivers/dri/i965/brw_util.h index 671d72e1cc7..04e4e944118 100644 --- a/src/mesa/drivers/dri/i965/brw_util.h +++ b/src/mesa/drivers/dri/i965/brw_util.h @@ -41,7 +41,7 @@ extern GLuint brw_translate_blend_factor( GLenum factor ); extern GLuint brw_translate_blend_equation( GLenum mode ); extern GLenum brw_fix_xRGB_alpha(GLenum function); -static inline float +static inline uint32_t brw_get_line_width(struct brw_context *brw) { /* From the OpenGL 4.4 spec: @@ -50,9 +50,32 @@ brw_get_line_width(struct brw_context *brw) * the supplied width to the nearest integer, then clamping it to the * implementation-dependent maximum non-antialiased line width." */ - return CLAMP(!brw->ctx.Multisample._Enabled && !brw->ctx.Line.SmoothFlag - ? roundf(brw->ctx.Line.Width) : brw->ctx.Line.Width, - 0.0, brw->ctx.Const.MaxLineWidth); + float line_width = + CLAMP(!brw->ctx.Multisample._Enabled && !brw->ctx.Line.SmoothFlag + ? roundf(brw->ctx.Line.Width) : brw->ctx.Line.Width, + 0.0, brw->ctx.Const.MaxLineWidth); + uint32_t line_width_u3_7 = U_FIXED(line_width, 7); + + /* Line width of 0 is not allowed when MSAA enabled */ + if (brw->ctx.Multisample._Enabled) { + if (line_width_u3_7 == 0) + line_width_u3_7 = 1; + } else if (brw->ctx.Line.SmoothFlag && line_width < 1.5) { + /* For 1 pixel line thickness or less, the general + * anti-aliasing algorithm gives up, and a garbage line is + * generated. Setting a Line Width of 0.0 specifies the + * rasterization of the "thinnest" (one-pixel-wide), + * non-antialiased lines. + * + * Lines rendered with zero Line Width are rasterized using + * Grid Intersection Quantization rules as specified by + * bspec section 6.3.12.1 Zero-Width (Cosmetic) Line + * Rasterization. + */ + line_width_u3_7 = 0; + } + + return line_width_u3_7; } #endif diff --git a/src/mesa/drivers/dri/i965/gen6_sf_state.c b/src/mesa/drivers/dri/i965/gen6_sf_state.c index d5777647f7e..5809628e021 100644 --- a/src/mesa/drivers/dri/i965/gen6_sf_state.c +++ b/src/mesa/drivers/dri/i965/gen6_sf_state.c @@ -361,27 +361,7 @@ upload_sf_state(struct brw_context *brw) /* _NEW_LINE */ { - float line_width = brw_get_line_width(brw); - uint32_t line_width_u3_7 = U_FIXED(line_width, 7); - - /* Line width of 0 is not allowed when MSAA enabled */ - if (ctx->Multisample._Enabled) { - if (line_width_u3_7 == 0) - line_width_u3_7 = 1; - } else if (ctx->Line.SmoothFlag && ctx->Line.Width < 1.5) { - /* For 1 pixel line thickness or less, the general - * anti-aliasing algorithm gives up, and a garbage line is - * generated. Setting a Line Width of 0.0 specifies the - * rasterization of the "thinnest" (one-pixel-wide), - * non-antialiased lines. - * - * Lines rendered with zero Line Width are rasterized using - * Grid Intersection Quantization rules as specified by - * bspec section 6.3.12.1 Zero-Width (Cosmetic) Line - * Rasterization. - */ - line_width_u3_7 = 0; - } + uint32_t line_width_u3_7 = brw_get_line_width(brw); dw3 |= line_width_u3_7 << GEN6_SF_LINE_WIDTH_SHIFT; } if (ctx->Line.SmoothFlag) { diff --git a/src/mesa/drivers/dri/i965/gen7_sf_state.c b/src/mesa/drivers/dri/i965/gen7_sf_state.c index 87ff284e31c..a20967caf5c 100644 --- a/src/mesa/drivers/dri/i965/gen7_sf_state.c +++ b/src/mesa/drivers/dri/i965/gen7_sf_state.c @@ -192,26 +192,7 @@ upload_sf_state(struct brw_context *brw) /* _NEW_LINE */ { - float line_width = brw_get_line_width(brw); - uint32_t line_width_u3_7 = U_FIXED(line_width, 7); - /* Line width of 0 is not allowed when MSAA enabled */ - if (ctx->Multisample._Enabled) { - if (line_width_u3_7 == 0) - line_width_u3_7 = 1; - } else if (ctx->Line.SmoothFlag && ctx->Line.Width < 1.5) { - /* For 1 pixel line thickness or less, the general - * anti-aliasing algorithm gives up, and a garbage line is - * generated. Setting a Line Width of 0.0 specifies the - * rasterization of the "thinnest" (one-pixel-wide), - * non-antialiased lines. - * - * Lines rendered with zero Line Width are rasterized using - * Grid Intersection Quantization rules as specified by - * bspec section 6.3.12.1 Zero-Width (Cosmetic) Line - * Rasterization. - */ - line_width_u3_7 = 0; - } + uint32_t line_width_u3_7 = brw_get_line_width(brw); dw2 |= line_width_u3_7 << GEN6_SF_LINE_WIDTH_SHIFT; } if (ctx->Line.SmoothFlag) { diff --git a/src/mesa/drivers/dri/i965/gen8_sf_state.c b/src/mesa/drivers/dri/i965/gen8_sf_state.c index 83ef62bc961..c2b585d0001 100644 --- a/src/mesa/drivers/dri/i965/gen8_sf_state.c +++ b/src/mesa/drivers/dri/i965/gen8_sf_state.c @@ -154,10 +154,7 @@ upload_sf(struct brw_context *brw) dw1 |= GEN6_SF_VIEWPORT_TRANSFORM_ENABLE; /* _NEW_LINE */ - float line_width = brw_get_line_width(brw); - uint32_t line_width_u3_7 = U_FIXED(line_width, 7); - if (line_width_u3_7 == 0) - line_width_u3_7 = 1; + uint32_t line_width_u3_7 = brw_get_line_width(brw); if (brw->gen >= 9 || brw->is_cherryview) { dw1 |= line_width_u3_7 << GEN9_SF_LINE_WIDTH_SHIFT; } else { From 9fed4f9bf5146af1fcd093422b39353845f2267a Mon Sep 17 00:00:00 2001 From: Jose Fonseca Date: Thu, 11 Jun 2015 13:32:21 +0100 Subject: [PATCH 553/834] mesa/main: Don't use ONCE_FLAG_INIT as a r-value. It should only be used as an initializer expression. Trivial, and fixes Windows builds. Nevertheless, overwriting an once_flag like this seems dangerous and should be revised. --- src/mesa/main/formats.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/mesa/main/formats.c b/src/mesa/main/formats.c index f7c94024f65..baeb1bfe5de 100644 --- a/src/mesa/main/formats.c +++ b/src/mesa/main/formats.c @@ -438,7 +438,8 @@ _mesa_format_from_array_format(uint32_t array_format) call_once(&format_array_format_table_exists, format_array_format_table_init); if (!format_array_format_table) { - format_array_format_table_exists = ONCE_FLAG_INIT; + static const once_flag once_flag_init = ONCE_FLAG_INIT; + format_array_format_table_exists = once_flag_init; return MESA_FORMAT_NONE; } From 1a6e4f46ed117b393e26aff326e5b05d4aea7fb0 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 10 Jun 2015 10:59:37 -0600 Subject: [PATCH 554/834] gallium: remove explicit values from PIPE_CAP_ enums The other PIPE_CAPF_ and PIPE_SHADER_CAP_ enums don't have explicit values. Reviewed-by: Roland Scheidegger Reviewed-by: Jose Fonseca --- src/gallium/include/pipe/p_defines.h | 196 +++++++++++++-------------- 1 file changed, 98 insertions(+), 98 deletions(-) diff --git a/src/gallium/include/pipe/p_defines.h b/src/gallium/include/pipe/p_defines.h index a077029725c..88b7b7699c1 100644 --- a/src/gallium/include/pipe/p_defines.h +++ b/src/gallium/include/pipe/p_defines.h @@ -505,106 +505,106 @@ enum pipe_reset_status */ enum pipe_cap { - PIPE_CAP_NPOT_TEXTURES = 1, - PIPE_CAP_TWO_SIDED_STENCIL = 2, - PIPE_CAP_MAX_DUAL_SOURCE_RENDER_TARGETS = 4, - PIPE_CAP_ANISOTROPIC_FILTER = 5, - PIPE_CAP_POINT_SPRITE = 6, - PIPE_CAP_MAX_RENDER_TARGETS = 7, - PIPE_CAP_OCCLUSION_QUERY = 8, - PIPE_CAP_QUERY_TIME_ELAPSED = 9, - PIPE_CAP_TEXTURE_SHADOW_MAP = 10, - PIPE_CAP_TEXTURE_SWIZZLE = 11, - PIPE_CAP_MAX_TEXTURE_2D_LEVELS = 12, - PIPE_CAP_MAX_TEXTURE_3D_LEVELS = 13, - PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS = 14, - PIPE_CAP_TEXTURE_MIRROR_CLAMP = 25, - PIPE_CAP_BLEND_EQUATION_SEPARATE = 28, - PIPE_CAP_SM3 = 29, /*< Shader Model, supported */ - PIPE_CAP_MAX_STREAM_OUTPUT_BUFFERS = 30, - PIPE_CAP_PRIMITIVE_RESTART = 31, + PIPE_CAP_NPOT_TEXTURES, + PIPE_CAP_TWO_SIDED_STENCIL, + PIPE_CAP_MAX_DUAL_SOURCE_RENDER_TARGETS, + PIPE_CAP_ANISOTROPIC_FILTER, + PIPE_CAP_POINT_SPRITE, + PIPE_CAP_MAX_RENDER_TARGETS, + PIPE_CAP_OCCLUSION_QUERY, + PIPE_CAP_QUERY_TIME_ELAPSED, + PIPE_CAP_TEXTURE_SHADOW_MAP, + PIPE_CAP_TEXTURE_SWIZZLE, + PIPE_CAP_MAX_TEXTURE_2D_LEVELS, + PIPE_CAP_MAX_TEXTURE_3D_LEVELS, + PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS, + PIPE_CAP_TEXTURE_MIRROR_CLAMP, + PIPE_CAP_BLEND_EQUATION_SEPARATE, + PIPE_CAP_SM3, + PIPE_CAP_MAX_STREAM_OUTPUT_BUFFERS, + PIPE_CAP_PRIMITIVE_RESTART, /** blend enables and write masks per rendertarget */ - PIPE_CAP_INDEP_BLEND_ENABLE = 33, + PIPE_CAP_INDEP_BLEND_ENABLE, /** different blend funcs per rendertarget */ - PIPE_CAP_INDEP_BLEND_FUNC = 34, - PIPE_CAP_MAX_TEXTURE_ARRAY_LAYERS = 36, - PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT = 37, - PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT = 38, - PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER = 39, - PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER = 40, - PIPE_CAP_DEPTH_CLIP_DISABLE = 41, - PIPE_CAP_SHADER_STENCIL_EXPORT = 42, - PIPE_CAP_TGSI_INSTANCEID = 43, - PIPE_CAP_VERTEX_ELEMENT_INSTANCE_DIVISOR = 44, - PIPE_CAP_FRAGMENT_COLOR_CLAMPED = 45, - PIPE_CAP_MIXED_COLORBUFFER_FORMATS = 46, - PIPE_CAP_SEAMLESS_CUBE_MAP = 47, - PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE = 48, - PIPE_CAP_MIN_TEXEL_OFFSET = 50, - PIPE_CAP_MAX_TEXEL_OFFSET = 51, - PIPE_CAP_CONDITIONAL_RENDER = 52, - PIPE_CAP_TEXTURE_BARRIER = 53, - PIPE_CAP_MAX_STREAM_OUTPUT_SEPARATE_COMPONENTS = 55, - PIPE_CAP_MAX_STREAM_OUTPUT_INTERLEAVED_COMPONENTS = 56, - PIPE_CAP_STREAM_OUTPUT_PAUSE_RESUME = 57, - PIPE_CAP_TGSI_CAN_COMPACT_CONSTANTS = 59, /* temporary */ - PIPE_CAP_VERTEX_COLOR_UNCLAMPED = 60, - PIPE_CAP_VERTEX_COLOR_CLAMPED = 61, - PIPE_CAP_GLSL_FEATURE_LEVEL = 62, - PIPE_CAP_QUADS_FOLLOW_PROVOKING_VERTEX_CONVENTION = 63, - PIPE_CAP_USER_VERTEX_BUFFERS = 64, - PIPE_CAP_VERTEX_BUFFER_OFFSET_4BYTE_ALIGNED_ONLY = 65, - PIPE_CAP_VERTEX_BUFFER_STRIDE_4BYTE_ALIGNED_ONLY = 66, - PIPE_CAP_VERTEX_ELEMENT_SRC_OFFSET_4BYTE_ALIGNED_ONLY = 67, - PIPE_CAP_COMPUTE = 68, - PIPE_CAP_USER_INDEX_BUFFERS = 69, - PIPE_CAP_USER_CONSTANT_BUFFERS = 70, - PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT = 71, - PIPE_CAP_START_INSTANCE = 72, - PIPE_CAP_QUERY_TIMESTAMP = 73, - PIPE_CAP_TEXTURE_MULTISAMPLE = 74, - PIPE_CAP_MIN_MAP_BUFFER_ALIGNMENT = 75, - PIPE_CAP_CUBE_MAP_ARRAY = 76, - PIPE_CAP_TEXTURE_BUFFER_OBJECTS = 77, - PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT = 78, - PIPE_CAP_TGSI_TEXCOORD = 79, - PIPE_CAP_PREFER_BLIT_BASED_TEXTURE_TRANSFER = 80, - PIPE_CAP_QUERY_PIPELINE_STATISTICS = 81, - PIPE_CAP_TEXTURE_BORDER_COLOR_QUIRK = 82, - PIPE_CAP_MAX_TEXTURE_BUFFER_SIZE = 83, - PIPE_CAP_MAX_VIEWPORTS = 84, - PIPE_CAP_ENDIANNESS = 85, - PIPE_CAP_MIXED_FRAMEBUFFER_SIZES = 86, - PIPE_CAP_TGSI_VS_LAYER_VIEWPORT = 87, - PIPE_CAP_MAX_GEOMETRY_OUTPUT_VERTICES = 88, - PIPE_CAP_MAX_GEOMETRY_TOTAL_OUTPUT_COMPONENTS = 89, - PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS = 90, - PIPE_CAP_TEXTURE_GATHER_SM5 = 91, - PIPE_CAP_BUFFER_MAP_PERSISTENT_COHERENT = 92, - PIPE_CAP_FAKE_SW_MSAA = 93, - PIPE_CAP_TEXTURE_QUERY_LOD = 94, - PIPE_CAP_MIN_TEXTURE_GATHER_OFFSET = 95, - PIPE_CAP_MAX_TEXTURE_GATHER_OFFSET = 96, - PIPE_CAP_SAMPLE_SHADING = 97, - PIPE_CAP_TEXTURE_GATHER_OFFSETS = 98, - PIPE_CAP_TGSI_VS_WINDOW_SPACE_POSITION = 99, - PIPE_CAP_MAX_VERTEX_STREAMS = 100, - PIPE_CAP_DRAW_INDIRECT = 101, - PIPE_CAP_TGSI_FS_FINE_DERIVATIVE = 102, - PIPE_CAP_VENDOR_ID = 103, - PIPE_CAP_DEVICE_ID = 104, - PIPE_CAP_ACCELERATED = 105, - PIPE_CAP_VIDEO_MEMORY = 106, - PIPE_CAP_UMA = 107, - PIPE_CAP_CONDITIONAL_RENDER_INVERTED = 108, - PIPE_CAP_MAX_VERTEX_ATTRIB_STRIDE = 109, - PIPE_CAP_SAMPLER_VIEW_TARGET = 110, - PIPE_CAP_CLIP_HALFZ = 111, - PIPE_CAP_VERTEXID_NOBASE = 112, - PIPE_CAP_POLYGON_OFFSET_CLAMP = 113, - PIPE_CAP_MULTISAMPLE_Z_RESOLVE = 114, - PIPE_CAP_RESOURCE_FROM_USER_MEMORY = 115, - PIPE_CAP_DEVICE_RESET_STATUS_QUERY = 116, + PIPE_CAP_INDEP_BLEND_FUNC, + PIPE_CAP_MAX_TEXTURE_ARRAY_LAYERS, + PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT, + PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT, + PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER, + PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER, + PIPE_CAP_DEPTH_CLIP_DISABLE, + PIPE_CAP_SHADER_STENCIL_EXPORT, + PIPE_CAP_TGSI_INSTANCEID, + PIPE_CAP_VERTEX_ELEMENT_INSTANCE_DIVISOR, + PIPE_CAP_FRAGMENT_COLOR_CLAMPED, + PIPE_CAP_MIXED_COLORBUFFER_FORMATS, + PIPE_CAP_SEAMLESS_CUBE_MAP, + PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE, + PIPE_CAP_MIN_TEXEL_OFFSET, + PIPE_CAP_MAX_TEXEL_OFFSET, + PIPE_CAP_CONDITIONAL_RENDER, + PIPE_CAP_TEXTURE_BARRIER, + PIPE_CAP_MAX_STREAM_OUTPUT_SEPARATE_COMPONENTS, + PIPE_CAP_MAX_STREAM_OUTPUT_INTERLEAVED_COMPONENTS, + PIPE_CAP_STREAM_OUTPUT_PAUSE_RESUME, + PIPE_CAP_TGSI_CAN_COMPACT_CONSTANTS, + PIPE_CAP_VERTEX_COLOR_UNCLAMPED, + PIPE_CAP_VERTEX_COLOR_CLAMPED, + PIPE_CAP_GLSL_FEATURE_LEVEL, + PIPE_CAP_QUADS_FOLLOW_PROVOKING_VERTEX_CONVENTION, + PIPE_CAP_USER_VERTEX_BUFFERS, + PIPE_CAP_VERTEX_BUFFER_OFFSET_4BYTE_ALIGNED_ONLY, + PIPE_CAP_VERTEX_BUFFER_STRIDE_4BYTE_ALIGNED_ONLY, + PIPE_CAP_VERTEX_ELEMENT_SRC_OFFSET_4BYTE_ALIGNED_ONLY, + PIPE_CAP_COMPUTE, + PIPE_CAP_USER_INDEX_BUFFERS, + PIPE_CAP_USER_CONSTANT_BUFFERS, + PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT, + PIPE_CAP_START_INSTANCE, + PIPE_CAP_QUERY_TIMESTAMP, + PIPE_CAP_TEXTURE_MULTISAMPLE, + PIPE_CAP_MIN_MAP_BUFFER_ALIGNMENT, + PIPE_CAP_CUBE_MAP_ARRAY, + PIPE_CAP_TEXTURE_BUFFER_OBJECTS, + PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT, + PIPE_CAP_TGSI_TEXCOORD, + PIPE_CAP_PREFER_BLIT_BASED_TEXTURE_TRANSFER, + PIPE_CAP_QUERY_PIPELINE_STATISTICS, + PIPE_CAP_TEXTURE_BORDER_COLOR_QUIRK, + PIPE_CAP_MAX_TEXTURE_BUFFER_SIZE, + PIPE_CAP_MAX_VIEWPORTS, + PIPE_CAP_ENDIANNESS, + PIPE_CAP_MIXED_FRAMEBUFFER_SIZES, + PIPE_CAP_TGSI_VS_LAYER_VIEWPORT, + PIPE_CAP_MAX_GEOMETRY_OUTPUT_VERTICES, + PIPE_CAP_MAX_GEOMETRY_TOTAL_OUTPUT_COMPONENTS, + PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS, + PIPE_CAP_TEXTURE_GATHER_SM5, + PIPE_CAP_BUFFER_MAP_PERSISTENT_COHERENT, + PIPE_CAP_FAKE_SW_MSAA, + PIPE_CAP_TEXTURE_QUERY_LOD, + PIPE_CAP_MIN_TEXTURE_GATHER_OFFSET, + PIPE_CAP_MAX_TEXTURE_GATHER_OFFSET, + PIPE_CAP_SAMPLE_SHADING, + PIPE_CAP_TEXTURE_GATHER_OFFSETS, + PIPE_CAP_TGSI_VS_WINDOW_SPACE_POSITION, + PIPE_CAP_MAX_VERTEX_STREAMS, + PIPE_CAP_DRAW_INDIRECT, + PIPE_CAP_TGSI_FS_FINE_DERIVATIVE, + PIPE_CAP_VENDOR_ID, + PIPE_CAP_DEVICE_ID, + PIPE_CAP_ACCELERATED, + PIPE_CAP_VIDEO_MEMORY, + PIPE_CAP_UMA, + PIPE_CAP_CONDITIONAL_RENDER_INVERTED, + PIPE_CAP_MAX_VERTEX_ATTRIB_STRIDE, + PIPE_CAP_SAMPLER_VIEW_TARGET, + PIPE_CAP_CLIP_HALFZ, + PIPE_CAP_VERTEXID_NOBASE, + PIPE_CAP_POLYGON_OFFSET_CLAMP, + PIPE_CAP_MULTISAMPLE_Z_RESOLVE, + PIPE_CAP_RESOURCE_FROM_USER_MEMORY, + PIPE_CAP_DEVICE_RESET_STATUS_QUERY, }; #define PIPE_QUIRK_TEXTURE_BORDER_COLOR_SWIZZLE_NV50 (1 << 0) From f4310cdbd08f20276237fbefa3eba406aa109636 Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Wed, 10 Jun 2015 01:46:13 -0700 Subject: [PATCH 555/834] i965: Re-index SSA definitions before printing NIR code. This makes the SSA definitions use sequential numbers (0, 1, 2, ...) instead of seemingly random ones. There's not much point normally, but it makes debug output much easier to read. Signed-off-by: Kenneth Graunke Reviewed-by: Iago Toral Quiroga Reviewed-by: Connor Abbott --- src/mesa/drivers/dri/i965/brw_nir.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/mesa/drivers/dri/i965/brw_nir.c b/src/mesa/drivers/dri/i965/brw_nir.c index 142162c1f8a..c13708a2f8a 100644 --- a/src/mesa/drivers/dri/i965/brw_nir.c +++ b/src/mesa/drivers/dri/i965/brw_nir.c @@ -167,6 +167,12 @@ brw_create_nir(struct brw_context *brw, nir_validate_shader(nir); if (unlikely(debug_enabled)) { + /* Re-index SSA defs so we print more sensible numbers. */ + nir_foreach_overload(nir, overload) { + if (overload->impl) + nir_index_ssa_defs(overload->impl); + } + fprintf(stderr, "NIR (SSA form) for %s shader:\n", _mesa_shader_stage_to_string(stage)); nir_print_shader(nir, stderr); From 16658f426dbd81fcbc317b21ae9a3f7c9b6448fb Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Tue, 9 Jun 2015 09:20:58 -0700 Subject: [PATCH 556/834] Revert "i965: Advertise a line width of 40.0 on Cherryview and Skylake." This reverts commit f3b709c0ac073cd0ec90a3a0d91d1ee94668e043. The "dEQP-GLES3.functional.rasterization.fbo.rbo_multisample_4. interpolation.lines_wide" test appears to be broken on Cherryview when we expose line widths greater than 12.0. I'm not sure why. For now, just go back to the limits we used on older platforms. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=90902 Acked-by: Matt Turner --- src/mesa/drivers/dri/i965/brw_context.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c index ab047046fdb..f39b3501539 100644 --- a/src/mesa/drivers/dri/i965/brw_context.c +++ b/src/mesa/drivers/dri/i965/brw_context.c @@ -428,11 +428,7 @@ brw_initialize_context_constants(struct brw_context *brw) ctx->Const.MinLineWidth = 1.0; ctx->Const.MinLineWidthAA = 1.0; - if (brw->gen >= 9 || brw->is_cherryview) { - ctx->Const.MaxLineWidth = 40.0; - ctx->Const.MaxLineWidthAA = 40.0; - ctx->Const.LineWidthGranularity = 0.125; - } else if (brw->gen >= 6) { + if (brw->gen >= 6) { ctx->Const.MaxLineWidth = 7.375; ctx->Const.MaxLineWidthAA = 7.375; ctx->Const.LineWidthGranularity = 0.125; From 8d3c48eed24f351c86361707978647c78010bb7f Mon Sep 17 00:00:00 2001 From: Francisco Jerez Date: Wed, 10 Jun 2015 14:40:33 +0300 Subject: [PATCH 557/834] i965/fs: Remove one more fixed brw_null_reg() from the visitor. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Instead use fs_builder::null_reg_f() which has the correct register width. Avoids the assertion failure in fs_builder::emit() hit by the "ES3-CTS.shaders.loops.for_dynamic_iterations.unconditional_break_fragment" GLES3 conformance test introduced by 4af4cfba9ee1014baa4a777660fc9d53d57e4c82. Reported-and-reviewed-by: Tapani Pälli Reviewed-by: Jason Ekstrand --- src/mesa/drivers/dri/i965/brw_fs.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 7789ca77e57..5563c5aa76c 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -3234,7 +3234,7 @@ fs_visitor::lower_integer_multiplication() ibld.ADD(dst, low, high); if (inst->conditional_mod) { - fs_reg null(retype(brw_null_reg(), inst->dst.type)); + fs_reg null(retype(ibld.null_reg_f(), inst->dst.type)); set_condmod(inst->conditional_mod, ibld.MOV(null, inst->dst)); } From 0dde821bcc96c579ac1f26e26fc03ca117caa377 Mon Sep 17 00:00:00 2001 From: Jose Fonseca Date: Fri, 12 Jun 2015 12:13:41 +0100 Subject: [PATCH 558/834] trace: Add missing p_compiler.h include. For boolean. Trivial. --- src/gallium/drivers/trace/tr_public.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/gallium/drivers/trace/tr_public.h b/src/gallium/drivers/trace/tr_public.h index aee4937dd4f..b03133f8d97 100644 --- a/src/gallium/drivers/trace/tr_public.h +++ b/src/gallium/drivers/trace/tr_public.h @@ -28,6 +28,8 @@ #ifndef TR_PUBLIC_H #define TR_PUBLIC_H +#include "pipe/p_compiler.h" + #ifdef __cplusplus extern "C" { #endif From c3036f4bb1c4ad788200afc877d42e63b64f330e Mon Sep 17 00:00:00 2001 From: Emil Velikov Date: Thu, 11 Jun 2015 11:24:17 +0100 Subject: [PATCH 559/834] egl/haiku: use correct version variable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Earlier commit folded the two separate variables into one, but forgot to update the haiku driver. Fixes: 0e4b564ef28(egl: combine VersionMajor and VersionMinor into one variable) Cc: Marek Olšák > Cc: Alexander von Gluck IV Acked-by: Brian Paul Signed-off-by: Emil Velikov --- src/egl/drivers/haiku/egl_haiku.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/egl/drivers/haiku/egl_haiku.cpp b/src/egl/drivers/haiku/egl_haiku.cpp index 4d9888ded2b..b347eb65283 100644 --- a/src/egl/drivers/haiku/egl_haiku.cpp +++ b/src/egl/drivers/haiku/egl_haiku.cpp @@ -304,8 +304,7 @@ init_haiku(_EGLDriver *drv, _EGLDisplay *dpy) _eglLog(_EGL_DEBUG,"Add configs"); haiku_add_configs_for_visuals(dpy); - dpy->VersionMajor=1; - dpy->VersionMinor=4; + dpy->Version = 14; //dpy->Extensions.KHR_create_context = true; From 0b652fedb5e097bcdea79e3b922e946d143148f6 Mon Sep 17 00:00:00 2001 From: Emil Velikov Date: Thu, 11 Jun 2015 11:31:40 +0100 Subject: [PATCH 560/834] egl/haiku: remove commented out code It serves little to no purpose. As the driver gets updated, one can look at the existing implementation (dri2) for reference rather than letting the commented functions bitrot. Cc: Alexander von Gluck IV Acked-by: Brian Paul Signed-off-by: Emil Velikov --- src/egl/drivers/haiku/egl_haiku.cpp | 93 ----------------------------- 1 file changed, 93 deletions(-) diff --git a/src/egl/drivers/haiku/egl_haiku.cpp b/src/egl/drivers/haiku/egl_haiku.cpp index b347eb65283..b09239c3c78 100644 --- a/src/egl/drivers/haiku/egl_haiku.cpp +++ b/src/egl/drivers/haiku/egl_haiku.cpp @@ -74,48 +74,6 @@ struct haiku_egl_surface }; -/* -static void -swrastCreateDrawable(struct dri2_egl_display * dri2_dpy, - struct dri2_egl_surface * dri2_surf, int depth) -{ - -} - - -static void -swrastDestroyDrawable(struct dri2_egl_display * dri2_dpy, - struct dri2_egl_surface * dri2_surf) -{ - -} - - -static void -swrastGetDrawableInfo(__DRIdrawable * draw, int *x, int *y, - int *w, int *h, void *loaderPrivate) -{ - -} - - -static void -swrastPutImage(__DRIdrawable * draw, int op, int x, int y, - int w, int h, char *data, void *loaderPrivate) -{ - -} - - -static void -swrastGetImage(__DRIdrawable * read, int x, int y, - int w, int h, char *data, void *loaderPrivate) -{ - -} -*/ - - static void haiku_log(EGLint level, const char *msg) { @@ -263,44 +221,12 @@ EGLBoolean init_haiku(_EGLDriver *drv, _EGLDisplay *dpy) { _eglLog(_EGL_DEBUG,"\nInitializing Haiku EGL\n"); - //_EGLDisplay* egl_dpy; printf("Initializing Haiku EGL\n"); _eglSetLogProc(haiku_log); loader_set_logger(_eglLog); - /*egl_dpy = (_EGLDisplay*) calloc(1, sizeof(_EGLDisplay)); - if (!egl_dpy) - return _eglError(EGL_BAD_ALLOC, "eglInitialize"); - - dpy->DriverData=(void*) egl_dpy; - if (!dpy->PlatformDisplay) { - // OPEN DEVICE - //dri2_dpy->bwindow = (void*)haiku_create_window(); - //dri2_dpy->own_device = true; - } else { - //dri2_dpy->bwindow = (BWindow*)dpy->PlatformDisplay; - }*/ - - //dri2_dpy->driver_name = strdup("swrast"); - //if (!dri2_load_driver_swrast(dpy)) - // goto cleanup_conn; - - /*dri2_dpy->swrast_loader_extension.base.name = __DRI_SWRAST_LOADER; - dri2_dpy->swrast_loader_extension.base.version = __DRI_SWRAST_LOADER_VERSION; - dri2_dpy->swrast_loader_extension.getDrawableInfo = swrastGetDrawableInfo; - dri2_dpy->swrast_loader_extension.putImage = swrastPutImage; - dri2_dpy->swrast_loader_extension.getImage = swrastGetImage; - - dri2_dpy->extensions[0] = &dri2_dpy->swrast_loader_extension.base; - dri2_dpy->extensions[1] = NULL; - dri2_dpy->extensions[2] = NULL;*/ - - /*if (dri2_dpy->bwindow) { - if (!dri2_haiku_add_configs_for_visuals(dri2_dpy, dpy)) - goto cleanup_configs; - }*/ _eglLog(_EGL_DEBUG,"Add configs"); haiku_add_configs_for_visuals(dpy); @@ -403,27 +329,8 @@ _eglBuiltInDriverHaiku(const char *args) driver->base.API.CreatePixmapSurface = haiku_create_pixmap_surface; driver->base.API.CreatePbufferSurface = haiku_create_pbuffer_surface; driver->base.API.DestroySurface = haiku_destroy_surface; - /* - driver->API.GetProcAddress = dri2_get_proc_address; - driver->API.WaitClient = dri2_wait_client; - driver->API.WaitNative = dri2_wait_native; - driver->API.BindTexImage = dri2_bind_tex_image; - driver->API.ReleaseTexImage = dri2_release_tex_image; - driver->API.SwapInterval = dri2_swap_interval; - */ driver->base.API.SwapBuffers = haiku_swap_buffers; - /* - driver->API.SwapBuffersWithDamageEXT = dri2_swap_buffers_with_damage; - driver->API.SwapBuffersRegionNOK = dri2_swap_buffers_region; - driver->API.PostSubBufferNV = dri2_post_sub_buffer; - driver->API.CopyBuffers = dri2_copy_buffers, - driver->API.QueryBufferAge = dri2_query_buffer_age; - driver->API.CreateImageKHR = dri2_create_image; - driver->API.DestroyImageKHR = dri2_destroy_image_khr; - driver->API.CreateWaylandBufferFromImageWL = dri2_create_wayland_buffer_from_image; - driver->API.GetSyncValuesCHROMIUM = dri2_get_sync_values_chromium; - */ driver->base.Name = "Haiku"; driver->base.Unload = haiku_unload; From ed9dcdf927b9badd1325130b6b88ad26b04d2ec1 Mon Sep 17 00:00:00 2001 From: Emil Velikov Date: Thu, 11 Jun 2015 12:02:45 +0100 Subject: [PATCH 561/834] egl/haiku: use CALL/TRACE/ERROR over _eglLog() for haiku specifics Cc: Alexander von Gluck IV Acked-by: Brian Paul Signed-off-by: Emil Velikov --- src/egl/drivers/haiku/egl_haiku.cpp | 64 ++++++++++++++++++----------- 1 file changed, 39 insertions(+), 25 deletions(-) diff --git a/src/egl/drivers/haiku/egl_haiku.cpp b/src/egl/drivers/haiku/egl_haiku.cpp index b09239c3c78..36e1277a2e0 100644 --- a/src/egl/drivers/haiku/egl_haiku.cpp +++ b/src/egl/drivers/haiku/egl_haiku.cpp @@ -44,6 +44,15 @@ #define CALLOC_STRUCT(T) (struct T *) calloc(1, sizeof(struct T)) +#ifdef DEBUG +# define TRACE(x...) printf("egl_haiku: " x) +# define CALLED() TRACE("CALLED: %s\n", __PRETTY_FUNCTION__) +#else +# define TRACE(x...) +# define CALLED() +#endif +#define ERROR(x...) printf("egl_haiku: " x) + _EGL_DRIVER_STANDARD_TYPECASTS(haiku_egl) @@ -114,23 +123,25 @@ static _EGLSurface * haiku_create_window_surface(_EGLDriver *drv, _EGLDisplay *disp, _EGLConfig *conf, void *native_window, const EGLint *attrib_list) { + CALLED(); + struct haiku_egl_surface* surface; surface = (struct haiku_egl_surface*)calloc(1,sizeof (*surface)); _eglInitSurface(&surface->surf, disp, EGL_WINDOW_BIT, conf, attrib_list); (&surface->surf)->SwapInterval = 1; - _eglLog(_EGL_DEBUG, "Creating window"); + TRACE("Creating window\n"); BWindow* win = (BWindow*)native_window; - _eglLog(_EGL_DEBUG, "Creating GL view"); + TRACE("Creating GL view\n"); surface->gl = new BGLView(win->Bounds(), "OpenGL", B_FOLLOW_ALL_SIDES, 0, BGL_RGB | BGL_DOUBLE | BGL_ALPHA); - _eglLog(_EGL_DEBUG, "Adding GL"); + TRACE("Adding GL\n"); win->AddChild(surface->gl); - _eglLog(_EGL_DEBUG, "Showing window"); + TRACE("Showing window\n"); win->Show(); return &surface->surf; } @@ -162,13 +173,14 @@ haiku_destroy_surface(_EGLDriver *drv, _EGLDisplay *disp, _EGLSurface *surf) static EGLBoolean haiku_add_configs_for_visuals(_EGLDisplay *dpy) { - printf("Adding configs\n"); + CALLED(); struct haiku_egl_config* conf; conf = CALLOC_STRUCT(haiku_egl_config); _eglInitConfig(&conf->base, dpy, 1); - _eglLog(_EGL_DEBUG,"Config inited\n"); + TRACE("Config inited\n"); + _eglSetConfigKey(&conf->base, EGL_RED_SIZE, 8); _eglSetConfigKey(&conf->base, EGL_BLUE_SIZE, 8); _eglSetConfigKey(&conf->base, EGL_GREEN_SIZE, 8); @@ -199,19 +211,19 @@ haiku_add_configs_for_visuals(_EGLDisplay *dpy) _eglSetConfigKey(&conf->base, EGL_MAX_PBUFFER_PIXELS, 0); // TODO: How to get the right value ? _eglSetConfigKey(&conf->base, EGL_SURFACE_TYPE, EGL_WINDOW_BIT /*| EGL_PIXMAP_BIT | EGL_PBUFFER_BIT*/); - printf("Config configuated\n"); + TRACE("Config configuated\n"); if (!_eglValidateConfig(&conf->base, EGL_FALSE)) { - _eglLog(_EGL_DEBUG, "Haiku failed to validate config"); + _eglLog(_EGL_DEBUG, "Haiku: failed to validate config"); return EGL_FALSE; } - printf("Validated config\n"); + TRACE("Validated config\n"); _eglLinkConfig(&conf->base); if (!_eglGetArraySize(dpy->Configs)) { _eglLog(_EGL_WARNING, "Haiku: failed to create any config"); return EGL_FALSE; } - printf("Config successful!\n"); + TRACE("Config successfull\n"); return EGL_TRUE; } @@ -220,22 +232,18 @@ extern "C" EGLBoolean init_haiku(_EGLDriver *drv, _EGLDisplay *dpy) { - _eglLog(_EGL_DEBUG,"\nInitializing Haiku EGL\n"); + CALLED(); - printf("Initializing Haiku EGL\n"); _eglSetLogProc(haiku_log); loader_set_logger(_eglLog); - _eglLog(_EGL_DEBUG,"Add configs"); - haiku_add_configs_for_visuals(dpy); + TRACE("Add configs\n"); + haiku_add_configs_for_visuals(dpy); dpy->Version = 14; - //dpy->Extensions.KHR_create_context = true; - - //dri2_dpy->vtbl = &dri2_haiku_display_vtbl; - _eglLog(_EGL_DEBUG, "Initialization finished"); + TRACE("Initialization finished\n"); return EGL_TRUE; } @@ -254,12 +262,15 @@ _EGLContext* haiku_create_context(_EGLDriver *drv, _EGLDisplay *disp, _EGLConfig *conf, _EGLContext *share_list, const EGLint *attrib_list) { - _eglLog(_EGL_DEBUG,"Creating context"); + CALLED(); + struct haiku_egl_context* context; context=(struct haiku_egl_context*)calloc(1,sizeof (*context)); - if(!_eglInitContext(&context->ctx, disp, conf, attrib_list)) - printf("ERROR creating context"); - _eglLog(_EGL_DEBUG, "Context created"); + + if (!_eglInitContext(&context->ctx, disp, conf, attrib_list)) + ERROR("ERROR creating context"); + + TRACE("Context created\n"); return &context->ctx; } @@ -278,6 +289,8 @@ EGLBoolean haiku_make_current(_EGLDriver* drv, _EGLDisplay* dpy, _EGLSurface *dsurf, _EGLSurface *rsurf, _EGLContext *ctx) { + CALLED(); + struct haiku_egl_context* cont=haiku_egl_context(ctx); struct haiku_egl_surface* surf=haiku_egl_surface(dsurf); _EGLContext *old_ctx; @@ -316,7 +329,8 @@ extern "C" _EGLDriver* _eglBuiltInDriverHaiku(const char *args) { - _eglLog(_EGL_DEBUG,"Driver loaded"); + CALLED(); + struct haiku_egl_driver* driver; driver=(struct haiku_egl_driver*)calloc(1,sizeof(*driver)); _eglInitDriverFallbacks(&driver->base); @@ -335,7 +349,7 @@ _eglBuiltInDriverHaiku(const char *args) driver->base.Name = "Haiku"; driver->base.Unload = haiku_unload; - _eglLog(_EGL_DEBUG, "API Calls defined"); - + TRACE("API Calls defined\n"); + return &driver->base; } From 46f87b2c19dc0a326c963c652b174384d59e3943 Mon Sep 17 00:00:00 2001 From: Emil Velikov Date: Thu, 11 Jun 2015 12:17:23 +0100 Subject: [PATCH 562/834] egl/haiku: handle memory allocation failure Cc: Alexander von Gluck IV Acked-by: Brian Paul Signed-off-by: Emil Velikov --- src/egl/drivers/haiku/egl_haiku.cpp | 35 +++++++++++++++++++++++------ 1 file changed, 28 insertions(+), 7 deletions(-) diff --git a/src/egl/drivers/haiku/egl_haiku.cpp b/src/egl/drivers/haiku/egl_haiku.cpp index 36e1277a2e0..760ee453f6d 100644 --- a/src/egl/drivers/haiku/egl_haiku.cpp +++ b/src/egl/drivers/haiku/egl_haiku.cpp @@ -42,8 +42,6 @@ #include -#define CALLOC_STRUCT(T) (struct T *) calloc(1, sizeof(struct T)) - #ifdef DEBUG # define TRACE(x...) printf("egl_haiku: " x) # define CALLED() TRACE("CALLED: %s\n", __PRETTY_FUNCTION__) @@ -126,9 +124,15 @@ haiku_create_window_surface(_EGLDriver *drv, _EGLDisplay *disp, CALLED(); struct haiku_egl_surface* surface; - surface = (struct haiku_egl_surface*)calloc(1,sizeof (*surface)); + surface = (struct haiku_egl_surface*) calloc(1, sizeof (*surface)); + if (!surface) { + _eglError(EGL_BAD_ALLOC, "haiku_create_window_surface"); + return NULL; + } + + if (!_eglInitSurface(&surface->surf, disp, EGL_WINDOW_BIT, conf, attrib_list)) + goto cleanup_surface; - _eglInitSurface(&surface->surf, disp, EGL_WINDOW_BIT, conf, attrib_list); (&surface->surf)->SwapInterval = 1; TRACE("Creating window\n"); @@ -144,6 +148,10 @@ haiku_create_window_surface(_EGLDriver *drv, _EGLDisplay *disp, TRACE("Showing window\n"); win->Show(); return &surface->surf; + +cleanup_surface: + free(surface); + return NULL; } @@ -176,7 +184,11 @@ haiku_add_configs_for_visuals(_EGLDisplay *dpy) CALLED(); struct haiku_egl_config* conf; - conf = CALLOC_STRUCT(haiku_egl_config); + conf = (struct haiku_egl_config*) calloc(1, sizeof (*conf)); + if (!conf) { + _eglError(EGL_BAD_ALLOC, "haiku_add_configs_for_visuals"); + return NULL; + } _eglInitConfig(&conf->base, dpy, 1); TRACE("Config inited\n"); @@ -265,7 +277,11 @@ haiku_create_context(_EGLDriver *drv, _EGLDisplay *disp, _EGLConfig *conf, CALLED(); struct haiku_egl_context* context; - context=(struct haiku_egl_context*)calloc(1,sizeof (*context)); + context = (struct haiku_egl_context*) calloc(1, sizeof (*context)); + if (!context) { + _eglError(EGL_BAD_ALLOC, "haiku_create_context"); + return NULL; + } if (!_eglInitContext(&context->ctx, disp, conf, attrib_list)) ERROR("ERROR creating context"); @@ -332,7 +348,12 @@ _eglBuiltInDriverHaiku(const char *args) CALLED(); struct haiku_egl_driver* driver; - driver=(struct haiku_egl_driver*)calloc(1,sizeof(*driver)); + driver = (struct haiku_egl_driver*) calloc(1, sizeof(*driver)); + if (!driver) { + _eglError(EGL_BAD_ALLOC, "_eglBuiltInDriverHaiku"); + return NULL; + } + _eglInitDriverFallbacks(&driver->base); driver->base.API.Initialize = init_haiku; driver->base.API.Terminate = haiku_terminate; From d0af2833039dca2963f3ddf241e3084e4bf7e840 Mon Sep 17 00:00:00 2001 From: Emil Velikov Date: Thu, 11 Jun 2015 12:18:35 +0100 Subject: [PATCH 563/834] egl/haiku: remove unused variables in struct haiku_egl_driver Cc: Alexander von Gluck IV Acked-by: Brian Paul Signed-off-by: Emil Velikov --- src/egl/drivers/haiku/egl_haiku.cpp | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/egl/drivers/haiku/egl_haiku.cpp b/src/egl/drivers/haiku/egl_haiku.cpp index 760ee453f6d..cfe8817fbac 100644 --- a/src/egl/drivers/haiku/egl_haiku.cpp +++ b/src/egl/drivers/haiku/egl_haiku.cpp @@ -58,10 +58,6 @@ _EGL_DRIVER_STANDARD_TYPECASTS(haiku_egl) struct haiku_egl_driver { _EGLDriver base; - - void *handle; - _EGLProc (*get_proc_address)(const char *procname); - void (*glFlush)(void); }; struct haiku_egl_config From 667fe2f5e9508a9591eeabdd7a01596006d87e5f Mon Sep 17 00:00:00 2001 From: Emil Velikov Date: Thu, 11 Jun 2015 12:20:34 +0100 Subject: [PATCH 564/834] egl/haiku: we don't use src/loader, drop all the references to it Cc: Alexander von Gluck IV Acked-by: Brian Paul Signed-off-by: Emil Velikov --- src/egl/drivers/haiku/SConscript | 5 ----- src/egl/drivers/haiku/egl_haiku.cpp | 3 --- 2 files changed, 8 deletions(-) diff --git a/src/egl/drivers/haiku/SConscript b/src/egl/drivers/haiku/SConscript index 9dd2f70f4ac..ec6020ece77 100644 --- a/src/egl/drivers/haiku/SConscript +++ b/src/egl/drivers/haiku/SConscript @@ -9,7 +9,6 @@ env.Append(CPPDEFINES = [ env.Append(CPPPATH = [ '#/include', '#/src/egl/main', - '#/src/loader', ]) sources = [ @@ -22,10 +21,6 @@ if env['platform'] == 'haiku': '_EGL_NATIVE_PLATFORM=haiku', ]) -env.Prepend(LIBS = [ - libloader, -]) - egl_haiku = env.ConvenienceLibrary( target = 'egl_haiku', source = sources, diff --git a/src/egl/drivers/haiku/egl_haiku.cpp b/src/egl/drivers/haiku/egl_haiku.cpp index cfe8817fbac..056fafc729d 100644 --- a/src/egl/drivers/haiku/egl_haiku.cpp +++ b/src/egl/drivers/haiku/egl_haiku.cpp @@ -27,7 +27,6 @@ #include #include -#include "loader.h" #include "eglconfig.h" #include "eglcontext.h" #include "egldisplay.h" @@ -244,8 +243,6 @@ init_haiku(_EGLDriver *drv, _EGLDisplay *dpy) _eglSetLogProc(haiku_log); - loader_set_logger(_eglLog); - TRACE("Add configs\n"); haiku_add_configs_for_visuals(dpy); From d38a80ba6c75b8f594a4ff88e59ede254075a859 Mon Sep 17 00:00:00 2001 From: Emil Velikov Date: Thu, 11 Jun 2015 12:22:28 +0100 Subject: [PATCH 565/834] egl/haiku: kill off haiku_log() It's an incomplete copy of the default _eglLog() implementation. Just use the default logger. Cc: Alexander von Gluck IV Acked-by: Brian Paul Signed-off-by: Emil Velikov --- src/egl/drivers/haiku/egl_haiku.cpp | 24 ------------------------ 1 file changed, 24 deletions(-) diff --git a/src/egl/drivers/haiku/egl_haiku.cpp b/src/egl/drivers/haiku/egl_haiku.cpp index 056fafc729d..2933acc95cc 100644 --- a/src/egl/drivers/haiku/egl_haiku.cpp +++ b/src/egl/drivers/haiku/egl_haiku.cpp @@ -76,28 +76,6 @@ struct haiku_egl_surface }; -static void -haiku_log(EGLint level, const char *msg) -{ - switch (level) { - case _EGL_DEBUG: - fprintf(stderr,"%s", msg); - break; - case _EGL_INFO: - fprintf(stderr,"%s", msg); - break; - case _EGL_WARNING: - fprintf(stderr,"%s", msg); - break; - case _EGL_FATAL: - fprintf(stderr,"%s", msg); - break; - default: - break; - } -} - - /** * Called via eglCreateWindowSurface(), drv->API.CreateWindowSurface(). */ @@ -241,8 +219,6 @@ init_haiku(_EGLDriver *drv, _EGLDisplay *dpy) { CALLED(); - _eglSetLogProc(haiku_log); - TRACE("Add configs\n"); haiku_add_configs_for_visuals(dpy); From e77a32fcaed30815d0f95e0d05432e8637ab0f3e Mon Sep 17 00:00:00 2001 From: Emil Velikov Date: Thu, 11 Jun 2015 12:33:55 +0100 Subject: [PATCH 566/834] egl/haiku: minor surface management cleanups Drop the stub/unused function haiku_create_surface() and add some basic implementation for destroy_surface() Cc: Alexander von Gluck IV Acked-by: Brian Paul Signed-off-by: Emil Velikov --- src/egl/drivers/haiku/egl_haiku.cpp | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) diff --git a/src/egl/drivers/haiku/egl_haiku.cpp b/src/egl/drivers/haiku/egl_haiku.cpp index 2933acc95cc..154b3af7e07 100644 --- a/src/egl/drivers/haiku/egl_haiku.cpp +++ b/src/egl/drivers/haiku/egl_haiku.cpp @@ -76,17 +76,6 @@ struct haiku_egl_surface }; -/** - * Called via eglCreateWindowSurface(), drv->API.CreateWindowSurface(). - */ -static _EGLSurface * -haiku_create_surface(_EGLDriver *drv, _EGLDisplay *disp, EGLint type, - _EGLConfig *conf, void *native_surface, const EGLint *attrib_list) -{ - return NULL; -} - - /** * Called via eglCreateWindowSurface(), drv->API.CreateWindowSurface(). */ @@ -147,6 +136,10 @@ haiku_create_pbuffer_surface(_EGLDriver *drv, _EGLDisplay *disp, static EGLBoolean haiku_destroy_surface(_EGLDriver *drv, _EGLDisplay *disp, _EGLSurface *surf) { + if (_eglPutSurface(surf)) { + // XXX: detach haiku_egl_surface::gl from the native window and destroy it + free(surf); + } return EGL_TRUE; } From b0f33e9736116a1a6a7bd8bade51d473d7373daa Mon Sep 17 00:00:00 2001 From: Emil Velikov Date: Thu, 11 Jun 2015 13:07:08 +0100 Subject: [PATCH 567/834] egl/haiku: plug some obvious memory leaks Cc: Alexander von Gluck IV Acked-by: Brian Paul Signed-off-by: Emil Velikov --- src/egl/drivers/haiku/egl_haiku.cpp | 32 ++++++++++++++++++++++------- 1 file changed, 25 insertions(+), 7 deletions(-) diff --git a/src/egl/drivers/haiku/egl_haiku.cpp b/src/egl/drivers/haiku/egl_haiku.cpp index 154b3af7e07..da72895ac04 100644 --- a/src/egl/drivers/haiku/egl_haiku.cpp +++ b/src/egl/drivers/haiku/egl_haiku.cpp @@ -192,18 +192,22 @@ haiku_add_configs_for_visuals(_EGLDisplay *dpy) TRACE("Config configuated\n"); if (!_eglValidateConfig(&conf->base, EGL_FALSE)) { _eglLog(_EGL_DEBUG, "Haiku: failed to validate config"); - return EGL_FALSE; + goto cleanup; } TRACE("Validated config\n"); _eglLinkConfig(&conf->base); if (!_eglGetArraySize(dpy->Configs)) { _eglLog(_EGL_WARNING, "Haiku: failed to create any config"); - return EGL_FALSE; + goto cleanup; } TRACE("Config successfull\n"); - + return EGL_TRUE; + +cleanup: + free(conf); + return EGL_FALSE; } extern "C" @@ -213,7 +217,8 @@ init_haiku(_EGLDriver *drv, _EGLDisplay *dpy) CALLED(); TRACE("Add configs\n"); - haiku_add_configs_for_visuals(dpy); + if (!haiku_add_configs_for_visuals(dpy)) + return EGL_FALSE; dpy->Version = 14; @@ -246,10 +251,14 @@ haiku_create_context(_EGLDriver *drv, _EGLDisplay *disp, _EGLConfig *conf, } if (!_eglInitContext(&context->ctx, disp, conf, attrib_list)) - ERROR("ERROR creating context"); + goto cleanup; TRACE("Context created\n"); return &context->ctx; + +cleanup: + free(context); + return NULL; } @@ -257,7 +266,13 @@ extern "C" EGLBoolean haiku_destroy_context(_EGLDriver* drv, _EGLDisplay *disp, _EGLContext* ctx) { - ctx=NULL; + struct haiku_egl_context* context = haiku_egl_context(ctx); + + if (_eglPutContext(ctx)) { + // XXX: teardown the context ? + free(context); + ctx = NULL + } return EGL_TRUE; } @@ -273,7 +288,10 @@ haiku_make_current(_EGLDriver* drv, _EGLDisplay* dpy, _EGLSurface *dsurf, struct haiku_egl_surface* surf=haiku_egl_surface(dsurf); _EGLContext *old_ctx; _EGLSurface *old_dsurf, *old_rsurf; - _eglBindContext(ctx, dsurf, rsurf, &old_ctx, &old_dsurf, &old_rsurf); + + if (!_eglBindContext(ctx, dsurf, rsurf, &old_ctx, &old_dsurf, &old_rsurf)) + return EGL_FALSE; + //cont->ctx.DrawSurface=&surf->surf; surf->gl->LockGL(); return EGL_TRUE; From 0e55db3b8a9a360511d8679953b8e4b890d66ed7 Mon Sep 17 00:00:00 2001 From: Emil Velikov Date: Thu, 11 Jun 2015 13:08:00 +0100 Subject: [PATCH 568/834] egl/haiku: coding style fixes Cc: Alexander von Gluck IV Acked-by: Brian Paul Signed-off-by: Emil Velikov --- src/egl/drivers/haiku/egl_haiku.cpp | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/egl/drivers/haiku/egl_haiku.cpp b/src/egl/drivers/haiku/egl_haiku.cpp index da72895ac04..3d00e47c8e6 100644 --- a/src/egl/drivers/haiku/egl_haiku.cpp +++ b/src/egl/drivers/haiku/egl_haiku.cpp @@ -284,10 +284,10 @@ haiku_make_current(_EGLDriver* drv, _EGLDisplay* dpy, _EGLSurface *dsurf, { CALLED(); - struct haiku_egl_context* cont=haiku_egl_context(ctx); - struct haiku_egl_surface* surf=haiku_egl_surface(dsurf); + struct haiku_egl_context* cont = haiku_egl_context(ctx); + struct haiku_egl_surface* surf = haiku_egl_surface(dsurf); _EGLContext *old_ctx; - _EGLSurface *old_dsurf, *old_rsurf; + _EGLSurface *old_dsurf, *old_rsurf; if (!_eglBindContext(ctx, dsurf, rsurf, &old_ctx, &old_dsurf, &old_rsurf)) return EGL_FALSE; @@ -302,7 +302,8 @@ extern "C" EGLBoolean haiku_swap_buffers(_EGLDriver *drv, _EGLDisplay *dpy, _EGLSurface *surf) { - struct haiku_egl_surface* surface=haiku_egl_surface(surf); + struct haiku_egl_surface* surface = haiku_egl_surface(surf); + surface->gl->SwapBuffers(); //gl->Render(); return EGL_TRUE; From 3f5dc9b94fc47f25821cec0a052df3d8f4cb5a1f Mon Sep 17 00:00:00 2001 From: Emil Velikov Date: Wed, 15 Apr 2015 11:28:38 +0100 Subject: [PATCH 569/834] freedreno: use CXX linker rather than explicit link against libstdc++ Cc: Rob Clark Cc: "10.6" Signed-off-by: Emil Velikov --- src/gallium/drivers/freedreno/Makefile.am | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/gallium/drivers/freedreno/Makefile.am b/src/gallium/drivers/freedreno/Makefile.am index 4b2629f77bd..e798e449b99 100644 --- a/src/gallium/drivers/freedreno/Makefile.am +++ b/src/gallium/drivers/freedreno/Makefile.am @@ -21,6 +21,8 @@ libfreedreno_la_SOURCES = \ noinst_PROGRAMS = ir3_compiler +# XXX: Required due to the C++ sources in libnir/libglsl_util +nodist_EXTRA_ir3_compiler_SOURCES = dummy.cpp ir3_compiler_SOURCES = \ ir3/ir3_cmdline.c @@ -29,7 +31,6 @@ ir3_compiler_LDADD = \ ../../auxiliary/libgallium.la \ $(top_builddir)/src/glsl/libnir.la \ $(top_builddir)/src/libglsl_util.la \ - -lstdc++ \ $(top_builddir)/src/util/libmesautil.la \ $(GALLIUM_COMMON_LIB_DEPS) \ $(FREEDRENO_LIBS) From 4722743f4b920c6986a7148ef3ce76b4fd12db46 Mon Sep 17 00:00:00 2001 From: Emil Velikov Date: Wed, 15 Apr 2015 11:42:55 +0100 Subject: [PATCH 570/834] gallium: use $(top_builddir) when referencing static archives Just like every other place in gallium. Signed-off-by: Emil Velikov --- src/gallium/drivers/freedreno/Makefile.am | 2 +- src/gallium/drivers/nouveau/Makefile.am | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/gallium/drivers/freedreno/Makefile.am b/src/gallium/drivers/freedreno/Makefile.am index e798e449b99..cbf62c6daae 100644 --- a/src/gallium/drivers/freedreno/Makefile.am +++ b/src/gallium/drivers/freedreno/Makefile.am @@ -28,7 +28,7 @@ ir3_compiler_SOURCES = \ ir3_compiler_LDADD = \ libfreedreno.la \ - ../../auxiliary/libgallium.la \ + $(top_builddir)/src/gallium/auxiliary/libgallium.la \ $(top_builddir)/src/glsl/libnir.la \ $(top_builddir)/src/libglsl_util.la \ $(top_builddir)/src/util/libmesautil.la \ diff --git a/src/gallium/drivers/nouveau/Makefile.am b/src/gallium/drivers/nouveau/Makefile.am index 0aefc031210..d05f0a17ab4 100644 --- a/src/gallium/drivers/nouveau/Makefile.am +++ b/src/gallium/drivers/nouveau/Makefile.am @@ -48,7 +48,7 @@ nouveau_compiler_SOURCES = \ nouveau_compiler_LDADD = \ libnouveau.la \ - ../../auxiliary/libgallium.la \ + $(top_builddir)/src/gallium/auxiliary/libgallium.la \ $(top_builddir)/src/util/libmesautil.la \ $(GALLIUM_COMMON_LIB_DEPS) From 1df5a6c71ee4a3c08b5da3f8bae24880af16b74c Mon Sep 17 00:00:00 2001 From: Emil Velikov Date: Wed, 15 Apr 2015 13:40:55 +0100 Subject: [PATCH 571/834] mesa; add a dummy _mesa_error_no_memory() symbol to libglsl_util Rather than forcing everyone to provide their own definition of the symbol provide a common (dummy) one. This helps us resolve the build of the standalone pipe-drivers (amongst others), which are missing the symbol. Cc: Rob Clark Cc: "10.6" Signed-off-by: Emil Velikov --- src/Makefile.am | 3 ++- src/gallium/drivers/freedreno/ir3/ir3_cmdline.c | 8 -------- src/gallium/state_trackers/xa/xa_tracker.c | 12 ------------ src/glsl/Makefile.am | 13 ++++--------- src/glsl/SConscript | 4 +++- src/glsl/main.cpp | 6 ------ src/mesa/Android.libmesa_glsl_utils.mk | 6 ++++-- .../tests/common.c => mesa/program/dummy_errors.c} | 0 8 files changed, 13 insertions(+), 39 deletions(-) rename src/{glsl/tests/common.c => mesa/program/dummy_errors.c} (100%) diff --git a/src/Makefile.am b/src/Makefile.am index 18cb4ce76d7..5d69abd996d 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -72,4 +72,5 @@ noinst_LTLIBRARIES = libglsl_util.la libglsl_util_la_SOURCES = \ mesa/main/imports.c \ mesa/program/prog_hash_table.c \ - mesa/program/symbol_table.c + mesa/program/symbol_table.c \ + mesa/program/dummy_errors.c diff --git a/src/gallium/drivers/freedreno/ir3/ir3_cmdline.c b/src/gallium/drivers/freedreno/ir3/ir3_cmdline.c index d0517aab8ce..0b16cc1eb54 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_cmdline.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_cmdline.c @@ -360,11 +360,3 @@ int main(int argc, char **argv) } dump_info(&v, info); } - -void _mesa_error_no_memory(const char *caller); - -void -_mesa_error_no_memory(const char *caller) -{ - fprintf(stderr, "Mesa error: out of memory in %s", caller); -} diff --git a/src/gallium/state_trackers/xa/xa_tracker.c b/src/gallium/state_trackers/xa/xa_tracker.c index 89019988d57..f69ac8edf27 100644 --- a/src/gallium/state_trackers/xa/xa_tracker.c +++ b/src/gallium/state_trackers/xa/xa_tracker.c @@ -535,15 +535,3 @@ xa_surface_format(const struct xa_surface *srf) { return srf->fdesc.xa_format; } - -/* - * _mesa_error_no_memory() is expected by NIR to be provided by the - * user. Normally this is in mesa st, but other state trackers - * must provide their own. - */ -void _mesa_error_no_memory(const char *caller); -void -_mesa_error_no_memory(const char *caller) -{ - debug_printf("Mesa error: out of memory in %s", caller); -} diff --git a/src/glsl/Makefile.am b/src/glsl/Makefile.am index 1b7b1f8c691..fa8c9f5d3ca 100644 --- a/src/glsl/Makefile.am +++ b/src/glsl/Makefile.am @@ -89,8 +89,7 @@ tests_general_ir_test_SOURCES = \ tests/builtin_variable_test.cpp \ tests/invalidate_locations_test.cpp \ tests/general_ir_test.cpp \ - tests/varyings_test.cpp \ - tests/common.c + tests/varyings_test.cpp tests_general_ir_test_CFLAGS = \ $(PTHREAD_CFLAGS) tests_general_ir_test_LDADD = \ @@ -103,8 +102,7 @@ tests_uniform_initializer_test_SOURCES = \ tests/copy_constant_to_storage_tests.cpp \ tests/set_uniform_initializer_tests.cpp \ tests/uniform_initializer_utils.cpp \ - tests/uniform_initializer_utils.h \ - tests/common.c + tests/uniform_initializer_utils.h tests_uniform_initializer_test_CFLAGS = \ $(PTHREAD_CFLAGS) tests_uniform_initializer_test_LDADD = \ @@ -114,8 +112,7 @@ tests_uniform_initializer_test_LDADD = \ $(PTHREAD_LIBS) tests_sampler_types_test_SOURCES = \ - tests/sampler_types_test.cpp \ - tests/common.c + tests/sampler_types_test.cpp tests_sampler_types_test_CFLAGS = \ $(PTHREAD_CFLAGS) tests_sampler_types_test_LDADD = \ @@ -133,8 +130,7 @@ libglcpp_la_SOURCES = \ $(LIBGLCPP_FILES) glcpp_glcpp_SOURCES = \ - glcpp/glcpp.c \ - tests/common.c + glcpp/glcpp.c glcpp_glcpp_LDADD = \ libglcpp.la \ $(top_builddir)/src/libglsl_util.la \ @@ -164,7 +160,6 @@ glsl_compiler_LDADD = \ glsl_test_SOURCES = \ standalone_scaffolding.cpp \ - tests/common.c \ test.cpp \ test_optpass.cpp \ test_optpass.h diff --git a/src/glsl/SConscript b/src/glsl/SConscript index 284b375844f..89c603580a5 100644 --- a/src/glsl/SConscript +++ b/src/glsl/SConscript @@ -71,6 +71,7 @@ env.Command('imports.c', '#src/mesa/main/imports.c', Copy('$TARGET', '$SOURCE')) env.Prepend(CPPPATH = ['#src/mesa/program']) env.Command('prog_hash_table.c', '#src/mesa/program/prog_hash_table.c', Copy('$TARGET', '$SOURCE')) env.Command('symbol_table.c', '#src/mesa/program/symbol_table.c', Copy('$TARGET', '$SOURCE')) +env.Command('dummy_errors.c', '#src/mesa/program/dummy_errors.c', Copy('$TARGET', '$SOURCE')) compiler_objs = env.StaticObject(source_lists['GLSL_COMPILER_CXX_FILES']) @@ -78,6 +79,7 @@ mesa_objs = env.StaticObject([ 'imports.c', 'prog_hash_table.c', 'symbol_table.c', + 'dummy_errors.c', ]) compiler_objs += mesa_objs @@ -115,6 +117,6 @@ env.Alias('glsl_compiler', glsl_compiler) glcpp = env.Program( target = 'glcpp/glcpp', - source = ['glcpp/glcpp.c', 'tests/common.c'] + mesa_objs, + source = ['glcpp/glcpp.c'] + mesa_objs, ) env.Alias('glcpp', glcpp) diff --git a/src/glsl/main.cpp b/src/glsl/main.cpp index 4b39c9e54c2..23412980dce 100644 --- a/src/glsl/main.cpp +++ b/src/glsl/main.cpp @@ -41,12 +41,6 @@ static int glsl_version = 330; -extern "C" void -_mesa_error_no_memory(const char *caller) -{ - fprintf(stderr, "Mesa error: out of memory in %s", caller); -} - static void initialize_context(struct gl_context *ctx, gl_api api) { diff --git a/src/mesa/Android.libmesa_glsl_utils.mk b/src/mesa/Android.libmesa_glsl_utils.mk index 3497377af8c..ed620ac648c 100644 --- a/src/mesa/Android.libmesa_glsl_utils.mk +++ b/src/mesa/Android.libmesa_glsl_utils.mk @@ -44,7 +44,8 @@ LOCAL_C_INCLUDES := \ LOCAL_SRC_FILES := \ main/imports.c \ program/prog_hash_table.c \ - program/symbol_table.c + program/symbol_table.c \ + program/dummy_errors.c include $(MESA_COMMON_MK) include $(BUILD_STATIC_LIBRARY) @@ -68,7 +69,8 @@ LOCAL_C_INCLUDES := \ LOCAL_SRC_FILES := \ main/imports.c \ program/prog_hash_table.c \ - program/symbol_table.c + program/symbol_table.c \ + program/dummy_errors.c include $(MESA_COMMON_MK) include $(BUILD_HOST_STATIC_LIBRARY) diff --git a/src/glsl/tests/common.c b/src/mesa/program/dummy_errors.c similarity index 100% rename from src/glsl/tests/common.c rename to src/mesa/program/dummy_errors.c From ba512cc7fa5db0aeeb2fc0708920914cd3a5bf95 Mon Sep 17 00:00:00 2001 From: Emil Velikov Date: Wed, 15 Apr 2015 12:46:30 +0100 Subject: [PATCH 572/834] pipe-loader: add libnir and libglsl_util to the link Based on commit 101142c4010(xa: support for drivers which use NIR) Cc: Rob Clark Cc: "10.6" Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=90466 Signed-off-by: Emil Velikov --- src/gallium/targets/pipe-loader/Makefile.am | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/gallium/targets/pipe-loader/Makefile.am b/src/gallium/targets/pipe-loader/Makefile.am index 967cdb76542..e4048b58605 100644 --- a/src/gallium/targets/pipe-loader/Makefile.am +++ b/src/gallium/targets/pipe-loader/Makefile.am @@ -52,6 +52,8 @@ endif PIPE_LIBS += \ $(top_builddir)/src/gallium/auxiliary/libgallium.la \ + $(top_builddir)/src/glsl/libnir.la \ + $(top_builddir)/src/libglsl_util.la \ $(top_builddir)/src/util/libmesautil.la \ $(top_builddir)/src/gallium/drivers/rbug/librbug.la \ $(top_builddir)/src/gallium/drivers/trace/libtrace.la \ From 83b5648a1e0b7c21536af18c0d29da2f2a31215e Mon Sep 17 00:00:00 2001 From: Emil Velikov Date: Wed, 15 Apr 2015 14:34:00 +0100 Subject: [PATCH 573/834] targets/nine: link against libnir/libglsl_util Based on commit 101142c4010(xa: support for drivers which use NIR) Cc: "10.6" Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=90466 Signed-off-by: Emil Velikov --- src/gallium/targets/d3dadapter9/Makefile.am | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/gallium/targets/d3dadapter9/Makefile.am b/src/gallium/targets/d3dadapter9/Makefile.am index 1dc55f5c1cd..591978f1f61 100644 --- a/src/gallium/targets/d3dadapter9/Makefile.am +++ b/src/gallium/targets/d3dadapter9/Makefile.am @@ -74,6 +74,8 @@ endif # HAVE_LD_VERSION_SCRIPT d3dadapter9_la_LIBADD = \ $(top_builddir)/src/gallium/auxiliary/libgalliumvl_stub.la \ $(top_builddir)/src/gallium/auxiliary/libgallium.la \ + $(top_builddir)/src/glsl/libnir.la \ + $(top_builddir)/src/libglsl_util.la \ $(top_builddir)/src/gallium/state_trackers/nine/libninetracker.la \ $(top_builddir)/src/util/libmesautil.la \ $(top_builddir)/src/gallium/winsys/sw/wrapper/libwsw.la \ From 634f2002563b4fca68490c0a39518ea838f28fb1 Mon Sep 17 00:00:00 2001 From: Erik Faye-Lund Date: Wed, 10 Jun 2015 23:35:04 +0100 Subject: [PATCH 574/834] mesa: build xmlconfig to a separate static library As we use the file from both the dri modules and loader, we end up with multiple definition of the symbols provided in our gallium dri modules. Additionally we compile the file twice. Resolve both issues, effectively enabling the build on toolchains which don't support -Wl,--allow-multiple-definition. v2: [Emil Velikov] - Fix the Scons/Android build. - Resolve libgbm build issues (bring back the missing -lm) Cc: Julien Isorce Cc: "10.5 10.6" Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=90310 Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=90905 Acked-by: Matt Turner Signed-off-by: Emil Velikov --- src/gallium/targets/dri/Makefile.am | 6 ------ src/loader/Makefile.am | 10 +++------- src/mesa/drivers/dri/Makefile.am | 1 + src/mesa/drivers/dri/common/Android.mk | 4 +++- src/mesa/drivers/dri/common/Makefile.am | 6 +++++- src/mesa/drivers/dri/common/Makefile.sources | 4 +++- src/mesa/drivers/dri/common/SConscript | 2 +- src/mesa/drivers/dri/i965/Makefile.am | 1 + 8 files changed, 17 insertions(+), 17 deletions(-) diff --git a/src/gallium/targets/dri/Makefile.am b/src/gallium/targets/dri/Makefile.am index f9e4ada9338..96483964589 100644 --- a/src/gallium/targets/dri/Makefile.am +++ b/src/gallium/targets/dri/Makefile.am @@ -53,12 +53,6 @@ gallium_dri_la_LIBADD = \ $(LIBDRM_LIBS) \ $(GALLIUM_COMMON_LIB_DEPS) -# XXX: Temporary allow duplicated symbols, as the loader pulls in xmlconfig.c -# which already provides driParse* and driQuery* amongst others. -# Remove this hack as we come up with a cleaner solution. -gallium_dri_la_LDFLAGS += \ - -Wl,--allow-multiple-definition - EXTRA_gallium_dri_la_DEPENDENCIES = \ dri.sym \ $(top_srcdir)/src/gallium/targets/dri-vdpau.dyn diff --git a/src/loader/Makefile.am b/src/loader/Makefile.am index 36ddba82bd3..aef1bd61bea 100644 --- a/src/loader/Makefile.am +++ b/src/loader/Makefile.am @@ -41,15 +41,11 @@ libloader_la_CPPFLAGS += \ -I$(top_builddir)/src/mesa/drivers/dri/common/ \ -I$(top_srcdir)/src/mesa/ \ -I$(top_srcdir)/src/mapi/ \ - -DUSE_DRICONF \ - $(EXPAT_CFLAGS) + -DUSE_DRICONF -libloader_la_SOURCES += \ - $(top_srcdir)/src/mesa/drivers/dri/common/xmlconfig.c + libloader_la_LIBADD += \ + $(top_builddir)/src/mesa/drivers/dri/common/libxmlconfig.la -libloader_la_LIBADD += \ - -lm \ - $(EXPAT_LIBS) endif if !HAVE_LIBDRM diff --git a/src/mesa/drivers/dri/Makefile.am b/src/mesa/drivers/dri/Makefile.am index fa1de103b56..08a8e645521 100644 --- a/src/mesa/drivers/dri/Makefile.am +++ b/src/mesa/drivers/dri/Makefile.am @@ -60,6 +60,7 @@ mesa_dri_drivers_la_LIBADD = \ ../../libmesa.la \ common/libmegadriver_stub.la \ common/libdricommon.la \ + common/libxmlconfig.la \ $(MEGADRIVERS_DEPS) \ $(DRI_LIB_DEPS) \ $() diff --git a/src/mesa/drivers/dri/common/Android.mk b/src/mesa/drivers/dri/common/Android.mk index c003c942fca..6986f5e8cb4 100644 --- a/src/mesa/drivers/dri/common/Android.mk +++ b/src/mesa/drivers/dri/common/Android.mk @@ -50,7 +50,9 @@ else LOCAL_SHARED_LIBRARIES := libdrm endif -LOCAL_SRC_FILES := $(DRI_COMMON_FILES) +LOCAL_SRC_FILES := \ + $(DRI_COMMON_FILES) \ + $(XMLCONFIG_FILES) MESA_DRI_OPTIONS_H := $(intermediates)/xmlpool/options.h LOCAL_GENERATED_SOURCES := $(MESA_DRI_OPTIONS_H) diff --git a/src/mesa/drivers/dri/common/Makefile.am b/src/mesa/drivers/dri/common/Makefile.am index da8f97a980e..ae19fcb3565 100644 --- a/src/mesa/drivers/dri/common/Makefile.am +++ b/src/mesa/drivers/dri/common/Makefile.am @@ -33,16 +33,20 @@ AM_CFLAGS = \ -I$(top_srcdir)/src/gallium/include \ -I$(top_srcdir)/src/gallium/auxiliary \ $(DEFINES) \ - $(EXPAT_CFLAGS) \ $(VISIBILITY_CFLAGS) noinst_LTLIBRARIES = \ libdricommon.la \ + libxmlconfig.la \ libmegadriver_stub.la \ libdri_test_stubs.la libdricommon_la_SOURCES = $(DRI_COMMON_FILES) +libxmlconfig_la_SOURCES = $(XMLCONFIG_FILES) +libxmlconfig_la_CFLAGS = $(AM_CFLAGS) $(EXPAT_CFLAGS) +libxmlconfig_la_LIBADD = $(EXPAT_LIBS) -lm + libdri_test_stubs_la_SOURCES = $(test_stubs_FILES) libdri_test_stubs_la_CFLAGS = $(AM_CFLAGS) -DNO_MAIN diff --git a/src/mesa/drivers/dri/common/Makefile.sources b/src/mesa/drivers/dri/common/Makefile.sources index d00ec5f7334..d5d8da8fcee 100644 --- a/src/mesa/drivers/dri/common/Makefile.sources +++ b/src/mesa/drivers/dri/common/Makefile.sources @@ -2,7 +2,9 @@ DRI_COMMON_FILES := \ utils.c \ utils.h \ dri_util.c \ - dri_util.h \ + dri_util.h + +XMLCONFIG_FILES := \ xmlconfig.c \ xmlconfig.h diff --git a/src/mesa/drivers/dri/common/SConscript b/src/mesa/drivers/dri/common/SConscript index 0bee1b41fc6..b402736db69 100644 --- a/src/mesa/drivers/dri/common/SConscript +++ b/src/mesa/drivers/dri/common/SConscript @@ -37,7 +37,7 @@ drienv.PkgUseModules('DRM') # else #env.Append(CPPDEFINES = ['__NOT_HAVE_DRM_H']) -sources = drienv.ParseSourceList('Makefile.sources', 'DRI_COMMON_FILES') +sources = drienv.ParseSourceList('Makefile.sources', ['DRI_COMMON_FILES', 'XMLCONFIG_FILES' ]) dri_common = drienv.ConvenienceLibrary( target = 'dri_common', diff --git a/src/mesa/drivers/dri/i965/Makefile.am b/src/mesa/drivers/dri/i965/Makefile.am index 235bcfeae02..566f2ddd98f 100644 --- a/src/mesa/drivers/dri/i965/Makefile.am +++ b/src/mesa/drivers/dri/i965/Makefile.am @@ -48,6 +48,7 @@ libi965_dri_la_LIBADD = $(INTEL_LIBS) TEST_LIBS = \ libi965_dri.la \ ../common/libdricommon.la \ + ../common/libxmlconfig.la \ ../common/libmegadriver_stub.la \ ../../../libmesa.la \ $(DRI_LIB_DEPS) \ From d15c06b514936fb927b174a716c24af8f5892542 Mon Sep 17 00:00:00 2001 From: Emil Velikov Date: Wed, 10 Jun 2015 23:50:21 +0100 Subject: [PATCH 575/834] vc4: automake: enable subdir-objects Silence the warnings about the future incompatibility with automake 2.0 Cc: Eric Anholt Signed-off-by: Emil Velikov --- src/gallium/drivers/vc4/Makefile.am | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/gallium/drivers/vc4/Makefile.am b/src/gallium/drivers/vc4/Makefile.am index 774463138d0..3f62ce21a9f 100644 --- a/src/gallium/drivers/vc4/Makefile.am +++ b/src/gallium/drivers/vc4/Makefile.am @@ -19,6 +19,8 @@ # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS # IN THE SOFTWARE. +AUTOMAKE_OPTIONS = subdir-objects + include Makefile.sources include $(top_srcdir)/src/gallium/Automake.inc From cfc175b40995ca4e590cd30897f6bb017e1376a3 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Wed, 10 Jun 2015 09:50:47 -0700 Subject: [PATCH 576/834] i965/fs: Fix unused variable warning Annotate offset_components with attribute 'unused'. Reviewed-by: Matt Turner --- src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp index 0f1a2860496..3c24a87c10c 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp @@ -1599,7 +1599,8 @@ fs_visitor::nir_emit_texture(const fs_builder &bld, nir_tex_instr *instr) bool is_cube_array = instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE && instr->is_array; - int lod_components = 0, offset_components = 0; + int lod_components = 0; + int UNUSED offset_components = 0; fs_reg coordinate, shadow_comparitor, lod, lod2, sample_index, mcs, tex_offset; From 86b4acb409a2103d6a12f83de7ec04af6cc05fec Mon Sep 17 00:00:00 2001 From: Jordan Justen Date: Thu, 11 Jun 2015 09:44:54 -0700 Subject: [PATCH 577/834] i965/cs: Use exec all for CS terminate This prevents an assertion from being hit with SIMD16: Assertion `inst->exec_size == dispatch_width() || force_writemask_all' failed. Signed-off-by: Jordan Justen Reviewed-by: Francisco Jerez --- src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp index c41284b0a5e..588966b66f1 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp @@ -1948,7 +1948,8 @@ fs_visitor::emit_cs_terminate() bld.exec_all().MOV(payload, g0); /* Send a message to the thread spawner to terminate the thread. */ - fs_inst *inst = bld.emit(CS_OPCODE_CS_TERMINATE, reg_undef, payload); + fs_inst *inst = bld.exec_all() + .emit(CS_OPCODE_CS_TERMINATE, reg_undef, payload); inst->eot = true; } From e7f628c2fc5ef42672e3281e224226c3d47b1bac Mon Sep 17 00:00:00 2001 From: Chris Forbes Date: Sun, 7 Sep 2014 19:24:15 +1200 Subject: [PATCH 578/834] glsl: Add ir node for barrier v2: * Changes suggested by mattst88 [jordan.l.justen@intel.com: Add nir support] Signed-off-by: Jordan Justen Reviewed-by: Ben Widawsky --- src/glsl/ir.h | 24 +++++++++++++++++++ src/glsl/ir_hierarchical_visitor.cpp | 9 +++++++ src/glsl/ir_hierarchical_visitor.h | 1 + src/glsl/ir_hv_accept.cpp | 6 +++++ src/glsl/ir_print_visitor.cpp | 7 +++++- src/glsl/ir_print_visitor.h | 1 + src/glsl/ir_visitor.h | 2 ++ src/glsl/nir/glsl_to_nir.cpp | 7 ++++++ src/mesa/drivers/dri/i965/brw_vec4.h | 1 + .../drivers/dri/i965/brw_vec4_visitor.cpp | 6 +++++ src/mesa/program/ir_to_mesa.cpp | 7 ++++++ src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 7 ++++++ 12 files changed, 77 insertions(+), 1 deletion(-) diff --git a/src/glsl/ir.h b/src/glsl/ir.h index fab1cd2d291..f9045553501 100644 --- a/src/glsl/ir.h +++ b/src/glsl/ir.h @@ -78,6 +78,7 @@ enum ir_node_type { ir_type_discard, ir_type_emit_vertex, ir_type_end_primitive, + ir_type_barrier, ir_type_max, /**< maximum ir_type enum number, for validation */ ir_type_unset = ir_type_max }; @@ -2396,6 +2397,29 @@ public: ir_rvalue *stream; }; +/** + * IR instruction for tessellation control and compute shader barrier. + */ +class ir_barrier : public ir_instruction { +public: + ir_barrier() + : ir_instruction(ir_type_barrier) + { + } + + virtual void accept(ir_visitor *v) + { + v->visit(this); + } + + virtual ir_barrier *clone(void *mem_ctx, struct hash_table *) const + { + return new(mem_ctx) ir_barrier(); + } + + virtual ir_visitor_status accept(ir_hierarchical_visitor *); +}; + /*@}*/ /** diff --git a/src/glsl/ir_hierarchical_visitor.cpp b/src/glsl/ir_hierarchical_visitor.cpp index adb629414a2..1d23a776643 100644 --- a/src/glsl/ir_hierarchical_visitor.cpp +++ b/src/glsl/ir_hierarchical_visitor.cpp @@ -79,6 +79,15 @@ ir_hierarchical_visitor::visit(ir_dereference_variable *ir) return visit_continue; } +ir_visitor_status +ir_hierarchical_visitor::visit(ir_barrier *ir) +{ + if (this->callback_enter != NULL) + this->callback_enter(ir, this->data_enter); + + return visit_continue; +} + ir_visitor_status ir_hierarchical_visitor::visit_enter(ir_loop *ir) { diff --git a/src/glsl/ir_hierarchical_visitor.h b/src/glsl/ir_hierarchical_visitor.h index cac78698e73..28517b6e4f4 100644 --- a/src/glsl/ir_hierarchical_visitor.h +++ b/src/glsl/ir_hierarchical_visitor.h @@ -87,6 +87,7 @@ public: virtual ir_visitor_status visit(class ir_variable *); virtual ir_visitor_status visit(class ir_constant *); virtual ir_visitor_status visit(class ir_loop_jump *); + virtual ir_visitor_status visit(class ir_barrier *); /** * ir_dereference_variable isn't technically a leaf, but it is treated as a diff --git a/src/glsl/ir_hv_accept.cpp b/src/glsl/ir_hv_accept.cpp index be5b3eaa00d..d3662cf5063 100644 --- a/src/glsl/ir_hv_accept.cpp +++ b/src/glsl/ir_hv_accept.cpp @@ -429,3 +429,9 @@ ir_end_primitive::accept(ir_hierarchical_visitor *v) return (s == visit_stop) ? s : v->visit_leave(this); } + +ir_visitor_status +ir_barrier::accept(ir_hierarchical_visitor *v) +{ + return v->visit(this); +} diff --git a/src/glsl/ir_print_visitor.cpp b/src/glsl/ir_print_visitor.cpp index 01f52e85f4a..f5de6ac065b 100644 --- a/src/glsl/ir_print_visitor.cpp +++ b/src/glsl/ir_print_visitor.cpp @@ -573,5 +573,10 @@ ir_print_visitor::visit(ir_end_primitive *ir) fprintf(f, "(end-primitive "); ir->stream->accept(this); fprintf(f, ")\n"); - +} + +void +ir_print_visitor::visit(ir_barrier *ir) +{ + fprintf(f, "(barrier)\n"); } diff --git a/src/glsl/ir_print_visitor.h b/src/glsl/ir_print_visitor.h index 98f041d1a7f..965e63ade8b 100644 --- a/src/glsl/ir_print_visitor.h +++ b/src/glsl/ir_print_visitor.h @@ -71,6 +71,7 @@ public: virtual void visit(ir_loop_jump *); virtual void visit(ir_emit_vertex *); virtual void visit(ir_end_primitive *); + virtual void visit(ir_barrier *); /*@}*/ private: diff --git a/src/glsl/ir_visitor.h b/src/glsl/ir_visitor.h index 40f96ffbca0..7c38481cd53 100644 --- a/src/glsl/ir_visitor.h +++ b/src/glsl/ir_visitor.h @@ -65,6 +65,7 @@ public: virtual void visit(class ir_loop_jump *) = 0; virtual void visit(class ir_emit_vertex *) = 0; virtual void visit(class ir_end_primitive *) = 0; + virtual void visit(class ir_barrier *) = 0; /*@}*/ }; @@ -85,6 +86,7 @@ public: virtual void visit(class ir_call *) {} virtual void visit(class ir_emit_vertex *) {} virtual void visit(class ir_end_primitive *) {} + virtual void visit(class ir_barrier *) {} }; #endif /* __cplusplus */ diff --git a/src/glsl/nir/glsl_to_nir.cpp b/src/glsl/nir/glsl_to_nir.cpp index ad8cfad627b..3c7284e8ca5 100644 --- a/src/glsl/nir/glsl_to_nir.cpp +++ b/src/glsl/nir/glsl_to_nir.cpp @@ -65,6 +65,7 @@ public: virtual void visit(ir_dereference_variable *); virtual void visit(ir_dereference_record *); virtual void visit(ir_dereference_array *); + virtual void visit(ir_barrier *); void create_function(ir_function *ir); @@ -1889,3 +1890,9 @@ nir_visitor::visit(ir_dereference_array *ir) ralloc_steal(this->deref_tail, deref); this->deref_tail = &deref->deref; } + +void +nir_visitor::visit(ir_barrier *ir) +{ + unreachable("Not implemented!"); +} diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h index 06a16a49b6f..4449e0a52ce 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.h +++ b/src/mesa/drivers/dri/i965/brw_vec4.h @@ -160,6 +160,7 @@ public: virtual void visit(ir_if *); virtual void visit(ir_emit_vertex *); virtual void visit(ir_end_primitive *); + virtual void visit(ir_barrier *); /*@}*/ src_reg result; diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp index 242d007b355..0a76bde7726 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp @@ -2966,6 +2966,12 @@ vec4_visitor::visit(ir_end_primitive *) unreachable("not reached"); } +void +vec4_visitor::visit(ir_barrier *) +{ + unreachable("not reached"); +} + void vec4_visitor::emit_untyped_atomic(unsigned atomic_op, unsigned surf_index, dst_reg dst, src_reg offset, diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp index 514bb930e76..18e3bc5d5cc 100644 --- a/src/mesa/program/ir_to_mesa.cpp +++ b/src/mesa/program/ir_to_mesa.cpp @@ -262,6 +262,7 @@ public: virtual void visit(ir_if *); virtual void visit(ir_emit_vertex *); virtual void visit(ir_end_primitive *); + virtual void visit(ir_barrier *); /*@}*/ src_reg result; @@ -2117,6 +2118,12 @@ ir_to_mesa_visitor::visit(ir_end_primitive *) assert(!"Geometry shaders not supported."); } +void +ir_to_mesa_visitor::visit(ir_barrier *) +{ + unreachable("GLSL barrier() not supported."); +} + ir_to_mesa_visitor::ir_to_mesa_visitor() { result.file = PROGRAM_UNDEFINED; diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index 0e60d95c575..03834b69a23 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -395,6 +395,7 @@ public: virtual void visit(ir_if *); virtual void visit(ir_emit_vertex *); virtual void visit(ir_end_primitive *); + virtual void visit(ir_barrier *); /*@}*/ st_src_reg result; @@ -3417,6 +3418,12 @@ glsl_to_tgsi_visitor::visit(ir_end_primitive *ir) emit_asm(ir, TGSI_OPCODE_ENDPRIM, undef_dst, this->result); } +void +glsl_to_tgsi_visitor::visit(ir_barrier *ir) +{ + unreachable("Not implemented!"); +} + glsl_to_tgsi_visitor::glsl_to_tgsi_visitor() { result.file = PROGRAM_UNDEFINED; From 86855365b4059c60a9e1dcc0b7713941a2507bd0 Mon Sep 17 00:00:00 2001 From: Chris Forbes Date: Sun, 7 Sep 2014 19:29:50 +1200 Subject: [PATCH 579/834] glsl: Add builtin barrier() function [jordan.l.justen@intel.com: Add CS support] Signed-off-by: Jordan Justen Reviewed-by: Ben Widawsky Reviewed-by: Kenneth Graunke --- src/glsl/builtin_functions.cpp | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/src/glsl/builtin_functions.cpp b/src/glsl/builtin_functions.cpp index 97055d85d58..efab2991993 100644 --- a/src/glsl/builtin_functions.cpp +++ b/src/glsl/builtin_functions.cpp @@ -410,6 +410,13 @@ fp64(const _mesa_glsl_parse_state *state) return state->has_double(); } +static bool +barrier_supported(const _mesa_glsl_parse_state *state) +{ + return state->stage == MESA_SHADER_COMPUTE; + /* TODO: || stage->state == MESA_SHADER_TESS_CTRL; */ +} + /** @} */ /******************************************************************************/ @@ -654,6 +661,7 @@ private: const glsl_type *stream_type); ir_function_signature *_EndStreamPrimitive(builtin_available_predicate avail, const glsl_type *stream_type); + B0(barrier) B2(textureQueryLod); B1(textureQueryLevels); @@ -1933,6 +1941,7 @@ builtin_builder::create_builtins() _EndStreamPrimitive(gs_streams, glsl_type::uint_type), _EndStreamPrimitive(gs_streams, glsl_type::int_type), NULL); + add_function("barrier", _barrier(), NULL); add_function("textureQueryLOD", _textureQueryLod(glsl_type::sampler1D_type, glsl_type::float_type), @@ -4295,6 +4304,15 @@ builtin_builder::_EndStreamPrimitive(builtin_available_predicate avail, return sig; } +ir_function_signature * +builtin_builder::_barrier() +{ + MAKE_SIG(glsl_type::void_type, barrier_supported, 0); + + body.emit(new(mem_ctx) ir_barrier()); + return sig; +} + ir_function_signature * builtin_builder::_textureQueryLod(const glsl_type *sampler_type, const glsl_type *coord_type) From 2867f2e8cd54e2cbb38140e2e0f5521973091ace Mon Sep 17 00:00:00 2001 From: Jordan Justen Date: Fri, 13 Mar 2015 12:03:15 -0700 Subject: [PATCH 580/834] nir: Add barrier intrinsic function Signed-off-by: Jordan Justen Reviewed-by: Chris Forbes Reviewed-by: Connor Abbott Reviewed-by: Ben Widawsky --- src/glsl/nir/glsl_to_nir.cpp | 4 +++- src/glsl/nir/nir_intrinsics.h | 1 + 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/src/glsl/nir/glsl_to_nir.cpp b/src/glsl/nir/glsl_to_nir.cpp index 3c7284e8ca5..95531bbcd8f 100644 --- a/src/glsl/nir/glsl_to_nir.cpp +++ b/src/glsl/nir/glsl_to_nir.cpp @@ -1894,5 +1894,7 @@ nir_visitor::visit(ir_dereference_array *ir) void nir_visitor::visit(ir_barrier *ir) { - unreachable("Not implemented!"); + nir_intrinsic_instr *instr = + nir_intrinsic_instr_create(this->shader, nir_intrinsic_barrier); + nir_instr_insert_after_cf_list(this->cf_node_list, &instr->instr); } diff --git a/src/glsl/nir/nir_intrinsics.h b/src/glsl/nir/nir_intrinsics.h index b516830be95..bc6e6b8f498 100644 --- a/src/glsl/nir/nir_intrinsics.h +++ b/src/glsl/nir/nir_intrinsics.h @@ -67,6 +67,7 @@ INTRINSIC(interp_var_at_offset, 1, ARR(2), true, 0, 1, 0, */ #define BARRIER(name) INTRINSIC(name, 0, ARR(), false, 0, 0, 0, 0) +BARRIER(barrier) BARRIER(discard) /* From 1b9cc257d4b805e86af4860bb356dbedf5e054c6 Mon Sep 17 00:00:00 2001 From: Jordan Justen Date: Tue, 4 Nov 2014 17:48:44 -0800 Subject: [PATCH 581/834] i965: Add GATEWAY_SFID definitions Signed-off-by: Jordan Justen Reviewed-by: Chris Forbes Reviewed-by: Ben Widawsky --- src/mesa/drivers/dri/i965/brw_defines.h | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h index f6da305254c..eb04cc9d471 100644 --- a/src/mesa/drivers/dri/i965/brw_defines.h +++ b/src/mesa/drivers/dri/i965/brw_defines.h @@ -1596,6 +1596,14 @@ enum brw_message_target { #define BRW_SCRATCH_SPACE_SIZE_1M 10 #define BRW_SCRATCH_SPACE_SIZE_2M 11 +#define BRW_MESSAGE_GATEWAY_SFID_OPEN_GATEWAY 0 +#define BRW_MESSAGE_GATEWAY_SFID_CLOSE_GATEWAY 1 +#define BRW_MESSAGE_GATEWAY_SFID_FORWARD_MSG 2 +#define BRW_MESSAGE_GATEWAY_SFID_GET_TIMESTAMP 3 +#define BRW_MESSAGE_GATEWAY_SFID_BARRIER_MSG 4 +#define BRW_MESSAGE_GATEWAY_SFID_UPDATE_GATEWAY_STATE 5 +#define BRW_MESSAGE_GATEWAY_SFID_MMIO_READ_WRITE 6 + #define CMD_URB_FENCE 0x6000 #define CMD_CS_URB_STATE 0x6001 From 69659546a6a352239c5989624f9d9f084c643d7d Mon Sep 17 00:00:00 2001 From: Jordan Justen Date: Tue, 4 Nov 2014 18:07:52 -0800 Subject: [PATCH 582/834] i965/inst: Add gateway_notify and gateway_subfuncid fields These fields will be used when emitting a send for the barrier function. Reference: IVB PRM Volume 4, Part 2, Section 1.1.1 Message Descriptor v2: * notify => gateway_notify (Ken) * define bits for gen4-gen6 (bwidawsk, Ken) Signed-off-by: Jordan Justen Reviewed-by: Chris Forbes Reviewed-by: Ben Widawsky Reviewed-by: Kenneth Graunke --- src/mesa/drivers/dri/i965/brw_inst.h | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_inst.h b/src/mesa/drivers/dri/i965/brw_inst.h index e347c518348..7a8c210118c 100644 --- a/src/mesa/drivers/dri/i965/brw_inst.h +++ b/src/mesa/drivers/dri/i965/brw_inst.h @@ -322,6 +322,9 @@ FJ(gen4_jump_count, 111, 96, devinfo->gen < 6) FC(gen4_pop_count, 115, 112, devinfo->gen < 6) /** @} */ +/* Message descriptor bits */ +#define MD(x) ((x) + 96) + /** * Fields for SEND messages: * @{ @@ -347,6 +350,7 @@ FF(header_present, /* 6: */ 115, 115, /* 7: */ 115, 115, /* 8: */ 115, 115) +F(gateway_notify, MD(16), MD(15)) FF(function_control, /* 4: */ 111, 96, /* 4.5: */ 111, 96, @@ -354,6 +358,13 @@ FF(function_control, /* 6: */ 114, 96, /* 7: */ 114, 96, /* 8: */ 114, 96) +FF(gateway_subfuncid, + /* 4: */ MD(1), MD(0), + /* 4.5: */ MD(1), MD(0), + /* 5: */ MD(1), MD(0), /* 2:0, but bit 2 is reserved MBZ */ + /* 6: */ MD(2), MD(0), + /* 7: */ MD(2), MD(0), + /* 8: */ MD(2), MD(0)) FF(sfid, /* 4: */ 123, 120, /* called msg_target */ /* 4.5 */ 123, 120, @@ -364,9 +375,6 @@ FF(sfid, FC(base_mrf, 27, 24, devinfo->gen < 6); /** @} */ -/* Message descriptor bits */ -#define MD(x) (x + 96) - /** * URB message function control bits: * @{ From bdbbec33cf23193e1c81e0ecf28f2cc793d507bf Mon Sep 17 00:00:00 2001 From: Jordan Justen Date: Tue, 4 Nov 2014 17:51:19 -0800 Subject: [PATCH 583/834] i965: Disassemble Gateway SEND messages Signed-off-by: Jordan Justen Reviewed-by: Chris Forbes --- src/mesa/drivers/dri/i965/brw_disasm.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/src/mesa/drivers/dri/i965/brw_disasm.c b/src/mesa/drivers/dri/i965/brw_disasm.c index ef9e7ccda79..1075c5acba5 100644 --- a/src/mesa/drivers/dri/i965/brw_disasm.c +++ b/src/mesa/drivers/dri/i965/brw_disasm.c @@ -402,6 +402,16 @@ static const char *const gen6_sfid[16] = { [HSW_SFID_CRE] = "cre", }; +static const char *const gen7_gateway_subfuncid[8] = { + [BRW_MESSAGE_GATEWAY_SFID_OPEN_GATEWAY] = "open", + [BRW_MESSAGE_GATEWAY_SFID_CLOSE_GATEWAY] = "close", + [BRW_MESSAGE_GATEWAY_SFID_FORWARD_MSG] = "forward msg", + [BRW_MESSAGE_GATEWAY_SFID_GET_TIMESTAMP] = "get timestamp", + [BRW_MESSAGE_GATEWAY_SFID_BARRIER_MSG] = "barrier msg", + [BRW_MESSAGE_GATEWAY_SFID_UPDATE_GATEWAY_STATE] = "update state", + [BRW_MESSAGE_GATEWAY_SFID_MMIO_READ_WRITE] = "mmio read/write", +}; + static const char *const dp_write_port_msg_type[8] = { [0b000] = "OWord block write", [0b001] = "OWord dual block write", @@ -1498,6 +1508,12 @@ brw_disassemble_inst(FILE *file, const struct brw_device_info *devinfo, break; case BRW_SFID_THREAD_SPAWNER: break; + + case BRW_SFID_MESSAGE_GATEWAY: + format(file, " (%s)", + gen7_gateway_subfuncid[brw_inst_gateway_subfuncid(devinfo, inst)]); + break; + case GEN7_SFID_DATAPORT_DATA_CACHE: if (devinfo->gen >= 7) { format(file, " ("); From b925f1a1df86120d2846bf09797bb0967040f9c6 Mon Sep 17 00:00:00 2001 From: Jordan Justen Date: Tue, 4 Nov 2014 17:52:42 -0800 Subject: [PATCH 584/834] i965: Add notification register This will be used by the wait instruction when implementing the barrier() function. v2: * Changes suggested by mattst88 Signed-off-by: Jordan Justen Reviewed-by: Chris Forbes Reviewed-by: Matt Turner --- src/mesa/drivers/dri/i965/brw_reg.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/src/mesa/drivers/dri/i965/brw_reg.h b/src/mesa/drivers/dri/i965/brw_reg.h index 81a932063a7..c8b134103bb 100644 --- a/src/mesa/drivers/dri/i965/brw_reg.h +++ b/src/mesa/drivers/dri/i965/brw_reg.h @@ -764,6 +764,22 @@ brw_ip_reg(void) WRITEMASK_XYZW); /* NOTE! */ } +static inline struct brw_reg +brw_notification_reg(void) +{ + return brw_reg(BRW_ARCHITECTURE_REGISTER_FILE, + BRW_ARF_NOTIFICATION_COUNT, + 0, + 0, + 0, + BRW_REGISTER_TYPE_UD, + BRW_VERTICAL_STRIDE_0, + BRW_WIDTH_1, + BRW_HORIZONTAL_STRIDE_0, + BRW_SWIZZLE_XXXX, + WRITEMASK_X); +} + static inline struct brw_reg brw_acc_reg(unsigned width) { From 0d250cc210f971f566bbe5b1e54cf3cd114537e9 Mon Sep 17 00:00:00 2001 From: Jordan Justen Date: Tue, 4 Nov 2014 18:05:04 -0800 Subject: [PATCH 585/834] i965: Add brw_WAIT to emit wait instruction This will be used to implement the barrier function. v2: * Rename to brw_WAIT (mattst88) Signed-off-by: Jordan Justen Reviewed-by: Chris Forbes Reviewed-by: Matt Turner --- src/mesa/drivers/dri/i965/brw_eu.h | 2 ++ src/mesa/drivers/dri/i965/brw_eu_emit.c | 21 +++++++++++++++++++++ 2 files changed, 23 insertions(+) diff --git a/src/mesa/drivers/dri/i965/brw_eu.h b/src/mesa/drivers/dri/i965/brw_eu.h index 0e7be1e1ea0..0eaab2f1d1f 100644 --- a/src/mesa/drivers/dri/i965/brw_eu.h +++ b/src/mesa/drivers/dri/i965/brw_eu.h @@ -390,6 +390,8 @@ brw_inst *brw_JMPI(struct brw_codegen *p, struct brw_reg index, void brw_NOP(struct brw_codegen *p); +void brw_WAIT(struct brw_codegen *p); + /* Special case: there is never a destination, execution size will be * taken from src0: */ diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c index a1d11f30433..61fc1abf897 100644 --- a/src/mesa/drivers/dri/i965/brw_eu_emit.c +++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c @@ -3406,3 +3406,24 @@ void brw_shader_time_add(struct brw_codegen *p, brw_pop_insn_state(p); } + + +/** + * Emit the wait instruction for a barrier + */ +void +brw_WAIT(struct brw_codegen *p) +{ + const struct brw_device_info *devinfo = p->devinfo; + struct brw_inst *insn; + + struct brw_reg src = brw_notification_reg(); + + insn = next_insn(p, BRW_OPCODE_WAIT); + brw_set_dest(p, insn, src); + brw_set_src0(p, insn, src); + brw_set_src1(p, insn, brw_null_reg()); + + brw_inst_set_exec_size(devinfo, insn, BRW_EXECUTE_1); + brw_inst_set_mask_control(devinfo, insn, BRW_MASK_DISABLE); +} From 7953c000731ec1310fdbb5d8a13720fe0cdbf6f4 Mon Sep 17 00:00:00 2001 From: Jordan Justen Date: Tue, 4 Nov 2014 18:11:37 -0800 Subject: [PATCH 586/834] i965: Add brw_barrier to emit a Gateway Barrier SEND This will be used to implement the Gateway Barrier SEND needed to implement the barrier function. v2: * notify => gateway_notify (Ken) * combine short lines of brw_barrier proto/decl (mattst88) Signed-off-by: Jordan Justen Reviewed-by: Chris Forbes --- src/mesa/drivers/dri/i965/brw_eu.h | 2 ++ src/mesa/drivers/dri/i965/brw_eu_emit.c | 30 +++++++++++++++++++++++++ 2 files changed, 32 insertions(+) diff --git a/src/mesa/drivers/dri/i965/brw_eu.h b/src/mesa/drivers/dri/i965/brw_eu.h index 0eaab2f1d1f..761aa0ec5fa 100644 --- a/src/mesa/drivers/dri/i965/brw_eu.h +++ b/src/mesa/drivers/dri/i965/brw_eu.h @@ -361,6 +361,8 @@ brw_jump_scale(const struct brw_device_info *devinfo) return 1; } +void brw_barrier(struct brw_codegen *p, struct brw_reg src); + /* If/else/endif. Works by manipulating the execution flags on each * channel. */ diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c index 61fc1abf897..0f536046f6f 100644 --- a/src/mesa/drivers/dri/i965/brw_eu_emit.c +++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c @@ -3408,6 +3408,36 @@ void brw_shader_time_add(struct brw_codegen *p, } +/** + * Emit the SEND message for a barrier + */ +void +brw_barrier(struct brw_codegen *p, struct brw_reg src) +{ + const struct brw_device_info *devinfo = p->devinfo; + struct brw_inst *inst; + + assert(devinfo->gen >= 7); + + inst = next_insn(p, BRW_OPCODE_SEND); + brw_set_dest(p, inst, brw_null_reg()); + brw_set_src0(p, inst, src); + brw_set_src1(p, inst, brw_null_reg()); + + brw_set_message_descriptor(p, inst, BRW_SFID_MESSAGE_GATEWAY, + 1 /* msg_length */, + 0 /* response_length */, + false /* header_present */, + false /* end_of_thread */); + + brw_inst_set_gateway_notify(devinfo, inst, 1); + brw_inst_set_gateway_subfuncid(devinfo, inst, + BRW_MESSAGE_GATEWAY_SFID_BARRIER_MSG); + + brw_inst_set_mask_control(devinfo, inst, BRW_MASK_DISABLE); +} + + /** * Emit the wait instruction for a barrier */ From f7ef8ec9d8f56b77029534952628c3204c4d5f63 Mon Sep 17 00:00:00 2001 From: Jordan Justen Date: Wed, 27 Aug 2014 11:32:08 -0700 Subject: [PATCH 587/834] i965/fs: Implement support for ir_barrier Signed-off-by: Jordan Justen Reviewed-by: Chris Forbes --- src/mesa/drivers/dri/i965/brw_defines.h | 5 ++++ src/mesa/drivers/dri/i965/brw_fs.h | 3 +++ .../drivers/dri/i965/brw_fs_generator.cpp | 11 +++++++++ src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 23 +++++++++++++++++++ src/mesa/drivers/dri/i965/brw_shader.cpp | 3 +++ 5 files changed, 45 insertions(+) diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h index eb04cc9d471..2a8fc0beea4 100644 --- a/src/mesa/drivers/dri/i965/brw_defines.h +++ b/src/mesa/drivers/dri/i965/brw_defines.h @@ -1135,6 +1135,11 @@ enum opcode { * Terminate the compute shader. */ CS_OPCODE_CS_TERMINATE, + + /** + * GLSL barrier() + */ + SHADER_OPCODE_BARRIER, }; enum brw_urb_write_flags { diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h index ca887ec0b37..cdeea6d9988 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.h +++ b/src/mesa/drivers/dri/i965/brw_fs.h @@ -273,6 +273,8 @@ public: void emit_urb_writes(); void emit_cs_terminate(); + void emit_barrier(); + void emit_shader_time_begin(); void emit_shader_time_end(); void SHADER_TIME_ADD(const brw::fs_builder &bld, @@ -418,6 +420,7 @@ private: void generate_fb_write(fs_inst *inst, struct brw_reg payload); void generate_urb_write(fs_inst *inst, struct brw_reg payload); void generate_cs_terminate(fs_inst *inst, struct brw_reg payload); + void generate_barrier(fs_inst *inst, struct brw_reg src); void generate_blorp_fb_write(fs_inst *inst); void generate_linterp(fs_inst *inst, struct brw_reg dst, struct brw_reg *src); diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp index ff05b2a35ab..8eb3ace5c0a 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp @@ -400,6 +400,13 @@ fs_generator::generate_cs_terminate(fs_inst *inst, struct brw_reg payload) brw_inst_set_mask_control(devinfo, insn, BRW_MASK_DISABLE); } +void +fs_generator::generate_barrier(fs_inst *inst, struct brw_reg src) +{ + brw_barrier(p, src); + brw_WAIT(p); +} + void fs_generator::generate_blorp_fb_write(fs_inst *inst) { @@ -2117,6 +2124,10 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width) generate_cs_terminate(inst, src[0]); break; + case SHADER_OPCODE_BARRIER: + generate_barrier(inst, src[0]); + break; + default: unreachable("Unsupported opcode"); diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp index 588966b66f1..4770838b26f 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp @@ -1953,6 +1953,29 @@ fs_visitor::emit_cs_terminate() inst->eot = true; } +void +fs_visitor::emit_barrier() +{ + assert(brw->gen >= 7); + + /* We are getting the barrier ID from the compute shader header */ + assert(stage == MESA_SHADER_COMPUTE); + + fs_reg payload = fs_reg(GRF, alloc.allocate(1), BRW_REGISTER_TYPE_UD); + + /* Clear the message payload */ + fs_inst *inst = bld.exec_all().MOV(payload, fs_reg(0u)); + + /* Copy bits 27:24 of r0.2 (barrier id) to the message payload reg.2 */ + fs_reg r0_2 = fs_reg(retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_UD)); + inst = bld.exec_all().AND(component(payload, 2), r0_2, fs_reg(0x0f000000u)); + + /* Emit a gateway "barrier" message using the payload we set up, followed + * by a wait instruction. + */ + bld.exec_all().emit(SHADER_OPCODE_BARRIER, reg_undef, payload); +} + fs_visitor::fs_visitor(struct brw_context *brw, void *mem_ctx, gl_shader_stage stage, diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp index 76285f273e4..545ec2679ae 100644 --- a/src/mesa/drivers/dri/i965/brw_shader.cpp +++ b/src/mesa/drivers/dri/i965/brw_shader.cpp @@ -631,6 +631,8 @@ brw_instruction_name(enum opcode op) return "gs_ff_sync_set_primitives"; case CS_OPCODE_CS_TERMINATE: return "cs_terminate"; + case SHADER_OPCODE_BARRIER: + return "barrier"; } unreachable("not reached"); @@ -1058,6 +1060,7 @@ backend_instruction::has_side_effects() const case SHADER_OPCODE_MEMORY_FENCE: case SHADER_OPCODE_URB_WRITE_SIMD8: case FS_OPCODE_FB_WRITE: + case SHADER_OPCODE_BARRIER: return true; default: return false; From f0e772392f1c61df6e3f253dc236eb9737fb6146 Mon Sep 17 00:00:00 2001 From: Jordan Justen Date: Fri, 13 Mar 2015 12:03:52 -0700 Subject: [PATCH 588/834] i965/nir: Support barrier intrinsic function Signed-off-by: Jordan Justen Reviewed-by: Chris Forbes --- src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp index 3c24a87c10c..59081eab877 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp @@ -1575,6 +1575,10 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr break; } + case nir_intrinsic_barrier: + emit_barrier(); + break; + default: unreachable("unknown intrinsic"); } From 3d78bdea3155ff3f19a782e0eb3a55612bfd8dd0 Mon Sep 17 00:00:00 2001 From: Timothy Arceri Date: Wed, 10 Jun 2015 18:46:22 +1000 Subject: [PATCH 589/834] glsl: enforce output variable rules for GLSL ES 3.10 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Some rules are already applied this just adds the missing ones. Reviewed-by: Samuel Iglesias Gonsálvez --- src/glsl/ast_to_hir.cpp | 49 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) diff --git a/src/glsl/ast_to_hir.cpp b/src/glsl/ast_to_hir.cpp index aab0c290c07..ec25e19dee6 100644 --- a/src/glsl/ast_to_hir.cpp +++ b/src/glsl/ast_to_hir.cpp @@ -3653,6 +3653,55 @@ ast_declarator_list::hir(exec_list *instructions, "type %s", check_type->name); } } + + /* From section 4.3.6 (Output Variables) of the GLSL ES 3.10 spec: + * + * It is a compile-time error to declare a vertex shader output + * with, or that contains, any of the following types: + * + * * A boolean type + * * An opaque type + * * An array of arrays + * * An array of structures + * * A structure containing an array + * * A structure containing a structure + * + * It is a compile-time error to declare a fragment shader output + * with, or that contains, any of the following types: + * + * * A boolean type + * * An opaque type + * * A matrix + * * A structure + * * An array of array + */ + if (state->es_shader) { + if (var->type->is_array() && + var->type->fields.array->is_array()) { + _mesa_glsl_error(&loc, state, + "%s shader output " + "cannot have an array of arrays", + _mesa_shader_stage_to_string(state->stage)); + } + if (state->stage == MESA_SHADER_VERTEX) { + if (var->type->is_array() && + var->type->fields.array->is_record()) { + _mesa_glsl_error(&loc, state, + "vertex shader output " + "cannot have an array of structs"); + } + if (var->type->is_record()) { + for (unsigned i = 0; i < var->type->length; i++) { + if (var->type->fields.structure[i].type->is_array() || + var->type->fields.structure[i].type->is_record()) + _mesa_glsl_error(&loc, state, + "vertex shader output cannot have a " + "struct that contains an " + "array or struct"); + } + } + } + } } /* Integer fragment inputs must be qualified with 'flat'. In GLSL ES, From 94d669b0d2f56d58a494a40f9e8acdef01306496 Mon Sep 17 00:00:00 2001 From: Timothy Arceri Date: Fri, 12 Jun 2015 16:03:56 +1000 Subject: [PATCH 590/834] glsl: enforce fragment shader input restrictions in GLSL ES 3.10 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Samuel Iglesias Gonsálvez --- src/glsl/ast_to_hir.cpp | 45 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) diff --git a/src/glsl/ast_to_hir.cpp b/src/glsl/ast_to_hir.cpp index ec25e19dee6..1c2c7b56efa 100644 --- a/src/glsl/ast_to_hir.cpp +++ b/src/glsl/ast_to_hir.cpp @@ -3607,6 +3607,51 @@ ast_declarator_list::hir(exec_list *instructions, } handle_geometry_shader_input_decl(state, loc, var); + } else if (state->stage == MESA_SHADER_FRAGMENT) { + /* From section 4.3.4 (Input Variables) of the GLSL ES 3.10 spec: + * + * It is a compile-time error to declare a fragment shader + * input with, or that contains, any of the following types: + * + * * A boolean type + * * An opaque type + * * An array of arrays + * * An array of structures + * * A structure containing an array + * * A structure containing a structure + */ + if (state->es_shader) { + const glsl_type *check_type = var->type->without_array(); + if (check_type->is_boolean() || + check_type->contains_opaque()) { + _mesa_glsl_error(&loc, state, + "fragment shader input cannot have type %s", + check_type->name); + } + if (var->type->is_array() && + var->type->fields.array->is_array()) { + _mesa_glsl_error(&loc, state, + "%s shader output " + "cannot have an array of arrays", + _mesa_shader_stage_to_string(state->stage)); + } + if (var->type->is_array() && + var->type->fields.array->is_record()) { + _mesa_glsl_error(&loc, state, + "fragment shader input " + "cannot have an array of structs"); + } + if (var->type->is_record()) { + for (unsigned i = 0; i < var->type->length; i++) { + if (var->type->fields.structure[i].type->is_array() || + var->type->fields.structure[i].type->is_record()) + _mesa_glsl_error(&loc, state, + "fragement shader input cannot have " + "a struct that contains an " + "array or struct"); + } + } + } } } else if (var->data.mode == ir_var_shader_out) { const glsl_type *check_type = var->type->without_array(); From 0d2068a92d74f421960947e589cf56a2b125035f Mon Sep 17 00:00:00 2001 From: Timothy Arceri Date: Wed, 10 Jun 2015 19:40:07 +1000 Subject: [PATCH 591/834] glsl: enforce restriction on AoA interface blocks in GLSL ES 3.10 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Samuel Iglesias Gonsálvez --- src/glsl/ast_to_hir.cpp | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/src/glsl/ast_to_hir.cpp b/src/glsl/ast_to_hir.cpp index 1c2c7b56efa..a7811af5d0f 100644 --- a/src/glsl/ast_to_hir.cpp +++ b/src/glsl/ast_to_hir.cpp @@ -5847,6 +5847,17 @@ ast_interface_block::hir(exec_list *instructions, const glsl_type *block_array_type = process_array_type(&loc, block_type, this->array_specifier, state); + /* From section 4.3.9 (Interface Blocks) of the GLSL ES 3.10 spec: + * + * * Arrays of arrays of blocks are not allowed + */ + if (state->es_shader && block_array_type->is_array() && + block_array_type->fields.array->is_array()) { + _mesa_glsl_error(&loc, state, + "arrays of arrays interface blocks are " + "not allowed"); + } + var = new(state) ir_variable(block_array_type, this->instance_name, var_mode); From b5c5aac687ca4e203695790e334c1f7f3315c240 Mon Sep 17 00:00:00 2001 From: Ben Widawsky Date: Thu, 21 May 2015 16:04:43 -0700 Subject: [PATCH 592/834] i965: Consolidate certain miptree params to flags I think pretty much everyone agrees that having more than a single bool as a function argument is bordering on a bad idea. What sucks about the current code is in several instances it's necessary to propagate these boolean selections down to lower layers of the code. This requires plumbing (mechanical, but still churn) pretty much all of the miptree functions each time. By introducing the flags paramater, it is possible to add miptree constraints very easily. The use of this, as is already the case, is sometimes we have some information at the time we create the miptree that needs to be known all the way at the lowest levels of the create/allocation, disable_aux_buffers is currently one such example. There will be another example coming up in a few patches. v2: Tab fix. (Ben) Long line fixes (Topi) Use anonymous enum instead of #define for layout flags (Chad) Use 'X != 0' instead of !!X (everyone except Chad) v3: Some non-trivial conflict resolution on top of Anuj's patches. Signed-off-by: Ben Widawsky Cc: "Pohjolainen, Topi" Reviewed-by: Chad Versace Reviewed-by: Anuj Phogat Reviewed-by: Jordan Justen --- src/mesa/drivers/dri/i965/brw_tex_layout.c | 6 +- src/mesa/drivers/dri/i965/intel_fbo.c | 5 +- src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 96 ++++++++++--------- src/mesa/drivers/dri/i965/intel_mipmap_tree.h | 16 +++- src/mesa/drivers/dri/i965/intel_pixel_draw.c | 2 +- src/mesa/drivers/dri/i965/intel_tex.c | 8 +- src/mesa/drivers/dri/i965/intel_tex.h | 2 +- src/mesa/drivers/dri/i965/intel_tex_image.c | 14 ++- .../drivers/dri/i965/intel_tex_validate.c | 3 +- 9 files changed, 77 insertions(+), 75 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_tex_layout.c b/src/mesa/drivers/dri/i965/brw_tex_layout.c index 312a8873afe..52f37a19f68 100644 --- a/src/mesa/drivers/dri/i965/brw_tex_layout.c +++ b/src/mesa/drivers/dri/i965/brw_tex_layout.c @@ -763,9 +763,9 @@ intel_miptree_set_total_width_height(struct brw_context *brw, void brw_miptree_layout(struct brw_context *brw, - bool for_bo, + struct intel_mipmap_tree *mt, enum intel_miptree_tiling_mode requested, - struct intel_mipmap_tree *mt) + uint32_t layout_flags) { bool gen6_hiz_or_stencil = false; @@ -821,7 +821,7 @@ brw_miptree_layout(struct brw_context *brw, mt->align_h /= j; } - if (!for_bo) + if ((layout_flags & MIPTREE_LAYOUT_FOR_BO) == 0) mt->tiling = brw_miptree_choose_tiling(brw, requested, mt); } diff --git a/src/mesa/drivers/dri/i965/intel_fbo.c b/src/mesa/drivers/dri/i965/intel_fbo.c index aebed723f75..1b3a72f3ec2 100644 --- a/src/mesa/drivers/dri/i965/intel_fbo.c +++ b/src/mesa/drivers/dri/i965/intel_fbo.c @@ -390,7 +390,7 @@ intel_image_target_renderbuffer_storage(struct gl_context *ctx, image->height, 1, image->pitch, - true /*disable_aux_buffers*/); + MIPTREE_LAYOUT_DISABLE_AUX); if (!irb->mt) return; @@ -1027,10 +1027,9 @@ intel_renderbuffer_move_to_temp(struct brw_context *brw, intel_image->base.Base.Level, intel_image->base.Base.Level, width, height, depth, - true, irb->mt->num_samples, INTEL_MIPTREE_TILING_ANY, - false); + MIPTREE_LAYOUT_ACCELERATED_UPLOAD); if (intel_miptree_wants_hiz_buffer(brw, new_mt)) { intel_miptree_alloc_hiz(brw, new_mt); diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c index 615cbfb7158..fb4e5b8b775 100644 --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c @@ -257,11 +257,9 @@ intel_miptree_create_layout(struct brw_context *brw, GLuint width0, GLuint height0, GLuint depth0, - bool for_bo, GLuint num_samples, enum intel_miptree_tiling_mode requested, - bool force_all_slices_at_each_lod, - bool disable_aux_buffers) + uint32_t layout_flags) { struct intel_mipmap_tree *mt = calloc(sizeof(*mt), 1); if (!mt) @@ -300,7 +298,7 @@ intel_miptree_create_layout(struct brw_context *brw, mt->logical_height0 = height0; mt->logical_depth0 = depth0; mt->fast_clear_state = INTEL_FAST_CLEAR_STATE_NO_MCS; - mt->disable_aux_buffers = disable_aux_buffers; + mt->disable_aux_buffers = (layout_flags & MIPTREE_LAYOUT_DISABLE_AUX) != 0; exec_list_make_empty(&mt->hiz_map); /* The cpp is bytes per (1, blockheight)-sized block for compressed @@ -436,12 +434,15 @@ intel_miptree_create_layout(struct brw_context *brw, mt->physical_height0 = height0; mt->physical_depth0 = depth0; - if (!for_bo && + if (!(layout_flags & MIPTREE_LAYOUT_FOR_BO) && _mesa_get_format_base_format(format) == GL_DEPTH_STENCIL && (brw->must_use_separate_stencil || (brw->has_separate_stencil && intel_miptree_wants_hiz_buffer(brw, mt)))) { - const bool force_all_slices_at_each_lod = brw->gen == 6; + uint32_t stencil_flags = MIPTREE_LAYOUT_ACCELERATED_UPLOAD; + if (brw->gen == 6) + stencil_flags |= MIPTREE_LAYOUT_FORCE_ALL_SLICE_AT_LOD; + mt->stencil_mt = intel_miptree_create(brw, mt->target, MESA_FORMAT_S_UINT8, @@ -450,10 +451,10 @@ intel_miptree_create_layout(struct brw_context *brw, mt->logical_width0, mt->logical_height0, mt->logical_depth0, - true, num_samples, INTEL_MIPTREE_TILING_ANY, - force_all_slices_at_each_lod); + stencil_flags); + if (!mt->stencil_mt) { intel_miptree_release(&mt); return NULL; @@ -471,10 +472,10 @@ intel_miptree_create_layout(struct brw_context *brw, } } - if (force_all_slices_at_each_lod) + if (layout_flags & MIPTREE_LAYOUT_FORCE_ALL_SLICE_AT_LOD) mt->array_layout = ALL_SLICES_AT_EACH_LOD; - brw_miptree_layout(brw, for_bo, requested, mt); + brw_miptree_layout(brw, mt, requested, layout_flags); if (mt->disable_aux_buffers) assert(mt->msaa_layout != INTEL_MSAA_LAYOUT_CMS); @@ -532,30 +533,33 @@ intel_miptree_create(struct brw_context *brw, GLuint width0, GLuint height0, GLuint depth0, - bool expect_accelerated_upload, GLuint num_samples, enum intel_miptree_tiling_mode requested_tiling, - bool force_all_slices_at_each_lod) + uint32_t layout_flags) { struct intel_mipmap_tree *mt; mesa_format tex_format = format; mesa_format etc_format = MESA_FORMAT_NONE; GLuint total_width, total_height; + uint32_t alloc_flags = 0; format = intel_lower_compressed_format(brw, format); etc_format = (format != tex_format) ? tex_format : MESA_FORMAT_NONE; + assert((layout_flags & MIPTREE_LAYOUT_DISABLE_AUX) == 0); + assert((layout_flags & MIPTREE_LAYOUT_FOR_BO) == 0); mt = intel_miptree_create_layout(brw, target, format, - first_level, last_level, width0, - height0, depth0, - false, num_samples, - requested_tiling, - force_all_slices_at_each_lod, - false /*disable_aux_buffers*/); - - if (!mt) + first_level, last_level, width0, + height0, depth0, num_samples, + requested_tiling, layout_flags); + /* + * pitch == 0 || height == 0 indicates the null texture + */ + if (!mt || !mt->total_width || !mt->total_height) { + intel_miptree_release(&mt); return NULL; + } total_width = mt->total_width; total_height = mt->total_height; @@ -573,13 +577,14 @@ intel_miptree_create(struct brw_context *brw, mt->tiling = I915_TILING_Y; } + if (layout_flags & MIPTREE_LAYOUT_ACCELERATED_UPLOAD) + alloc_flags |= BO_ALLOC_FOR_RENDER; + unsigned long pitch; + mt->bo = drm_intel_bo_alloc_tiled(brw->bufmgr, "miptree", total_width, + total_height, mt->cpp, &mt->tiling, + &pitch, alloc_flags); mt->etc_format = etc_format; - mt->bo = drm_intel_bo_alloc_tiled(brw->bufmgr, "miptree", - total_width, total_height, mt->cpp, - &mt->tiling, &pitch, - (expect_accelerated_upload ? - BO_ALLOC_FOR_RENDER : 0)); mt->pitch = pitch; /* If the BO is too large to fit in the aperture, we need to use the @@ -593,10 +598,8 @@ intel_miptree_create(struct brw_context *brw, mt->tiling = I915_TILING_X; drm_intel_bo_unreference(mt->bo); mt->bo = drm_intel_bo_alloc_tiled(brw->bufmgr, "miptree", - total_width, total_height, mt->cpp, - &mt->tiling, &pitch, - (expect_accelerated_upload ? - BO_ALLOC_FOR_RENDER : 0)); + total_width, total_height, mt->cpp, + &mt->tiling, &pitch, alloc_flags); mt->pitch = pitch; } @@ -635,7 +638,7 @@ intel_miptree_create_for_bo(struct brw_context *brw, uint32_t height, uint32_t depth, int pitch, - bool disable_aux_buffers) + uint32_t layout_flags) { struct intel_mipmap_tree *mt; uint32_t tiling, swizzle; @@ -662,13 +665,12 @@ intel_miptree_create_for_bo(struct brw_context *brw, * just a place holder and will not make any change to the miptree * tiling format. */ + layout_flags |= MIPTREE_LAYOUT_FOR_BO; mt = intel_miptree_create_layout(brw, target, format, 0, 0, - width, height, depth, - true, 0, + width, height, depth, 0, INTEL_MIPTREE_TILING_ANY, - false, - disable_aux_buffers); + layout_flags); if (!mt) return NULL; @@ -718,7 +720,7 @@ intel_update_winsys_renderbuffer_miptree(struct brw_context *intel, height, 1, pitch, - false); + 0); if (!singlesample_mt) goto fail; @@ -776,8 +778,9 @@ intel_miptree_create_for_renderbuffer(struct brw_context *brw, GLenum target = num_samples > 1 ? GL_TEXTURE_2D_MULTISAMPLE : GL_TEXTURE_2D; mt = intel_miptree_create(brw, target, format, 0, 0, - width, height, depth, true, num_samples, - INTEL_MIPTREE_TILING_ANY, false); + width, height, depth, num_samples, + INTEL_MIPTREE_TILING_ANY, + MIPTREE_LAYOUT_ACCELERATED_UPLOAD); if (!mt) goto fail; @@ -1288,10 +1291,9 @@ intel_miptree_alloc_mcs(struct brw_context *brw, mt->logical_width0, mt->logical_height0, mt->logical_depth0, - true, 0 /* num_samples */, INTEL_MIPTREE_TILING_Y, - false); + MIPTREE_LAYOUT_ACCELERATED_UPLOAD); /* From the Ivy Bridge PRM, Vol 2 Part 1 p326: * @@ -1347,10 +1349,9 @@ intel_miptree_alloc_non_msrt_mcs(struct brw_context *brw, mcs_width, mcs_height, mt->logical_depth0, - true, 0 /* num_samples */, INTEL_MIPTREE_TILING_Y, - false); + MIPTREE_LAYOUT_ACCELERATED_UPLOAD); return mt->mcs_mt; } @@ -1592,7 +1593,10 @@ intel_hiz_miptree_buf_create(struct brw_context *brw, struct intel_mipmap_tree *mt) { struct intel_miptree_aux_buffer *buf = calloc(sizeof(*buf), 1); - const bool force_all_slices_at_each_lod = brw->gen == 6; + uint32_t layout_flags = MIPTREE_LAYOUT_ACCELERATED_UPLOAD; + + if (brw->gen == 6) + layout_flags |= MIPTREE_LAYOUT_FORCE_ALL_SLICE_AT_LOD; if (!buf) return NULL; @@ -1605,10 +1609,9 @@ intel_hiz_miptree_buf_create(struct brw_context *brw, mt->logical_width0, mt->logical_height0, mt->logical_depth0, - true, mt->num_samples, INTEL_MIPTREE_TILING_ANY, - force_all_slices_at_each_lod); + layout_flags); if (!buf->mt) { free(buf); return NULL; @@ -2038,9 +2041,8 @@ intel_miptree_map_blit(struct brw_context *brw, map->mt = intel_miptree_create(brw, GL_TEXTURE_2D, mt->format, 0, 0, map->w, map->h, 1, - false, 0, - INTEL_MIPTREE_TILING_NONE, - false); + 0, INTEL_MIPTREE_TILING_NONE, 0); + if (!map->mt) { fprintf(stderr, "Failed to allocate blit temporary\n"); goto fail; diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.h b/src/mesa/drivers/dri/i965/intel_mipmap_tree.h index 20bed5378ca..30a4595441c 100644 --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.h +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.h @@ -535,6 +535,13 @@ bool intel_miptree_alloc_non_msrt_mcs(struct brw_context *brw, struct intel_mipmap_tree *mt); +enum { + MIPTREE_LAYOUT_ACCELERATED_UPLOAD = 1 << 0, + MIPTREE_LAYOUT_FORCE_ALL_SLICE_AT_LOD = 1 << 1, + MIPTREE_LAYOUT_FOR_BO = 1 << 2, + MIPTREE_LAYOUT_DISABLE_AUX = 1 << 3, +}; + struct intel_mipmap_tree *intel_miptree_create(struct brw_context *brw, GLenum target, mesa_format format, @@ -543,10 +550,9 @@ struct intel_mipmap_tree *intel_miptree_create(struct brw_context *brw, GLuint width0, GLuint height0, GLuint depth0, - bool expect_accelerated_upload, GLuint num_samples, enum intel_miptree_tiling_mode, - bool force_all_slices_at_each_lod); + uint32_t flags); struct intel_mipmap_tree * intel_miptree_create_for_bo(struct brw_context *brw, @@ -557,7 +563,7 @@ intel_miptree_create_for_bo(struct brw_context *brw, uint32_t height, uint32_t depth, int pitch, - bool disable_aux_buffers); + uint32_t layout_flags); void intel_update_winsys_renderbuffer_miptree(struct brw_context *intel, @@ -763,9 +769,9 @@ brw_miptree_get_vertical_slice_pitch(const struct brw_context *brw, void brw_miptree_layout(struct brw_context *brw, - bool for_bo, + struct intel_mipmap_tree *mt, enum intel_miptree_tiling_mode requested, - struct intel_mipmap_tree *mt); + uint32_t layout_flags); void *intel_miptree_map_raw(struct brw_context *brw, struct intel_mipmap_tree *mt); diff --git a/src/mesa/drivers/dri/i965/intel_pixel_draw.c b/src/mesa/drivers/dri/i965/intel_pixel_draw.c index 189a592d8d2..6c6bd8629ac 100644 --- a/src/mesa/drivers/dri/i965/intel_pixel_draw.c +++ b/src/mesa/drivers/dri/i965/intel_pixel_draw.c @@ -121,7 +121,7 @@ do_blit_drawpixels(struct gl_context * ctx, src_offset, width, height, 1, src_stride, - false /*disable_aux_buffers*/); + 0); if (!pbo_mt) return false; diff --git a/src/mesa/drivers/dri/i965/intel_tex.c b/src/mesa/drivers/dri/i965/intel_tex.c index 777a682ad21..b0181ad1d75 100644 --- a/src/mesa/drivers/dri/i965/intel_tex.c +++ b/src/mesa/drivers/dri/i965/intel_tex.c @@ -93,7 +93,7 @@ intel_alloc_texture_image_buffer(struct gl_context *ctx, } else { intel_image->mt = intel_miptree_create_for_teximage(brw, intel_texobj, intel_image, - false); + 0); /* Even if the object currently has a mipmap tree associated * with it, this one is a more likely candidate to represent the @@ -144,10 +144,8 @@ intel_alloc_texture_storage(struct gl_context *ctx, first_image->TexFormat, 0, levels - 1, width, height, depth, - false, /* expect_accelerated */ num_samples, - INTEL_MIPTREE_TILING_ANY, - false); + INTEL_MIPTREE_TILING_ANY, 0); if (intel_texobj->mt == NULL) { return false; @@ -341,7 +339,7 @@ intel_set_texture_storage_for_buffer_object(struct gl_context *ctx, buffer_offset, image->Width, image->Height, image->Depth, row_stride, - false /*disable_aux_buffers*/); + 0); if (!intel_texobj->mt) return false; diff --git a/src/mesa/drivers/dri/i965/intel_tex.h b/src/mesa/drivers/dri/i965/intel_tex.h index f048e846d55..402a3891ecd 100644 --- a/src/mesa/drivers/dri/i965/intel_tex.h +++ b/src/mesa/drivers/dri/i965/intel_tex.h @@ -53,7 +53,7 @@ struct intel_mipmap_tree * intel_miptree_create_for_teximage(struct brw_context *brw, struct intel_texture_object *intelObj, struct intel_texture_image *intelImage, - bool expect_accelerated_upload); + uint32_t layout_flags); GLuint intel_finalize_mipmap_tree(struct brw_context *brw, GLuint unit); diff --git a/src/mesa/drivers/dri/i965/intel_tex_image.c b/src/mesa/drivers/dri/i965/intel_tex_image.c index 85d3d04ecb3..ebe84b664d4 100644 --- a/src/mesa/drivers/dri/i965/intel_tex_image.c +++ b/src/mesa/drivers/dri/i965/intel_tex_image.c @@ -36,7 +36,7 @@ struct intel_mipmap_tree * intel_miptree_create_for_teximage(struct brw_context *brw, struct intel_texture_object *intelObj, struct intel_texture_image *intelImage, - bool expect_accelerated_upload) + uint32_t layout_flags) { GLuint lastLevel; int width, height, depth; @@ -79,10 +79,9 @@ intel_miptree_create_for_teximage(struct brw_context *brw, width, height, depth, - expect_accelerated_upload, intelImage->base.Base.NumSamples, INTEL_MIPTREE_TILING_ANY, - false); + layout_flags); } static void @@ -155,7 +154,7 @@ intel_set_texture_image_bo(struct gl_context *ctx, GLuint width, GLuint height, GLuint pitch, GLuint tile_x, GLuint tile_y, - bool disable_aux_buffers) + uint32_t layout_flags) { struct brw_context *brw = brw_context(ctx); struct intel_texture_image *intel_image = intel_texture_image(image); @@ -171,7 +170,7 @@ intel_set_texture_image_bo(struct gl_context *ctx, intel_image->mt = intel_miptree_create_for_bo(brw, bo, image->TexFormat, 0, width, height, 1, pitch, - disable_aux_buffers); + layout_flags); if (intel_image->mt == NULL) return; intel_image->mt->target = target; @@ -255,8 +254,7 @@ intelSetTexBuffer2(__DRIcontext *pDRICtx, GLint target, rb->Base.Base.Width, rb->Base.Base.Height, rb->mt->pitch, - 0, 0, - false /*disable_aux_buffers*/); + 0, 0, 0); _mesa_unlock_texture(&brw->ctx, texObj); } @@ -349,7 +347,7 @@ intel_image_target_texture_2d(struct gl_context *ctx, GLenum target, image->width, image->height, image->pitch, image->tile_x, image->tile_y, - true /*disable_aux_buffers*/); + MIPTREE_LAYOUT_DISABLE_AUX); } /** diff --git a/src/mesa/drivers/dri/i965/intel_tex_validate.c b/src/mesa/drivers/dri/i965/intel_tex_validate.c index c581e145f64..4991c2997ef 100644 --- a/src/mesa/drivers/dri/i965/intel_tex_validate.c +++ b/src/mesa/drivers/dri/i965/intel_tex_validate.c @@ -144,10 +144,9 @@ intel_finalize_mipmap_tree(struct brw_context *brw, GLuint unit) width, height, depth, - true, 0 /* num_samples */, INTEL_MIPTREE_TILING_ANY, - false); + MIPTREE_LAYOUT_ACCELERATED_UPLOAD); if (!intelObj->mt) return false; } From b91a110d5ce946abe0ee84625498676ac4bcf7aa Mon Sep 17 00:00:00 2001 From: Ben Widawsky Date: Fri, 22 May 2015 18:13:24 -0700 Subject: [PATCH 593/834] i965/gen9: Only allow Y-Tiled MCS buffers For GEN9, much of the logic to use X-Tiled buffers has been stripped out. It is still supported in some places, but it's never desirable. Unfortunately we don't yet have the ability to have Y-Tiled scanout (see: http://patchwork.freedesktop.org/patch/46984/), NOTE: This patch shouldn't actually do anything since SKL doesn't yet use fast clears (they are disabled because they are causing regressions). THerefore, the only case we can get to this function on SKL is by way of intel_update_winsys_renderbuffer_miptree. v2: Update commit message to be more clear that the NOTE is for SKL only. Signed-off-by: Ben Widawsky Reviewed-by: Chad Versace Reviewed-by: Jordan Justen Reviewed-by: Anuj Phogat --- src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c index fb4e5b8b775..cbb91dcd565 100644 --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c @@ -193,6 +193,8 @@ intel_is_non_msrt_mcs_buffer_supported(struct brw_context *brw, return false; } + if (brw->gen >= 9 && mt->tiling != I915_TILING_Y) + return false; if (mt->tiling != I915_TILING_X && mt->tiling != I915_TILING_Y) return false; From e92fbdcf9cf69e6b135c17c2851d50e256da8c29 Mon Sep 17 00:00:00 2001 From: Ben Widawsky Date: Thu, 21 May 2015 22:47:37 -0700 Subject: [PATCH 594/834] i965: Extract tiling from fast clear decision There are several constraints when determining if one can fast clear a surface. Some of these are alignment, pixel density, tiling formats, and others that vary by generation. The helper function which exists today does a suitable job, however it conflates "BO properties" with "Miptree properties" when using tiling. I consider the former to be attributes of the physical surface, things which are determined through BO allocation, and the latter being attributes which are derived from the API, and having nothing to do with the underlying surface. Determining tiling properties and creating miptrees are related operations (when we allocate a BO for a miptree) with some disjoint constraints. By extracting the decisions into two distinct choices (tiling vs. miptree properties), we gain flexibility throughout the code to make determinations about when we can or cannot fast clear strictly on the miptree. To signify this change, I've also renamed the function to indicate it is a distinction made on the miptree. I am torn as to whether or not it was a good idea to remove "non_msrt" since it's a really nice thing for grep. v2: Reword some comments (Chad) intel_is_non_msrt_mcs_tile_supported->intel_tiling_supports_non_msrt_mcs (Chad) Make full if ladder for gens in above function (Chad) Signed-off-by: Ben Widawsky Cc: Topi Pohjolainen Reviewed-by: Jordan Justen Reviewed-by: Anuj Phogat Reviewed-by: Chad Versace --- src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 36 +++++++++++++------ src/mesa/drivers/dri/i965/intel_mipmap_tree.h | 10 +++--- 2 files changed, 30 insertions(+), 16 deletions(-) diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c index cbb91dcd565..2d8ace1476c 100644 --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c @@ -158,15 +158,32 @@ intel_get_non_msrt_mcs_alignment(struct brw_context *brw, } } +bool +intel_tiling_supports_non_msrt_mcs(struct brw_context *brw, unsigned tiling) +{ + /* From the Ivy Bridge PRM, Vol2 Part1 11.7 "MCS Buffer for Render + * Target(s)", beneath the "Fast Color Clear" bullet (p326): + * + * - Support is limited to tiled render targets. + * + * Gen9 changes the restriction to Y-tile only. + */ + if (brw->gen >= 9) + return tiling == I915_TILING_Y; + else if (brw->gen >= 7) + return tiling != I915_TILING_NONE; + else + return false; +} /** * For a single-sampled render target ("non-MSRT"), determine if an MCS buffer - * can be used. + * can be used. This doesn't (and should not) inspect any of the properties of + * the miptree's BO. * * From the Ivy Bridge PRM, Vol2 Part1 11.7 "MCS Buffer for Render Target(s)", * beneath the "Fast Color Clear" bullet (p326): * - * - Support is limited to tiled render targets. * - Support is for non-mip-mapped and non-array surface types only. * * And then later, on p327: @@ -175,8 +192,8 @@ intel_get_non_msrt_mcs_alignment(struct brw_context *brw, * 64bpp, and 128bpp. */ bool -intel_is_non_msrt_mcs_buffer_supported(struct brw_context *brw, - struct intel_mipmap_tree *mt) +intel_miptree_is_fast_clear_capable(struct brw_context *brw, + struct intel_mipmap_tree *mt) { /* MCS support does not exist prior to Gen7 */ if (brw->gen < 7) @@ -193,11 +210,6 @@ intel_is_non_msrt_mcs_buffer_supported(struct brw_context *brw, return false; } - if (brw->gen >= 9 && mt->tiling != I915_TILING_Y) - return false; - if (mt->tiling != I915_TILING_X && - mt->tiling != I915_TILING_Y) - return false; if (mt->cpp != 4 && mt->cpp != 8 && mt->cpp != 16) return false; if (mt->first_level != 0 || mt->last_level != 0) { @@ -625,7 +637,8 @@ intel_miptree_create(struct brw_context *brw, * Allocation of the MCS miptree will be deferred until the first fast * clear actually occurs. */ - if (intel_is_non_msrt_mcs_buffer_supported(brw, mt)) + if (intel_tiling_supports_non_msrt_mcs(brw, mt->tiling) && + intel_miptree_is_fast_clear_capable(brw, mt)) mt->fast_clear_state = INTEL_FAST_CLEAR_STATE_RESOLVED; return mt; @@ -731,7 +744,8 @@ intel_update_winsys_renderbuffer_miptree(struct brw_context *intel, * Allocation of the MCS miptree will be deferred until the first fast * clear actually occurs. */ - if (intel_is_non_msrt_mcs_buffer_supported(intel, singlesample_mt)) + if (intel_tiling_supports_non_msrt_mcs(intel, singlesample_mt->tiling) && + intel_miptree_is_fast_clear_capable(intel, singlesample_mt)) singlesample_mt->fast_clear_state = INTEL_FAST_CLEAR_STATE_RESOLVED; if (num_samples == 0) { diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.h b/src/mesa/drivers/dri/i965/intel_mipmap_tree.h index 30a4595441c..64fa191616c 100644 --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.h +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.h @@ -522,15 +522,15 @@ enum intel_miptree_tiling_mode { INTEL_MIPTREE_TILING_NONE, }; -bool -intel_is_non_msrt_mcs_buffer_supported(struct brw_context *brw, - struct intel_mipmap_tree *mt); - void intel_get_non_msrt_mcs_alignment(struct brw_context *brw, struct intel_mipmap_tree *mt, unsigned *width_px, unsigned *height); - +bool +intel_tiling_supports_non_msrt_mcs(struct brw_context *brw, unsigned tiling); +bool +intel_miptree_is_fast_clear_capable(struct brw_context *brw, + struct intel_mipmap_tree *mt); bool intel_miptree_alloc_non_msrt_mcs(struct brw_context *brw, struct intel_mipmap_tree *mt); From c4aa041a611dfeb0a880c2173cb35c9c08dc79ca Mon Sep 17 00:00:00 2001 From: Ben Widawsky Date: Thu, 14 May 2015 09:30:02 -0700 Subject: [PATCH 595/834] i965/gen8: Correct HALIGN for AUX surfaces This restriction was attempted in this commit: commit 47053464630888f819ef8cc44278f1a1220159b9 Author: Anuj Phogat Date: Fri Feb 13 11:21:21 2015 -0800 i965/gen8: Use HALIGN_16 if MCS is enabled for non-MSRT However, the commit itself doesn't achieve the desired goal as determined by the asserts which the next patch adds. mcs_mt is NULL (never set) we're in the process of allocating the mcs_mt miptree when we get to this function. I didn't check, but perhaps this would work with blorp, however, meta clears allocate the miptree structure (which AFAICT needs the alignment also) way before it allocates using meta clears where the renderbuffer is allocated way before the aux buffer. The restriction is referenced in a few places, but the most concise one [IMO] from the spec is for Gen9. Gen8 loosens the restriction in that it only requires this for non-msrt surface. When Auxiliary Surface Mode is set to AUX_CCS_D or AUX_CCS_E, HALIGN 16 must be used. With the code before the miptree layout flag rework (patches preceding this), accomplishing this workaround is very difficult. v2: bugfix: Don't set HALIGN16 for gens before 8 (Chad) v3: non-trivial rebase Signed-off-by: Ben Widawsky Cc: Neil Roberts Reviewed-by: Jordan Justen Reviewed-by: Anuj Phogat Reviewed-by: Chad Versace --- src/mesa/drivers/dri/i965/brw_tex_layout.c | 13 ++++++++----- src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 15 +++++++++++++-- src/mesa/drivers/dri/i965/intel_mipmap_tree.h | 1 + 3 files changed, 22 insertions(+), 7 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_tex_layout.c b/src/mesa/drivers/dri/i965/brw_tex_layout.c index 52f37a19f68..1e7d8a103db 100644 --- a/src/mesa/drivers/dri/i965/brw_tex_layout.c +++ b/src/mesa/drivers/dri/i965/brw_tex_layout.c @@ -116,8 +116,12 @@ tr_mode_horizontal_texture_alignment(const struct brw_context *brw, static unsigned int intel_horizontal_texture_alignment_unit(struct brw_context *brw, - struct intel_mipmap_tree *mt) + struct intel_mipmap_tree *mt, + uint32_t layout_flags) { + if (layout_flags & MIPTREE_LAYOUT_FORCE_HALIGN16) + return 16; + /** * From the "Alignment Unit Size" section of various specs, namely: * - Gen3 Spec: "Memory Data Formats" Volume, Section 1.20.1.4 @@ -172,9 +176,6 @@ intel_horizontal_texture_alignment_unit(struct brw_context *brw, if (brw->gen >= 7 && mt->format == MESA_FORMAT_Z_UNORM16) return 8; - if (brw->gen == 8 && mt->mcs_mt && mt->num_samples <= 1) - return 16; - return 4; } @@ -792,6 +793,7 @@ brw_miptree_layout(struct brw_context *brw, */ mt->align_w = 64; mt->align_h = 64; + assert((layout_flags & MIPTREE_LAYOUT_FORCE_HALIGN16) == 0); } else { /* Depth uses Y tiling, so we force need Y tiling alignment for the * ALL_SLICES_AT_EACH_LOD miptree layout. @@ -800,7 +802,8 @@ brw_miptree_layout(struct brw_context *brw, mt->align_h = 32; } } else { - mt->align_w = intel_horizontal_texture_alignment_unit(brw, mt); + mt->align_w = + intel_horizontal_texture_alignment_unit(brw, mt, layout_flags); mt->align_h = intel_vertical_texture_alignment_unit(brw, mt); } diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c index 2d8ace1476c..f218a2a196a 100644 --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c @@ -489,6 +489,11 @@ intel_miptree_create_layout(struct brw_context *brw, if (layout_flags & MIPTREE_LAYOUT_FORCE_ALL_SLICE_AT_LOD) mt->array_layout = ALL_SLICES_AT_EACH_LOD; + /* Use HALIGN_16 if MCS is enabled for non-MSRT */ + if (brw->gen >= 8 && num_samples < 2 && + intel_miptree_is_fast_clear_capable(brw, mt)) + layout_flags |= MIPTREE_LAYOUT_FORCE_HALIGN16; + brw_miptree_layout(brw, mt, requested, layout_flags); if (mt->disable_aux_buffers) @@ -626,6 +631,7 @@ intel_miptree_create(struct brw_context *brw, if (mt->msaa_layout == INTEL_MSAA_LAYOUT_CMS) { + assert(mt->num_samples > 1); if (!intel_miptree_alloc_mcs(brw, mt, num_samples)) { intel_miptree_release(&mt); return NULL; @@ -638,8 +644,10 @@ intel_miptree_create(struct brw_context *brw, * clear actually occurs. */ if (intel_tiling_supports_non_msrt_mcs(brw, mt->tiling) && - intel_miptree_is_fast_clear_capable(brw, mt)) + intel_miptree_is_fast_clear_capable(brw, mt)) { mt->fast_clear_state = INTEL_FAST_CLEAR_STATE_RESOLVED; + assert(brw->gen < 8 || mt->align_w == 16 || num_samples <= 1); + } return mt; } @@ -1357,6 +1365,9 @@ intel_miptree_alloc_non_msrt_mcs(struct brw_context *brw, unsigned mcs_height = ALIGN(mt->logical_height0, height_divisor) / height_divisor; assert(mt->logical_depth0 == 1); + uint32_t layout_flags = MIPTREE_LAYOUT_ACCELERATED_UPLOAD; + if (brw->gen >= 8) + layout_flags |= MIPTREE_LAYOUT_FORCE_HALIGN16; mt->mcs_mt = intel_miptree_create(brw, mt->target, format, @@ -1367,7 +1378,7 @@ intel_miptree_alloc_non_msrt_mcs(struct brw_context *brw, mt->logical_depth0, 0 /* num_samples */, INTEL_MIPTREE_TILING_Y, - MIPTREE_LAYOUT_ACCELERATED_UPLOAD); + layout_flags); return mt->mcs_mt; } diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.h b/src/mesa/drivers/dri/i965/intel_mipmap_tree.h index 64fa191616c..bde6daa4e2d 100644 --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.h +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.h @@ -540,6 +540,7 @@ enum { MIPTREE_LAYOUT_FORCE_ALL_SLICE_AT_LOD = 1 << 1, MIPTREE_LAYOUT_FOR_BO = 1 << 2, MIPTREE_LAYOUT_DISABLE_AUX = 1 << 3, + MIPTREE_LAYOUT_FORCE_HALIGN16 = 1 << 4, }; struct intel_mipmap_tree *intel_miptree_create(struct brw_context *brw, From a2421623db9b900d2ab0026539e8f7f6294475ea Mon Sep 17 00:00:00 2001 From: Ben Widawsky Date: Fri, 22 May 2015 15:57:37 -0700 Subject: [PATCH 596/834] i965/gen9: Set HALIGN_16 for all aux buffers Just like the previous patch, but for the GEN9 constraints. v2: bugfix: Gen9 HALIGN was being set for all miptree buffers (Chad). To address this, move the check to where the gen8 check is, and do the appropriate conditional there. v3: Remove stray whitespace introduced in v2 (Chad) Rework comment to show AUX_CCS and AUX_MCS specifically. Remove misworded part about gen7 (Chad). Signed-off-by: Ben Widawsky Reviewed-by: Anuj Phogat (v1) Reviewed-by: Jordan Justen (v1) Reviewed-by: Chad Versace --- src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 22 ++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c index f218a2a196a..8addcc5010c 100644 --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c @@ -489,10 +489,26 @@ intel_miptree_create_layout(struct brw_context *brw, if (layout_flags & MIPTREE_LAYOUT_FORCE_ALL_SLICE_AT_LOD) mt->array_layout = ALL_SLICES_AT_EACH_LOD; - /* Use HALIGN_16 if MCS is enabled for non-MSRT */ - if (brw->gen >= 8 && num_samples < 2 && - intel_miptree_is_fast_clear_capable(brw, mt)) + /* + * Obey HALIGN_16 constraints for Gen8 and Gen9 buffers which are + * multisampled or have an AUX buffer attached to it. + * + * GEN | MSRT | AUX_CCS_* or AUX_MCS + * ------------------------------------------- + * 9 | HALIGN_16 | HALIGN_16 + * 8 | HALIGN_ANY | HALIGN_16 + * 7 | ? | ? + * 6 | ? | ? + */ + if (intel_miptree_is_fast_clear_capable(brw, mt)) { + if (brw->gen >= 9 || (brw->gen == 8 && num_samples == 1)) + layout_flags |= MIPTREE_LAYOUT_FORCE_HALIGN16; + } else if (brw->gen >= 9 && num_samples > 1) { layout_flags |= MIPTREE_LAYOUT_FORCE_HALIGN16; + } else { + /* For now, nothing else has this requirement */ + assert((layout_flags & MIPTREE_LAYOUT_FORCE_HALIGN16) == 0); + } brw_miptree_layout(brw, mt, requested, layout_flags); From 935f1f60da71df07aa45a3da92fa764a1830e0fb Mon Sep 17 00:00:00 2001 From: Ben Widawsky Date: Thu, 14 May 2015 09:28:37 -0700 Subject: [PATCH 597/834] i965/gen8+: Add aux buffer alignment assertions This helped find the incorrect HALIGN values from the previous patches. v2: Add PRM references for assertions (Chad) v3: Remove duplicated part of commit message, assert num_samples > 1, instead of num_samples > 0. (Chad) Signed-off-by: Ben Widawsky Reviewed-by: Chad Versace Reviewed-by: Anuj Phogat Reviewed-by: Jordan Justen --- .../drivers/dri/i965/gen8_surface_state.c | 22 +++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/src/mesa/drivers/dri/i965/gen8_surface_state.c b/src/mesa/drivers/dri/i965/gen8_surface_state.c index 672fc70a6f3..6b2463a2c63 100644 --- a/src/mesa/drivers/dri/i965/gen8_surface_state.c +++ b/src/mesa/drivers/dri/i965/gen8_surface_state.c @@ -178,6 +178,17 @@ gen8_emit_texture_surface_state(struct brw_context *brw, if (mt->mcs_mt) { aux_mt = mt->mcs_mt; aux_mode = GEN8_SURFACE_AUX_MODE_MCS; + + /* + * From the BDW PRM, Volume 2d, page 260 (RENDER_SURFACE_STATE): + * "When MCS is enabled for non-MSRT, HALIGN_16 must be used" + * + * From the hardware spec for GEN9: + * "When Auxiliary Surface Mode is set to AUX_CCS_D or AUX_CCS_E, HALIGN + * 16 must be used." + */ + assert(brw->gen < 9 || mt->align_w == 16); + assert(brw->gen < 8 || mt->num_samples > 1 || mt->align_w == 16); } uint32_t *surf = allocate_surface_state(brw, surf_offset, surf_index); @@ -391,6 +402,17 @@ gen8_update_renderbuffer_surface(struct brw_context *brw, if (mt->mcs_mt) { aux_mt = mt->mcs_mt; aux_mode = GEN8_SURFACE_AUX_MODE_MCS; + + /* + * From the BDW PRM, Volume 2d, page 260 (RENDER_SURFACE_STATE): + * "When MCS is enabled for non-MSRT, HALIGN_16 must be used" + * + * From the hardware spec for GEN9: + * "When Auxiliary Surface Mode is set to AUX_CCS_D or AUX_CCS_E, HALIGN + * 16 must be used." + */ + assert(brw->gen < 9 || mt->align_w == 16); + assert(brw->gen < 8 || mt->num_samples > 1 || mt->align_w == 16); } uint32_t *surf = allocate_surface_state(brw, &offset, surf_index); From faf7670ee86253cb7bf9422bf7937a0a63f4956f Mon Sep 17 00:00:00 2001 From: Timothy Arceri Date: Wed, 10 Jun 2015 18:35:08 +1000 Subject: [PATCH 598/834] glsl: fix compile error message Reviewed-by: Anuj Phogat --- src/glsl/ast_to_hir.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/glsl/ast_to_hir.cpp b/src/glsl/ast_to_hir.cpp index a7811af5d0f..a05efe00470 100644 --- a/src/glsl/ast_to_hir.cpp +++ b/src/glsl/ast_to_hir.cpp @@ -3686,7 +3686,7 @@ ast_declarator_list::hir(exec_list *instructions, if (check_type->is_record() || check_type->is_matrix()) _mesa_glsl_error(&loc, state, "fragment shader output " - "cannot have struct or array type"); + "cannot have struct or matrix type"); switch (check_type->base_type) { case GLSL_TYPE_UINT: case GLSL_TYPE_INT: From 3e7412233739c882548f50fe01d9f6c5f0dd4bbb Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Wed, 27 May 2015 16:51:43 -0700 Subject: [PATCH 599/834] gallivm: Only build lp_profile() body when PROFILE is defined The only use of lp_profile() is wrapped in #if defined(PROFILE), so there is no reason to build it unless this macro is defined. Reviewed-by: Jose Fonseca --- src/gallium/auxiliary/gallivm/lp_bld_debug.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gallium/auxiliary/gallivm/lp_bld_debug.cpp b/src/gallium/auxiliary/gallivm/lp_bld_debug.cpp index 9a85248018c..405e6486f7a 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_debug.cpp +++ b/src/gallium/auxiliary/gallivm/lp_bld_debug.cpp @@ -246,7 +246,7 @@ lp_disassemble(LLVMValueRef func, const void *code) { extern "C" void lp_profile(LLVMValueRef func, const void *code) { -#if defined(__linux__) && (defined(DEBUG) || defined(PROFILE)) +#if defined(__linux__) && defined(PROFILE) static boolean first_time = TRUE; static FILE *perf_map_file = NULL; static int perf_asm_fd = -1; From 4d35eef326e49cc8da50879d30a1c5088d4775e1 Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Thu, 11 Jun 2015 15:42:25 +0000 Subject: [PATCH 600/834] radeon/llvm: Handle LLVM backend rename from R600 to AMDGPU MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Marek Olšák --- configure.ac | 13 ++++++++----- src/gallium/drivers/radeon/radeon_llvm_emit.c | 8 ++++++++ 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/configure.ac b/configure.ac index be0cd7dd7d9..34d1ac988f6 100644 --- a/configure.ac +++ b/configure.ac @@ -2048,16 +2048,19 @@ require_egl_drm() { } radeon_llvm_check() { + if test ${LLVM_VERSION_INT} -lt 307; then + amdgpu_llvm_target_name='r600' + else + amdgpu_llvm_target_name='amdgpu' + fi if test "x$enable_gallium_llvm" != "xyes"; then AC_MSG_ERROR([--enable-gallium-llvm is required when building $1]) fi llvm_check_version_for "3" "4" "2" $1 - if test true && $LLVM_CONFIG --targets-built | grep -qvw 'R600' ; then - AC_MSG_ERROR([LLVM R600 Target not enabled. You can enable it when building the LLVM - sources with the --enable-experimental-targets=R600 - configure flag]) + if test true && $LLVM_CONFIG --targets-built | grep -iqvw $amdgpu_llvm_target_name ; then + AC_MSG_ERROR([LLVM $amdgpu_llvm_target_name not enabled in your LLVM build.]) fi - LLVM_COMPONENTS="${LLVM_COMPONENTS} r600 bitreader ipo" + LLVM_COMPONENTS="${LLVM_COMPONENTS} $amdgpu_llvm_target_name bitreader ipo" NEED_RADEON_LLVM=yes if test "x$have_libelf" != xyes; then AC_MSG_ERROR([$1 requires libelf when using llvm]) diff --git a/src/gallium/drivers/radeon/radeon_llvm_emit.c b/src/gallium/drivers/radeon/radeon_llvm_emit.c index 624077c7465..25580b6bd4c 100644 --- a/src/gallium/drivers/radeon/radeon_llvm_emit.c +++ b/src/gallium/drivers/radeon/radeon_llvm_emit.c @@ -86,10 +86,18 @@ static void init_r600_target() { static unsigned initialized = 0; if (!initialized) { +#if HAVE_LLVM < 0x0307 LLVMInitializeR600TargetInfo(); LLVMInitializeR600Target(); LLVMInitializeR600TargetMC(); LLVMInitializeR600AsmPrinter(); +#else + LLVMInitializeAMDGPUTargetInfo(); + LLVMInitializeAMDGPUTarget(); + LLVMInitializeAMDGPUTargetMC(); + LLVMInitializeAMDGPUAsmPrinter(); + +#endif initialized = 1; } } From ab7229b9b6b160e805d14d600a432e76a5e88ef8 Mon Sep 17 00:00:00 2001 From: Chia-I Wu Date: Fri, 22 May 2015 13:49:20 +0800 Subject: [PATCH 601/834] ilo: assert core objects are zero-initialized Core objects are usually embedded inside calloc()'ed objects and we expect them to be zero-initialized. --- src/gallium/drivers/ilo/core/ilo_buffer.h | 3 +++ src/gallium/drivers/ilo/core/ilo_builder.c | 2 +- src/gallium/drivers/ilo/core/ilo_debug.h | 17 +++++++++++++++++ src/gallium/drivers/ilo/core/ilo_dev.c | 2 ++ src/gallium/drivers/ilo/core/ilo_fence.h | 3 ++- src/gallium/drivers/ilo/core/ilo_image.c | 4 ++++ 6 files changed, 29 insertions(+), 2 deletions(-) diff --git a/src/gallium/drivers/ilo/core/ilo_buffer.h b/src/gallium/drivers/ilo/core/ilo_buffer.h index 50f97d10bd7..13e5ce270d6 100644 --- a/src/gallium/drivers/ilo/core/ilo_buffer.h +++ b/src/gallium/drivers/ilo/core/ilo_buffer.h @@ -31,6 +31,7 @@ #include "intel_winsys.h" #include "ilo_core.h" +#include "ilo_debug.h" #include "ilo_dev.h" struct ilo_buffer { @@ -43,6 +44,8 @@ static inline void ilo_buffer_init(struct ilo_buffer *buf, const struct ilo_dev *dev, unsigned size, uint32_t bind, uint32_t flags) { + assert(ilo_is_zeroed(buf, sizeof(*buf))); + buf->bo_size = size; /* diff --git a/src/gallium/drivers/ilo/core/ilo_builder.c b/src/gallium/drivers/ilo/core/ilo_builder.c index 3c5eef9bcbc..4e05a3aca1e 100644 --- a/src/gallium/drivers/ilo/core/ilo_builder.c +++ b/src/gallium/drivers/ilo/core/ilo_builder.c @@ -333,7 +333,7 @@ ilo_builder_init(struct ilo_builder *builder, { int i; - memset(builder, 0, sizeof(*builder)); + assert(ilo_is_zeroed(builder, sizeof(*builder))); builder->dev = dev; builder->winsys = winsys; diff --git a/src/gallium/drivers/ilo/core/ilo_debug.h b/src/gallium/drivers/ilo/core/ilo_debug.h index d9c460498ff..9833233d796 100644 --- a/src/gallium/drivers/ilo/core/ilo_debug.h +++ b/src/gallium/drivers/ilo/core/ilo_debug.h @@ -100,4 +100,21 @@ ilo_warn(const char *format, ...) #endif } +static inline bool +ilo_is_zeroed(const void *ptr, size_t size) +{ +#ifdef DEBUG + size_t i; + + for (i = 0; i < size; i++) { + if (*((const char *) ptr) != 0) + return false; + } + + return true; +#else + return true; +#endif +} + #endif /* ILO_DEBUG_H */ diff --git a/src/gallium/drivers/ilo/core/ilo_dev.c b/src/gallium/drivers/ilo/core/ilo_dev.c index 7a774fa1591..c2cdad72d1a 100644 --- a/src/gallium/drivers/ilo/core/ilo_dev.c +++ b/src/gallium/drivers/ilo/core/ilo_dev.c @@ -40,6 +40,8 @@ ilo_dev_init(struct ilo_dev *dev, struct intel_winsys *winsys) { const struct intel_winsys_info *info; + assert(ilo_is_zeroed(dev, sizeof(*dev))); + info = intel_winsys_get_info(winsys); dev->winsys = winsys; diff --git a/src/gallium/drivers/ilo/core/ilo_fence.h b/src/gallium/drivers/ilo/core/ilo_fence.h index 00d555aa95b..b0b3c061dbc 100644 --- a/src/gallium/drivers/ilo/core/ilo_fence.h +++ b/src/gallium/drivers/ilo/core/ilo_fence.h @@ -31,6 +31,7 @@ #include "intel_winsys.h" #include "ilo_core.h" +#include "ilo_debug.h" #include "ilo_dev.h" struct ilo_fence { @@ -40,7 +41,7 @@ struct ilo_fence { static inline void ilo_fence_init(struct ilo_fence *fence, const struct ilo_dev *dev) { - /* no-op */ + assert(ilo_is_zeroed(fence, sizeof(*fence))); } static inline void diff --git a/src/gallium/drivers/ilo/core/ilo_image.c b/src/gallium/drivers/ilo/core/ilo_image.c index 22c8ef2620a..240595992bf 100644 --- a/src/gallium/drivers/ilo/core/ilo_image.c +++ b/src/gallium/drivers/ilo/core/ilo_image.c @@ -1386,6 +1386,8 @@ void ilo_image_init(struct ilo_image *img, struct ilo_image_params params; bool transfer_only; + assert(ilo_is_zeroed(img, sizeof(*img))); + /* use transfer layout when the texture is never bound to GPU */ transfer_only = !(templ->bind & ~(PIPE_BIND_TRANSFER_WRITE | PIPE_BIND_TRANSFER_READ)); @@ -1411,6 +1413,8 @@ ilo_image_init_for_imported(struct ilo_image *img, { struct ilo_image_params params; + assert(ilo_is_zeroed(img, sizeof(*img))); + if ((tiling == GEN6_TILING_X && bo_stride % 512) || (tiling == GEN6_TILING_Y && bo_stride % 128) || (tiling == GEN8_TILING_W && bo_stride % 64)) From 1885ac490834e70d831b5b4a287c272b4148761c Mon Sep 17 00:00:00 2001 From: Chia-I Wu Date: Thu, 21 May 2015 16:30:03 +0800 Subject: [PATCH 602/834] ilo: avoid resource owning in core It is up to the users whether to reference count the BOs or not. --- src/gallium/drivers/ilo/core/ilo_buffer.h | 14 +---- src/gallium/drivers/ilo/core/ilo_dev.c | 9 +-- src/gallium/drivers/ilo/core/ilo_dev.h | 3 - src/gallium/drivers/ilo/core/ilo_fence.h | 74 ----------------------- src/gallium/drivers/ilo/core/ilo_image.h | 27 ++------- src/gallium/drivers/ilo/ilo_resource.c | 20 +++--- src/gallium/drivers/ilo/ilo_screen.c | 22 +++---- 7 files changed, 27 insertions(+), 142 deletions(-) delete mode 100644 src/gallium/drivers/ilo/core/ilo_fence.h diff --git a/src/gallium/drivers/ilo/core/ilo_buffer.h b/src/gallium/drivers/ilo/core/ilo_buffer.h index 13e5ce270d6..99c7b014736 100644 --- a/src/gallium/drivers/ilo/core/ilo_buffer.h +++ b/src/gallium/drivers/ilo/core/ilo_buffer.h @@ -37,6 +37,7 @@ struct ilo_buffer { unsigned bo_size; + /* managed by users */ struct intel_bo *bo; }; @@ -77,17 +78,4 @@ ilo_buffer_init(struct ilo_buffer *buf, const struct ilo_dev *dev, } } -static inline void -ilo_buffer_cleanup(struct ilo_buffer *buf) -{ - intel_bo_unref(buf->bo); -} - -static inline void -ilo_buffer_set_bo(struct ilo_buffer *buf, struct intel_bo *bo) -{ - intel_bo_unref(buf->bo); - buf->bo = intel_bo_ref(bo); -} - #endif /* ILO_BUFFER_H */ diff --git a/src/gallium/drivers/ilo/core/ilo_dev.c b/src/gallium/drivers/ilo/core/ilo_dev.c index c2cdad72d1a..925322abba4 100644 --- a/src/gallium/drivers/ilo/core/ilo_dev.c +++ b/src/gallium/drivers/ilo/core/ilo_dev.c @@ -32,8 +32,7 @@ #include "ilo_dev.h" /** - * Initialize the \p dev from \p winsys. \p winsys is considered owned by \p - * dev and will be destroyed in \p ilo_dev_cleanup(). + * Initialize the \p dev from \p winsys. */ bool ilo_dev_init(struct ilo_dev *dev, struct intel_winsys *winsys) @@ -180,9 +179,3 @@ ilo_dev_init(struct ilo_dev *dev, struct intel_winsys *winsys) return true; } - -void -ilo_dev_cleanup(struct ilo_dev *dev) -{ - intel_winsys_destroy(dev->winsys); -} diff --git a/src/gallium/drivers/ilo/core/ilo_dev.h b/src/gallium/drivers/ilo/core/ilo_dev.h index 4eb5d59dc86..a9f9b176e16 100644 --- a/src/gallium/drivers/ilo/core/ilo_dev.h +++ b/src/gallium/drivers/ilo/core/ilo_dev.h @@ -63,9 +63,6 @@ struct ilo_dev { bool ilo_dev_init(struct ilo_dev *dev, struct intel_winsys *winsys); -void -ilo_dev_cleanup(struct ilo_dev *dev); - static inline int ilo_dev_gen(const struct ilo_dev *dev) { diff --git a/src/gallium/drivers/ilo/core/ilo_fence.h b/src/gallium/drivers/ilo/core/ilo_fence.h deleted file mode 100644 index b0b3c061dbc..00000000000 --- a/src/gallium/drivers/ilo/core/ilo_fence.h +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Mesa 3-D graphics library - * - * Copyright (C) 2012-2013 LunarG, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included - * in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - * - * Authors: - * Chia-I Wu - */ - -#ifndef ILO_FENCE_H -#define ILO_FENCE_H - -#include "intel_winsys.h" - -#include "ilo_core.h" -#include "ilo_debug.h" -#include "ilo_dev.h" - -struct ilo_fence { - struct intel_bo *seq_bo; -}; - -static inline void -ilo_fence_init(struct ilo_fence *fence, const struct ilo_dev *dev) -{ - assert(ilo_is_zeroed(fence, sizeof(*fence))); -} - -static inline void -ilo_fence_cleanup(struct ilo_fence *fence) -{ - intel_bo_unref(fence->seq_bo); -} - -/** - * Set the sequence bo for waiting. The fence is considered signaled when - * there is no sequence bo. - */ -static inline void -ilo_fence_set_seq_bo(struct ilo_fence *fence, struct intel_bo *seq_bo) -{ - intel_bo_unref(fence->seq_bo); - fence->seq_bo = intel_bo_ref(seq_bo); -} - -/** - * Wait for the fence to be signaled or until \p timeout nanoseconds has - * passed. It will wait indefinitely when \p timeout is negative. - */ -static inline bool -ilo_fence_wait(struct ilo_fence *fence, int64_t timeout) -{ - return (!fence->seq_bo || intel_bo_wait(fence->seq_bo, timeout) == 0); -} - -#endif /* ILO_FENCE_H */ diff --git a/src/gallium/drivers/ilo/core/ilo_image.h b/src/gallium/drivers/ilo/core/ilo_image.h index 4956bdae2ee..1354dd4e986 100644 --- a/src/gallium/drivers/ilo/core/ilo_image.h +++ b/src/gallium/drivers/ilo/core/ilo_image.h @@ -125,8 +125,6 @@ struct ilo_image { bool scanout; - struct intel_bo *bo; - struct { enum ilo_image_aux_type type; @@ -140,8 +138,12 @@ struct ilo_image { unsigned bo_stride; unsigned bo_height; + /* managed by users */ struct intel_bo *bo; } aux; + + /* managed by users */ + struct intel_bo *bo; }; struct pipe_resource; @@ -158,27 +160,6 @@ ilo_image_init_for_imported(struct ilo_image *img, enum gen_surface_tiling tiling, unsigned bo_stride); -static inline void -ilo_image_cleanup(struct ilo_image *img) -{ - intel_bo_unref(img->bo); - intel_bo_unref(img->aux.bo); -} - -static inline void -ilo_image_set_bo(struct ilo_image *img, struct intel_bo *bo) -{ - intel_bo_unref(img->bo); - img->bo = intel_bo_ref(bo); -} - -static inline void -ilo_image_set_aux_bo(struct ilo_image *img, struct intel_bo *bo) -{ - intel_bo_unref(img->aux.bo); - img->aux.bo = intel_bo_ref(bo); -} - static inline bool ilo_image_can_enable_aux(const struct ilo_image *img, unsigned level) { diff --git a/src/gallium/drivers/ilo/ilo_resource.c b/src/gallium/drivers/ilo/ilo_resource.c index ad4852278d0..91e4d63fc8d 100644 --- a/src/gallium/drivers/ilo/ilo_resource.c +++ b/src/gallium/drivers/ilo/ilo_resource.c @@ -178,8 +178,8 @@ tex_create_bo(struct ilo_texture *tex) if (!bo) return false; - ilo_image_set_bo(&tex->image, bo); - intel_bo_unref(bo); + intel_bo_unref(tex->image.bo); + tex->image.bo = bo; return true; } @@ -223,7 +223,7 @@ tex_create_hiz(struct ilo_texture *tex) if (!bo) return false; - ilo_image_set_aux_bo(&tex->image, bo); + tex->image.aux.bo = bo; if (tex->imported) { unsigned lv; @@ -256,7 +256,7 @@ tex_create_mcs(struct ilo_texture *tex) if (!bo) return false; - ilo_image_set_aux_bo(&tex->image, bo); + tex->image.aux.bo = bo; return true; } @@ -267,7 +267,8 @@ tex_destroy(struct ilo_texture *tex) if (tex->separate_s8) tex_destroy(tex->separate_s8); - ilo_image_cleanup(&tex->image); + intel_bo_unref(tex->image.bo); + intel_bo_unref(tex->image.aux.bo); tex_free_slices(tex); FREE(tex); @@ -328,8 +329,7 @@ tex_import_handle(struct ilo_texture *tex, return false; } - ilo_image_set_bo(&tex->image, bo); - intel_bo_unref(bo); + tex->image.bo = bo; tex->imported = true; @@ -427,8 +427,8 @@ buf_create_bo(struct ilo_buffer_resource *buf) if (!bo) return false; - ilo_buffer_set_bo(&buf->buffer, bo); - intel_bo_unref(bo); + intel_bo_unref(buf->buffer.bo); + buf->buffer.bo = bo; return true; } @@ -436,7 +436,7 @@ buf_create_bo(struct ilo_buffer_resource *buf) static void buf_destroy(struct ilo_buffer_resource *buf) { - ilo_buffer_cleanup(&buf->buffer); + intel_bo_unref(buf->buffer.bo); FREE(buf); } diff --git a/src/gallium/drivers/ilo/ilo_screen.c b/src/gallium/drivers/ilo/ilo_screen.c index b0fed730512..80e01c7bd1d 100644 --- a/src/gallium/drivers/ilo/ilo_screen.c +++ b/src/gallium/drivers/ilo/ilo_screen.c @@ -31,7 +31,6 @@ #include "vl/vl_decoder.h" #include "vl/vl_video_buffer.h" #include "genhw/genhw.h" /* for GEN6_REG_TIMESTAMP */ -#include "core/ilo_fence.h" #include "core/ilo_format.h" #include "core/intel_winsys.h" @@ -43,8 +42,7 @@ struct pipe_fence_handle { struct pipe_reference reference; - - struct ilo_fence fence; + struct intel_bo *seqno_bo; }; static float @@ -642,7 +640,7 @@ ilo_screen_fence_reference(struct pipe_screen *screen, STATIC_ASSERT(&((struct pipe_fence_handle *) NULL)->reference == NULL); if (pipe_reference(&old->reference, &fence->reference)) { - ilo_fence_cleanup(&old->fence); + intel_bo_unref(old->seqno_bo); FREE(old); } } @@ -655,10 +653,14 @@ ilo_screen_fence_finish(struct pipe_screen *screen, const int64_t wait_timeout = (timeout > INT64_MAX) ? -1 : timeout; bool signaled; - signaled = ilo_fence_wait(&fence->fence, wait_timeout); + signaled = (!fence->seqno_bo || + intel_bo_wait(fence->seqno_bo, wait_timeout) == 0); + /* XXX not thread safe */ - if (signaled) - ilo_fence_set_seq_bo(&fence->fence, NULL); + if (signaled && fence->seqno_bo) { + intel_bo_unref(fence->seqno_bo); + fence->seqno_bo = NULL; + } return signaled; } @@ -677,7 +679,6 @@ ilo_screen_fence_signalled(struct pipe_screen *screen, struct pipe_fence_handle * ilo_screen_fence_create(struct pipe_screen *screen, struct intel_bo *bo) { - struct ilo_screen *is = ilo_screen(screen); struct pipe_fence_handle *fence; fence = CALLOC_STRUCT(pipe_fence_handle); @@ -686,8 +687,7 @@ ilo_screen_fence_create(struct pipe_screen *screen, struct intel_bo *bo) pipe_reference_init(&fence->reference, 1); - ilo_fence_init(&fence->fence, &is->dev); - ilo_fence_set_seq_bo(&fence->fence, bo); + fence->seqno_bo = intel_bo_ref(bo); return fence; } @@ -697,7 +697,7 @@ ilo_screen_destroy(struct pipe_screen *screen) { struct ilo_screen *is = ilo_screen(screen); - ilo_dev_cleanup(&is->dev); + intel_winsys_destroy(is->dev.winsys); FREE(is); } From 9da9cf729ff74684902cbb4b53b5cccd442df28e Mon Sep 17 00:00:00 2001 From: Chia-I Wu Date: Fri, 15 May 2015 10:39:05 +0800 Subject: [PATCH 603/834] ilo: fix "Render Cache Read Write Mode" It needs be set to R/W only when using certain messages via DP render cache. Since we only use RT wrties with the render cache, we never need to set it. --- .../drivers/ilo/core/ilo_builder_3d_top.h | 2 +- src/gallium/drivers/ilo/core/ilo_state_3d.h | 2 +- .../drivers/ilo/core/ilo_state_3d_top.c | 22 +++++-------------- src/gallium/drivers/ilo/ilo_render_surface.c | 4 ++-- src/gallium/drivers/ilo/ilo_state.c | 10 +++------ 5 files changed, 12 insertions(+), 28 deletions(-) diff --git a/src/gallium/drivers/ilo/core/ilo_builder_3d_top.h b/src/gallium/drivers/ilo/core/ilo_builder_3d_top.h index 05dbce7c905..f9275b64d0e 100644 --- a/src/gallium/drivers/ilo/core/ilo_builder_3d_top.h +++ b/src/gallium/drivers/ilo/core/ilo_builder_3d_top.h @@ -1711,7 +1711,7 @@ gen6_so_SURFACE_STATE(struct ilo_builder *builder, } ilo_gpe_init_view_surface_for_buffer(builder->dev, buf, bo_offset, - so->buffer_size, struct_size, elem_format, false, true, &surf); + so->buffer_size, struct_size, elem_format, false, &surf); return gen6_SURFACE_STATE(builder, &surf, false); } diff --git a/src/gallium/drivers/ilo/core/ilo_state_3d.h b/src/gallium/drivers/ilo/core/ilo_state_3d.h index fdce445f733..04d00756e09 100644 --- a/src/gallium/drivers/ilo/core/ilo_state_3d.h +++ b/src/gallium/drivers/ilo/core/ilo_state_3d.h @@ -380,7 +380,7 @@ ilo_gpe_init_view_surface_for_buffer(const struct ilo_dev *dev, unsigned offset, unsigned size, unsigned struct_size, enum pipe_format elem_format, - bool is_rt, bool render_cache_rw, + bool is_rt, struct ilo_view_surface *surf); void diff --git a/src/gallium/drivers/ilo/core/ilo_state_3d_top.c b/src/gallium/drivers/ilo/core/ilo_state_3d_top.c index c17957fb704..c498a8462a9 100644 --- a/src/gallium/drivers/ilo/core/ilo_state_3d_top.c +++ b/src/gallium/drivers/ilo/core/ilo_state_3d_top.c @@ -469,8 +469,7 @@ view_init_for_buffer_gen6(const struct ilo_dev *dev, unsigned offset, unsigned size, unsigned struct_size, enum pipe_format elem_format, - bool is_rt, bool render_cache_rw, - struct ilo_view_surface *surf) + bool is_rt, struct ilo_view_surface *surf) { const int elem_size = util_format_get_blocksize(elem_format); int width, height, depth, pitch; @@ -539,8 +538,6 @@ view_init_for_buffer_gen6(const struct ilo_dev *dev, dw[0] = GEN6_SURFTYPE_BUFFER << GEN6_SURFACE_DW0_TYPE__SHIFT | surface_format << GEN6_SURFACE_DW0_FORMAT__SHIFT; - if (render_cache_rw) - dw[0] |= GEN6_SURFACE_DW0_RENDER_CACHE_RW; dw[1] = offset; @@ -691,9 +688,6 @@ view_init_for_image_gen6(const struct ilo_dev *dev, GEN6_SURFACE_DW0_CUBE_FACE_ENABLES__MASK; } - if (is_rt) - dw[0] |= GEN6_SURFACE_DW0_RENDER_CACHE_RW; - dw[1] = 0; dw[2] = (height - 1) << GEN6_SURFACE_DW2_HEIGHT__SHIFT | @@ -795,8 +789,7 @@ view_init_for_buffer_gen7(const struct ilo_dev *dev, unsigned offset, unsigned size, unsigned struct_size, enum pipe_format elem_format, - bool is_rt, bool render_cache_rw, - struct ilo_view_surface *surf) + bool is_rt, struct ilo_view_surface *surf) { const bool typed = (elem_format != PIPE_FORMAT_NONE); const bool structured = (!typed && struct_size > 1); @@ -886,8 +879,6 @@ view_init_for_buffer_gen7(const struct ilo_dev *dev, dw[0] = surface_type << GEN7_SURFACE_DW0_TYPE__SHIFT | surface_format << GEN7_SURFACE_DW0_FORMAT__SHIFT; - if (render_cache_rw) - dw[0] |= GEN7_SURFACE_DW0_RENDER_CACHE_RW; if (ilo_dev_gen(dev) >= ILO_GEN(8)) { dw[8] = offset; @@ -1117,9 +1108,6 @@ view_init_for_image_gen7(const struct ilo_dev *dev, dw[0] |= GEN7_SURFACE_DW0_ARYSPC_FULL; } - if (is_rt) - dw[0] |= GEN7_SURFACE_DW0_RENDER_CACHE_RW; - if (surface_type == GEN6_SURFTYPE_CUBE && !is_rt) dw[0] |= GEN7_SURFACE_DW0_CUBE_FACE_ENABLES__MASK; @@ -1213,15 +1201,15 @@ ilo_gpe_init_view_surface_for_buffer(const struct ilo_dev *dev, unsigned offset, unsigned size, unsigned struct_size, enum pipe_format elem_format, - bool is_rt, bool render_cache_rw, + bool is_rt, struct ilo_view_surface *surf) { if (ilo_dev_gen(dev) >= ILO_GEN(7)) { view_init_for_buffer_gen7(dev, buf, offset, size, - struct_size, elem_format, is_rt, render_cache_rw, surf); + struct_size, elem_format, is_rt, surf); } else { view_init_for_buffer_gen6(dev, buf, offset, size, - struct_size, elem_format, is_rt, render_cache_rw, surf); + struct_size, elem_format, is_rt, surf); } /* do not increment reference count */ diff --git a/src/gallium/drivers/ilo/ilo_render_surface.c b/src/gallium/drivers/ilo/ilo_render_surface.c index b345dfb4fc4..52b1cb42c06 100644 --- a/src/gallium/drivers/ilo/ilo_render_surface.c +++ b/src/gallium/drivers/ilo/ilo_render_surface.c @@ -437,7 +437,7 @@ gen6_emit_launch_grid_surface_const(struct ilo_render *r, session->input->buffer_offset, session->input->buffer_size, 1, PIPE_FORMAT_NONE, - false, false, &view); + false, &view); assert(count == 1 && session->input->buffer); surface_state[base] = gen6_SURFACE_STATE(r->builder, &view, false); @@ -488,7 +488,7 @@ gen6_emit_launch_grid_surface_global(struct ilo_render *r, assert(bindings[i].resource->target == PIPE_BUFFER); ilo_gpe_init_view_surface_for_buffer(r->dev, buf, 0, buf->bo_size, - 1, PIPE_FORMAT_NONE, true, true, &view); + 1, PIPE_FORMAT_NONE, true, &view); surface_state[i] = gen6_SURFACE_STATE(r->builder, &view, true); } else { diff --git a/src/gallium/drivers/ilo/ilo_state.c b/src/gallium/drivers/ilo/ilo_state.c index b1bd49a0b6c..7627fcf7c96 100644 --- a/src/gallium/drivers/ilo/ilo_state.c +++ b/src/gallium/drivers/ilo/ilo_state.c @@ -112,7 +112,7 @@ finalize_cbuf_state(struct ilo_context *ilo, ilo_buffer(cbuf->cso[i].resource), offset, cbuf->cso[i].user_buffer_size, util_format_get_blocksize(elem_format), elem_format, - false, false, &cbuf->cso[i].surface); + false, &cbuf->cso[i].surface); ilo->state_vector.dirty |= ILO_DIRTY_CBUF; } @@ -683,7 +683,7 @@ ilo_set_constant_buffer(struct pipe_context *pipe, ilo_buffer(buf[i].buffer), buf[i].buffer_offset, buf[i].buffer_size, util_format_get_blocksize(elem_format), elem_format, - false, false, &cso->surface); + false, &cso->surface); cso->user_buffer = NULL; cso->user_buffer_size = 0; @@ -1007,7 +1007,7 @@ ilo_create_sampler_view(struct pipe_context *pipe, ilo_gpe_init_view_surface_for_buffer(dev, ilo_buffer(res), first_elem * elem_size, num_elems * elem_size, - elem_size, templ->format, false, false, &view->surface); + elem_size, templ->format, false, &view->surface); } else { struct ilo_texture *tex = ilo_texture(res); @@ -1066,10 +1066,6 @@ ilo_create_surface(struct pipe_context *pipe, /* relax this? */ assert(tex->base.target != PIPE_BUFFER); - /* - * classic i965 sets render_cache_rw for constant buffers and sol - * surfaces but not render buffers. Why? - */ ilo_gpe_init_view_surface_for_image(dev, &tex->image, tex->base.target, templ->format, templ->u.tex.level, 1, From f9d2bbe967fb1fbbe7102c0765f067b3155f5ca6 Mon Sep 17 00:00:00 2001 From: Chia-I Wu Date: Sun, 17 May 2015 11:55:05 +0800 Subject: [PATCH 604/834] ilo: add pipe_texture_target to ilo_image Save the target in ilo_image instead of passing it around. --- src/gallium/drivers/ilo/core/ilo_image.c | 2 ++ src/gallium/drivers/ilo/core/ilo_image.h | 2 ++ src/gallium/drivers/ilo/core/ilo_state_3d.h | 2 -- .../drivers/ilo/core/ilo_state_3d_bottom.c | 8 +++----- src/gallium/drivers/ilo/core/ilo_state_3d_top.c | 17 +++++++---------- src/gallium/drivers/ilo/ilo_state.c | 10 ++++------ 6 files changed, 18 insertions(+), 23 deletions(-) diff --git a/src/gallium/drivers/ilo/core/ilo_image.c b/src/gallium/drivers/ilo/core/ilo_image.c index 240595992bf..5365dd10bab 100644 --- a/src/gallium/drivers/ilo/core/ilo_image.c +++ b/src/gallium/drivers/ilo/core/ilo_image.c @@ -675,6 +675,7 @@ img_init_size_and_format(struct ilo_image *img, enum pipe_format format = templ->format; bool require_separate_stencil = false; + img->target = templ->target; img->width0 = templ->width0; img->height0 = templ->height0; img->depth0 = templ->depth0; @@ -1343,6 +1344,7 @@ img_init_for_transfer(struct ilo_image *img, img->aux.type = ILO_IMAGE_AUX_NONE; + img->target = templ->target; img->width0 = templ->width0; img->height0 = templ->height0; img->depth0 = templ->depth0; diff --git a/src/gallium/drivers/ilo/core/ilo_image.h b/src/gallium/drivers/ilo/core/ilo_image.h index 1354dd4e986..bef08bc3ca2 100644 --- a/src/gallium/drivers/ilo/core/ilo_image.h +++ b/src/gallium/drivers/ilo/core/ilo_image.h @@ -88,6 +88,8 @@ struct ilo_image_lod { * Texture layout. */ struct ilo_image { + enum pipe_texture_target target; + /* size, format, etc for programming hardware states */ unsigned width0; unsigned height0; diff --git a/src/gallium/drivers/ilo/core/ilo_state_3d.h b/src/gallium/drivers/ilo/core/ilo_state_3d.h index 04d00756e09..819a17d2681 100644 --- a/src/gallium/drivers/ilo/core/ilo_state_3d.h +++ b/src/gallium/drivers/ilo/core/ilo_state_3d.h @@ -386,7 +386,6 @@ ilo_gpe_init_view_surface_for_buffer(const struct ilo_dev *dev, void ilo_gpe_init_view_surface_for_image(const struct ilo_dev *dev, const struct ilo_image *img, - enum pipe_texture_target target, enum pipe_format format, unsigned first_level, unsigned num_levels, @@ -399,7 +398,6 @@ void ilo_gpe_init_zs_surface(const struct ilo_dev *dev, const struct ilo_image *img, const struct ilo_image *s8_img, - enum pipe_texture_target target, enum pipe_format format, unsigned level, unsigned first_layer, unsigned num_layers, struct ilo_zs_surface *zs); diff --git a/src/gallium/drivers/ilo/core/ilo_state_3d_bottom.c b/src/gallium/drivers/ilo/core/ilo_state_3d_bottom.c index 5a4c5dde7e7..22cd4eac82e 100644 --- a/src/gallium/drivers/ilo/core/ilo_state_3d_bottom.c +++ b/src/gallium/drivers/ilo/core/ilo_state_3d_bottom.c @@ -936,7 +936,6 @@ static void zs_init_info(const struct ilo_dev *dev, const struct ilo_image *img, const struct ilo_image *s8_img, - enum pipe_texture_target target, enum pipe_format format, unsigned level, unsigned first_layer, unsigned num_layers, struct ilo_zs_surface_info *info) @@ -947,7 +946,7 @@ zs_init_info(const struct ilo_dev *dev, memset(info, 0, sizeof(*info)); - info->surface_type = ilo_gpe_gen6_translate_texture(target); + info->surface_type = ilo_gpe_gen6_translate_texture(img->target); if (info->surface_type == GEN6_SURFTYPE_CUBE) { /* @@ -1086,7 +1085,7 @@ zs_init_info(const struct ilo_dev *dev, info->width = img->width0; info->height = img->height0; - info->depth = (target == PIPE_TEXTURE_3D) ? img->depth0 : num_layers; + info->depth = (img->target == PIPE_TEXTURE_3D) ? img->depth0 : num_layers; info->lod = level; info->first_layer = first_layer; @@ -1097,7 +1096,6 @@ void ilo_gpe_init_zs_surface(const struct ilo_dev *dev, const struct ilo_image *img, const struct ilo_image *s8_img, - enum pipe_texture_target target, enum pipe_format format, unsigned level, unsigned first_layer, unsigned num_layers, struct ilo_zs_surface *zs) @@ -1111,7 +1109,7 @@ ilo_gpe_init_zs_surface(const struct ilo_dev *dev, ILO_DEV_ASSERT(dev, 6, 8); if (img) { - zs_init_info(dev, img, s8_img, target, format, + zs_init_info(dev, img, s8_img, format, level, first_layer, num_layers, &info); switch (img->sample_count) { diff --git a/src/gallium/drivers/ilo/core/ilo_state_3d_top.c b/src/gallium/drivers/ilo/core/ilo_state_3d_top.c index c498a8462a9..4c06c91da90 100644 --- a/src/gallium/drivers/ilo/core/ilo_state_3d_top.c +++ b/src/gallium/drivers/ilo/core/ilo_state_3d_top.c @@ -554,7 +554,6 @@ view_init_for_buffer_gen6(const struct ilo_dev *dev, static void view_init_for_image_gen6(const struct ilo_dev *dev, const struct ilo_image *img, - enum pipe_texture_target target, enum pipe_format format, unsigned first_level, unsigned num_levels, @@ -569,7 +568,7 @@ view_init_for_image_gen6(const struct ilo_dev *dev, ILO_DEV_ASSERT(dev, 6, 6); - surface_type = ilo_gpe_gen6_translate_texture(target); + surface_type = ilo_gpe_gen6_translate_texture(img->target); assert(surface_type != GEN6_SURFTYPE_BUFFER); if (format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT && img->separate_stencil) @@ -583,7 +582,7 @@ view_init_for_image_gen6(const struct ilo_dev *dev, width = img->width0; height = img->height0; - depth = (target == PIPE_TEXTURE_3D) ? img->depth0 : num_layers; + depth = (img->target == PIPE_TEXTURE_3D) ? img->depth0 : num_layers; pitch = img->bo_stride; if (surface_type == GEN6_SURFTYPE_CUBE) { @@ -910,7 +909,6 @@ view_init_for_buffer_gen7(const struct ilo_dev *dev, static void view_init_for_image_gen7(const struct ilo_dev *dev, const struct ilo_image *img, - enum pipe_texture_target target, enum pipe_format format, unsigned first_level, unsigned num_levels, @@ -925,7 +923,7 @@ view_init_for_image_gen7(const struct ilo_dev *dev, ILO_DEV_ASSERT(dev, 7, 8); - surface_type = ilo_gpe_gen6_translate_texture(target); + surface_type = ilo_gpe_gen6_translate_texture(img->target); assert(surface_type != GEN6_SURFTYPE_BUFFER); if (format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT && img->separate_stencil) @@ -939,7 +937,7 @@ view_init_for_image_gen7(const struct ilo_dev *dev, width = img->width0; height = img->height0; - depth = (target == PIPE_TEXTURE_3D) ? img->depth0 : num_layers; + depth = (img->target == PIPE_TEXTURE_3D) ? img->depth0 : num_layers; pitch = img->bo_stride; if (surface_type == GEN6_SURFTYPE_CUBE) { @@ -1045,7 +1043,7 @@ view_init_for_image_gen7(const struct ilo_dev *dev, * returns zero for the number of layers when this field is not set. */ if (surface_type != GEN6_SURFTYPE_3D) { - switch (target) { + switch (img->target) { case PIPE_TEXTURE_1D_ARRAY: case PIPE_TEXTURE_2D_ARRAY: case PIPE_TEXTURE_CUBE_ARRAY: @@ -1220,7 +1218,6 @@ ilo_gpe_init_view_surface_for_buffer(const struct ilo_dev *dev, void ilo_gpe_init_view_surface_for_image(const struct ilo_dev *dev, const struct ilo_image *img, - enum pipe_texture_target target, enum pipe_format format, unsigned first_level, unsigned num_levels, @@ -1230,11 +1227,11 @@ ilo_gpe_init_view_surface_for_image(const struct ilo_dev *dev, struct ilo_view_surface *surf) { if (ilo_dev_gen(dev) >= ILO_GEN(7)) { - view_init_for_image_gen7(dev, img, target, format, + view_init_for_image_gen7(dev, img, format, first_level, num_levels, first_layer, num_layers, is_rt, surf); } else { - view_init_for_image_gen6(dev, img, target, format, + view_init_for_image_gen6(dev, img, format, first_level, num_levels, first_layer, num_layers, is_rt, surf); } diff --git a/src/gallium/drivers/ilo/ilo_state.c b/src/gallium/drivers/ilo/ilo_state.c index 7627fcf7c96..3865e15bfb1 100644 --- a/src/gallium/drivers/ilo/ilo_state.c +++ b/src/gallium/drivers/ilo/ilo_state.c @@ -1019,8 +1019,7 @@ ilo_create_sampler_view(struct pipe_context *pipe, "not created for sampling\n"); } - ilo_gpe_init_view_surface_for_image(dev, &tex->image, - tex->base.target, templ->format, + ilo_gpe_init_view_surface_for_image(dev, &tex->image, templ->format, templ->u.tex.first_level, templ->u.tex.last_level - templ->u.tex.first_level + 1, templ->u.tex.first_layer, @@ -1066,8 +1065,7 @@ ilo_create_surface(struct pipe_context *pipe, /* relax this? */ assert(tex->base.target != PIPE_BUFFER); - ilo_gpe_init_view_surface_for_image(dev, - &tex->image, tex->base.target, + ilo_gpe_init_view_surface_for_image(dev, &tex->image, templ->format, templ->u.tex.level, 1, templ->u.tex.first_layer, templ->u.tex.last_layer - templ->u.tex.first_layer + 1, @@ -1077,7 +1075,7 @@ ilo_create_surface(struct pipe_context *pipe, ilo_gpe_init_zs_surface(dev, &tex->image, (tex->separate_s8) ? &tex->separate_s8->image : NULL, - tex->base.target, templ->format, + templ->format, templ->u.tex.level, templ->u.tex.first_layer, templ->u.tex.last_layer - templ->u.tex.first_layer + 1, &surf->u.zs); @@ -1292,7 +1290,7 @@ ilo_state_vector_init(const struct ilo_dev *dev, { ilo_gpe_set_scissor_null(dev, &vec->scissor); - ilo_gpe_init_zs_surface(dev, NULL, NULL, PIPE_TEXTURE_2D, + ilo_gpe_init_zs_surface(dev, NULL, NULL, PIPE_FORMAT_NONE, 0, 0, 1, &vec->fb.null_zs); util_dynarray_init(&vec->global_binding.bindings); From f0de65cbc29b45fffbe4bf4e1ce299ddb8be9eda Mon Sep 17 00:00:00 2001 From: Chia-I Wu Date: Tue, 26 May 2015 15:46:44 +0800 Subject: [PATCH 605/834] ilo: add array_size and level_count to ilo_image We will use them for bound checking. --- src/gallium/drivers/ilo/core/ilo_image.c | 4 ++++ src/gallium/drivers/ilo/core/ilo_image.h | 2 ++ 2 files changed, 6 insertions(+) diff --git a/src/gallium/drivers/ilo/core/ilo_image.c b/src/gallium/drivers/ilo/core/ilo_image.c index 5365dd10bab..cf6c17f28f7 100644 --- a/src/gallium/drivers/ilo/core/ilo_image.c +++ b/src/gallium/drivers/ilo/core/ilo_image.c @@ -679,6 +679,8 @@ img_init_size_and_format(struct ilo_image *img, img->width0 = templ->width0; img->height0 = templ->height0; img->depth0 = templ->depth0; + img->array_size = templ->array_size; + img->level_count = templ->last_level + 1; img->sample_count = (templ->nr_samples) ? templ->nr_samples : 1; /* @@ -1348,6 +1350,8 @@ img_init_for_transfer(struct ilo_image *img, img->width0 = templ->width0; img->height0 = templ->height0; img->depth0 = templ->depth0; + img->array_size = templ->array_size; + img->level_count = 1; img->sample_count = 1; img->format = templ->format; diff --git a/src/gallium/drivers/ilo/core/ilo_image.h b/src/gallium/drivers/ilo/core/ilo_image.h index bef08bc3ca2..8307cb68f9c 100644 --- a/src/gallium/drivers/ilo/core/ilo_image.h +++ b/src/gallium/drivers/ilo/core/ilo_image.h @@ -94,6 +94,8 @@ struct ilo_image { unsigned width0; unsigned height0; unsigned depth0; + unsigned array_size; + unsigned level_count; unsigned sample_count; enum pipe_format format; bool separate_stencil; From 9cb0df4b50593e69f65b65704f5b64f3a12be9b5 Mon Sep 17 00:00:00 2001 From: Chia-I Wu Date: Fri, 22 May 2015 14:21:22 +0800 Subject: [PATCH 606/834] ilo: add ilo_image_disable_aux() When aux bo allocation fails, ilo_image_disable_aux() should be called to disable aux buffer. --- src/gallium/drivers/ilo/core/ilo_image.c | 19 +++++++++++++++++++ src/gallium/drivers/ilo/core/ilo_image.h | 5 ++++- src/gallium/drivers/ilo/ilo_resource.c | 12 +++++------- 3 files changed, 28 insertions(+), 8 deletions(-) diff --git a/src/gallium/drivers/ilo/core/ilo_image.c b/src/gallium/drivers/ilo/core/ilo_image.c index cf6c17f28f7..631093273bf 100644 --- a/src/gallium/drivers/ilo/core/ilo_image.c +++ b/src/gallium/drivers/ilo/core/ilo_image.c @@ -1445,3 +1445,22 @@ ilo_image_init_for_imported(struct ilo_image *img, return true; } + +bool +ilo_image_disable_aux(struct ilo_image *img, const struct ilo_dev *dev) +{ + /* HiZ is required for separate stencil on Gen6 */ + if (ilo_dev_gen(dev) == ILO_GEN(6) && + img->aux.type == ILO_IMAGE_AUX_HIZ && + img->separate_stencil) + return false; + + /* MCS is required for multisample images */ + if (img->aux.type == ILO_IMAGE_AUX_MCS && + img->sample_count > 1) + return false; + + img->aux.enables = 0x0; + + return true; +} diff --git a/src/gallium/drivers/ilo/core/ilo_image.h b/src/gallium/drivers/ilo/core/ilo_image.h index 8307cb68f9c..af15e856028 100644 --- a/src/gallium/drivers/ilo/core/ilo_image.h +++ b/src/gallium/drivers/ilo/core/ilo_image.h @@ -164,10 +164,13 @@ ilo_image_init_for_imported(struct ilo_image *img, enum gen_surface_tiling tiling, unsigned bo_stride); +bool +ilo_image_disable_aux(struct ilo_image *img, const struct ilo_dev *dev); + static inline bool ilo_image_can_enable_aux(const struct ilo_image *img, unsigned level) { - return (img->aux.bo && (img->aux.enables & (1 << level))); + return (img->aux.enables & (1 << level)); } /** diff --git a/src/gallium/drivers/ilo/ilo_resource.c b/src/gallium/drivers/ilo/ilo_resource.c index 91e4d63fc8d..b6f5d26da5b 100644 --- a/src/gallium/drivers/ilo/ilo_resource.c +++ b/src/gallium/drivers/ilo/ilo_resource.c @@ -288,15 +288,13 @@ tex_alloc_bos(struct ilo_texture *tex) switch (tex->image.aux.type) { case ILO_IMAGE_AUX_HIZ: - if (!tex_create_hiz(tex)) { - /* Separate Stencil Buffer requires HiZ to be enabled */ - if (ilo_dev_gen(&is->dev) == ILO_GEN(6) && - tex->image.separate_stencil) - return false; - } + if (!tex_create_hiz(tex) && + !ilo_image_disable_aux(&tex->image, &is->dev)) + return false; break; case ILO_IMAGE_AUX_MCS: - if (!tex_create_mcs(tex)) + if (!tex_create_mcs(tex) && + !ilo_image_disable_aux(&tex->image, &is->dev)) return false; break; default: From 9af1fc590d90fdda65aa0cf145773480af52a4e5 Mon Sep 17 00:00:00 2001 From: Chia-I Wu Date: Sat, 9 May 2015 21:39:34 +0800 Subject: [PATCH 607/834] ilo: update genhw headers Generate these new enums enum gen_reorder_mode; enum gen_clip_mode; enum gen_front_winding; enum gen_fill_mode; enum gen_cull_mode; enum gen_pixel_location; enum gen_sample_count; enum gen_inputattr_select; enum gen_msrast_mode; enum gen_prefilter_op; Correct the type of GEN6_SAMPLER_DW0_BASE_LOD. Rename gen_logicop_function, gen_sampler_mip_filter, gen_sampler_map_filter, gen_sampler_aniso_ratio, and others. --- .../drivers/ilo/core/ilo_builder_3d_bottom.h | 64 ++-- .../drivers/ilo/core/ilo_builder_3d_top.h | 2 +- .../drivers/ilo/core/ilo_builder_decode.c | 8 +- .../drivers/ilo/core/ilo_state_3d_bottom.c | 104 +++--- src/gallium/drivers/ilo/genhw/gen_mi.xml.h | 3 + src/gallium/drivers/ilo/genhw/gen_regs.xml.h | 2 + .../drivers/ilo/genhw/gen_render_3d.xml.h | 298 +++++++++--------- .../ilo/genhw/gen_render_dynamic.xml.h | 80 +++-- .../ilo/genhw/gen_render_surface.xml.h | 7 +- src/gallium/drivers/ilo/genhw/genhw.h | 7 +- src/gallium/drivers/ilo/ilo_shader.c | 3 +- 11 files changed, 297 insertions(+), 281 deletions(-) diff --git a/src/gallium/drivers/ilo/core/ilo_builder_3d_bottom.h b/src/gallium/drivers/ilo/core/ilo_builder_3d_bottom.h index 16ec4afd15b..093cca12840 100644 --- a/src/gallium/drivers/ilo/core/ilo_builder_3d_bottom.h +++ b/src/gallium/drivers/ilo/core/ilo_builder_3d_bottom.h @@ -66,7 +66,7 @@ gen6_3DSTATE_CLIP(struct ilo_builder *builder, GEN6_INTERP_NONPERSPECTIVE_SAMPLE)) dw2 |= GEN6_CLIP_DW2_NONPERSPECTIVE_BARYCENTRIC_ENABLE; - dw3 |= GEN6_CLIP_DW3_RTAINDEX_FORCED_ZERO | + dw3 |= GEN6_CLIP_DW3_FORCE_RTAINDEX_ZERO | (num_viewports - 1); ilo_builder_batch_pointer(builder, cmd_len, &dw); @@ -107,7 +107,7 @@ gen7_internal_3dstate_sf(struct ilo_builder *builder, if (!sf) { dw[1] = 0; - dw[2] = (num_samples > 1) ? GEN7_SF_DW2_MSRASTMODE_ON_PATTERN : 0; + dw[2] = (num_samples > 1) ? (GEN6_MSRASTMODE_ON_PATTERN << 8) : 0; dw[3] = 0; dw[4] = 0; dw[5] = 0; @@ -593,23 +593,23 @@ gen8_3DSTATE_WM_HZ_OP(struct ilo_builder *builder, uint32_t op, switch (sample_count) { case 0: case 1: - dw1 |= GEN8_WM_HZ_DW1_NUMSAMPLES_1; + dw1 |= GEN6_NUMSAMPLES_1 << 13; break; case 2: - dw1 |= GEN8_WM_HZ_DW1_NUMSAMPLES_2; + dw1 |= GEN8_NUMSAMPLES_2 << 13; break; case 4: - dw1 |= GEN8_WM_HZ_DW1_NUMSAMPLES_4; + dw1 |= GEN6_NUMSAMPLES_4 << 13; break; case 8: - dw1 |= GEN8_WM_HZ_DW1_NUMSAMPLES_8; + dw1 |= GEN7_NUMSAMPLES_8 << 13; break; case 16: - dw1 |= GEN8_WM_HZ_DW1_NUMSAMPLES_16; + dw1 |= GEN8_NUMSAMPLES_16 << 13; break; default: assert(!"unsupported sample count"); - dw1 |= GEN8_WM_HZ_DW1_NUMSAMPLES_1; + dw1 |= GEN6_NUMSAMPLES_1 << 13; break; } @@ -772,7 +772,7 @@ gen8_3DSTATE_PS_EXTRA(struct ilo_builder *builder, dw1 = cso->payload[3]; if (cc_may_kill) - dw1 |= GEN8_PSX_DW1_DISPATCH_ENABLE | GEN8_PSX_DW1_KILL_PIXEL; + dw1 |= GEN8_PSX_DW1_VALID | GEN8_PSX_DW1_KILL_PIXEL; if (per_sample) dw1 |= GEN8_PSX_DW1_PER_SAMPLE; @@ -866,34 +866,35 @@ gen6_3DSTATE_MULTISAMPLE(struct ilo_builder *builder, bool pixel_location_center) { const uint8_t cmd_len = (ilo_dev_gen(builder->dev) >= ILO_GEN(7)) ? 4 : 3; + const enum gen_pixel_location pixloc = (pixel_location_center) ? + GEN6_PIXLOC_CENTER : GEN6_PIXLOC_UL_CORNER; uint32_t dw1, dw2, dw3, *dw; ILO_DEV_ASSERT(builder->dev, 6, 7.5); - dw1 = (pixel_location_center) ? GEN6_MULTISAMPLE_DW1_PIXLOC_CENTER : - GEN6_MULTISAMPLE_DW1_PIXLOC_UL_CORNER; + dw1 = pixloc << 4; switch (num_samples) { case 0: case 1: - dw1 |= GEN6_MULTISAMPLE_DW1_NUMSAMPLES_1; + dw1 |= GEN6_NUMSAMPLES_1 << 1; dw2 = 0; dw3 = 0; break; case 4: - dw1 |= GEN6_MULTISAMPLE_DW1_NUMSAMPLES_4; + dw1 |= GEN6_NUMSAMPLES_4 << 1; dw2 = pattern[0]; dw3 = 0; break; case 8: assert(ilo_dev_gen(builder->dev) >= ILO_GEN(7)); - dw1 |= GEN7_MULTISAMPLE_DW1_NUMSAMPLES_8; + dw1 |= GEN7_NUMSAMPLES_8 << 1; dw2 = pattern[0]; dw3 = pattern[1]; break; default: assert(!"unsupported sample count"); - dw1 |= GEN6_MULTISAMPLE_DW1_NUMSAMPLES_1; + dw1 |= GEN6_NUMSAMPLES_1 << 1; dw2 = 0; dw3 = 0; break; @@ -914,33 +915,34 @@ gen8_3DSTATE_MULTISAMPLE(struct ilo_builder *builder, bool pixel_location_center) { const uint8_t cmd_len = 2; + const enum gen_pixel_location pixloc = (pixel_location_center) ? + GEN6_PIXLOC_CENTER : GEN6_PIXLOC_UL_CORNER; uint32_t dw1, *dw; ILO_DEV_ASSERT(builder->dev, 8, 8); - dw1 = (pixel_location_center) ? GEN6_MULTISAMPLE_DW1_PIXLOC_CENTER : - GEN6_MULTISAMPLE_DW1_PIXLOC_UL_CORNER; + dw1 = pixloc << 4; switch (num_samples) { case 0: case 1: - dw1 |= GEN6_MULTISAMPLE_DW1_NUMSAMPLES_1; + dw1 |= GEN6_NUMSAMPLES_1 << 1; break; case 2: - dw1 |= GEN8_MULTISAMPLE_DW1_NUMSAMPLES_2; + dw1 |= GEN8_NUMSAMPLES_2 << 1; break; case 4: - dw1 |= GEN6_MULTISAMPLE_DW1_NUMSAMPLES_4; + dw1 |= GEN6_NUMSAMPLES_4 << 1; break; case 8: - dw1 |= GEN7_MULTISAMPLE_DW1_NUMSAMPLES_8; + dw1 |= GEN7_NUMSAMPLES_8 << 1; break; case 16: - dw1 |= GEN8_MULTISAMPLE_DW1_NUMSAMPLES_16; + dw1 |= GEN8_NUMSAMPLES_16 << 1; break; default: assert(!"unsupported sample count"); - dw1 |= GEN6_MULTISAMPLE_DW1_NUMSAMPLES_1; + dw1 |= GEN6_NUMSAMPLES_1 << 1; break; } @@ -1732,10 +1734,10 @@ gen6_BLEND_STATE(struct ilo_builder *builder, if (caps->can_alpha_test) dw[1] |= dsa->dw_blend_alpha; } else { - dw[1] |= GEN6_RT_DW1_WRITE_DISABLE_A | - GEN6_RT_DW1_WRITE_DISABLE_R | - GEN6_RT_DW1_WRITE_DISABLE_G | - GEN6_RT_DW1_WRITE_DISABLE_B | + dw[1] |= GEN6_RT_DW1_WRITE_DISABLES_A | + GEN6_RT_DW1_WRITE_DISABLES_R | + GEN6_RT_DW1_WRITE_DISABLES_G | + GEN6_RT_DW1_WRITE_DISABLES_B | dsa->dw_blend_alpha; } @@ -1800,10 +1802,10 @@ gen8_BLEND_STATE(struct ilo_builder *builder, if (caps->can_logicop) dw[1] |= blend->dw_logicop; } else { - dw[0] |= GEN8_RT_DW0_WRITE_DISABLE_A | - GEN8_RT_DW0_WRITE_DISABLE_R | - GEN8_RT_DW0_WRITE_DISABLE_G | - GEN8_RT_DW0_WRITE_DISABLE_B; + dw[0] |= GEN8_RT_DW0_WRITE_DISABLES_A | + GEN8_RT_DW0_WRITE_DISABLES_R | + GEN8_RT_DW0_WRITE_DISABLES_G | + GEN8_RT_DW0_WRITE_DISABLES_B; } dw += 2; diff --git a/src/gallium/drivers/ilo/core/ilo_builder_3d_top.h b/src/gallium/drivers/ilo/core/ilo_builder_3d_top.h index f9275b64d0e..cfa0e441855 100644 --- a/src/gallium/drivers/ilo/core/ilo_builder_3d_top.h +++ b/src/gallium/drivers/ilo/core/ilo_builder_3d_top.h @@ -1051,7 +1051,7 @@ gen7_3DSTATE_STREAMOUT(struct ilo_builder *builder, GEN7_SO_DW1_STATISTICS; /* API_OPENGL */ if (true) - dw[1] |= GEN7_SO_DW1_REORDER_TRAILING; + dw[1] |= GEN7_REORDER_TRAILING << GEN7_SO_DW1_REORDER_MODE__SHIFT; if (ilo_dev_gen(builder->dev) < ILO_GEN(8)) dw[1] |= buf_mask << GEN7_SO_DW1_BUFFER_ENABLES__SHIFT; diff --git a/src/gallium/drivers/ilo/core/ilo_builder_decode.c b/src/gallium/drivers/ilo/core/ilo_builder_decode.c index cedaab1559d..c5a98c91204 100644 --- a/src/gallium/drivers/ilo/core/ilo_builder_decode.c +++ b/src/gallium/drivers/ilo/core/ilo_builder_decode.c @@ -319,7 +319,7 @@ writer_decode_color_calc(const struct ilo_builder *builder, "stencil ref %d, bf stencil ref %d\n", GEN_EXTRACT(dw, GEN6_CC_DW0_ALPHATEST) ? "FLOAT32" : "UNORM8", (bool) (dw & GEN6_CC_DW0_ROUND_DISABLE_DISABLE), - GEN_EXTRACT(dw, GEN6_CC_DW0_STENCIL0_REF), + GEN_EXTRACT(dw, GEN6_CC_DW0_STENCIL_REF), GEN_EXTRACT(dw, GEN6_CC_DW0_STENCIL1_REF)); writer_dw(builder, which, item->offset, 1, "CC\n"); @@ -347,13 +347,13 @@ writer_decode_depth_stencil(const struct ilo_builder *builder, dw = writer_dw(builder, which, item->offset, 0, "D_S"); ilo_printf("stencil %sable, func %d, write %sable\n", (dw & GEN6_ZS_DW0_STENCIL_TEST_ENABLE) ? "en" : "dis", - GEN_EXTRACT(dw, GEN6_ZS_DW0_STENCIL0_FUNC), + GEN_EXTRACT(dw, GEN6_ZS_DW0_STENCIL_FUNC), (dw & GEN6_ZS_DW0_STENCIL_WRITE_ENABLE) ? "en" : "dis"); dw = writer_dw(builder, which, item->offset, 1, "D_S"); ilo_printf("stencil test mask 0x%x, write mask 0x%x\n", - GEN_EXTRACT(dw, GEN6_ZS_DW1_STENCIL0_VALUEMASK), - GEN_EXTRACT(dw, GEN6_ZS_DW1_STENCIL0_WRITEMASK)); + GEN_EXTRACT(dw, GEN6_ZS_DW1_STENCIL_TEST_MASK), + GEN_EXTRACT(dw, GEN6_ZS_DW1_STENCIL_WRITE_MASK)); dw = writer_dw(builder, which, item->offset, 2, "D_S"); ilo_printf("depth test %sable, func %d, write %sable\n", diff --git a/src/gallium/drivers/ilo/core/ilo_state_3d_bottom.c b/src/gallium/drivers/ilo/core/ilo_state_3d_bottom.c index 22cd4eac82e..9d472d93fdc 100644 --- a/src/gallium/drivers/ilo/core/ilo_state_3d_bottom.c +++ b/src/gallium/drivers/ilo/core/ilo_state_3d_bottom.c @@ -61,20 +61,20 @@ rasterizer_init_clip(const struct ilo_dev *dev, if (ilo_dev_gen(dev) < ILO_GEN(8)) { if (state->front_ccw) - dw1 |= GEN7_CLIP_DW1_FRONTWINDING_CCW; + dw1 |= GEN6_FRONTWINDING_CCW << 20; switch (state->cull_face) { case PIPE_FACE_NONE: - dw1 |= GEN7_CLIP_DW1_CULLMODE_NONE; + dw1 |= GEN6_CULLMODE_NONE << 16; break; case PIPE_FACE_FRONT: - dw1 |= GEN7_CLIP_DW1_CULLMODE_FRONT; + dw1 |= GEN6_CULLMODE_FRONT << 16; break; case PIPE_FACE_BACK: - dw1 |= GEN7_CLIP_DW1_CULLMODE_BACK; + dw1 |= GEN6_CULLMODE_BACK << 16; break; case PIPE_FACE_FRONT_AND_BACK: - dw1 |= GEN7_CLIP_DW1_CULLMODE_BOTH; + dw1 |= GEN6_CULLMODE_BOTH << 16; break; } } @@ -83,7 +83,7 @@ rasterizer_init_clip(const struct ilo_dev *dev, dw2 = GEN6_CLIP_DW2_CLIP_ENABLE | GEN6_CLIP_DW2_XY_TEST_ENABLE | state->clip_plane_enable << GEN6_CLIP_DW2_UCP_CLIP_ENABLES__SHIFT | - GEN6_CLIP_DW2_CLIPMODE_NORMAL; + GEN6_CLIPMODE_NORMAL << 13; if (state->clip_halfz) dw2 |= GEN6_CLIP_DW2_APIMODE_D3D; @@ -160,7 +160,7 @@ rasterizer_init_sf_gen6(const struct ilo_dev *dev, * CLIP_STATE is clear." */ dw1 = GEN7_SF_DW1_STATISTICS | - GEN7_SF_DW1_VIEWPORT_ENABLE; + GEN7_SF_DW1_VIEWPORT_TRANSFORM; /* XXX GEN6 path seems to work fine for GEN7 */ if (false && ilo_dev_gen(dev) >= ILO_GEN(7)) { @@ -192,30 +192,30 @@ rasterizer_init_sf_gen6(const struct ilo_dev *dev, switch (state->fill_front) { case PIPE_POLYGON_MODE_FILL: - dw1 |= GEN7_SF_DW1_FRONTFACE_SOLID; + dw1 |= GEN6_FILLMODE_SOLID << 5; break; case PIPE_POLYGON_MODE_LINE: - dw1 |= GEN7_SF_DW1_FRONTFACE_WIREFRAME; + dw1 |= GEN6_FILLMODE_WIREFRAME << 5; break; case PIPE_POLYGON_MODE_POINT: - dw1 |= GEN7_SF_DW1_FRONTFACE_POINT; + dw1 |= GEN6_FILLMODE_POINT << 5; break; } switch (state->fill_back) { case PIPE_POLYGON_MODE_FILL: - dw1 |= GEN7_SF_DW1_BACKFACE_SOLID; + dw1 |= GEN6_FILLMODE_SOLID << 3; break; case PIPE_POLYGON_MODE_LINE: - dw1 |= GEN7_SF_DW1_BACKFACE_WIREFRAME; + dw1 |= GEN6_FILLMODE_WIREFRAME << 3; break; case PIPE_POLYGON_MODE_POINT: - dw1 |= GEN7_SF_DW1_BACKFACE_POINT; + dw1 |= GEN6_FILLMODE_POINT << 3; break; } if (state->front_ccw) - dw1 |= GEN7_SF_DW1_FRONTWINDING_CCW; + dw1 |= GEN6_FRONTWINDING_CCW; dw2 = 0; @@ -239,16 +239,16 @@ rasterizer_init_sf_gen6(const struct ilo_dev *dev, switch (state->cull_face) { case PIPE_FACE_NONE: - dw2 |= GEN7_SF_DW2_CULLMODE_NONE; + dw2 |= GEN6_CULLMODE_NONE << 29; break; case PIPE_FACE_FRONT: - dw2 |= GEN7_SF_DW2_CULLMODE_FRONT; + dw2 |= GEN6_CULLMODE_FRONT << 29; break; case PIPE_FACE_BACK: - dw2 |= GEN7_SF_DW2_CULLMODE_BACK; + dw2 |= GEN6_CULLMODE_BACK << 29; break; case PIPE_FACE_FRONT_AND_BACK: - dw2 |= GEN7_SF_DW2_CULLMODE_BOTH; + dw2 |= GEN6_CULLMODE_BOTH << 29; break; } @@ -307,7 +307,7 @@ rasterizer_init_sf_gen6(const struct ilo_dev *dev, sf->payload[2] = dw3; if (state->multisample) { - sf->dw_msaa = GEN7_SF_DW2_MSRASTMODE_ON_PATTERN; + sf->dw_msaa = GEN6_MSRASTMODE_ON_PATTERN << 8; /* * From the Sandy Bridge PRM, volume 2 part 1, page 251: @@ -339,20 +339,20 @@ rasterizer_get_sf_raster_gen8(const struct ilo_dev *dev, ILO_DEV_ASSERT(dev, 8, 8); if (state->front_ccw) - dw |= GEN8_RASTER_DW1_FRONTWINDING_CCW; + dw |= GEN6_FRONTWINDING_CCW << 21; switch (state->cull_face) { case PIPE_FACE_NONE: - dw |= GEN8_RASTER_DW1_CULLMODE_NONE; + dw |= GEN6_CULLMODE_NONE << 16; break; case PIPE_FACE_FRONT: - dw |= GEN8_RASTER_DW1_CULLMODE_FRONT; + dw |= GEN6_CULLMODE_FRONT << 16; break; case PIPE_FACE_BACK: - dw |= GEN8_RASTER_DW1_CULLMODE_BACK; + dw |= GEN6_CULLMODE_BACK << 16; break; case PIPE_FACE_FRONT_AND_BACK: - dw |= GEN8_RASTER_DW1_CULLMODE_BOTH; + dw |= GEN6_CULLMODE_BOTH << 16; break; } @@ -371,25 +371,25 @@ rasterizer_get_sf_raster_gen8(const struct ilo_dev *dev, switch (state->fill_front) { case PIPE_POLYGON_MODE_FILL: - dw |= GEN8_RASTER_DW1_FRONTFACE_SOLID; + dw |= GEN6_FILLMODE_SOLID << 5; break; case PIPE_POLYGON_MODE_LINE: - dw |= GEN8_RASTER_DW1_FRONTFACE_WIREFRAME; + dw |= GEN6_FILLMODE_WIREFRAME << 5; break; case PIPE_POLYGON_MODE_POINT: - dw |= GEN8_RASTER_DW1_FRONTFACE_POINT; + dw |= GEN6_FILLMODE_POINT << 5; break; } switch (state->fill_back) { case PIPE_POLYGON_MODE_FILL: - dw |= GEN8_RASTER_DW1_BACKFACE_SOLID; + dw |= GEN6_FILLMODE_SOLID << 3; break; case PIPE_POLYGON_MODE_LINE: - dw |= GEN8_RASTER_DW1_BACKFACE_WIREFRAME; + dw |= GEN6_FILLMODE_WIREFRAME << 3; break; case PIPE_POLYGON_MODE_POINT: - dw |= GEN8_RASTER_DW1_BACKFACE_POINT; + dw |= GEN6_FILLMODE_POINT << 3; break; } @@ -429,7 +429,7 @@ rasterizer_init_sf_gen8(const struct ilo_dev *dev, point_width = CLAMP(point_width, 1, 2047); dw1 = GEN7_SF_DW1_STATISTICS | - GEN7_SF_DW1_VIEWPORT_ENABLE; + GEN7_SF_DW1_VIEWPORT_TRANSFORM; dw2 = line_width << GEN7_SF_DW2_LINE_WIDTH__SHIFT; if (state->line_smooth) @@ -497,15 +497,15 @@ rasterizer_init_wm_gen6(const struct ilo_dev *dev, * * is valid */ - STATIC_ASSERT(GEN6_WM_DW6_MSRASTMODE_OFF_PIXEL == 0 && + STATIC_ASSERT(GEN6_MSRASTMODE_OFF_PIXEL == 0 && GEN6_WM_DW6_MSDISPMODE_PERSAMPLE == 0); - dw6 = GEN6_WM_DW6_ZW_INTERP_PIXEL; + dw6 = GEN6_ZW_INTERP_PIXEL << GEN6_WM_DW6_ZW_INTERP__SHIFT; if (state->bottom_edge_rule) dw6 |= GEN6_WM_DW6_POINT_RASTRULE_UPPER_RIGHT; wm->dw_msaa_rast = - (state->multisample) ? GEN6_WM_DW6_MSRASTMODE_ON_PATTERN : 0; + (state->multisample) ? (GEN6_MSRASTMODE_ON_PATTERN << 1) : 0; wm->dw_msaa_disp = GEN6_WM_DW6_MSDISPMODE_PERPIXEL; STATIC_ASSERT(Elements(wm->payload) >= 2); @@ -530,9 +530,9 @@ rasterizer_init_wm_gen7(const struct ilo_dev *dev, * * is valid */ - STATIC_ASSERT(GEN7_WM_DW1_MSRASTMODE_OFF_PIXEL == 0 && + STATIC_ASSERT(GEN6_MSRASTMODE_OFF_PIXEL == 0 && GEN7_WM_DW2_MSDISPMODE_PERSAMPLE == 0); - dw1 = GEN7_WM_DW1_ZW_INTERP_PIXEL | + dw1 = GEN6_ZW_INTERP_PIXEL << GEN7_WM_DW1_ZW_INTERP__SHIFT | GEN7_WM_DW1_AA_LINE_WIDTH_2_0; dw2 = 0; @@ -549,7 +549,7 @@ rasterizer_init_wm_gen7(const struct ilo_dev *dev, dw1 |= GEN7_WM_DW1_POINT_RASTRULE_UPPER_RIGHT; wm->dw_msaa_rast = - (state->multisample) ? GEN7_WM_DW1_MSRASTMODE_ON_PATTERN : 0; + (state->multisample) ? GEN6_MSRASTMODE_ON_PATTERN : 0; wm->dw_msaa_disp = GEN7_WM_DW2_MSDISPMODE_PERPIXEL; STATIC_ASSERT(Elements(wm->payload) >= 2); @@ -565,7 +565,7 @@ rasterizer_get_wm_gen8(const struct ilo_dev *dev, ILO_DEV_ASSERT(dev, 8, 8); - dw = GEN7_WM_DW1_ZW_INTERP_PIXEL | + dw = GEN6_ZW_INTERP_PIXEL << GEN7_WM_DW1_ZW_INTERP__SHIFT | GEN7_WM_DW1_AA_LINE_WIDTH_2_0; /* same value as in 3DSTATE_SF */ @@ -691,7 +691,7 @@ fs_init_cso_gen6(const struct ilo_dev *dev, dw5 |= GEN6_PS_DISPATCH_8 << GEN6_WM_DW5_PS_DISPATCH_MODE__SHIFT; dw6 = input_count << GEN6_WM_DW6_SF_ATTR_COUNT__SHIFT | - GEN6_WM_DW6_PS_POSOFFSET_NONE | + GEN6_POSOFFSET_NONE << GEN6_WM_DW6_PS_POSOFFSET__SHIFT | interps << GEN6_WM_DW6_BARYCENTRIC_INTERP__SHIFT; STATIC_ASSERT(Elements(cso->payload) >= 4); @@ -752,7 +752,7 @@ fs_get_wm_gen7(const struct ilo_dev *dev, dw |= GEN7_WM_DW1_PS_KILL_PIXEL; if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_OUTPUT_Z)) - dw |= GEN7_WM_DW1_PSCDEPTH_ON; + dw |= GEN7_PSCDEPTH_ON << GEN7_WM_DW1_PSCDEPTH__SHIFT; if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_INPUT_Z)) dw |= GEN7_WM_DW1_PS_USE_DEPTH; @@ -779,7 +779,7 @@ fs_init_cso_gen7(const struct ilo_dev *dev, dw2 = (true) ? 0 : GEN6_THREADDISP_FP_MODE_ALT; dw2 |= ((sampler_count + 3) / 4) << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT; - dw4 = GEN7_PS_DW4_POSOFFSET_NONE; + dw4 = GEN6_POSOFFSET_NONE << GEN7_PS_DW4_POSOFFSET__SHIFT; /* see brwCreateContext() */ switch (ilo_dev_gen(dev)) { @@ -823,12 +823,12 @@ fs_get_psx_gen8(const struct ilo_dev *dev, ILO_DEV_ASSERT(dev, 8, 8); - dw = GEN8_PSX_DW1_DISPATCH_ENABLE; + dw = GEN8_PSX_DW1_VALID; if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_USE_KILL)) dw |= GEN8_PSX_DW1_KILL_PIXEL; if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_OUTPUT_Z)) - dw |= GEN8_PSX_DW1_PSCDEPTH_ON; + dw |= GEN7_PSCDEPTH_ON << GEN8_PSX_DW1_PSCDEPTH__SHIFT; if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_INPUT_Z)) dw |= GEN8_PSX_DW1_USE_DEPTH; if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_INPUT_W)) @@ -868,7 +868,7 @@ fs_init_cso_gen8(const struct ilo_dev *dev, /* always 64? */ dw6 = (64 - 2) << GEN8_PS_DW6_MAX_THREADS__SHIFT | - GEN8_PS_DW6_POSOFFSET_NONE; + GEN6_POSOFFSET_NONE << GEN8_PS_DW6_POSOFFSET__SHIFT; if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_PCB_CBUF0_SIZE)) dw6 |= GEN8_PS_DW6_PUSH_CONSTANT_ENABLE; @@ -1604,13 +1604,13 @@ blend_init_cso_gen6(const struct ilo_dev *dev, GEN6_RT_DW1_POST_BLEND_CLAMP; if (!(rt->colormask & PIPE_MASK_A)) - cso->payload[1] |= GEN6_RT_DW1_WRITE_DISABLE_A; + cso->payload[1] |= GEN6_RT_DW1_WRITE_DISABLES_A; if (!(rt->colormask & PIPE_MASK_R)) - cso->payload[1] |= GEN6_RT_DW1_WRITE_DISABLE_R; + cso->payload[1] |= GEN6_RT_DW1_WRITE_DISABLES_R; if (!(rt->colormask & PIPE_MASK_G)) - cso->payload[1] |= GEN6_RT_DW1_WRITE_DISABLE_G; + cso->payload[1] |= GEN6_RT_DW1_WRITE_DISABLES_G; if (!(rt->colormask & PIPE_MASK_B)) - cso->payload[1] |= GEN6_RT_DW1_WRITE_DISABLE_B; + cso->payload[1] |= GEN6_RT_DW1_WRITE_DISABLES_B; /* * From the Sandy Bridge PRM, volume 2 part 1, page 365: @@ -1649,13 +1649,13 @@ blend_init_cso_gen8(const struct ilo_dev *dev, GEN8_RT_DW1_POST_BLEND_CLAMP; if (!(rt->colormask & PIPE_MASK_A)) - cso->payload[0] |= GEN8_RT_DW0_WRITE_DISABLE_A; + cso->payload[0] |= GEN8_RT_DW0_WRITE_DISABLES_A; if (!(rt->colormask & PIPE_MASK_R)) - cso->payload[0] |= GEN8_RT_DW0_WRITE_DISABLE_R; + cso->payload[0] |= GEN8_RT_DW0_WRITE_DISABLES_R; if (!(rt->colormask & PIPE_MASK_G)) - cso->payload[0] |= GEN8_RT_DW0_WRITE_DISABLE_G; + cso->payload[0] |= GEN8_RT_DW0_WRITE_DISABLES_G; if (!(rt->colormask & PIPE_MASK_B)) - cso->payload[0] |= GEN8_RT_DW0_WRITE_DISABLE_B; + cso->payload[0] |= GEN8_RT_DW0_WRITE_DISABLES_B; if (state->logicop_enable) { cso->dw_blend = 0; diff --git a/src/gallium/drivers/ilo/genhw/gen_mi.xml.h b/src/gallium/drivers/ilo/genhw/gen_mi.xml.h index 24d726adcb3..5a0bb4f8d77 100644 --- a/src/gallium/drivers/ilo/genhw/gen_mi.xml.h +++ b/src/gallium/drivers/ilo/genhw/gen_mi.xml.h @@ -97,6 +97,9 @@ enum gen_mi_alu_operand { #define GEN6_MI_LENGTH__MASK 0x0000003f #define GEN6_MI_LENGTH__SHIFT 0 #define GEN6_MI_NOOP__SIZE 1 +#define GEN6_MI_NOOP_DW0_WRITE_NOPID (0x1 << 22) +#define GEN6_MI_NOOP_DW0_VALUE__MASK 0x003fffff +#define GEN6_MI_NOOP_DW0_VALUE__SHIFT 0 #define GEN75_MI_SET_PREDICATE__SIZE 1 #define GEN75_MI_SET_PREDICATE_DW0_PREDICATE__MASK 0x00000003 diff --git a/src/gallium/drivers/ilo/genhw/gen_regs.xml.h b/src/gallium/drivers/ilo/genhw/gen_regs.xml.h index 2bdd72b29bc..c51e4f78bc0 100644 --- a/src/gallium/drivers/ilo/genhw/gen_regs.xml.h +++ b/src/gallium/drivers/ilo/genhw/gen_regs.xml.h @@ -35,6 +35,8 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define GEN6_REG_MASK__MASK 0xffff0000 #define GEN6_REG_MASK__SHIFT 16 #define GEN6_REG__SIZE 0x400000 +#define GEN6_REG_NOPID 0x2094 + #define GEN7_REG_HS_INVOCATION_COUNT 0x2300 #define GEN7_REG_DS_INVOCATION_COUNT 0x2308 diff --git a/src/gallium/drivers/ilo/genhw/gen_render_3d.xml.h b/src/gallium/drivers/ilo/genhw/gen_render_3d.xml.h index d25542e8cc2..1abfef987b5 100644 --- a/src/gallium/drivers/ilo/genhw/gen_render_3d.xml.h +++ b/src/gallium/drivers/ilo/genhw/gen_render_3d.xml.h @@ -32,7 +32,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -enum gen_prim_type { +enum gen_3dprim_type { GEN6_3DPRIM_POINTLIST = 0x1, GEN6_3DPRIM_LINELIST = 0x2, GEN6_3DPRIM_LINESTRIP = 0x3, @@ -123,6 +123,87 @@ enum gen_depth_format { GEN6_ZFORMAT_D16_UNORM = 0x5, }; +enum gen_reorder_mode { + GEN7_REORDER_LEADING = 0x0, + GEN7_REORDER_TRAILING = 0x1, +}; + +enum gen_clip_mode { + GEN6_CLIPMODE_NORMAL = 0x0, + GEN6_CLIPMODE_REJECT_ALL = 0x3, + GEN6_CLIPMODE_ACCEPT_ALL = 0x4, +}; + +enum gen_front_winding { + GEN6_FRONTWINDING_CW = 0x0, + GEN6_FRONTWINDING_CCW = 0x1, +}; + +enum gen_fill_mode { + GEN6_FILLMODE_SOLID = 0x0, + GEN6_FILLMODE_WIREFRAME = 0x1, + GEN6_FILLMODE_POINT = 0x2, +}; + +enum gen_cull_mode { + GEN6_CULLMODE_BOTH = 0x0, + GEN6_CULLMODE_NONE = 0x1, + GEN6_CULLMODE_FRONT = 0x2, + GEN6_CULLMODE_BACK = 0x3, +}; + +enum gen_pixel_location { + GEN6_PIXLOC_CENTER = 0x0, + GEN6_PIXLOC_UL_CORNER = 0x1, +}; + +enum gen_sample_count { + GEN6_NUMSAMPLES_1 = 0x0, + GEN8_NUMSAMPLES_2 = 0x1, + GEN6_NUMSAMPLES_4 = 0x2, + GEN7_NUMSAMPLES_8 = 0x3, + GEN8_NUMSAMPLES_16 = 0x4, +}; + +enum gen_inputattr_select { + GEN6_INPUTATTR_NORMAL = 0x0, + GEN6_INPUTATTR_FACING = 0x1, + GEN6_INPUTATTR_W = 0x2, + GEN6_INPUTATTR_FACING_W = 0x3, +}; + +enum gen_zw_interp { + GEN6_ZW_INTERP_PIXEL = 0x0, + GEN6_ZW_INTERP_CENTROID = 0x2, + GEN6_ZW_INTERP_SAMPLE = 0x3, +}; + +enum gen_position_offset { + GEN6_POSOFFSET_NONE = 0x0, + GEN6_POSOFFSET_CENTROID = 0x2, + GEN6_POSOFFSET_SAMPLE = 0x3, +}; + +enum gen_edsc_mode { + GEN7_EDSC_NORMAL = 0x0, + GEN7_EDSC_PSEXEC = 0x1, + GEN7_EDSC_PREPS = 0x2, +}; + +enum gen_pscdepth_mode { + GEN7_PSCDEPTH_OFF = 0x0, + GEN7_PSCDEPTH_ON = 0x1, + GEN7_PSCDEPTH_ON_GE = 0x2, + GEN7_PSCDEPTH_ON_LE = 0x3, +}; + +enum gen_msrast_mode { + GEN6_MSRASTMODE_OFF_PIXEL = 0x0, + GEN6_MSRASTMODE_OFF_PATTERN = 0x1, + GEN6_MSRASTMODE_ON_PIXEL = 0x2, + GEN6_MSRASTMODE_ON_PATTERN = 0x3, +}; + #define GEN6_INTERP_NONPERSPECTIVE_SAMPLE (0x1 << 5) #define GEN6_INTERP_NONPERSPECTIVE_CENTROID (0x1 << 4) #define GEN6_INTERP_NONPERSPECTIVE_PIXEL (0x1 << 3) @@ -614,7 +695,7 @@ enum gen_depth_format { #define GEN6_GS_DW5_SO_STATISTICS (0x1 << 9) #define GEN6_GS_DW5_RENDER_ENABLE (0x1 << 8) -#define GEN6_GS_DW6_REORDER_ENABLE (0x1 << 30) +#define GEN6_GS_DW6_REORDER_LEADING_ENABLE (0x1 << 30) #define GEN6_GS_DW6_DISCARD_ADJACENCY (0x1 << 29) #define GEN6_GS_DW6_SVBI_PAYLOAD_ENABLE (0x1 << 28) #define GEN6_GS_DW6_SVBI_POST_INC_ENABLE (0x1 << 27) @@ -666,11 +747,9 @@ enum gen_depth_format { #define GEN7_GS_DW5_INVOCATION_INCR__SHIFT 5 #define GEN7_GS_DW5_INCLUDE_PRIMITIVE_ID (0x1 << 4) #define GEN7_GS_DW5_HINT (0x1 << 3) -#define GEN7_GS_DW5_REORDER_ENABLE (0x1 << 2) -#define GEN75_GS_DW5_REORDER__MASK 0x00000004 -#define GEN75_GS_DW5_REORDER__SHIFT 2 -#define GEN75_GS_DW5_REORDER_LEADING (0x0 << 2) -#define GEN75_GS_DW5_REORDER_TRAILING (0x1 << 2) +#define GEN7_GS_DW5_REORDER_LEADING_ENABLE (0x1 << 2) +#define GEN75_GS_DW5_REORDER_MODE__MASK 0x00000004 +#define GEN75_GS_DW5_REORDER_MODE__SHIFT 2 #define GEN7_GS_DW5_DISCARD_ADJACENCY (0x1 << 1) #define GEN7_GS_DW5_GS_ENABLE (0x1 << 0) @@ -727,10 +806,8 @@ enum gen_depth_format { #define GEN8_GS_DW7_INVOCATION_INCR__SHIFT 5 #define GEN8_GS_DW7_INCLUDE_PRIMITIVE_ID (0x1 << 4) #define GEN8_GS_DW7_HINT (0x1 << 3) -#define GEN8_GS_DW7_REORDER__MASK 0x00000004 -#define GEN8_GS_DW7_REORDER__SHIFT 2 -#define GEN8_GS_DW7_REORDER_LEADING (0x0 << 2) -#define GEN8_GS_DW7_REORDER_TRAILING (0x1 << 2) +#define GEN8_GS_DW7_REORDER_MODE__MASK 0x00000004 +#define GEN8_GS_DW7_REORDER_MODE__SHIFT 2 #define GEN8_GS_DW7_DISCARD_ADJACENCY (0x1 << 1) #define GEN8_GS_DW7_GS_ENABLE (0x1 << 0) @@ -758,10 +835,8 @@ enum gen_depth_format { #define GEN7_SO_DW1_RENDER_DISABLE (0x1 << 30) #define GEN7_SO_DW1_RENDER_STREAM_SELECT__MASK 0x18000000 #define GEN7_SO_DW1_RENDER_STREAM_SELECT__SHIFT 27 -#define GEN7_SO_DW1_REORDER__MASK 0x04000000 -#define GEN7_SO_DW1_REORDER__SHIFT 26 -#define GEN7_SO_DW1_REORDER_LEADING (0x0 << 26) -#define GEN7_SO_DW1_REORDER_TRAILING (0x1 << 26) +#define GEN7_SO_DW1_REORDER_MODE__MASK 0x04000000 +#define GEN7_SO_DW1_REORDER_MODE__SHIFT 26 #define GEN7_SO_DW1_STATISTICS (0x1 << 25) #define GEN7_SO_DW1_BUFFER_ENABLES__MASK 0x00000f00 #define GEN7_SO_DW1_BUFFER_ENABLES__SHIFT 8 @@ -862,21 +937,15 @@ enum gen_depth_format { #define GEN6_3DSTATE_CLIP__SIZE 4 -#define GEN7_CLIP_DW1_FRONTWINDING__MASK 0x00100000 -#define GEN7_CLIP_DW1_FRONTWINDING__SHIFT 20 -#define GEN7_CLIP_DW1_FRONTWINDING_CW (0x0 << 20) -#define GEN7_CLIP_DW1_FRONTWINDING_CCW (0x1 << 20) +#define GEN7_CLIP_DW1_FRONT_WINDING__MASK 0x00100000 +#define GEN7_CLIP_DW1_FRONT_WINDING__SHIFT 20 #define GEN7_CLIP_DW1_SUBPIXEL__MASK 0x00080000 #define GEN7_CLIP_DW1_SUBPIXEL__SHIFT 19 #define GEN7_CLIP_DW1_SUBPIXEL_8BITS (0x0 << 19) #define GEN7_CLIP_DW1_SUBPIXEL_4BITS (0x1 << 19) #define GEN7_CLIP_DW1_EARLY_CULL_ENABLE (0x1 << 18) -#define GEN7_CLIP_DW1_CULLMODE__MASK 0x00030000 -#define GEN7_CLIP_DW1_CULLMODE__SHIFT 16 -#define GEN7_CLIP_DW1_CULLMODE_BOTH (0x0 << 16) -#define GEN7_CLIP_DW1_CULLMODE_NONE (0x1 << 16) -#define GEN7_CLIP_DW1_CULLMODE_FRONT (0x2 << 16) -#define GEN7_CLIP_DW1_CULLMODE_BACK (0x3 << 16) +#define GEN7_CLIP_DW1_CULL_MODE__MASK 0x00030000 +#define GEN7_CLIP_DW1_CULL_MODE__SHIFT 16 #define GEN6_CLIP_DW1_STATISTICS (0x1 << 10) #define GEN6_CLIP_DW1_UCP_CULL_ENABLES__MASK 0x000000ff #define GEN6_CLIP_DW1_UCP_CULL_ENABLES__SHIFT 0 @@ -891,11 +960,8 @@ enum gen_depth_format { #define GEN6_CLIP_DW2_GB_TEST_ENABLE (0x1 << 26) #define GEN6_CLIP_DW2_UCP_CLIP_ENABLES__MASK 0x00ff0000 #define GEN6_CLIP_DW2_UCP_CLIP_ENABLES__SHIFT 16 -#define GEN6_CLIP_DW2_CLIPMODE__MASK 0x0000e000 -#define GEN6_CLIP_DW2_CLIPMODE__SHIFT 13 -#define GEN6_CLIP_DW2_CLIPMODE_NORMAL (0x0 << 13) -#define GEN6_CLIP_DW2_CLIPMODE_REJECT_ALL (0x3 << 13) -#define GEN6_CLIP_DW2_CLIPMODE_ACCEPT_ALL (0x4 << 13) +#define GEN6_CLIP_DW2_CLIP_MODE__MASK 0x0000e000 +#define GEN6_CLIP_DW2_CLIP_MODE__SHIFT 13 #define GEN6_CLIP_DW2_PERSPECTIVE_DIVIDE_DISABLE (0x1 << 9) #define GEN6_CLIP_DW2_NONPERSPECTIVE_BARYCENTRIC_ENABLE (0x1 << 8) #define GEN6_CLIP_DW2_TRI_PROVOKE__MASK 0x00000030 @@ -911,7 +977,7 @@ enum gen_depth_format { #define GEN6_CLIP_DW3_MAX_POINT_WIDTH__MASK 0x0001ffc0 #define GEN6_CLIP_DW3_MAX_POINT_WIDTH__SHIFT 6 #define GEN6_CLIP_DW3_MAX_POINT_WIDTH__RADIX 3 -#define GEN6_CLIP_DW3_RTAINDEX_FORCED_ZERO (0x1 << 5) +#define GEN6_CLIP_DW3_FORCE_RTAINDEX_ZERO (0x1 << 5) #define GEN6_CLIP_DW3_MAX_VPINDEX__MASK 0x0000000f #define GEN6_CLIP_DW3_MAX_VPINDEX__SHIFT 0 @@ -927,29 +993,17 @@ enum gen_depth_format { #define GEN7_SF_DW1_DEPTH_OFFSET_SOLID (0x1 << 9) #define GEN7_SF_DW1_DEPTH_OFFSET_WIREFRAME (0x1 << 8) #define GEN7_SF_DW1_DEPTH_OFFSET_POINT (0x1 << 7) -#define GEN7_SF_DW1_FRONTFACE__MASK 0x00000060 -#define GEN7_SF_DW1_FRONTFACE__SHIFT 5 -#define GEN7_SF_DW1_FRONTFACE_SOLID (0x0 << 5) -#define GEN7_SF_DW1_FRONTFACE_WIREFRAME (0x1 << 5) -#define GEN7_SF_DW1_FRONTFACE_POINT (0x2 << 5) -#define GEN7_SF_DW1_BACKFACE__MASK 0x00000018 -#define GEN7_SF_DW1_BACKFACE__SHIFT 3 -#define GEN7_SF_DW1_BACKFACE_SOLID (0x0 << 3) -#define GEN7_SF_DW1_BACKFACE_WIREFRAME (0x1 << 3) -#define GEN7_SF_DW1_BACKFACE_POINT (0x2 << 3) -#define GEN7_SF_DW1_VIEWPORT_ENABLE (0x1 << 1) -#define GEN7_SF_DW1_FRONTWINDING__MASK 0x00000001 -#define GEN7_SF_DW1_FRONTWINDING__SHIFT 0 -#define GEN7_SF_DW1_FRONTWINDING_CW 0x0 -#define GEN7_SF_DW1_FRONTWINDING_CCW 0x1 +#define GEN7_SF_DW1_FILL_MODE_FRONT__MASK 0x00000060 +#define GEN7_SF_DW1_FILL_MODE_FRONT__SHIFT 5 +#define GEN7_SF_DW1_FILL_MODE_BACK__MASK 0x00000018 +#define GEN7_SF_DW1_FILL_MODE_BACK__SHIFT 3 +#define GEN7_SF_DW1_VIEWPORT_TRANSFORM (0x1 << 1) +#define GEN7_SF_DW1_FRONT_WINDING__MASK 0x00000001 +#define GEN7_SF_DW1_FRONT_WINDING__SHIFT 0 #define GEN7_SF_DW2_AA_LINE_ENABLE (0x1 << 31) -#define GEN7_SF_DW2_CULLMODE__MASK 0x60000000 -#define GEN7_SF_DW2_CULLMODE__SHIFT 29 -#define GEN7_SF_DW2_CULLMODE_BOTH (0x0 << 29) -#define GEN7_SF_DW2_CULLMODE_NONE (0x1 << 29) -#define GEN7_SF_DW2_CULLMODE_FRONT (0x2 << 29) -#define GEN7_SF_DW2_CULLMODE_BACK (0x3 << 29) +#define GEN7_SF_DW2_CULL_MODE__MASK 0x60000000 +#define GEN7_SF_DW2_CULL_MODE__SHIFT 29 #define GEN7_SF_DW2_LINE_WIDTH__MASK 0x0ffc0000 #define GEN7_SF_DW2_LINE_WIDTH__SHIFT 18 #define GEN7_SF_DW2_LINE_WIDTH__RADIX 7 @@ -963,10 +1017,6 @@ enum gen_depth_format { #define GEN7_SF_DW2_SCISSOR_ENABLE (0x1 << 11) #define GEN7_SF_DW2_MSRASTMODE__MASK 0x00000300 #define GEN7_SF_DW2_MSRASTMODE__SHIFT 8 -#define GEN7_SF_DW2_MSRASTMODE_OFF_PIXEL (0x0 << 8) -#define GEN7_SF_DW2_MSRASTMODE_OFF_PATTERN (0x1 << 8) -#define GEN7_SF_DW2_MSRASTMODE_ON_PIXEL (0x2 << 8) -#define GEN7_SF_DW2_MSRASTMODE_ON_PATTERN (0x3 << 8) #define GEN7_SF_DW3_LINE_LAST_PIXEL_ENABLE (0x1 << 31) #define GEN7_SF_DW3_TRI_PROVOKE__MASK 0x60000000 @@ -1021,14 +1071,10 @@ enum gen_depth_format { #define GEN8_SBE_SWIZ_CONST_0001_FLOAT (0x1 << 9) #define GEN8_SBE_SWIZ_CONST_1111_FLOAT (0x2 << 9) #define GEN8_SBE_SWIZ_CONST_PRIM_ID (0x3 << 9) -#define GEN8_SBE_SWIZ_INPUTATTR__MASK 0x000000c0 -#define GEN8_SBE_SWIZ_INPUTATTR__SHIFT 6 -#define GEN8_SBE_SWIZ_INPUTATTR_NORMAL (0x0 << 6) -#define GEN8_SBE_SWIZ_INPUTATTR_FACING (0x1 << 6) -#define GEN8_SBE_SWIZ_INPUTATTR_W (0x2 << 6) -#define GEN8_SBE_SWIZ_INPUTATTR_FACING_W (0x3 << 6) -#define GEN8_SBE_SWIZ_URB_ENTRY_OFFSET__MASK 0x0000001f -#define GEN8_SBE_SWIZ_URB_ENTRY_OFFSET__SHIFT 0 +#define GEN8_SBE_SWIZ_SWIZZLE_SELECT__MASK 0x000000c0 +#define GEN8_SBE_SWIZ_SWIZZLE_SELECT__SHIFT 6 +#define GEN8_SBE_SWIZ_SRC_ATTR__MASK 0x0000001f +#define GEN8_SBE_SWIZ_SRC_ATTR__SHIFT 0 #define GEN6_3DSTATE_SF__SIZE 20 @@ -1080,31 +1126,19 @@ enum gen_depth_format { #define GEN9_RASTER_DW1_Z_TEST_FAR_ENABLE (0x1 << 26) -#define GEN8_RASTER_DW1_FRONTWINDING__MASK 0x00200000 -#define GEN8_RASTER_DW1_FRONTWINDING__SHIFT 21 -#define GEN8_RASTER_DW1_FRONTWINDING_CW (0x0 << 21) -#define GEN8_RASTER_DW1_FRONTWINDING_CCW (0x1 << 21) -#define GEN8_RASTER_DW1_CULLMODE__MASK 0x00030000 -#define GEN8_RASTER_DW1_CULLMODE__SHIFT 16 -#define GEN8_RASTER_DW1_CULLMODE_BOTH (0x0 << 16) -#define GEN8_RASTER_DW1_CULLMODE_NONE (0x1 << 16) -#define GEN8_RASTER_DW1_CULLMODE_FRONT (0x2 << 16) -#define GEN8_RASTER_DW1_CULLMODE_BACK (0x3 << 16) +#define GEN8_RASTER_DW1_FRONT_WINDING__MASK 0x00200000 +#define GEN8_RASTER_DW1_FRONT_WINDING__SHIFT 21 +#define GEN8_RASTER_DW1_CULL_MODE__MASK 0x00030000 +#define GEN8_RASTER_DW1_CULL_MODE__SHIFT 16 #define GEN8_RASTER_DW1_SMOOTH_POINT_ENABLE (0x1 << 13) #define GEN8_RASTER_DW1_API_MULTISAMPLE_ENABLE (0x1 << 12) #define GEN8_RASTER_DW1_DEPTH_OFFSET_SOLID (0x1 << 9) #define GEN8_RASTER_DW1_DEPTH_OFFSET_WIREFRAME (0x1 << 8) #define GEN8_RASTER_DW1_DEPTH_OFFSET_POINT (0x1 << 7) -#define GEN8_RASTER_DW1_FRONTFACE__MASK 0x00000060 -#define GEN8_RASTER_DW1_FRONTFACE__SHIFT 5 -#define GEN8_RASTER_DW1_FRONTFACE_SOLID (0x0 << 5) -#define GEN8_RASTER_DW1_FRONTFACE_WIREFRAME (0x1 << 5) -#define GEN8_RASTER_DW1_FRONTFACE_POINT (0x2 << 5) -#define GEN8_RASTER_DW1_BACKFACE__MASK 0x00000018 -#define GEN8_RASTER_DW1_BACKFACE__SHIFT 3 -#define GEN8_RASTER_DW1_BACKFACE_SOLID (0x0 << 3) -#define GEN8_RASTER_DW1_BACKFACE_WIREFRAME (0x1 << 3) -#define GEN8_RASTER_DW1_BACKFACE_POINT (0x2 << 3) +#define GEN8_RASTER_DW1_FILL_MODE_FRONT__MASK 0x00000060 +#define GEN8_RASTER_DW1_FILL_MODE_FRONT__SHIFT 5 +#define GEN8_RASTER_DW1_FILL_MODE_BACK__MASK 0x00000018 +#define GEN8_RASTER_DW1_FILL_MODE_BACK__SHIFT 3 #define GEN8_RASTER_DW1_AA_LINE_ENABLE (0x1 << 2) #define GEN8_RASTER_DW1_SCISSOR_ENABLE (0x1 << 1) #define GEN8_RASTER_DW1_Z_TEST_ENABLE (0x1 << 0) @@ -1164,14 +1198,8 @@ enum gen_depth_format { #define GEN6_WM_DW6_SF_ATTR_COUNT__SHIFT 20 #define GEN6_WM_DW6_PS_POSOFFSET__MASK 0x000c0000 #define GEN6_WM_DW6_PS_POSOFFSET__SHIFT 18 -#define GEN6_WM_DW6_PS_POSOFFSET_NONE (0x0 << 18) -#define GEN6_WM_DW6_PS_POSOFFSET_CENTROID (0x2 << 18) -#define GEN6_WM_DW6_PS_POSOFFSET_SAMPLE (0x3 << 18) #define GEN6_WM_DW6_ZW_INTERP__MASK 0x00030000 #define GEN6_WM_DW6_ZW_INTERP__SHIFT 16 -#define GEN6_WM_DW6_ZW_INTERP_PIXEL (0x0 << 16) -#define GEN6_WM_DW6_ZW_INTERP_CENTROID (0x2 << 16) -#define GEN6_WM_DW6_ZW_INTERP_SAMPLE (0x3 << 16) #define GEN6_WM_DW6_BARYCENTRIC_INTERP__MASK 0x0000fc00 #define GEN6_WM_DW6_BARYCENTRIC_INTERP__SHIFT 10 #define GEN6_WM_DW6_POINT_RASTRULE__MASK 0x00000200 @@ -1180,10 +1208,6 @@ enum gen_depth_format { #define GEN6_WM_DW6_POINT_RASTRULE_UPPER_RIGHT (0x1 << 9) #define GEN6_WM_DW6_MSRASTMODE__MASK 0x00000006 #define GEN6_WM_DW6_MSRASTMODE__SHIFT 1 -#define GEN6_WM_DW6_MSRASTMODE_OFF_PIXEL (0x0 << 1) -#define GEN6_WM_DW6_MSRASTMODE_OFF_PATTERN (0x1 << 1) -#define GEN6_WM_DW6_MSRASTMODE_ON_PIXEL (0x2 << 1) -#define GEN6_WM_DW6_MSRASTMODE_ON_PATTERN (0x3 << 1) #define GEN6_WM_DW6_MSDISPMODE__MASK 0x00000001 #define GEN6_WM_DW6_MSDISPMODE__SHIFT 0 #define GEN6_WM_DW6_MSDISPMODE_PERSAMPLE 0x0 @@ -1207,22 +1231,12 @@ enum gen_depth_format { #define GEN7_WM_DW1_PS_KILL_PIXEL (0x1 << 25) #define GEN7_WM_DW1_PSCDEPTH__MASK 0x01800000 #define GEN7_WM_DW1_PSCDEPTH__SHIFT 23 -#define GEN7_WM_DW1_PSCDEPTH_OFF (0x0 << 23) -#define GEN7_WM_DW1_PSCDEPTH_ON (0x1 << 23) -#define GEN7_WM_DW1_PSCDEPTH_ON_GE (0x2 << 23) -#define GEN7_WM_DW1_PSCDEPTH_ON_LE (0x3 << 23) #define GEN7_WM_DW1_EDSC__MASK 0x00600000 #define GEN7_WM_DW1_EDSC__SHIFT 21 -#define GEN7_WM_DW1_EDSC_NORMAL (0x0 << 21) -#define GEN7_WM_DW1_EDSC_PSEXEC (0x1 << 21) -#define GEN7_WM_DW1_EDSC_PREPS (0x2 << 21) #define GEN7_WM_DW1_PS_USE_DEPTH (0x1 << 20) #define GEN7_WM_DW1_PS_USE_W (0x1 << 19) #define GEN7_WM_DW1_ZW_INTERP__MASK 0x00060000 #define GEN7_WM_DW1_ZW_INTERP__SHIFT 17 -#define GEN7_WM_DW1_ZW_INTERP_PIXEL (0x0 << 17) -#define GEN7_WM_DW1_ZW_INTERP_CENTROID (0x2 << 17) -#define GEN7_WM_DW1_ZW_INTERP_SAMPLE (0x3 << 17) #define GEN7_WM_DW1_BARYCENTRIC_INTERP__MASK 0x0001f800 #define GEN7_WM_DW1_BARYCENTRIC_INTERP__SHIFT 11 #define GEN7_WM_DW1_PS_USE_COVERAGE_MASK (0x1 << 10) @@ -1247,10 +1261,6 @@ enum gen_depth_format { #define GEN7_WM_DW1_POINT_RASTRULE_UPPER_RIGHT (0x1 << 2) #define GEN7_WM_DW1_MSRASTMODE__MASK 0x00000003 #define GEN7_WM_DW1_MSRASTMODE__SHIFT 0 -#define GEN7_WM_DW1_MSRASTMODE_OFF_PIXEL 0x0 -#define GEN7_WM_DW1_MSRASTMODE_OFF_PATTERN 0x1 -#define GEN7_WM_DW1_MSRASTMODE_ON_PIXEL 0x2 -#define GEN7_WM_DW1_MSRASTMODE_ON_PATTERN 0x3 #define GEN7_WM_DW2_MSDISPMODE__MASK 0x80000000 #define GEN7_WM_DW2_MSDISPMODE__SHIFT 31 @@ -1265,12 +1275,12 @@ enum gen_depth_format { #define GEN8_3DSTATE_WM_DEPTH_STENCIL__SIZE 4 -#define GEN8_ZS_DW1_STENCIL0_FAIL_OP__MASK 0xe0000000 -#define GEN8_ZS_DW1_STENCIL0_FAIL_OP__SHIFT 29 -#define GEN8_ZS_DW1_STENCIL0_ZFAIL_OP__MASK 0x1c000000 -#define GEN8_ZS_DW1_STENCIL0_ZFAIL_OP__SHIFT 26 -#define GEN8_ZS_DW1_STENCIL0_ZPASS_OP__MASK 0x03800000 -#define GEN8_ZS_DW1_STENCIL0_ZPASS_OP__SHIFT 23 +#define GEN8_ZS_DW1_STENCIL_FAIL_OP__MASK 0xe0000000 +#define GEN8_ZS_DW1_STENCIL_FAIL_OP__SHIFT 29 +#define GEN8_ZS_DW1_STENCIL_ZFAIL_OP__MASK 0x1c000000 +#define GEN8_ZS_DW1_STENCIL_ZFAIL_OP__SHIFT 26 +#define GEN8_ZS_DW1_STENCIL_ZPASS_OP__MASK 0x03800000 +#define GEN8_ZS_DW1_STENCIL_ZPASS_OP__SHIFT 23 #define GEN8_ZS_DW1_STENCIL1_FUNC__MASK 0x00700000 #define GEN8_ZS_DW1_STENCIL1_FUNC__SHIFT 20 #define GEN8_ZS_DW1_STENCIL1_FAIL_OP__MASK 0x000e0000 @@ -1279,8 +1289,8 @@ enum gen_depth_format { #define GEN8_ZS_DW1_STENCIL1_ZFAIL_OP__SHIFT 14 #define GEN8_ZS_DW1_STENCIL1_ZPASS_OP__MASK 0x00003800 #define GEN8_ZS_DW1_STENCIL1_ZPASS_OP__SHIFT 11 -#define GEN8_ZS_DW1_STENCIL0_FUNC__MASK 0x00000700 -#define GEN8_ZS_DW1_STENCIL0_FUNC__SHIFT 8 +#define GEN8_ZS_DW1_STENCIL_FUNC__MASK 0x00000700 +#define GEN8_ZS_DW1_STENCIL_FUNC__SHIFT 8 #define GEN8_ZS_DW1_DEPTH_FUNC__MASK 0x000000e0 #define GEN8_ZS_DW1_DEPTH_FUNC__SHIFT 5 #define GEN8_ZS_DW1_STENCIL1_ENABLE (0x1 << 4) @@ -1289,17 +1299,17 @@ enum gen_depth_format { #define GEN8_ZS_DW1_DEPTH_TEST_ENABLE (0x1 << 1) #define GEN8_ZS_DW1_DEPTH_WRITE_ENABLE (0x1 << 0) -#define GEN8_ZS_DW2_STENCIL0_VALUEMASK__MASK 0xff000000 -#define GEN8_ZS_DW2_STENCIL0_VALUEMASK__SHIFT 24 -#define GEN8_ZS_DW2_STENCIL0_WRITEMASK__MASK 0x00ff0000 -#define GEN8_ZS_DW2_STENCIL0_WRITEMASK__SHIFT 16 -#define GEN8_ZS_DW2_STENCIL1_VALUEMASK__MASK 0x0000ff00 -#define GEN8_ZS_DW2_STENCIL1_VALUEMASK__SHIFT 8 -#define GEN8_ZS_DW2_STENCIL1_WRITEMASK__MASK 0x000000ff -#define GEN8_ZS_DW2_STENCIL1_WRITEMASK__SHIFT 0 +#define GEN8_ZS_DW2_STENCIL_TEST_MASK__MASK 0xff000000 +#define GEN8_ZS_DW2_STENCIL_TEST_MASK__SHIFT 24 +#define GEN8_ZS_DW2_STENCIL_WRITE_MASK__MASK 0x00ff0000 +#define GEN8_ZS_DW2_STENCIL_WRITE_MASK__SHIFT 16 +#define GEN8_ZS_DW2_STENCIL1_TEST_MASK__MASK 0x0000ff00 +#define GEN8_ZS_DW2_STENCIL1_TEST_MASK__SHIFT 8 +#define GEN8_ZS_DW2_STENCIL1_WRITE_MASK__MASK 0x000000ff +#define GEN8_ZS_DW2_STENCIL1_WRITE_MASK__SHIFT 0 -#define GEN9_ZS_DW3_STENCIL0_REF__MASK 0x0000ff00 -#define GEN9_ZS_DW3_STENCIL0_REF__SHIFT 8 +#define GEN9_ZS_DW3_STENCIL_REF__MASK 0x0000ff00 +#define GEN9_ZS_DW3_STENCIL_REF__SHIFT 8 #define GEN9_ZS_DW3_STENCIL1_REF__MASK 0x000000ff #define GEN9_ZS_DW3_STENCIL1_REF__SHIFT 0 @@ -1314,13 +1324,8 @@ enum gen_depth_format { #define GEN8_WM_HZ_DW1_FULL_SURFACE_DEPTH_CLEAR (0x1 << 25) #define GEN8_WM_HZ_DW1_STENCIL_CLEAR_VALUE__MASK 0x00ff0000 #define GEN8_WM_HZ_DW1_STENCIL_CLEAR_VALUE__SHIFT 16 -#define GEN8_WM_HZ_DW1_NUMSAMPLES__MASK 0x0000e000 -#define GEN8_WM_HZ_DW1_NUMSAMPLES__SHIFT 13 -#define GEN8_WM_HZ_DW1_NUMSAMPLES_1 (0x0 << 13) -#define GEN8_WM_HZ_DW1_NUMSAMPLES_2 (0x1 << 13) -#define GEN8_WM_HZ_DW1_NUMSAMPLES_4 (0x2 << 13) -#define GEN8_WM_HZ_DW1_NUMSAMPLES_8 (0x3 << 13) -#define GEN8_WM_HZ_DW1_NUMSAMPLES_16 (0x4 << 13) +#define GEN8_WM_HZ_DW1_NUM_SAMPLES__MASK 0x0000e000 +#define GEN8_WM_HZ_DW1_NUM_SAMPLES__SHIFT 13 #define GEN8_WM_HZ_DW2_RECT_MIN_Y__MASK 0xffff0000 #define GEN8_WM_HZ_DW2_RECT_MIN_Y__SHIFT 16 @@ -1359,9 +1364,6 @@ enum gen_depth_format { #define GEN75_PS_DW4_ACCESS_UAV (0x1 << 5) #define GEN7_PS_DW4_POSOFFSET__MASK 0x00000018 #define GEN7_PS_DW4_POSOFFSET__SHIFT 3 -#define GEN7_PS_DW4_POSOFFSET_NONE (0x0 << 3) -#define GEN7_PS_DW4_POSOFFSET_CENTROID (0x2 << 3) -#define GEN7_PS_DW4_POSOFFSET_SAMPLE (0x3 << 3) #define GEN7_PS_DW4_DISPATCH_MODE__MASK 0x00000007 #define GEN7_PS_DW4_DISPATCH_MODE__SHIFT 0 @@ -1397,9 +1399,6 @@ enum gen_depth_format { #define GEN8_PS_DW6_RT_RESOLVE (0x1 << 6) #define GEN8_PS_DW6_POSOFFSET__MASK 0x00000018 #define GEN8_PS_DW6_POSOFFSET__SHIFT 3 -#define GEN8_PS_DW6_POSOFFSET_NONE (0x0 << 3) -#define GEN8_PS_DW6_POSOFFSET_CENTROID (0x2 << 3) -#define GEN8_PS_DW6_POSOFFSET_SAMPLE (0x3 << 3) #define GEN8_PS_DW6_DISPATCH_MODE__MASK 0x00000007 #define GEN8_PS_DW6_DISPATCH_MODE__SHIFT 0 @@ -1423,16 +1422,12 @@ enum gen_depth_format { #define GEN8_3DSTATE_PS_EXTRA__SIZE 2 -#define GEN8_PSX_DW1_DISPATCH_ENABLE (0x1 << 31) +#define GEN8_PSX_DW1_VALID (0x1 << 31) #define GEN8_PSX_DW1_UAV_ONLY (0x1 << 30) #define GEN8_PSX_DW1_COMPUTE_OMASK (0x1 << 29) #define GEN8_PSX_DW1_KILL_PIXEL (0x1 << 28) #define GEN8_PSX_DW1_PSCDEPTH__MASK 0x0c000000 #define GEN8_PSX_DW1_PSCDEPTH__SHIFT 26 -#define GEN8_PSX_DW1_PSCDEPTH_OFF (0x0 << 26) -#define GEN8_PSX_DW1_PSCDEPTH_ON (0x1 << 26) -#define GEN8_PSX_DW1_PSCDEPTH_ON_GE (0x2 << 26) -#define GEN8_PSX_DW1_PSCDEPTH_ON_LE (0x3 << 26) #define GEN8_PSX_DW1_FORCE_COMPUTE_DEPTH (0x1 << 25) #define GEN8_PSX_DW1_USE_DEPTH (0x1 << 24) #define GEN8_PSX_DW1_USE_W (0x1 << 23) @@ -1696,17 +1691,10 @@ enum gen_depth_format { #define GEN75_MULTISAMPLE_DW1_DX9_MULTISAMPLE_ENABLE (0x1 << 5) -#define GEN6_MULTISAMPLE_DW1_PIXLOC__MASK 0x00000010 -#define GEN6_MULTISAMPLE_DW1_PIXLOC__SHIFT 4 -#define GEN6_MULTISAMPLE_DW1_PIXLOC_CENTER (0x0 << 4) -#define GEN6_MULTISAMPLE_DW1_PIXLOC_UL_CORNER (0x1 << 4) -#define GEN6_MULTISAMPLE_DW1_NUMSAMPLES__MASK 0x0000000e -#define GEN6_MULTISAMPLE_DW1_NUMSAMPLES__SHIFT 1 -#define GEN6_MULTISAMPLE_DW1_NUMSAMPLES_1 (0x0 << 1) -#define GEN8_MULTISAMPLE_DW1_NUMSAMPLES_2 (0x1 << 1) -#define GEN6_MULTISAMPLE_DW1_NUMSAMPLES_4 (0x2 << 1) -#define GEN7_MULTISAMPLE_DW1_NUMSAMPLES_8 (0x3 << 1) -#define GEN8_MULTISAMPLE_DW1_NUMSAMPLES_16 (0x4 << 1) +#define GEN6_MULTISAMPLE_DW1_PIXEL_LOCATION__MASK 0x00000010 +#define GEN6_MULTISAMPLE_DW1_PIXEL_LOCATION__SHIFT 4 +#define GEN6_MULTISAMPLE_DW1_NUM_SAMPLES__MASK 0x0000000e +#define GEN6_MULTISAMPLE_DW1_NUM_SAMPLES__SHIFT 1 diff --git a/src/gallium/drivers/ilo/genhw/gen_render_dynamic.xml.h b/src/gallium/drivers/ilo/genhw/gen_render_dynamic.xml.h index 6d815beecb3..b65b704adc6 100644 --- a/src/gallium/drivers/ilo/genhw/gen_render_dynamic.xml.h +++ b/src/gallium/drivers/ilo/genhw/gen_render_dynamic.xml.h @@ -84,7 +84,7 @@ enum gen_blend_function { GEN6_BLENDFUNCTION_MAX = 0x4, }; -enum gen_logicop_function { +enum gen_logic_op { GEN6_LOGICOP_CLEAR = 0x0, GEN6_LOGICOP_NOR = 0x1, GEN6_LOGICOP_AND_INVERTED = 0x2, @@ -103,20 +103,31 @@ enum gen_logicop_function { GEN6_LOGICOP_SET = 0xf, }; -enum gen_sampler_mip_filter { +enum gen_mip_filter { GEN6_MIPFILTER_NONE = 0x0, GEN6_MIPFILTER_NEAREST = 0x1, GEN6_MIPFILTER_LINEAR = 0x3, }; -enum gen_sampler_map_filter { +enum gen_map_filter { GEN6_MAPFILTER_NEAREST = 0x0, GEN6_MAPFILTER_LINEAR = 0x1, GEN6_MAPFILTER_ANISOTROPIC = 0x2, GEN6_MAPFILTER_MONO = 0x6, }; -enum gen_sampler_aniso_ratio { +enum gen_prefilter_op { + GEN6_PREFILTEROP_ALWAYS = 0x0, + GEN6_PREFILTEROP_NEVER = 0x1, + GEN6_PREFILTEROP_LESS = 0x2, + GEN6_PREFILTEROP_EQUAL = 0x3, + GEN6_PREFILTEROP_LEQUAL = 0x4, + GEN6_PREFILTEROP_GREATER = 0x5, + GEN6_PREFILTEROP_NOTEQUAL = 0x6, + GEN6_PREFILTEROP_GEQUAL = 0x7, +}; + +enum gen_aniso_ratio { GEN6_ANISORATIO_2 = 0x0, GEN6_ANISORATIO_4 = 0x1, GEN6_ANISORATIO_6 = 0x2, @@ -127,7 +138,7 @@ enum gen_sampler_aniso_ratio { GEN6_ANISORATIO_16 = 0x7, }; -enum gen_sampler_texcoord_mode { +enum gen_texcoord_mode { GEN6_TEXCOORDMODE_WRAP = 0x0, GEN6_TEXCOORDMODE_MIRROR = 0x1, GEN6_TEXCOORDMODE_CLAMP = 0x2, @@ -137,15 +148,15 @@ enum gen_sampler_texcoord_mode { GEN8_TEXCOORDMODE_HALF_BORDER = 0x6, }; -enum gen_sampler_key_filter { +enum gen_key_filter { GEN6_KEYFILTER_KILL_ON_ANY_MATCH = 0x0, GEN6_KEYFILTER_REPLACE_BLACK = 0x1, }; #define GEN6_COLOR_CALC_STATE__SIZE 6 -#define GEN6_CC_DW0_STENCIL0_REF__MASK 0xff000000 -#define GEN6_CC_DW0_STENCIL0_REF__SHIFT 24 +#define GEN6_CC_DW0_STENCIL_REF__MASK 0xff000000 +#define GEN6_CC_DW0_STENCIL_REF__SHIFT 24 #define GEN6_CC_DW0_STENCIL1_REF__MASK 0x00ff0000 #define GEN6_CC_DW0_STENCIL1_REF__SHIFT 16 #define GEN6_CC_DW0_ROUND_DISABLE_DISABLE (0x1 << 15) @@ -162,14 +173,14 @@ enum gen_sampler_key_filter { #define GEN6_DEPTH_STENCIL_STATE__SIZE 3 #define GEN6_ZS_DW0_STENCIL_TEST_ENABLE (0x1 << 31) -#define GEN6_ZS_DW0_STENCIL0_FUNC__MASK 0x70000000 -#define GEN6_ZS_DW0_STENCIL0_FUNC__SHIFT 28 -#define GEN6_ZS_DW0_STENCIL0_FAIL_OP__MASK 0x0e000000 -#define GEN6_ZS_DW0_STENCIL0_FAIL_OP__SHIFT 25 -#define GEN6_ZS_DW0_STENCIL0_ZFAIL_OP__MASK 0x01c00000 -#define GEN6_ZS_DW0_STENCIL0_ZFAIL_OP__SHIFT 22 -#define GEN6_ZS_DW0_STENCIL0_ZPASS_OP__MASK 0x00380000 -#define GEN6_ZS_DW0_STENCIL0_ZPASS_OP__SHIFT 19 +#define GEN6_ZS_DW0_STENCIL_FUNC__MASK 0x70000000 +#define GEN6_ZS_DW0_STENCIL_FUNC__SHIFT 28 +#define GEN6_ZS_DW0_STENCIL_FAIL_OP__MASK 0x0e000000 +#define GEN6_ZS_DW0_STENCIL_FAIL_OP__SHIFT 25 +#define GEN6_ZS_DW0_STENCIL_ZFAIL_OP__MASK 0x01c00000 +#define GEN6_ZS_DW0_STENCIL_ZFAIL_OP__SHIFT 22 +#define GEN6_ZS_DW0_STENCIL_ZPASS_OP__MASK 0x00380000 +#define GEN6_ZS_DW0_STENCIL_ZPASS_OP__SHIFT 19 #define GEN6_ZS_DW0_STENCIL_WRITE_ENABLE (0x1 << 18) #define GEN6_ZS_DW0_STENCIL1_ENABLE (0x1 << 15) #define GEN6_ZS_DW0_STENCIL1_FUNC__MASK 0x00007000 @@ -181,14 +192,14 @@ enum gen_sampler_key_filter { #define GEN6_ZS_DW0_STENCIL1_ZPASS_OP__MASK 0x00000038 #define GEN6_ZS_DW0_STENCIL1_ZPASS_OP__SHIFT 3 -#define GEN6_ZS_DW1_STENCIL0_VALUEMASK__MASK 0xff000000 -#define GEN6_ZS_DW1_STENCIL0_VALUEMASK__SHIFT 24 -#define GEN6_ZS_DW1_STENCIL0_WRITEMASK__MASK 0x00ff0000 -#define GEN6_ZS_DW1_STENCIL0_WRITEMASK__SHIFT 16 -#define GEN6_ZS_DW1_STENCIL1_VALUEMASK__MASK 0x0000ff00 -#define GEN6_ZS_DW1_STENCIL1_VALUEMASK__SHIFT 8 -#define GEN6_ZS_DW1_STENCIL1_WRITEMASK__MASK 0x000000ff -#define GEN6_ZS_DW1_STENCIL1_WRITEMASK__SHIFT 0 +#define GEN6_ZS_DW1_STENCIL_TEST_MASK__MASK 0xff000000 +#define GEN6_ZS_DW1_STENCIL_TEST_MASK__SHIFT 24 +#define GEN6_ZS_DW1_STENCIL_WRITE_MASK__MASK 0x00ff0000 +#define GEN6_ZS_DW1_STENCIL_WRITE_MASK__SHIFT 16 +#define GEN6_ZS_DW1_STENCIL1_TEST_MASK__MASK 0x0000ff00 +#define GEN6_ZS_DW1_STENCIL1_TEST_MASK__SHIFT 8 +#define GEN6_ZS_DW1_STENCIL1_WRITE_MASK__MASK 0x000000ff +#define GEN6_ZS_DW1_STENCIL1_WRITE_MASK__SHIFT 0 #define GEN6_ZS_DW2_DEPTH_TEST_ENABLE (0x1 << 31) #define GEN6_ZS_DW2_DEPTH_FUNC__MASK 0x38000000 @@ -216,10 +227,12 @@ enum gen_sampler_key_filter { #define GEN6_RT_DW1_ALPHA_TO_COVERAGE (0x1 << 31) #define GEN6_RT_DW1_ALPHA_TO_ONE (0x1 << 30) #define GEN6_RT_DW1_ALPHA_TO_COVERAGE_DITHER (0x1 << 29) -#define GEN6_RT_DW1_WRITE_DISABLE_A (0x1 << 27) -#define GEN6_RT_DW1_WRITE_DISABLE_R (0x1 << 26) -#define GEN6_RT_DW1_WRITE_DISABLE_G (0x1 << 25) -#define GEN6_RT_DW1_WRITE_DISABLE_B (0x1 << 24) +#define GEN6_RT_DW1_WRITE_DISABLES__MASK 0x0f000000 +#define GEN6_RT_DW1_WRITE_DISABLES__SHIFT 24 +#define GEN6_RT_DW1_WRITE_DISABLES_A (0x1 << 27) +#define GEN6_RT_DW1_WRITE_DISABLES_R (0x1 << 26) +#define GEN6_RT_DW1_WRITE_DISABLES_G (0x1 << 25) +#define GEN6_RT_DW1_WRITE_DISABLES_B (0x1 << 24) #define GEN6_RT_DW1_LOGICOP_ENABLE (0x1 << 22) #define GEN6_RT_DW1_LOGICOP_FUNC__MASK 0x003c0000 #define GEN6_RT_DW1_LOGICOP_FUNC__SHIFT 18 @@ -267,10 +280,12 @@ enum gen_sampler_key_filter { #define GEN8_RT_DW0_DST_ALPHA_FACTOR__SHIFT 8 #define GEN8_RT_DW0_ALPHA_FUNC__MASK 0x000000e0 #define GEN8_RT_DW0_ALPHA_FUNC__SHIFT 5 -#define GEN8_RT_DW0_WRITE_DISABLE_A (0x1 << 3) -#define GEN8_RT_DW0_WRITE_DISABLE_R (0x1 << 2) -#define GEN8_RT_DW0_WRITE_DISABLE_G (0x1 << 1) -#define GEN8_RT_DW0_WRITE_DISABLE_B (0x1 << 0) +#define GEN8_RT_DW0_WRITE_DISABLES__MASK 0x0000000f +#define GEN8_RT_DW0_WRITE_DISABLES__SHIFT 0 +#define GEN8_RT_DW0_WRITE_DISABLES_A (0x1 << 3) +#define GEN8_RT_DW0_WRITE_DISABLES_R (0x1 << 2) +#define GEN8_RT_DW0_WRITE_DISABLES_G (0x1 << 1) +#define GEN8_RT_DW0_WRITE_DISABLES_B (0x1 << 0) #define GEN8_RT_DW1_LOGICOP_ENABLE (0x1 << 31) #define GEN8_RT_DW1_LOGICOP_FUNC__MASK 0x78000000 @@ -419,6 +434,7 @@ enum gen_sampler_key_filter { #define GEN8_SAMPLER_DW0_LOD_PRECLAMP_ENABLE__SHIFT 27 #define GEN6_SAMPLER_DW0_BASE_LOD__MASK 0x07c00000 #define GEN6_SAMPLER_DW0_BASE_LOD__SHIFT 22 +#define GEN6_SAMPLER_DW0_BASE_LOD__RADIX 1 #define GEN6_SAMPLER_DW0_MIP_FILTER__MASK 0x00300000 #define GEN6_SAMPLER_DW0_MIP_FILTER__SHIFT 20 #define GEN6_SAMPLER_DW0_MAG_FILTER__MASK 0x000e0000 diff --git a/src/gallium/drivers/ilo/genhw/gen_render_surface.xml.h b/src/gallium/drivers/ilo/genhw/gen_render_surface.xml.h index 7c2349f2447..b5d09f64429 100644 --- a/src/gallium/drivers/ilo/genhw/gen_render_surface.xml.h +++ b/src/gallium/drivers/ilo/genhw/gen_render_surface.xml.h @@ -299,7 +299,10 @@ enum gen_surface_scs { #define GEN6_SURFACE_DW0_MIPLAYOUT__SHIFT 10 #define GEN6_SURFACE_DW0_MIPLAYOUT_BELOW (0x0 << 10) #define GEN6_SURFACE_DW0_MIPLAYOUT_RIGHT (0x1 << 10) -#define GEN6_SURFACE_DW0_CUBE_MAP_CORNER_MODE (0x1 << 9) +#define GEN6_SURFACE_DW0_CUBE_MAP_CORNER_MODE__MASK 0x00000200 +#define GEN6_SURFACE_DW0_CUBE_MAP_CORNER_MODE__SHIFT 9 +#define GEN6_SURFACE_DW0_CUBE_MAP_CORNER_MODE_REPLICATE (0x0 << 9) +#define GEN6_SURFACE_DW0_CUBE_MAP_CORNER_MODE_AVERAGE (0x1 << 9) #define GEN6_SURFACE_DW0_RENDER_CACHE_RW (0x1 << 8) #define GEN6_SURFACE_DW0_MEDIA_BOUNDARY_PIXEL_MODE__MASK 0x000000c0 #define GEN6_SURFACE_DW0_MEDIA_BOUNDARY_PIXEL_MODE__SHIFT 6 @@ -485,6 +488,8 @@ enum gen_surface_scs { #define GEN7_SURFACE_DW7_CC_B__SHIFT 29 #define GEN7_SURFACE_DW7_CC_A__MASK 0x10000000 #define GEN7_SURFACE_DW7_CC_A__SHIFT 28 +#define GEN75_SURFACE_DW7_SCS__MASK 0x0fff0000 +#define GEN75_SURFACE_DW7_SCS__SHIFT 16 #define GEN75_SURFACE_DW7_SCS_R__MASK 0x0e000000 #define GEN75_SURFACE_DW7_SCS_R__SHIFT 25 #define GEN75_SURFACE_DW7_SCS_G__MASK 0x01c00000 diff --git a/src/gallium/drivers/ilo/genhw/genhw.h b/src/gallium/drivers/ilo/genhw/genhw.h index 9e05bf5beca..3a777a18c2a 100644 --- a/src/gallium/drivers/ilo/genhw/genhw.h +++ b/src/gallium/drivers/ilo/genhw/genhw.h @@ -1,6 +1,4 @@ /* - * Mesa 3-D graphics library - * * Copyright (C) 2014 LunarG, Inc. * * Permission is hereby granted, free of charge, to any person obtaining a @@ -25,8 +23,9 @@ #ifndef GENHW_H #define GENHW_H -#include "pipe/p_compiler.h" -#include "util/u_debug.h" +#include +#include +#include #include "gen_regs.xml.h" #include "gen_mi.xml.h" diff --git a/src/gallium/drivers/ilo/ilo_shader.c b/src/gallium/drivers/ilo/ilo_shader.c index 799db2cbfcb..af467064fe4 100644 --- a/src/gallium/drivers/ilo/ilo_shader.c +++ b/src/gallium/drivers/ilo/ilo_shader.c @@ -930,7 +930,8 @@ ilo_shader_select_kernel_routing(struct ilo_shader_state *shader, src_slot + 1 < routing->source_len && src_semantics[src_slot + 1] == TGSI_SEMANTIC_BCOLOR && src_indices[src_slot + 1] == index) { - routing->swizzles[dst_slot] |= GEN8_SBE_SWIZ_INPUTATTR_FACING; + routing->swizzles[dst_slot] |= GEN6_INPUTATTR_FACING << + GEN8_SBE_SWIZ_SWIZZLE_SELECT__SHIFT; src_slot++; } From b91250a56b0af51b82bf4152a4f98e74fab22ed4 Mon Sep 17 00:00:00 2001 From: Chia-I Wu Date: Sat, 16 May 2015 08:27:24 +0800 Subject: [PATCH 608/834] ilo: add ilo_state_zs We want to replace ilo_zs_surface with ilo_state_zs. One noteworthy difference is that ilo_state_zs always aligns level 0 to 8x4 when HiZ is enabled. HiZ will not be enabled for 1D surfaces as a result. --- src/gallium/drivers/ilo/Makefile.sources | 2 + src/gallium/drivers/ilo/core/ilo_state_zs.c | 727 ++++++++++++++++++++ src/gallium/drivers/ilo/core/ilo_state_zs.h | 93 +++ 3 files changed, 822 insertions(+) create mode 100644 src/gallium/drivers/ilo/core/ilo_state_zs.c create mode 100644 src/gallium/drivers/ilo/core/ilo_state_zs.h diff --git a/src/gallium/drivers/ilo/Makefile.sources b/src/gallium/drivers/ilo/Makefile.sources index 91a6f65f2e9..943e29113ae 100644 --- a/src/gallium/drivers/ilo/Makefile.sources +++ b/src/gallium/drivers/ilo/Makefile.sources @@ -23,6 +23,8 @@ C_SOURCES := \ core/ilo_state_3d.h \ core/ilo_state_3d_bottom.c \ core/ilo_state_3d_top.c \ + core/ilo_state_zs.c \ + core/ilo_state_zs.h \ core/intel_winsys.h \ ilo_blit.c \ ilo_blit.h \ diff --git a/src/gallium/drivers/ilo/core/ilo_state_zs.c b/src/gallium/drivers/ilo/core/ilo_state_zs.c new file mode 100644 index 00000000000..901fedb5599 --- /dev/null +++ b/src/gallium/drivers/ilo/core/ilo_state_zs.c @@ -0,0 +1,727 @@ +/* + * Mesa 3-D graphics library + * + * Copyright (C) 2012-2015 LunarG, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Authors: + * Chia-I Wu + */ + +#include "intel_winsys.h" + +#include "ilo_debug.h" +#include "ilo_image.h" +#include "ilo_state_zs.h" + +static bool +zs_set_gen6_null_3DSTATE_DEPTH_BUFFER(struct ilo_state_zs *zs, + const struct ilo_dev *dev) +{ + const enum gen_depth_format format = GEN6_ZFORMAT_D32_FLOAT; + uint32_t dw1; + + ILO_DEV_ASSERT(dev, 6, 8); + + if (ilo_dev_gen(dev) >= ILO_GEN(7)) { + dw1 = GEN6_SURFTYPE_NULL << GEN7_DEPTH_DW1_TYPE__SHIFT | + format << GEN7_DEPTH_DW1_FORMAT__SHIFT; + } else { + dw1 = GEN6_SURFTYPE_NULL << GEN6_DEPTH_DW1_TYPE__SHIFT | + GEN6_TILING_Y << GEN6_DEPTH_DW1_TILING__SHIFT | + format << GEN6_DEPTH_DW1_FORMAT__SHIFT; + } + + STATIC_ASSERT(ARRAY_SIZE(zs->depth) >= 5); + zs->depth[0] = dw1; + zs->depth[1] = 0; + zs->depth[2] = 0; + zs->depth[3] = 0; + zs->depth[4] = 0; + + zs->depth_format = format; + + return true; +} + +static enum gen_surface_type +get_gen6_surface_type(const struct ilo_dev *dev, const struct ilo_image *img) +{ + ILO_DEV_ASSERT(dev, 6, 8); + + switch (img->target) { + case PIPE_TEXTURE_1D: + case PIPE_TEXTURE_1D_ARRAY: + return GEN6_SURFTYPE_1D; + case PIPE_TEXTURE_2D: + case PIPE_TEXTURE_CUBE: + case PIPE_TEXTURE_RECT: + case PIPE_TEXTURE_2D_ARRAY: + case PIPE_TEXTURE_CUBE_ARRAY: + return GEN6_SURFTYPE_2D; + case PIPE_TEXTURE_3D: + return GEN6_SURFTYPE_3D; + default: + assert(!"unknown texture target"); + return GEN6_SURFTYPE_NULL; + } +} + +static enum gen_depth_format +get_gen6_depth_format(const struct ilo_dev *dev, const struct ilo_image *img) +{ + ILO_DEV_ASSERT(dev, 6, 8); + + if (ilo_dev_gen(dev) >= ILO_GEN(7)) { + switch (img->format) { + case PIPE_FORMAT_Z32_FLOAT: + return GEN6_ZFORMAT_D32_FLOAT; + case PIPE_FORMAT_Z24X8_UNORM: + return GEN6_ZFORMAT_D24_UNORM_X8_UINT; + case PIPE_FORMAT_Z16_UNORM: + return GEN6_ZFORMAT_D16_UNORM; + default: + assert(!"unknown depth format"); + return GEN6_ZFORMAT_D32_FLOAT; + } + } else { + switch (img->format) { + case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: + return GEN6_ZFORMAT_D32_FLOAT_S8X24_UINT; + case PIPE_FORMAT_Z32_FLOAT: + return GEN6_ZFORMAT_D32_FLOAT; + case PIPE_FORMAT_Z24_UNORM_S8_UINT: + return GEN6_ZFORMAT_D24_UNORM_S8_UINT; + case PIPE_FORMAT_Z24X8_UNORM: + return GEN6_ZFORMAT_D24_UNORM_X8_UINT; + case PIPE_FORMAT_Z16_UNORM: + return GEN6_ZFORMAT_D16_UNORM; + default: + assert(!"unknown depth format"); + return GEN6_ZFORMAT_D32_FLOAT; + } + } +} + +static bool +zs_validate_gen6(const struct ilo_dev *dev, + const struct ilo_state_zs_info *info) +{ + const struct ilo_image *img = (info->z_img) ? info->z_img : info->s_img; + + ILO_DEV_ASSERT(dev, 6, 8); + + /* + * From the Ivy Bridge PRM, volume 2 part 1, page 315: + * + * The stencil buffer has a format of S8_UINT, and shares Surface + * Type, Height, Width, and Depth, Minimum Array Element, Render + * Target View Extent, Depth Coordinate Offset X/Y, LOD, and Depth + * Buffer Object Control State fields of the depth buffer. + */ + if (info->z_img == info->s_img) { + assert(info->z_img->target == info->s_img->target && + info->z_img->width0 == info->s_img->width0 && + info->z_img->height0 == info->s_img->height0 && + info->z_img->depth0 == info->s_img->depth0); + } + + assert(info->level < img->level_count); + assert(img->bo_stride); + + if (info->hiz_enable) { + assert(info->z_img && + ilo_image_can_enable_aux(info->z_img, info->level)); + } + + if (info->is_cube_map) { + assert(get_gen6_surface_type(dev, img) == GEN6_SURFTYPE_2D); + + /* + * From the Sandy Bridge PRM, volume 2 part 1, page 323: + * + * "For cube maps, Width must be set equal to Height." + */ + assert(img->width0 == img->height0); + } + + if (info->z_img) + assert(info->z_img->tiling == GEN6_TILING_Y); + if (info->s_img) + assert(info->s_img->tiling == GEN8_TILING_W); + + return true; +} + +static void +get_gen6_max_extent(const struct ilo_dev *dev, + const struct ilo_image *img, + uint16_t *max_w, uint16_t *max_h) +{ + const uint16_t max_size = (ilo_dev_gen(dev) >= ILO_GEN(7)) ? 16384 : 8192; + + ILO_DEV_ASSERT(dev, 6, 8); + + switch (get_gen6_surface_type(dev, img)) { + case GEN6_SURFTYPE_1D: + *max_w = max_size; + *max_h = 1; + break; + case GEN6_SURFTYPE_2D: + *max_w = max_size; + *max_h = max_size; + break; + case GEN6_SURFTYPE_3D: + *max_w = 2048; + *max_h = 2048; + break; + default: + assert(!"invalid surface type"); + *max_w = 1; + *max_h = 1; + break; + } +} + +static void +get_gen6_hiz_alignments(const struct ilo_dev *dev, + const struct ilo_image *img, + uint16_t *align_w, uint16_t *align_h) +{ + ILO_DEV_ASSERT(dev, 6, 8); + + /* + * From the Sandy Bridge PRM, volume 2 part 1, page 313: + * + * "A rectangle primitive representing the clear area is delivered. The + * primitive must adhere to the following restrictions on size: + * + * - If Number of Multisamples is NUMSAMPLES_1, the rectangle must be + * aligned to an 8x4 pixel block relative to the upper left corner + * of the depth buffer, and contain an integer number of these pixel + * blocks, and all 8x4 pixels must be lit. + * - If Number of Multisamples is NUMSAMPLES_4, the rectangle must be + * aligned to a 4x2 pixel block (8x4 sample block) relative to the + * upper left corner of the depth buffer, and contain an integer + * number of these pixel blocks, and all samples of the 4x2 pixels + * must be lit + * - If Number of Multisamples is NUMSAMPLES_8, the rectangle must be + * aligned to a 2x2 pixel block (8x4 sample block) relative to the + * upper left corner of the depth buffer, and contain an integer + * number of these pixel blocks, and all samples of the 2x2 pixels + * must be list." + * + * Experiments on Gen7.5 show that HiZ resolve also requires the rectangle + * to be aligned to 8x4 sample blocks. But to be on the safe side, we + * always require a level to be aligned when HiZ is enabled. + */ + switch (img->sample_count) { + case 1: + *align_w = 8; + *align_h = 4; + break; + case 2: + *align_w = 4; + *align_h = 4; + break; + case 4: + *align_w = 4; + *align_h = 2; + break; + case 8: + *align_w = 2; + *align_h = 2; + break; + case 16: + *align_w = 2; + *align_h = 1; + break; + default: + assert(!"unknown sample count"); + *align_w = 1; + *align_h = 1; + break; + } +} + +static bool +zs_get_gen6_depth_extent(const struct ilo_dev *dev, + const struct ilo_state_zs_info *info, + uint16_t *width, uint16_t *height) +{ + const struct ilo_image *img = (info->z_img) ? info->z_img : info->s_img; + uint16_t w, h, max_w, max_h; + + ILO_DEV_ASSERT(dev, 6, 8); + + w = img->width0; + h = img->height0; + + if (info->hiz_enable) { + uint16_t align_w, align_h; + + get_gen6_hiz_alignments(dev, info->z_img, &align_w, &align_h); + + /* + * We want to force 8x4 alignment, but we can do so only for level 0 and + * only when it is padded. ilo_image should know all these. + */ + if (info->level) + assert(w % align_w == 0 && h % align_h == 0); + + w = align(w, align_w); + h = align(h, align_h); + } + + get_gen6_max_extent(dev, img, &max_w, &max_h); + assert(w && h && w <= max_w && h <= max_h); + + *width = w - 1; + *height = h - 1; + + return true; +} + +static bool +zs_get_gen6_depth_slices(const struct ilo_dev *dev, + const struct ilo_state_zs_info *info, + uint16_t *depth, uint16_t *min_array_elem, + uint16_t *rt_view_extent) +{ + const struct ilo_image *img = (info->z_img) ? info->z_img : info->s_img; + uint16_t max_slice, d; + + ILO_DEV_ASSERT(dev, 6, 8); + + /* + * From the Sandy Bridge PRM, volume 2 part 1, page 325: + * + * "This field (Depth) specifies the total number of levels for a + * volume texture or the number of array elements allowed to be + * accessed starting at the Minimum Array Element for arrayed + * surfaces. If the volume texture is MIP-mapped, this field specifies + * the depth of the base MIP level." + */ + switch (get_gen6_surface_type(dev, img)) { + case GEN6_SURFTYPE_1D: + case GEN6_SURFTYPE_2D: + max_slice = (ilo_dev_gen(dev) >= ILO_GEN(7)) ? 2048 : 512; + + assert(img->array_size <= max_slice); + max_slice = img->array_size; + + d = info->slice_count; + if (info->is_cube_map) { + /* + * Minumum Array Element and Depth must be 0; Render Target View + * Extent is ignored. + */ + if (info->slice_base || d != 6) { + ilo_warn("no cube array dpeth buffer\n"); + return false; + } + + d /= 6; + } + break; + case GEN6_SURFTYPE_3D: + max_slice = 2048; + + assert(img->depth0 <= max_slice); + max_slice = u_minify(img->depth0, info->level); + + d = img->depth0; + break; + default: + assert(!"invalid surface type"); + return false; + break; + } + + if (!info->slice_count || + info->slice_base + info->slice_count > max_slice) { + ilo_warn("invalid slice range\n"); + return false; + } + + assert(d); + *depth = d - 1; + + /* + * From the Sandy Bridge PRM, volume 2 part 1, page 325: + * + * "For 1D and 2D Surfaces: + * This field (Minimum Array Element) indicates the minimum array + * element that can be accessed as part of this surface. The delivered + * array index is added to this field before being used to address the + * surface. + * + * For 3D Surfaces: + * This field indicates the minimum `R' coordinate on the LOD + * currently being rendered to. This field is added to the delivered + * array index before it is used to address the surface. + * + * For Other Surfaces: + * This field is ignored." + */ + *min_array_elem = info->slice_base; + + /* + * From the Sandy Bridge PRM, volume 2 part 1, page 326: + * + * "For 3D Surfaces: + * This field (Render Target View Extent) indicates the extent of the + * accessible `R' coordinates minus 1 on the LOD currently being + * rendered to. + * + * For 1D and 2D Surfaces: + * This field must be set to the same value as the Depth field. + * + * For Other Surfaces: + * This field is ignored." + */ + *rt_view_extent = info->slice_count - 1; + + return true; +} + +static bool +zs_set_gen6_3DSTATE_DEPTH_BUFFER(struct ilo_state_zs *zs, + const struct ilo_dev *dev, + const struct ilo_state_zs_info *info) +{ + uint16_t width, height, depth, array_base, view_extent; + enum gen_surface_type type; + enum gen_depth_format format; + uint32_t dw1, dw2, dw3, dw4; + + ILO_DEV_ASSERT(dev, 6, 6); + + if (!zs_validate_gen6(dev, info) || + !zs_get_gen6_depth_extent(dev, info, &width, &height) || + !zs_get_gen6_depth_slices(dev, info, &depth, &array_base, + &view_extent)) + return false; + + type = (info->is_cube_map) ? GEN6_SURFTYPE_CUBE : + (info->z_img) ? get_gen6_surface_type(dev, info->z_img) : + get_gen6_surface_type(dev, info->s_img); + + format = (info->z_img) ? get_gen6_depth_format(dev, info->z_img) : + GEN6_ZFORMAT_D32_FLOAT; + + /* + * From the Ironlake PRM, volume 2 part 1, page 330: + * + * "If this field (Separate Stencil Buffer Enable) is disabled, the + * Surface Format of the depth buffer cannot be D24_UNORM_X8_UINT." + * + * From the Sandy Bridge PRM, volume 2 part 1, page 321: + * + * "[DevSNB]: This field (Separate Stencil Buffer Enable) must be set + * to the same value (enabled or disabled) as Hierarchical Depth + * Buffer Enable." + */ + if (!info->hiz_enable && format == GEN6_ZFORMAT_D24_UNORM_X8_UINT) + format = GEN6_ZFORMAT_D24_UNORM_S8_UINT; + + /* info->z_readonly and info->s_readonly are ignored on Gen6 */ + dw1 = type << GEN6_DEPTH_DW1_TYPE__SHIFT | + GEN6_TILING_Y << GEN6_DEPTH_DW1_TILING__SHIFT | + format << GEN6_DEPTH_DW1_FORMAT__SHIFT; + + if (info->z_img) + dw1 |= (info->z_img->bo_stride - 1) << GEN6_DEPTH_DW1_PITCH__SHIFT; + + if (info->hiz_enable || !info->z_img) { + dw1 |= GEN6_DEPTH_DW1_HIZ_ENABLE | + GEN6_DEPTH_DW1_SEPARATE_STENCIL; + } + + dw2 = 0; + dw3 = height << GEN6_DEPTH_DW3_HEIGHT__SHIFT | + width << GEN6_DEPTH_DW3_WIDTH__SHIFT | + info->level << GEN6_DEPTH_DW3_LOD__SHIFT | + GEN6_DEPTH_DW3_MIPLAYOUT_BELOW; + dw4 = depth << GEN6_DEPTH_DW4_DEPTH__SHIFT | + array_base << GEN6_DEPTH_DW4_MIN_ARRAY_ELEMENT__SHIFT | + view_extent << GEN6_DEPTH_DW4_RT_VIEW_EXTENT__SHIFT; + + STATIC_ASSERT(ARRAY_SIZE(zs->depth) >= 5); + zs->depth[0] = dw1; + zs->depth[1] = dw2; + zs->depth[2] = dw3; + zs->depth[3] = dw4; + zs->depth[4] = 0; + + zs->depth_format = format; + + return true; +} + +static bool +zs_set_gen7_3DSTATE_DEPTH_BUFFER(struct ilo_state_zs *zs, + const struct ilo_dev *dev, + const struct ilo_state_zs_info *info) +{ + enum gen_surface_type type; + enum gen_depth_format format; + uint16_t width, height, depth; + uint16_t array_base, view_extent; + uint32_t dw1, dw2, dw3, dw4, dw6; + + ILO_DEV_ASSERT(dev, 7, 8); + + if (!zs_validate_gen6(dev, info) || + !zs_get_gen6_depth_extent(dev, info, &width, &height) || + !zs_get_gen6_depth_slices(dev, info, &depth, &array_base, + &view_extent)) + return false; + + type = (info->is_cube_map) ? GEN6_SURFTYPE_CUBE : + (info->z_img) ? get_gen6_surface_type(dev, info->z_img) : + get_gen6_surface_type(dev, info->s_img); + + format = (info->z_img) ? get_gen6_depth_format(dev, info->z_img) : + GEN6_ZFORMAT_D32_FLOAT; + + dw1 = type << GEN7_DEPTH_DW1_TYPE__SHIFT | + format << GEN7_DEPTH_DW1_FORMAT__SHIFT; + + if (info->z_img) { + if (!info->z_readonly) + dw1 |= GEN7_DEPTH_DW1_DEPTH_WRITE_ENABLE; + if (info->hiz_enable) + dw1 |= GEN7_DEPTH_DW1_HIZ_ENABLE; + + dw1 |= (info->z_img->bo_stride - 1) << GEN7_DEPTH_DW1_PITCH__SHIFT; + } + + if (info->s_img && !info->s_readonly) + dw1 |= GEN7_DEPTH_DW1_STENCIL_WRITE_ENABLE; + + dw2 = 0; + dw3 = height << GEN7_DEPTH_DW3_HEIGHT__SHIFT | + width << GEN7_DEPTH_DW3_WIDTH__SHIFT | + info->level << GEN7_DEPTH_DW3_LOD__SHIFT; + dw4 = depth << GEN7_DEPTH_DW4_DEPTH__SHIFT | + array_base << GEN7_DEPTH_DW4_MIN_ARRAY_ELEMENT__SHIFT; + dw6 = view_extent << GEN7_DEPTH_DW6_RT_VIEW_EXTENT__SHIFT; + + if (ilo_dev_gen(dev) >= ILO_GEN(8) && info->z_img) { + assert(info->z_img->walk_layer_height % 4 == 0); + /* note that DW is off-by-one for Gen8+ */ + dw6 |= (info->z_img->walk_layer_height / 4) << + GEN8_DEPTH_DW7_QPITCH__SHIFT; + } + + STATIC_ASSERT(ARRAY_SIZE(zs->depth) >= 5); + zs->depth[0] = dw1; + zs->depth[1] = dw2; + zs->depth[2] = dw3; + zs->depth[3] = dw4; + zs->depth[4] = dw6; + + zs->depth_format = format; + + return true; +} + +static bool +zs_set_gen6_null_3DSTATE_STENCIL_BUFFER(struct ilo_state_zs *zs, + const struct ilo_dev *dev) +{ + ILO_DEV_ASSERT(dev, 6, 8); + + STATIC_ASSERT(ARRAY_SIZE(zs->stencil) >= 3); + zs->stencil[0] = 0; + zs->stencil[1] = 0; + if (ilo_dev_gen(dev) >= ILO_GEN(8)) + zs->stencil[2] = 0; + + return true; +} + +static bool +zs_set_gen6_3DSTATE_STENCIL_BUFFER(struct ilo_state_zs *zs, + const struct ilo_dev *dev, + const struct ilo_state_zs_info *info) +{ + const struct ilo_image *img = info->s_img; + uint32_t dw1, dw2; + + ILO_DEV_ASSERT(dev, 6, 8); + + assert(img->bo_stride); + + /* + * From the Sandy Bridge PRM, volume 2 part 1, page 329: + * + * "The pitch must be set to 2x the value computed based on width, as + * the stencil buffer is stored with two rows interleaved." + * + * For Gen7+, we still dobule the stride because we did not double the + * slice widths when initializing ilo_image. + */ + dw1 = (img->bo_stride * 2 - 1) << GEN6_STENCIL_DW1_PITCH__SHIFT; + + if (ilo_dev_gen(dev) >= ILO_GEN(7.5)) + dw1 |= GEN75_STENCIL_DW1_STENCIL_BUFFER_ENABLE; + + dw2 = 0; + /* offset to the level as Gen6 does not support mipmapped stencil */ + if (ilo_dev_gen(dev) == ILO_GEN(6)) { + unsigned x, y; + + ilo_image_get_slice_pos(img, info->level, 0, &x, &y); + ilo_image_pos_to_mem(img, x, y, &x, &y); + dw2 |= ilo_image_mem_to_raw(img, x, y); + } + + STATIC_ASSERT(ARRAY_SIZE(zs->stencil) >= 3); + zs->stencil[0] = dw1; + zs->stencil[1] = dw2; + + if (ilo_dev_gen(dev) >= ILO_GEN(8)) { + uint32_t dw4; + + assert(img->walk_layer_height % 4 == 0); + dw4 = (img->walk_layer_height / 4) << GEN8_STENCIL_DW4_QPITCH__SHIFT; + + zs->stencil[2] = dw4; + } + + return true; +} + +static bool +zs_set_gen6_null_3DSTATE_HIER_DEPTH_BUFFER(struct ilo_state_zs *zs, + const struct ilo_dev *dev) +{ + ILO_DEV_ASSERT(dev, 6, 8); + + STATIC_ASSERT(ARRAY_SIZE(zs->hiz) >= 3); + zs->hiz[0] = 0; + zs->hiz[1] = 0; + if (ilo_dev_gen(dev) >= ILO_GEN(8)) + zs->hiz[2] = 0; + + return true; +} + +static bool +zs_set_gen6_3DSTATE_HIER_DEPTH_BUFFER(struct ilo_state_zs *zs, + const struct ilo_dev *dev, + const struct ilo_state_zs_info *info) +{ + const struct ilo_image *img = info->z_img; + uint32_t dw1, dw2; + + ILO_DEV_ASSERT(dev, 6, 8); + + assert(img->aux.bo_stride); + + dw1 = (img->aux.bo_stride - 1) << GEN6_HIZ_DW1_PITCH__SHIFT; + + dw2 = 0; + /* offset to the level as Gen6 does not support mipmapped HiZ */ + if (ilo_dev_gen(dev) == ILO_GEN(6)) + dw2 |= img->aux.walk_lod_offsets[info->level]; + + STATIC_ASSERT(ARRAY_SIZE(zs->hiz) >= 3); + zs->hiz[0] = dw1; + zs->hiz[1] = dw2; + + if (ilo_dev_gen(dev) >= ILO_GEN(8)) { + uint32_t dw4; + + assert(img->aux.walk_layer_height % 4 == 0); + dw4 = (img->aux.walk_layer_height / 4) << GEN8_HIZ_DW4_QPITCH__SHIFT; + + zs->hiz[2] = dw4; + } + + return true; +} + +bool +ilo_state_zs_init(struct ilo_state_zs *zs, const struct ilo_dev *dev, + const struct ilo_state_zs_info *info) +{ + bool ret = true; + + assert(ilo_is_zeroed(zs, sizeof(*zs))); + + if (info->z_img || info->s_img) { + if (ilo_dev_gen(dev) >= ILO_GEN(7)) + ret &= zs_set_gen7_3DSTATE_DEPTH_BUFFER(zs, dev, info); + else + ret &= zs_set_gen6_3DSTATE_DEPTH_BUFFER(zs, dev, info); + } else { + ret &= zs_set_gen6_null_3DSTATE_DEPTH_BUFFER(zs, dev); + } + + if (info->s_img) + ret &= zs_set_gen6_3DSTATE_STENCIL_BUFFER(zs, dev, info); + else + ret &= zs_set_gen6_null_3DSTATE_STENCIL_BUFFER(zs, dev); + + if (info->z_img && info->hiz_enable) + ret &= zs_set_gen6_3DSTATE_HIER_DEPTH_BUFFER(zs, dev, info); + else + ret &= zs_set_gen6_null_3DSTATE_HIER_DEPTH_BUFFER(zs, dev); + + zs->z_readonly = info->z_readonly; + zs->s_readonly = info->s_readonly; + + assert(ret); + + return ret; +} + +bool +ilo_state_zs_init_for_null(struct ilo_state_zs *zs, + const struct ilo_dev *dev) +{ + struct ilo_state_zs_info info; + + memset(&info, 0, sizeof(info)); + + return ilo_state_zs_init(zs, dev, &info); +} + +bool +ilo_state_zs_disable_hiz(struct ilo_state_zs *zs, + const struct ilo_dev *dev) +{ + ILO_DEV_ASSERT(dev, 6, 8); + + /* + * Separate stencil must be disabled simultaneously on Gen6. We can make + * it work when there is no stencil buffer, but it is probably not worth + * it. + */ + assert(ilo_dev_gen(dev) >= ILO_GEN(7)); + + zs->depth[0] &= ~GEN7_DEPTH_DW1_HIZ_ENABLE; + zs_set_gen6_null_3DSTATE_HIER_DEPTH_BUFFER(zs, dev); + + return true; +} diff --git a/src/gallium/drivers/ilo/core/ilo_state_zs.h b/src/gallium/drivers/ilo/core/ilo_state_zs.h new file mode 100644 index 00000000000..98212daf74f --- /dev/null +++ b/src/gallium/drivers/ilo/core/ilo_state_zs.h @@ -0,0 +1,93 @@ +/* + * Mesa 3-D graphics library + * + * Copyright (C) 2015 LunarG, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Authors: + * Chia-I Wu + */ + +#ifndef ILO_STATE_ZS_H +#define ILO_STATE_ZS_H + +#include "genhw/genhw.h" +#include "intel_winsys.h" + +#include "ilo_core.h" +#include "ilo_dev.h" + +struct ilo_image; + +struct ilo_state_zs_info { + /* both are optional */ + const struct ilo_image *z_img; + const struct ilo_image *s_img; + + /* ignored prior to Gen7 */ + bool z_readonly; + bool s_readonly; + + bool hiz_enable; + bool is_cube_map; + + uint8_t level; + uint16_t slice_base; + uint16_t slice_count; +}; + +struct ilo_state_zs { + uint32_t depth[5]; + uint32_t stencil[3]; + uint32_t hiz[3]; + + /* TODO move this to ilo_image */ + enum gen_depth_format depth_format; + + bool z_readonly; + bool s_readonly; + + /* managed by users */ + struct intel_bo *depth_bo; + struct intel_bo *stencil_bo; + struct intel_bo *hiz_bo; +}; + +bool +ilo_state_zs_init(struct ilo_state_zs *zs, + const struct ilo_dev *dev, + const struct ilo_state_zs_info *info); + +bool +ilo_state_zs_init_for_null(struct ilo_state_zs *zs, + const struct ilo_dev *dev); + +bool +ilo_state_zs_disable_hiz(struct ilo_state_zs *zs, + const struct ilo_dev *dev); + +static inline enum gen_depth_format +ilo_state_zs_get_depth_format(const struct ilo_state_zs *zs, + const struct ilo_dev *dev) +{ + return zs->depth_format; +} + +#endif /* ILO_STATE_ZS_H */ From f5f2007322b5468aa3025e7e259b4c50c7a7a0bd Mon Sep 17 00:00:00 2001 From: Chia-I Wu Date: Thu, 14 May 2015 09:46:42 +0800 Subject: [PATCH 609/834] ilo: add ilo_state_surface We want to replace ilo_view_surface with ilo_state_surface. --- src/gallium/drivers/ilo/Makefile.sources | 2 + .../drivers/ilo/core/ilo_state_surface.c | 1179 +++++++++++++++++ .../drivers/ilo/core/ilo_state_surface.h | 116 ++ 3 files changed, 1297 insertions(+) create mode 100644 src/gallium/drivers/ilo/core/ilo_state_surface.c create mode 100644 src/gallium/drivers/ilo/core/ilo_state_surface.h diff --git a/src/gallium/drivers/ilo/Makefile.sources b/src/gallium/drivers/ilo/Makefile.sources index 943e29113ae..587850eafdb 100644 --- a/src/gallium/drivers/ilo/Makefile.sources +++ b/src/gallium/drivers/ilo/Makefile.sources @@ -23,6 +23,8 @@ C_SOURCES := \ core/ilo_state_3d.h \ core/ilo_state_3d_bottom.c \ core/ilo_state_3d_top.c \ + core/ilo_state_surface.c \ + core/ilo_state_surface.h \ core/ilo_state_zs.c \ core/ilo_state_zs.h \ core/intel_winsys.h \ diff --git a/src/gallium/drivers/ilo/core/ilo_state_surface.c b/src/gallium/drivers/ilo/core/ilo_state_surface.c new file mode 100644 index 00000000000..be7225b7bc4 --- /dev/null +++ b/src/gallium/drivers/ilo/core/ilo_state_surface.c @@ -0,0 +1,1179 @@ +/* + * Mesa 3-D graphics library + * + * Copyright (C) 2012-2015 LunarG, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Authors: + * Chia-I Wu + */ + +#include "ilo_debug.h" +#include "ilo_buffer.h" +#include "ilo_image.h" +#include "ilo_state_surface.h" + +static bool +surface_set_gen6_null_SURFACE_STATE(struct ilo_state_surface *surf, + const struct ilo_dev *dev) +{ + uint32_t dw0, dw3; + + ILO_DEV_ASSERT(dev, 6, 6); + + /* + * From the Sandy Bridge PRM, volume 4 part 1, page 71: + * + * "All of the remaining fields in surface state are ignored for null + * surfaces, with the following exceptions: + * + * - [DevSNB+]: Width, Height, Depth, and LOD fields must match the + * depth buffer's corresponding state for all render target + * surfaces, including null. + * - Surface Format must be R8G8B8A8_UNORM." + * + * From the Sandy Bridge PRM, volume 4 part 1, page 82: + * + * "If Surface Type is SURFTYPE_NULL, this field (Tiled Surface) must + * be true" + * + * Note that we ignore the first exception for all surface types. + */ + dw0 = GEN6_SURFTYPE_NULL << GEN6_SURFACE_DW0_TYPE__SHIFT | + GEN6_FORMAT_R8G8B8A8_UNORM << GEN6_SURFACE_DW0_FORMAT__SHIFT; + dw3 = GEN6_TILING_X << GEN6_SURFACE_DW3_TILING__SHIFT; + + STATIC_ASSERT(ARRAY_SIZE(surf->surface) >= 6); + surf->surface[0] = dw0; + surf->surface[1] = 0; + surf->surface[2] = 0; + surf->surface[3] = dw3; + surf->surface[4] = 0; + surf->surface[5] = 0; + + return true; +} + +static bool +surface_set_gen7_null_SURFACE_STATE(struct ilo_state_surface *surf, + const struct ilo_dev *dev) +{ + uint32_t dw0; + + ILO_DEV_ASSERT(dev, 7, 8); + + dw0 = GEN6_SURFTYPE_NULL << GEN7_SURFACE_DW0_TYPE__SHIFT | + GEN6_FORMAT_R8G8B8A8_UNORM << GEN7_SURFACE_DW0_FORMAT__SHIFT; + if (ilo_dev_gen(dev) >= ILO_GEN(8)) + dw0 |= GEN6_TILING_X << GEN8_SURFACE_DW0_TILING__SHIFT; + else + dw0 |= GEN6_TILING_X << GEN7_SURFACE_DW0_TILING__SHIFT; + + STATIC_ASSERT(ARRAY_SIZE(surf->surface) >= 13); + surf->surface[0] = dw0; + memset(&surf->surface[1], 0, sizeof(uint32_t) * + ((ilo_dev_gen(dev) >= ILO_GEN(8)) ? 13 : 8) - 1); + + return true; +} + +static bool +surface_validate_gen6_buffer(const struct ilo_dev *dev, + const struct ilo_state_surface_buffer_info *info) +{ + ILO_DEV_ASSERT(dev, 6, 8); + + /* SVB writes are Gen6-only */ + if (ilo_dev_gen(dev) >= ILO_GEN(7)) + assert(info->access != ILO_STATE_SURFACE_ACCESS_DP_SVB); + + if (info->offset + info->size > info->buf->bo_size) { + ilo_warn("invalid buffer range\n"); + return false; + } + + /* + * From the Sandy Bridge PRM, volume 4 part 1, page 81: + * + * "For surfaces of type SURFTYPE_BUFFER: [0,2047] -> [1B, 2048B] + * For surfaces of type SURFTYPE_STRBUF: [0,2047] -> [1B, 2048B]" + */ + if (!info->struct_size || info->struct_size > 2048) { + ilo_warn("invalid buffer struct size\n"); + return false; + } + + /* + * From the Ivy Bridge PRM, volume 4 part 1, page 68: + * + * "The Base Address for linear render target surfaces and surfaces + * accessed with the typed surface read/write data port messages must + * be element-size aligned, for non-YUV surface formats, or a multiple + * of 2 element-sizes for YUV surface formats. Other linear surfaces + * have no alignment requirements (byte alignment is sufficient)." + * + * "Certain message types used to access surfaces have more stringent + * alignment requirements. Please refer to the specific message + * documentation for additional restrictions." + * + * From the Ivy Bridge PRM, volume 4 part 1, page 233, 235, and 237: + * + * "the surface base address must be OWord aligned" + * + * for OWord Block Read/Write, Unaligned OWord Block Read, and OWord Dual + * Block Read/Write. + * + * From the Ivy Bridge PRM, volume 4 part 1, page 246 and 249: + * + * "The surface base address must be DWord aligned" + * + * for DWord Scattered Read/Write and Byte Scattered Read/Write. + * + * We have to rely on users to correctly set info->struct_size here. DWord + * Scattered Read/Write has conflicting pitch and alignment, but we do not + * use them yet so we are fine. + * + * It is unclear if sampling engine surfaces require aligned offsets. + */ + if (info->access != ILO_STATE_SURFACE_ACCESS_DP_SVB) { + assert(info->struct_size % info->format_size == 0); + + if (info->offset % info->struct_size) { + ilo_warn("bad buffer offset\n"); + return false; + } + } + + if (info->format == GEN6_FORMAT_RAW) { + /* + * From the Sandy Bridge PRM, volume 4 part 1, page 97: + * + * ""RAW" is supported only with buffers and structured buffers + * accessed via the untyped surface read/write and untyped atomic + * operation messages, which do not have a column in the table." + * + * We do not have a specific access mode for untyped messages. + */ + assert(info->access == ILO_STATE_SURFACE_ACCESS_DP_UNTYPED); + + /* + * Nothing is said about Untyped* messages, but I guess they require the + * base address to be DWord aligned. + */ + if (info->offset % 4) { + ilo_warn("bad RAW buffer offset\n"); + return false; + } + + if (info->struct_size > 1) { + /* no STRBUF on Gen6 */ + if (ilo_dev_gen(dev) == ILO_GEN(6)) { + ilo_warn("no STRBUF support\n"); + return false; + } + + /* + * From the Ivy Bridge PRM, volume 4 part 1, page 70: + * + * "For linear surfaces with Surface Type of SURFTYPE_STRBUF, the + * pitch must be a multiple of 4 bytes." + */ + if (info->struct_size % 4) { + ilo_warn("bad STRBUF pitch\n"); + return false; + } + } + } + + return true; +} + +static bool +surface_get_gen6_buffer_struct_count(const struct ilo_dev *dev, + const struct ilo_state_surface_buffer_info *info, + uint32_t *count) +{ + uint32_t max_struct, c; + + ILO_DEV_ASSERT(dev, 6, 8); + + c = info->size / info->struct_size; + if (info->access == ILO_STATE_SURFACE_ACCESS_DP_SVB && + info->format_size < info->size - info->struct_size * c) + c++; + + /* + * From the Sandy Bridge PRM, volume 4 part 1, page 77: + * + * "For buffer surfaces, the number of entries in the buffer ranges + * from 1 to 2^27." + * + * From the Ivy Bridge PRM, volume 4 part 1, page 68: + * + * "For typed buffer and structured buffer surfaces, the number of + * entries in the buffer ranges from 1 to 2^27. For raw buffer + * surfaces, the number of entries in the buffer is the number of + * bytes which can range from 1 to 2^30." + * + * From the Ivy Bridge PRM, volume 4 part 1, page 69: + * + * For SURFTYPE_BUFFER: The low two bits of this field (Width) must be + * 11 if the Surface Format is RAW (the size of the buffer must be a + * multiple of 4 bytes)." + */ + max_struct = 1 << 27; + if (info->format == GEN6_FORMAT_RAW && info->struct_size == 1) { + if (ilo_dev_gen(dev) >= ILO_GEN(7)) + max_struct = 1 << 30; + + c &= ~3; + } + + if (!c || c > max_struct) { + ilo_warn("too many or zero buffer structs\n"); + return false; + } + + *count = c - 1; + + return true; +} + +static bool +surface_set_gen6_buffer_SURFACE_STATE(struct ilo_state_surface *surf, + const struct ilo_dev *dev, + const struct ilo_state_surface_buffer_info *info) +{ + uint32_t dw0, dw1, dw2, dw3; + uint32_t struct_count; + int width, height, depth; + + ILO_DEV_ASSERT(dev, 6, 6); + + if (!surface_validate_gen6_buffer(dev, info) || + !surface_get_gen6_buffer_struct_count(dev, info, &struct_count)) + return false; + + /* bits [6:0] */ + width = (struct_count & 0x0000007f); + /* bits [19:7] */ + height = (struct_count & 0x000fff80) >> 7; + /* bits [26:20] */ + depth = (struct_count & 0x07f00000) >> 20; + + dw0 = GEN6_SURFTYPE_BUFFER << GEN6_SURFACE_DW0_TYPE__SHIFT | + info->format << GEN6_SURFACE_DW0_FORMAT__SHIFT; + dw1 = info->offset; + dw2 = height << GEN6_SURFACE_DW2_HEIGHT__SHIFT | + width << GEN6_SURFACE_DW2_WIDTH__SHIFT; + dw3 = depth << GEN6_SURFACE_DW3_DEPTH__SHIFT | + (info->struct_size - 1) << GEN6_SURFACE_DW3_PITCH__SHIFT; + + STATIC_ASSERT(ARRAY_SIZE(surf->surface) >= 6); + surf->surface[0] = dw0; + surf->surface[1] = dw1; + surf->surface[2] = dw2; + surf->surface[3] = dw3; + surf->surface[4] = 0; + surf->surface[5] = 0; + + surf->type = GEN6_SURFTYPE_BUFFER; + surf->min_lod = 0; + surf->mip_count = 0; + + return true; +} + +static bool +surface_set_gen7_buffer_SURFACE_STATE(struct ilo_state_surface *surf, + const struct ilo_dev *dev, + const struct ilo_state_surface_buffer_info *info) +{ + uint32_t dw0, dw1, dw2, dw3, dw7; + enum gen_surface_type type; + uint32_t struct_count; + int width, height, depth; + + ILO_DEV_ASSERT(dev, 7, 8); + + if (!surface_validate_gen6_buffer(dev, info) || + !surface_get_gen6_buffer_struct_count(dev, info, &struct_count)) + return false; + + type = (info->format == GEN6_FORMAT_RAW && info->struct_size > 1) ? + GEN7_SURFTYPE_STRBUF : GEN6_SURFTYPE_BUFFER; + + /* bits [6:0] */ + width = (struct_count & 0x0000007f); + /* bits [20:7] */ + height = (struct_count & 0x001fff80) >> 7; + /* bits [30:21] */ + depth = (struct_count & 0x7fe00000) >> 21; + + dw0 = type << GEN7_SURFACE_DW0_TYPE__SHIFT | + info->format << GEN7_SURFACE_DW0_FORMAT__SHIFT; + dw1 = (ilo_dev_gen(dev) >= ILO_GEN(8)) ? 0 : info->offset; + dw2 = GEN_SHIFT32(height, GEN7_SURFACE_DW2_HEIGHT) | + GEN_SHIFT32(width, GEN7_SURFACE_DW2_WIDTH); + dw3 = GEN_SHIFT32(depth, GEN7_SURFACE_DW3_DEPTH) | + GEN_SHIFT32(info->struct_size - 1, GEN7_SURFACE_DW3_PITCH); + + dw7 = 0; + if (ilo_dev_gen(dev) >= ILO_GEN(7.5)) { + dw7 |= GEN_SHIFT32(GEN75_SCS_RED, GEN75_SURFACE_DW7_SCS_R) | + GEN_SHIFT32(GEN75_SCS_GREEN, GEN75_SURFACE_DW7_SCS_G) | + GEN_SHIFT32(GEN75_SCS_BLUE, GEN75_SURFACE_DW7_SCS_B) | + GEN_SHIFT32(GEN75_SCS_ALPHA, GEN75_SURFACE_DW7_SCS_A); + } + + STATIC_ASSERT(ARRAY_SIZE(surf->surface) >= 13); + surf->surface[0] = dw0; + surf->surface[1] = dw1; + surf->surface[2] = dw2; + surf->surface[3] = dw3; + surf->surface[4] = 0; + surf->surface[5] = 0; + surf->surface[6] = 0; + surf->surface[7] = dw7; + if (ilo_dev_gen(dev) >= ILO_GEN(8)) { + surf->surface[8] = info->offset; + surf->surface[9] = 0; + surf->surface[10] = 0; + surf->surface[11] = 0; + surf->surface[12] = 0; + } + + surf->type = type; + surf->min_lod = 0; + surf->mip_count = 0; + + return true; +} + +static enum gen_surface_type +get_gen6_surface_type(const struct ilo_dev *dev, const struct ilo_image *img) +{ + ILO_DEV_ASSERT(dev, 6, 8); + + switch (img->target) { + case PIPE_TEXTURE_1D: + case PIPE_TEXTURE_1D_ARRAY: + return GEN6_SURFTYPE_1D; + case PIPE_TEXTURE_2D: + case PIPE_TEXTURE_CUBE: + case PIPE_TEXTURE_RECT: + case PIPE_TEXTURE_2D_ARRAY: + case PIPE_TEXTURE_CUBE_ARRAY: + return GEN6_SURFTYPE_2D; + case PIPE_TEXTURE_3D: + return GEN6_SURFTYPE_3D; + default: + assert(!"unknown texture target"); + return GEN6_SURFTYPE_NULL; + } +} + +static bool +surface_validate_gen6_image(const struct ilo_dev *dev, + const struct ilo_state_surface_image_info *info) +{ + ILO_DEV_ASSERT(dev, 6, 8); + + switch (info->access) { + case ILO_STATE_SURFACE_ACCESS_SAMPLER: + case ILO_STATE_SURFACE_ACCESS_DP_RENDER: + break; + case ILO_STATE_SURFACE_ACCESS_DP_TYPED: + assert(ilo_dev_gen(dev) >= ILO_GEN(7)); + break; + default: + assert(!"unsupported surface access"); + break; + } + + /* + * From the Sandy Bridge PRM, volume 4 part 1, page 78: + * + * "For surface types other than SURFTYPE_BUFFER, the Width specified + * by this field must be less than or equal to the surface pitch + * (specified in bytes via the Surface Pitch field)." + */ + assert(info->img->bo_stride && info->img->bo_stride <= 512 * 1024 && + info->img->width0 <= info->img->bo_stride); + + if (info->is_cube_map) { + assert(get_gen6_surface_type(dev, info->img) == GEN6_SURFTYPE_2D); + + /* + * From the Sandy Bridge PRM, volume 4 part 1, page 78: + * + * "For cube maps, Width must be set equal to the Height." + */ + assert(info->img->width0 == info->img->height0); + } + + /* + * From the Sandy Bridge PRM, volume 4 part 1, page 72: + * + * "Tile Walk TILEWALK_YMAJOR is UNDEFINED for render target formats + * that have 128 bits-per-element (BPE)." + * + * "If Number of Multisamples is set to a value other than + * MULTISAMPLECOUNT_1, this field cannot be set to the following + * formats: + * + * - any format with greater than 64 bits per element + * - any compressed texture format (BC*) + * - any YCRCB* format" + * + * From the Ivy Bridge PRM, volume 4 part 1, page 63: + * + * If Number of Multisamples is set to a value other than + * MULTISAMPLECOUNT_1, this field cannot be set to the following + * formats: any format with greater than 64 bits per element, if + * Number of Multisamples is MULTISAMPLECOUNT_8, any compressed + * texture format (BC*), and any YCRCB* format. + * + * TODO + */ + + if (ilo_dev_gen(dev) < ILO_GEN(8) && info->img->tiling == GEN8_TILING_W) { + ilo_warn("tiling W is not supported\n"); + return false; + } + + return true; +} + +static void +get_gen6_max_extent(const struct ilo_dev *dev, + const struct ilo_image *img, + uint16_t *max_w, uint16_t *max_h) +{ + const uint16_t max_size = (ilo_dev_gen(dev) >= ILO_GEN(7)) ? 16384 : 8192; + + ILO_DEV_ASSERT(dev, 6, 8); + + switch (get_gen6_surface_type(dev, img)) { + case GEN6_SURFTYPE_1D: + *max_w = max_size; + *max_h = 1; + break; + case GEN6_SURFTYPE_2D: + *max_w = max_size; + *max_h = max_size; + break; + case GEN6_SURFTYPE_3D: + *max_w = 2048; + *max_h = 2048; + break; + default: + assert(!"invalid surface type"); + *max_w = 1; + *max_h = 1; + break; + } +} + +static bool +surface_get_gen6_image_extent(const struct ilo_dev *dev, + const struct ilo_state_surface_image_info *info, + uint16_t *width, uint16_t *height) +{ + uint16_t w, h, max_w, max_h; + + ILO_DEV_ASSERT(dev, 6, 8); + + w = info->img->width0; + h = info->img->height0; + + get_gen6_max_extent(dev, info->img, &max_w, &max_h); + assert(w && h && w <= max_w && h <= max_h); + + *width = w - 1; + *height = h - 1; + + return true; +} + +static bool +surface_get_gen6_image_slices(const struct ilo_dev *dev, + const struct ilo_state_surface_image_info *info, + uint16_t *depth, uint16_t *min_array_elem, + uint16_t *rt_view_extent) +{ + uint16_t max_slice, d; + + ILO_DEV_ASSERT(dev, 6, 8); + + /* + * From the Ivy Bridge PRM, volume 4 part 1, page 63: + * + * "If this field (Surface Array) is enabled, the Surface Type must be + * SURFTYPE_1D, SURFTYPE_2D, or SURFTYPE_CUBE. If this field is + * disabled and Surface Type is SURFTYPE_1D, SURFTYPE_2D, or + * SURFTYPE_CUBE, the Depth field must be set to zero." + * + * From the Ivy Bridge PRM, volume 4 part 1, page 69: + * + * "This field (Depth) specifies the total number of levels for a + * volume texture or the number of array elements allowed to be + * accessed starting at the Minimum Array Element for arrayed + * surfaces. If the volume texture is MIP-mapped, this field + * specifies the depth of the base MIP level." + * + * "For SURFTYPE_CUBE:For Sampling Engine Surfaces, the range of this + * field is [0,340], indicating the number of cube array elements + * (equal to the number of underlying 2D array elements divided by 6). + * For other surfaces, this field must be zero." + * + * "Errata: For SURFTYPE_CUBE sampling engine surfaces, the range of + * this field is limited to [0,85]. + * + * Errata: If Surface Array is enabled, and Depth is between 1024 and + * 2047, an incorrect array slice may be accessed if the requested + * array index in the message is greater than or equal to 4096." + * + * The errata are for Gen7-specific, and they limit the number of useable + * layers to (86 * 6), about 512. + */ + + switch (get_gen6_surface_type(dev, info->img)) { + case GEN6_SURFTYPE_1D: + case GEN6_SURFTYPE_2D: + max_slice = (ilo_dev_gen(dev) >= ILO_GEN(7.5)) ? 2048 : 512; + + assert(info->img->array_size <= max_slice); + max_slice = info->img->array_size; + + d = info->slice_count; + if (info->is_cube_map) { + if (info->access == ILO_STATE_SURFACE_ACCESS_SAMPLER) { + if (!d || d % 6) { + ilo_warn("invalid cube slice count\n"); + return false; + } + + if (ilo_dev_gen(dev) == ILO_GEN(7) && d > 86 * 6) { + ilo_warn("cube slice count exceeds Gen7 limit\n"); + return false; + } + } else { + /* + * Minumum Array Element and Depth must be 0; Render Target View + * Extent is ignored. + */ + if (info->slice_base || d != 6) { + ilo_warn("no cube RT array support in data port\n"); + return false; + } + } + + d /= 6; + } + + if (!info->is_array && d > 1) { + ilo_warn("non-array surface with non-zero depth\n"); + return false; + } + break; + case GEN6_SURFTYPE_3D: + max_slice = 2048; + + assert(info->img->depth0 <= max_slice); + max_slice = u_minify(info->img->depth0, info->level_base); + + d = info->img->depth0; + + if (info->is_array) { + ilo_warn("3D surfaces cannot be arrays\n"); + return false; + } + break; + default: + assert(!"invalid surface type"); + return false; + break; + } + + if (!info->slice_count || + info->slice_base + info->slice_count > max_slice) { + ilo_warn("invalid slice range\n"); + return false; + } + + assert(d); + *depth = d - 1; + + /* + * From the Sandy Bridge PRM, volume 4 part 1, page 84: + * + * "For Sampling Engine and Render Target 1D and 2D Surfaces: + * This field (Minimum Array Element) indicates the minimum array + * element that can be accessed as part of this surface. This field + * is added to the delivered array index before it is used to address + * the surface. + * + * For Render Target 3D Surfaces: + * This field indicates the minimum `R' coordinate on the LOD + * currently being rendered to. This field is added to the delivered + * array index before it is used to address the surface. + * + * For Sampling Engine Cube Surfaces on [DevSNB+] only: + * This field indicates the minimum array element in the underlying 2D + * surface array that can be accessed as part of this surface (the + * cube array index is multipled by 6 to compute this value, although + * this field is not restricted to only multiples of 6). This field is + * added to the delivered array index before it is used to address the + * surface. + * + * For Other Surfaces: + * This field must be set to zero." + * + * On Gen7+, typed sufaces are treated like sampling engine 1D and 2D + * surfaces. + */ + *min_array_elem = info->slice_base; + + /* + * From the Sandy Bridge PRM, volume 4 part 1, page 84: + * + * "For Render Target 3D Surfaces: + * This field (Render Target View Extent) indicates the extent of the + * accessible `R' coordinates minus 1 on the LOD currently being + * rendered to. + * + * For Render Target 1D and 2D Surfaces: + * This field must be set to the same value as the Depth field. + * + * For Other Surfaces: + * This field is ignored." + */ + *rt_view_extent = info->slice_count - 1; + + return true; +} + +static bool +surface_get_gen6_image_levels(const struct ilo_dev *dev, + const struct ilo_state_surface_image_info *info, + uint8_t *min_lod, uint8_t *mip_count) +{ + uint8_t max_level = (ilo_dev_gen(dev) >= ILO_GEN(7)) ? 15 : 14; + + ILO_DEV_ASSERT(dev, 6, 8); + + assert(info->img->level_count <= max_level); + max_level = info->img->level_count; + + if (!info->level_count || + info->level_base + info->level_count > max_level) { + ilo_warn("invalid level range\n"); + return false; + } + + /* + * From the Sandy Bridge PRM, volume 4 part 1, page 79: + * + * "For Sampling Engine Surfaces: + * This field (MIP Count / LOD) indicates the number of MIP levels + * allowed to be accessed starting at Surface Min LOD, which must be + * less than or equal to the number of MIP levels actually stored in + * memory for this surface. + * + * Force the mip map access to be between the mipmap specified by the + * integer bits of the Min LOD and the ceiling of the value specified + * here. + * + * For Render Target Surfaces: + * This field defines the MIP level that is currently being rendered + * into. This is the absolute MIP level on the surface and is not + * relative to the Surface Min LOD field, which is ignored for render + * target surfaces. + * + * For Other Surfaces: + * This field is reserved : MBZ" + * + * From the Sandy Bridge PRM, volume 4 part 1, page 83: + * + * "For Sampling Engine Surfaces: + * + * This field (Surface Min LOD) indicates the most detailed LOD that + * can be accessed as part of this surface. This field is added to + * the delivered LOD (sample_l, ld, or resinfo message types) before + * it is used to address the surface. + * + * For Other Surfaces: + * This field is ignored." + * + * On Gen7+, typed sufaces are treated like sampling engine surfaces. + */ + if (info->access == ILO_STATE_SURFACE_ACCESS_DP_RENDER) { + assert(info->level_count == 1); + + *min_lod = 0; + *mip_count = info->level_base; + } else { + *min_lod = info->level_base; + *mip_count = info->level_count - 1; + } + + return true; +} + +static bool +surface_get_gen6_image_sample_count(const struct ilo_dev *dev, + const struct ilo_state_surface_image_info *info, + enum gen_sample_count *sample_count) +{ + int min_gen; + + ILO_DEV_ASSERT(dev, 6, 8); + + switch (info->img->sample_count) { + case 1: + *sample_count = GEN6_NUMSAMPLES_1; + min_gen = ILO_GEN(6); + break; + case 2: + *sample_count = GEN8_NUMSAMPLES_2; + min_gen = ILO_GEN(8); + break; + case 4: + *sample_count = GEN6_NUMSAMPLES_4; + min_gen = ILO_GEN(6); + break; + case 8: + *sample_count = GEN7_NUMSAMPLES_8; + min_gen = ILO_GEN(7); + break; + case 16: + *sample_count = GEN8_NUMSAMPLES_16; + min_gen = ILO_GEN(8); + break; + default: + assert(!"invalid sample count"); + *sample_count = GEN6_NUMSAMPLES_1; + break; + } + + assert(ilo_dev_gen(dev) >= min_gen); + + return true; +} + +static bool +surface_get_gen6_image_alignments(const struct ilo_dev *dev, + const struct ilo_state_surface_image_info *info, + uint32_t *alignments) +{ + uint32_t a = 0; + bool err = false; + + ILO_DEV_ASSERT(dev, 6, 8); + + if (ilo_dev_gen(dev) >= ILO_GEN(8)) { + switch (info->img->align_i) { + case 4: + a |= GEN8_SURFACE_DW0_HALIGN_4; + break; + case 8: + a |= GEN8_SURFACE_DW0_HALIGN_8; + break; + case 16: + a |= GEN8_SURFACE_DW0_HALIGN_16; + break; + default: + err = true; + break; + } + + switch (info->img->align_j) { + case 4: + a |= GEN7_SURFACE_DW0_VALIGN_4; + break; + case 8: + a |= GEN8_SURFACE_DW0_VALIGN_8; + break; + case 16: + a |= GEN8_SURFACE_DW0_VALIGN_16; + break; + default: + err = true; + break; + } + } else if (ilo_dev_gen(dev) >= ILO_GEN(7)) { + switch (info->img->align_i) { + case 4: + a |= GEN7_SURFACE_DW0_HALIGN_4; + break; + case 8: + a |= GEN7_SURFACE_DW0_HALIGN_8; + break; + default: + err = true; + break; + } + + switch (info->img->align_j) { + case 2: + a |= GEN7_SURFACE_DW0_VALIGN_2; + break; + case 4: + a |= GEN7_SURFACE_DW0_VALIGN_4; + break; + default: + err = true; + break; + } + } else { + if (info->img->align_i != 4) + err = true; + + switch (info->img->align_j) { + case 2: + a |= GEN6_SURFACE_DW5_VALIGN_2; + break; + case 4: + a |= GEN6_SURFACE_DW5_VALIGN_4; + break; + default: + err = true; + break; + } + } + + if (err) + assert(!"invalid HALIGN or VALIGN"); + + *alignments = a; + + return true; +} + +static bool +surface_set_gen6_image_SURFACE_STATE(struct ilo_state_surface *surf, + const struct ilo_dev *dev, + const struct ilo_state_surface_image_info *info) +{ + uint16_t width, height, depth, array_base, view_extent; + uint8_t min_lod, mip_count; + enum gen_sample_count sample_count; + uint32_t alignments; + enum gen_surface_type type; + uint32_t dw0, dw2, dw3, dw4, dw5; + + ILO_DEV_ASSERT(dev, 6, 6); + + if (!surface_validate_gen6_image(dev, info) || + !surface_get_gen6_image_extent(dev, info, &width, &height) || + !surface_get_gen6_image_slices(dev, info, &depth, &array_base, + &view_extent) || + !surface_get_gen6_image_levels(dev, info, &min_lod, &mip_count) || + !surface_get_gen6_image_sample_count(dev, info, &sample_count) || + !surface_get_gen6_image_alignments(dev, info, &alignments)) + return false; + + /* no ARYSPC_LOD0 */ + assert(info->img->walk != ILO_IMAGE_WALK_LOD); + /* no UMS/CMS */ + if (info->img->sample_count > 1) + assert(info->img->interleaved_samples); + + type = (info->is_cube_map) ? GEN6_SURFTYPE_CUBE : + get_gen6_surface_type(dev, info->img); + + dw0 = type << GEN6_SURFACE_DW0_TYPE__SHIFT | + info->format << GEN6_SURFACE_DW0_FORMAT__SHIFT | + GEN6_SURFACE_DW0_MIPLAYOUT_BELOW; + + /* + * From the Sandy Bridge PRM, volume 4 part 1, page 74: + * + * "CUBE_AVERAGE may only be selected if all of the Cube Face Enable + * fields are equal to one." + * + * From the Sandy Bridge PRM, volume 4 part 1, page 75-76: + * + * "For SURFTYPE_CUBE Surfaces accessed via the Sampling Engine: + * Bits 5:0 of this field (Cube Face Enables) enable the individual + * faces of a cube map. Enabling a face indicates that the face is + * present in the cube map, while disabling it indicates that that + * face is represented by the texture map's border color. Refer to + * Memory Data Formats for the correlation between faces and the cube + * map memory layout. Note that storage for disabled faces must be + * provided. + * + * For other surfaces: + * This field is reserved : MBZ" + * + * "When TEXCOORDMODE_CLAMP is used when accessing a cube map, this + * field must be programmed to 111111b (all faces enabled)." + */ + if (info->is_cube_map && + info->access == ILO_STATE_SURFACE_ACCESS_SAMPLER) { + dw0 |= GEN6_SURFACE_DW0_CUBE_MAP_CORNER_MODE_AVERAGE | + GEN6_SURFACE_DW0_CUBE_FACE_ENABLES__MASK; + } + + dw2 = height << GEN6_SURFACE_DW2_HEIGHT__SHIFT | + width << GEN6_SURFACE_DW2_WIDTH__SHIFT | + mip_count << GEN6_SURFACE_DW2_MIP_COUNT_LOD__SHIFT; + + dw3 = depth << GEN6_SURFACE_DW3_DEPTH__SHIFT | + (info->img->bo_stride - 1) << GEN6_SURFACE_DW3_PITCH__SHIFT | + info->img->tiling << GEN6_SURFACE_DW3_TILING__SHIFT; + + dw4 = min_lod << GEN6_SURFACE_DW4_MIN_LOD__SHIFT | + array_base << GEN6_SURFACE_DW4_MIN_ARRAY_ELEMENT__SHIFT | + view_extent << GEN6_SURFACE_DW4_RT_VIEW_EXTENT__SHIFT | + sample_count << GEN6_SURFACE_DW4_MULTISAMPLECOUNT__SHIFT; + + dw5 = alignments; + + STATIC_ASSERT(ARRAY_SIZE(surf->surface) >= 6); + surf->surface[0] = dw0; + surf->surface[1] = 0; + surf->surface[2] = dw2; + surf->surface[3] = dw3; + surf->surface[4] = dw4; + surf->surface[5] = dw5; + + surf->type = type; + surf->min_lod = min_lod; + surf->mip_count = mip_count; + + return true; +} + +static bool +surface_set_gen7_image_SURFACE_STATE(struct ilo_state_surface *surf, + const struct ilo_dev *dev, + const struct ilo_state_surface_image_info *info) +{ + uint16_t width, height, depth, array_base, view_extent; + uint8_t min_lod, mip_count; + uint32_t alignments; + enum gen_sample_count sample_count; + enum gen_surface_type type; + uint32_t dw0, dw1, dw2, dw3, dw4, dw5, dw7; + + ILO_DEV_ASSERT(dev, 7, 8); + + if (!surface_validate_gen6_image(dev, info) || + !surface_get_gen6_image_extent(dev, info, &width, &height) || + !surface_get_gen6_image_slices(dev, info, &depth, &array_base, + &view_extent) || + !surface_get_gen6_image_levels(dev, info, &min_lod, &mip_count) || + !surface_get_gen6_image_sample_count(dev, info, &sample_count) || + !surface_get_gen6_image_alignments(dev, info, &alignments)) + return false; + + type = (info->is_cube_map) ? GEN6_SURFTYPE_CUBE : + get_gen6_surface_type(dev, info->img); + + dw0 = type << GEN7_SURFACE_DW0_TYPE__SHIFT | + info->format << GEN7_SURFACE_DW0_FORMAT__SHIFT | + alignments; + + if (info->is_array) + dw0 |= GEN7_SURFACE_DW0_IS_ARRAY; + + if (ilo_dev_gen(dev) >= ILO_GEN(8)) { + dw0 |= info->img->tiling << GEN8_SURFACE_DW0_TILING__SHIFT; + } else { + dw0 |= info->img->tiling << GEN7_SURFACE_DW0_TILING__SHIFT; + + if (info->img->walk == ILO_IMAGE_WALK_LOD) + dw0 |= GEN7_SURFACE_DW0_ARYSPC_LOD0; + else + dw0 |= GEN7_SURFACE_DW0_ARYSPC_FULL; + } + + /* + * From the Ivy Bridge PRM, volume 4 part 1, page 67: + * + * "For SURFTYPE_CUBE Surfaces accessed via the Sampling Engine: Bits + * 5:0 of this field (Cube Face Enables) enable the individual faces + * of a cube map. Enabling a face indicates that the face is present + * in the cube map, while disabling it indicates that that face is + * represented by the texture map's border color. Refer to Memory Data + * Formats for the correlation between faces and the cube map memory + * layout. Note that storage for disabled faces must be provided. For + * other surfaces this field is reserved and MBZ." + * + * "When TEXCOORDMODE_CLAMP is used when accessing a cube map, this + * field must be programmed to 111111b (all faces enabled). This field + * is ignored unless the Surface Type is SURFTYPE_CUBE." + */ + if (info->is_cube_map && + info->access == ILO_STATE_SURFACE_ACCESS_SAMPLER) + dw0 |= GEN7_SURFACE_DW0_CUBE_FACE_ENABLES__MASK; + + dw1 = 0; + if (ilo_dev_gen(dev) >= ILO_GEN(8)) { + assert(info->img->walk_layer_height % 4 == 0); + dw1 |= info->img->walk_layer_height / 4 << + GEN8_SURFACE_DW1_QPITCH__SHIFT; + } + + dw2 = height << GEN7_SURFACE_DW2_HEIGHT__SHIFT | + width << GEN7_SURFACE_DW2_WIDTH__SHIFT; + + dw3 = depth << GEN7_SURFACE_DW3_DEPTH__SHIFT | + (info->img->bo_stride - 1) << GEN7_SURFACE_DW3_PITCH__SHIFT; + + if (ilo_dev_gen(dev) == ILO_GEN(7.5)) + dw3 |= 0 << GEN75_SURFACE_DW3_INTEGER_SURFACE_FORMAT__SHIFT; + + dw4 = array_base << GEN7_SURFACE_DW4_MIN_ARRAY_ELEMENT__SHIFT | + view_extent << GEN7_SURFACE_DW4_RT_VIEW_EXTENT__SHIFT | + sample_count << GEN7_SURFACE_DW4_MULTISAMPLECOUNT__SHIFT; + + /* + * MSFMT_MSS means the samples are not interleaved and MSFMT_DEPTH_STENCIL + * means the samples are interleaved. The layouts are the same when the + * number of samples is 1. + */ + if (info->img->interleaved_samples && info->img->sample_count > 1) { + assert(info->access != ILO_STATE_SURFACE_ACCESS_DP_RENDER); + dw4 |= GEN7_SURFACE_DW4_MSFMT_DEPTH_STENCIL; + } else { + dw4 |= GEN7_SURFACE_DW4_MSFMT_MSS; + } + + dw5 = min_lod << GEN7_SURFACE_DW5_MIN_LOD__SHIFT | + mip_count << GEN7_SURFACE_DW5_MIP_COUNT_LOD__SHIFT; + + dw7 = 0; + if (ilo_dev_gen(dev) >= ILO_GEN(7.5)) { + dw7 |= GEN_SHIFT32(GEN75_SCS_RED, GEN75_SURFACE_DW7_SCS_R) | + GEN_SHIFT32(GEN75_SCS_GREEN, GEN75_SURFACE_DW7_SCS_G) | + GEN_SHIFT32(GEN75_SCS_BLUE, GEN75_SURFACE_DW7_SCS_B) | + GEN_SHIFT32(GEN75_SCS_ALPHA, GEN75_SURFACE_DW7_SCS_A); + } + + STATIC_ASSERT(ARRAY_SIZE(surf->surface) >= 13); + surf->surface[0] = dw0; + surf->surface[1] = dw1; + surf->surface[2] = dw2; + surf->surface[3] = dw3; + surf->surface[4] = dw4; + surf->surface[5] = dw5; + surf->surface[6] = 0; + surf->surface[7] = dw7; + if (ilo_dev_gen(dev) >= ILO_GEN(8)) { + surf->surface[8] = 0; + surf->surface[9] = 0; + surf->surface[10] = 0; + surf->surface[11] = 0; + surf->surface[12] = 0; + } + + surf->type = type; + surf->min_lod = min_lod; + surf->mip_count = mip_count; + + return true; +} + +bool +ilo_state_surface_init_for_null(struct ilo_state_surface *surf, + const struct ilo_dev *dev) +{ + bool ret = true; + + assert(ilo_is_zeroed(surf, sizeof(*surf))); + + if (ilo_dev_gen(dev) >= ILO_GEN(7)) + ret &= surface_set_gen7_null_SURFACE_STATE(surf, dev); + else + ret &= surface_set_gen6_null_SURFACE_STATE(surf, dev); + + surf->type = GEN6_SURFTYPE_NULL; + surf->readonly = true; + + assert(ret); + + return ret; +} + +bool +ilo_state_surface_init_for_buffer(struct ilo_state_surface *surf, + const struct ilo_dev *dev, + const struct ilo_state_surface_buffer_info *info) +{ + bool ret = true; + + assert(ilo_is_zeroed(surf, sizeof(*surf))); + + if (ilo_dev_gen(dev) >= ILO_GEN(7)) + ret &= surface_set_gen7_buffer_SURFACE_STATE(surf, dev, info); + else + ret &= surface_set_gen6_buffer_SURFACE_STATE(surf, dev, info); + + surf->readonly = info->readonly; + + assert(ret); + + return ret; +} + +bool +ilo_state_surface_init_for_image(struct ilo_state_surface *surf, + const struct ilo_dev *dev, + const struct ilo_state_surface_image_info *info) +{ + bool ret = true; + + assert(ilo_is_zeroed(surf, sizeof(*surf))); + + if (ilo_dev_gen(dev) >= ILO_GEN(7)) + ret &= surface_set_gen7_image_SURFACE_STATE(surf, dev, info); + else + ret &= surface_set_gen6_image_SURFACE_STATE(surf, dev, info); + + surf->is_integer = info->is_integer; + surf->readonly = info->readonly; + surf->scanout = info->img->scanout; + + assert(ret); + + return ret; +} + +bool +ilo_state_surface_set_scs(struct ilo_state_surface *surf, + const struct ilo_dev *dev, + enum gen_surface_scs rgba[4]) +{ + const uint32_t scs = GEN_SHIFT32(rgba[0], GEN75_SURFACE_DW7_SCS_R) | + GEN_SHIFT32(rgba[1], GEN75_SURFACE_DW7_SCS_G) | + GEN_SHIFT32(rgba[2], GEN75_SURFACE_DW7_SCS_B) | + GEN_SHIFT32(rgba[3], GEN75_SURFACE_DW7_SCS_A); + + ILO_DEV_ASSERT(dev, 6, 8); + + assert(ilo_dev_gen(dev) >= ILO_GEN(7.5)); + + surf->surface[7] = (surf->surface[7] & ~GEN75_SURFACE_DW7_SCS__MASK) | scs; + + return true; +} diff --git a/src/gallium/drivers/ilo/core/ilo_state_surface.h b/src/gallium/drivers/ilo/core/ilo_state_surface.h new file mode 100644 index 00000000000..deb0b549ebd --- /dev/null +++ b/src/gallium/drivers/ilo/core/ilo_state_surface.h @@ -0,0 +1,116 @@ +/* + * Mesa 3-D graphics library + * + * Copyright (C) 2015 LunarG, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Authors: + * Chia-I Wu + */ + +#ifndef ILO_STATE_SURFACE_H +#define ILO_STATE_SURFACE_H + +#include "genhw/genhw.h" +#include "intel_winsys.h" + +#include "ilo_core.h" +#include "ilo_dev.h" + +struct ilo_buffer; +struct ilo_image; + +enum ilo_state_surface_access { + ILO_STATE_SURFACE_ACCESS_SAMPLER, /* sampling engine surfaces */ + ILO_STATE_SURFACE_ACCESS_DP_RENDER, /* render target surfaces */ + ILO_STATE_SURFACE_ACCESS_DP_TYPED, /* typed surfaces */ + ILO_STATE_SURFACE_ACCESS_DP_UNTYPED, /* untyped surfaces */ + ILO_STATE_SURFACE_ACCESS_DP_DATA, + ILO_STATE_SURFACE_ACCESS_DP_SVB, +}; + +struct ilo_state_surface_buffer_info { + const struct ilo_buffer *buf; + + enum ilo_state_surface_access access; + + enum gen_surface_format format; + uint8_t format_size; + + bool readonly; + uint16_t struct_size; + + uint32_t offset; + uint32_t size; +}; + +struct ilo_state_surface_image_info { + const struct ilo_image *img; + + enum ilo_state_surface_access access; + + enum gen_surface_format format; + bool is_integer; + + bool readonly; + bool is_cube_map; + bool is_array; + + uint8_t level_base; + uint8_t level_count; + uint16_t slice_base; + uint16_t slice_count; +}; + +struct ilo_state_surface { + uint32_t surface[13]; + + enum gen_surface_type type; + uint8_t min_lod; + uint8_t mip_count; + bool is_integer; + + bool readonly; + bool scanout; + + /* managed by users */ + struct intel_bo *bo; +}; + +bool +ilo_state_surface_init_for_null(struct ilo_state_surface *surf, + const struct ilo_dev *dev); + +bool +ilo_state_surface_init_for_buffer(struct ilo_state_surface *surf, + const struct ilo_dev *dev, + const struct ilo_state_surface_buffer_info *info); + +bool +ilo_state_surface_init_for_image(struct ilo_state_surface *surf, + const struct ilo_dev *dev, + const struct ilo_state_surface_image_info *info); + +bool +ilo_state_surface_set_scs(struct ilo_state_surface *surf, + const struct ilo_dev *dev, + enum gen_surface_scs rgba[4]); + +#endif /* ILO_STATE_SURFACE_H */ From 61fea171af64288bdf622e7ecf07e3ca42f83974 Mon Sep 17 00:00:00 2001 From: Chia-I Wu Date: Wed, 13 May 2015 13:10:54 +0800 Subject: [PATCH 610/834] ilo: add ilo_state_sampler We want to replace ilo_sampler_cso with ilo_state_sampler. --- src/gallium/drivers/ilo/Makefile.sources | 2 + .../drivers/ilo/core/ilo_state_sampler.c | 742 ++++++++++++++++++ .../drivers/ilo/core/ilo_state_sampler.h | 103 +++ 3 files changed, 847 insertions(+) create mode 100644 src/gallium/drivers/ilo/core/ilo_state_sampler.c create mode 100644 src/gallium/drivers/ilo/core/ilo_state_sampler.h diff --git a/src/gallium/drivers/ilo/Makefile.sources b/src/gallium/drivers/ilo/Makefile.sources index 587850eafdb..3bb3dde0e18 100644 --- a/src/gallium/drivers/ilo/Makefile.sources +++ b/src/gallium/drivers/ilo/Makefile.sources @@ -23,6 +23,8 @@ C_SOURCES := \ core/ilo_state_3d.h \ core/ilo_state_3d_bottom.c \ core/ilo_state_3d_top.c \ + core/ilo_state_sampler.c \ + core/ilo_state_sampler.h \ core/ilo_state_surface.c \ core/ilo_state_surface.h \ core/ilo_state_zs.c \ diff --git a/src/gallium/drivers/ilo/core/ilo_state_sampler.c b/src/gallium/drivers/ilo/core/ilo_state_sampler.c new file mode 100644 index 00000000000..3787f684fe8 --- /dev/null +++ b/src/gallium/drivers/ilo/core/ilo_state_sampler.c @@ -0,0 +1,742 @@ +/* + * Mesa 3-D graphics library + * + * Copyright (C) 2012-2015 LunarG, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Authors: + * Chia-I Wu + */ + +#include "util/u_half.h" + +#include "ilo_debug.h" +#include "ilo_state_surface.h" +#include "ilo_state_sampler.h" + +static bool +sampler_validate_gen6_non_normalized(const struct ilo_dev *dev, + const struct ilo_state_sampler_info *info) +{ + const enum gen_texcoord_mode addr_ctrls[3] = { + info->tcx_ctrl, info->tcy_ctrl, info->tcz_ctrl, + }; + int i; + + ILO_DEV_ASSERT(dev, 6, 8); + + /* + * From the Ivy Bridge PRM, volume 4 part 1, page 98: + * + * "The following state must be set as indicated if this field + * (Non-normalized Coordinate Enable) is enabled: + * + * - TCX/Y/Z Address Control Mode must be TEXCOORDMODE_CLAMP, + * TEXCOORDMODE_HALF_BORDER, or TEXCOORDMODE_CLAMP_BORDER. + * - Surface Type must be SURFTYPE_2D or SURFTYPE_3D. + * - Mag Mode Filter must be MAPFILTER_NEAREST or + * MAPFILTER_LINEAR. + * - Min Mode Filter must be MAPFILTER_NEAREST or + * MAPFILTER_LINEAR. + * - Mip Mode Filter must be MIPFILTER_NONE. + * - Min LOD must be 0. + * - Max LOD must be 0. + * - MIP Count must be 0. + * - Surface Min LOD must be 0. + * - Texture LOD Bias must be 0." + */ + for (i = 0; i < 3; i++) { + switch (addr_ctrls[i]) { + case GEN6_TEXCOORDMODE_CLAMP: + case GEN6_TEXCOORDMODE_CLAMP_BORDER: + case GEN8_TEXCOORDMODE_HALF_BORDER: + break; + default: + assert(!"bad non-normalized coordinate wrap mode"); + break; + } + } + + assert(info->mip_filter == GEN6_MIPFILTER_NONE); + + assert((info->min_filter == GEN6_MAPFILTER_NEAREST || + info->min_filter == GEN6_MAPFILTER_LINEAR) && + (info->mag_filter == GEN6_MAPFILTER_NEAREST || + info->mag_filter == GEN6_MAPFILTER_LINEAR)); + + assert(info->min_lod == 0.0f && + info->max_lod == 0.0f && + info->lod_bias == 0.0f); + + return true; +} + +static bool +sampler_validate_gen6_sampler(const struct ilo_dev *dev, + const struct ilo_state_sampler_info *info) +{ + ILO_DEV_ASSERT(dev, 6, 8); + + if (info->non_normalized && + !sampler_validate_gen6_non_normalized(dev, info)) + return false; + + if (ilo_dev_gen(dev) < ILO_GEN(8)) { + assert(info->tcx_ctrl != GEN8_TEXCOORDMODE_HALF_BORDER && + info->tcy_ctrl != GEN8_TEXCOORDMODE_HALF_BORDER && + info->tcz_ctrl != GEN8_TEXCOORDMODE_HALF_BORDER); + } + + return true; +} + +static uint32_t +sampler_get_gen6_integer_filters(const struct ilo_dev *dev, + const struct ilo_state_sampler_info *info) +{ + /* + * From the Sandy Bridge PRM, volume 4 part 1, page 103: + * + * "MIPFILTER_LINEAR is not supported for surface formats that do not + * support "Sampling Engine Filtering" as indicated in the Surface + * Formats table unless using the sample_c message type." + * + * "Only MAPFILTER_NEAREST is supported for surface formats that do not + * support "Sampling Engine Filtering" as indicated in the Surface + * Formats table unless using the sample_c message type. + */ + const enum gen_mip_filter mip_filter = + (info->mip_filter == GEN6_MIPFILTER_LINEAR) ? + GEN6_MIPFILTER_NEAREST : info->mip_filter; + const enum gen_map_filter min_filter = GEN6_MAPFILTER_NEAREST; + const enum gen_map_filter mag_filter = GEN6_MAPFILTER_NEAREST; + + ILO_DEV_ASSERT(dev, 6, 8); + + return mip_filter << GEN6_SAMPLER_DW0_MIP_FILTER__SHIFT | + mag_filter << GEN6_SAMPLER_DW0_MAG_FILTER__SHIFT | + min_filter << GEN6_SAMPLER_DW0_MIN_FILTER__SHIFT; +} + +static uint32_t +sampler_get_gen6_3d_filters(const struct ilo_dev *dev, + const struct ilo_state_sampler_info *info) +{ + const enum gen_mip_filter mip_filter = info->mip_filter; + /* + * From the Sandy Bridge PRM, volume 4 part 1, page 103: + * + * "Only MAPFILTER_NEAREST and MAPFILTER_LINEAR are supported for + * surfaces of type SURFTYPE_3D." + */ + const enum gen_map_filter min_filter = + (info->min_filter == GEN6_MAPFILTER_NEAREST || + info->min_filter == GEN6_MAPFILTER_LINEAR) ? + info->min_filter : GEN6_MAPFILTER_LINEAR; + const enum gen_map_filter mag_filter = + (info->mag_filter == GEN6_MAPFILTER_NEAREST || + info->mag_filter == GEN6_MAPFILTER_LINEAR) ? + info->mag_filter : GEN6_MAPFILTER_LINEAR; + + ILO_DEV_ASSERT(dev, 6, 8); + + return mip_filter << GEN6_SAMPLER_DW0_MIP_FILTER__SHIFT | + mag_filter << GEN6_SAMPLER_DW0_MAG_FILTER__SHIFT | + min_filter << GEN6_SAMPLER_DW0_MIN_FILTER__SHIFT; +} + +static uint32_t +get_gen6_addr_controls(const struct ilo_dev *dev, + enum gen_texcoord_mode tcx_ctrl, + enum gen_texcoord_mode tcy_ctrl, + enum gen_texcoord_mode tcz_ctrl) +{ + ILO_DEV_ASSERT(dev, 6, 8); + + if (ilo_dev_gen(dev) >= ILO_GEN(7)) { + return tcx_ctrl << GEN7_SAMPLER_DW3_U_WRAP__SHIFT | + tcy_ctrl << GEN7_SAMPLER_DW3_V_WRAP__SHIFT | + tcz_ctrl << GEN7_SAMPLER_DW3_R_WRAP__SHIFT; + } else { + return tcx_ctrl << GEN6_SAMPLER_DW1_U_WRAP__SHIFT | + tcy_ctrl << GEN6_SAMPLER_DW1_V_WRAP__SHIFT | + tcz_ctrl << GEN6_SAMPLER_DW1_R_WRAP__SHIFT; + } +} + +static uint32_t +sampler_get_gen6_1d_addr_controls(const struct ilo_dev *dev, + const struct ilo_state_sampler_info *info) +{ + const enum gen_texcoord_mode tcx_ctrl = + (info->tcx_ctrl == GEN6_TEXCOORDMODE_CUBE) ? + GEN6_TEXCOORDMODE_CLAMP : info->tcx_ctrl; + /* + * From the Ivy Bridge PRM, volume 4 part 1, page 100: + * + * "If this field (TCY Address Control Mode) is set to + * TEXCOORDMODE_CLAMP_BORDER or TEXCOORDMODE_HALF_BORDER and a 1D + * surface is sampled, incorrect blending with the border color in the + * vertical direction may occur." + */ + const enum gen_texcoord_mode tcy_ctrl = GEN6_TEXCOORDMODE_CLAMP; + const enum gen_texcoord_mode tcz_ctrl = GEN6_TEXCOORDMODE_CLAMP; + + ILO_DEV_ASSERT(dev, 6, 8); + + return get_gen6_addr_controls(dev, tcx_ctrl, tcy_ctrl, tcz_ctrl); +} + +static uint32_t +sampler_get_gen6_2d_3d_addr_controls(const struct ilo_dev *dev, + const struct ilo_state_sampler_info *info) +{ + const enum gen_texcoord_mode tcx_ctrl = + (info->tcx_ctrl == GEN6_TEXCOORDMODE_CUBE) ? + GEN6_TEXCOORDMODE_CLAMP : info->tcx_ctrl; + const enum gen_texcoord_mode tcy_ctrl = + (info->tcy_ctrl == GEN6_TEXCOORDMODE_CUBE) ? + GEN6_TEXCOORDMODE_CLAMP : info->tcy_ctrl; + /* + * From the Sandy Bridge PRM, volume 4 part 1, page 108: + * + * "[DevSNB]: if this field (TCZ Address Control Mode) is set to + * TEXCOORDMODE_CLAMP_BORDER samples outside the map will clamp to 0 + * instead of boarder color" + * + * From the Ivy Bridge PRM, volume 4 part 1, page 100: + * + * "If this field is set to TEXCOORDMODE_CLAMP_BORDER for 3D maps on + * formats without an alpha channel, samples straddling the map in the + * Z direction may have their alpha channels off by 1." + * + * Do we want to do something here? + */ + const enum gen_texcoord_mode tcz_ctrl = + (info->tcz_ctrl == GEN6_TEXCOORDMODE_CUBE) ? + GEN6_TEXCOORDMODE_CLAMP : info->tcz_ctrl; + + ILO_DEV_ASSERT(dev, 6, 8); + + return get_gen6_addr_controls(dev, tcx_ctrl, tcy_ctrl, tcz_ctrl); +} + +static uint32_t +sampler_get_gen6_cube_addr_controls(const struct ilo_dev *dev, + const struct ilo_state_sampler_info *info) +{ + /* + * From the Ivy Bridge PRM, volume 4 part 1, page 99: + * + * "When using cube map texture coordinates, only TEXCOORDMODE_CLAMP + * and TEXCOORDMODE_CUBE settings are valid, and each TC component + * must have the same Address Control mode. + * + * When TEXCOORDMODE_CUBE is not used accessing a cube map, the map's + * Cube Face Enable field must be programmed to 111111b (all faces + * enabled)." + * + * From the Haswell PRM, volume 2d, page 278: + * + * "When using cube map texture coordinates, each TC component must + * have the same Address Control Mode. + * + * When TEXCOORDMODE_CUBE is not used accessing a cube map, the map's + * Cube Face Enable field must be programmed to 111111b (all faces + * enabled)." + * + * We always enable all cube faces and only need to make sure all address + * control modes are the same. + */ + const enum gen_texcoord_mode tcx_ctrl = + (ilo_dev_gen(dev) >= ILO_GEN(7.5) || + info->tcx_ctrl == GEN6_TEXCOORDMODE_CUBE || + info->tcx_ctrl == GEN6_TEXCOORDMODE_CLAMP) ? + info->tcx_ctrl : GEN6_TEXCOORDMODE_CLAMP; + const enum gen_texcoord_mode tcy_ctrl = tcx_ctrl; + const enum gen_texcoord_mode tcz_ctrl = tcx_ctrl; + + ILO_DEV_ASSERT(dev, 6, 8); + + return get_gen6_addr_controls(dev, tcx_ctrl, tcy_ctrl, tcz_ctrl); +} + +static uint16_t +get_gen6_lod_bias(const struct ilo_dev *dev, float bias) +{ + /* [-16.0, 16.0) in S4.6 or S4.8 */ + const int fbits = (ilo_dev_gen(dev) >= ILO_GEN(7)) ? 8 : 6; + const float max = 16.0f; + const float scale = (float) (1 << fbits); + const int mask = (1 << (1 + 4 + fbits)) - 1; + const int scaled_max = (16 << fbits) - 1; + int scaled; + + ILO_DEV_ASSERT(dev, 6, 8); + + if (bias > max) + bias = max; + else if (bias < -max) + bias = -max; + + scaled = (int) (bias * scale); + if (scaled > scaled_max) + scaled = scaled_max; + + return (scaled & mask); +} + +static uint16_t +get_gen6_lod_clamp(const struct ilo_dev *dev, float clamp) +{ + /* [0.0, 13.0] in U4.6 or [0.0, 14.0] in U4.8 */ + const int fbits = (ilo_dev_gen(dev) >= ILO_GEN(7)) ? 8 : 6; + const float max = (ilo_dev_gen(dev) >= ILO_GEN(7)) ? 14.0f : 13.0f; + const float scale = (float) (1 << fbits); + + ILO_DEV_ASSERT(dev, 6, 8); + + if (clamp > max) + clamp = max; + else if (clamp < 0.0f) + clamp = 0.0f; + + return (int) (clamp * scale); +} + +static bool +sampler_set_gen6_SAMPLER_STATE(struct ilo_state_sampler *sampler, + const struct ilo_dev *dev, + const struct ilo_state_sampler_info *info) +{ + uint16_t lod_bias, max_lod, min_lod; + uint32_t dw0, dw1, dw3; + + ILO_DEV_ASSERT(dev, 6, 8); + + if (!sampler_validate_gen6_sampler(dev, info)) + return false; + + /* + * From the Ivy Bridge PRM, volume 4 part 1, page 15: + * + * "The per-pixel LOD is computed in an implementation-dependent manner + * and approximates the log2 of the texel/pixel ratio at the given + * pixel. The computation is typically based on the differential + * texel-space distances associated with a one-pixel differential + * distance along the screen x- and y-axes. These texel-space + * distances are computed by evaluating neighboring pixel texture + * coordinates, these coordinates being in units of texels on the base + * MIP level (multiplied by the corresponding surface size in + * texels)." + * + * Judging from the LOD computation pseudocode on page 16-18, the "base MIP + * level" should be given by SurfMinLod. To summarize, for the "sample" + * message, + * + * 1) LOD is set to log2(texel/pixel ratio). The number of texels is + * measured against level SurfMinLod. + * 2) Bias is added to LOD. + * 3) if pre-clamp is enabled, LOD is clamped to [MinLod, MaxLod] first + * 4) LOD is compared with Base to determine whether magnification or + * minification is needed. + * 5) If magnification is needed, or no mipmapping is requested, LOD is + * set to floor(MinLod). + * 6) LOD is clamped to [0, MIPCnt], and SurfMinLod is added to LOD. + * + * As an example, we could set SurfMinLod to GL_TEXTURE_BASE_LEVEL and Base + * to 0 to match GL. But GL expects LOD to be set to 0, instead of + * floor(MinLod), in 5). Since this is only an issue when MinLod is + * greater than or equal to one, and, with Base being 0, a non-zero MinLod + * implies minification, we only need to deal with the case when mipmapping + * is disabled. We can thus do: + * + * if (MipFilter == MIPFILTER_NONE && MinLod) { + * MinLod = 0; + * MagFilter = MinFilter; + * } + */ + + lod_bias = get_gen6_lod_bias(dev, info->lod_bias); + min_lod = get_gen6_lod_clamp(dev, info->min_lod); + max_lod = get_gen6_lod_clamp(dev, info->max_lod); + + dw0 = GEN6_SAMPLER_DW0_LOD_PRECLAMP_ENABLE | + 0 << GEN6_SAMPLER_DW0_BASE_LOD__SHIFT | + info->mip_filter << GEN6_SAMPLER_DW0_MIP_FILTER__SHIFT | + info->mag_filter << GEN6_SAMPLER_DW0_MAG_FILTER__SHIFT | + info->min_filter << GEN6_SAMPLER_DW0_MIN_FILTER__SHIFT; + + if (ilo_dev_gen(dev) >= ILO_GEN(7)) { + dw0 |= GEN7_SAMPLER_DW0_BORDER_COLOR_MODE_DX10_OGL | + lod_bias << GEN7_SAMPLER_DW0_LOD_BIAS__SHIFT; + + if (info->min_filter == GEN6_MAPFILTER_ANISOTROPIC || + info->mag_filter == GEN6_MAPFILTER_ANISOTROPIC) + dw0 |= GEN7_SAMPLER_DW0_ANISO_ALGO_EWA; + } else { + dw0 |= lod_bias << GEN6_SAMPLER_DW0_LOD_BIAS__SHIFT | + info->shadow_func << GEN6_SAMPLER_DW0_SHADOW_FUNC__SHIFT; + + /* + * From the Sandy Bridge PRM, volume 4 part 1, page 102: + * + * "(Min and Mag State Not Equal) Must be set to 1 if any of the + * following are true: + * + * - Mag Mode Filter and Min Mode Filter are not the same + * - Address Rounding Enable: U address mag filter and U address + * min filter are not the same + * - Address Rounding Enable: V address mag filter and V address + * min filter are not the same + * - Address Rounding Enable: R address mag filter and R address + * min filter are not the same" + * + * We set address rounding for U, V, and R uniformly. Only need to + * check the filters. + */ + if (info->min_filter != info->mag_filter) + dw0 |= GEN6_SAMPLER_DW0_MIN_MAG_NOT_EQUAL; + } + + dw1 = 0; + + if (ilo_dev_gen(dev) >= ILO_GEN(7)) { + /* + * From the Ivy Bridge PRM, volume 4 part 1, page 96: + * + * "This field (Cube Surface Control Mode) must be set to + * CUBECTRLMODE_PROGRAMMED" + */ + dw1 |= min_lod << GEN7_SAMPLER_DW1_MIN_LOD__SHIFT | + max_lod << GEN7_SAMPLER_DW1_MAX_LOD__SHIFT | + info->shadow_func << GEN7_SAMPLER_DW1_SHADOW_FUNC__SHIFT | + GEN7_SAMPLER_DW1_CUBECTRLMODE_PROGRAMMED; + } else { + dw1 |= min_lod << GEN6_SAMPLER_DW1_MIN_LOD__SHIFT | + max_lod << GEN6_SAMPLER_DW1_MAX_LOD__SHIFT | + GEN6_SAMPLER_DW1_CUBECTRLMODE_PROGRAMMED | + info->tcx_ctrl << GEN6_SAMPLER_DW1_U_WRAP__SHIFT | + info->tcy_ctrl << GEN6_SAMPLER_DW1_V_WRAP__SHIFT | + info->tcz_ctrl << GEN6_SAMPLER_DW1_R_WRAP__SHIFT; + } + + dw3 = info->max_anisotropy << GEN6_SAMPLER_DW3_MAX_ANISO__SHIFT; + + /* round the coordinates for linear filtering */ + if (info->min_filter != GEN6_MAPFILTER_NEAREST) { + dw3 |= GEN6_SAMPLER_DW3_U_MIN_ROUND | + GEN6_SAMPLER_DW3_V_MIN_ROUND | + GEN6_SAMPLER_DW3_R_MIN_ROUND; + } + if (info->mag_filter != GEN6_MAPFILTER_NEAREST) { + dw3 |= GEN6_SAMPLER_DW3_U_MAG_ROUND | + GEN6_SAMPLER_DW3_V_MAG_ROUND | + GEN6_SAMPLER_DW3_R_MAG_ROUND; + } + + if (ilo_dev_gen(dev) >= ILO_GEN(7)) { + dw3 |= GEN7_SAMPLER_DW3_TRIQUAL_FULL | + info->tcx_ctrl << GEN7_SAMPLER_DW3_U_WRAP__SHIFT | + info->tcy_ctrl << GEN7_SAMPLER_DW3_V_WRAP__SHIFT | + info->tcz_ctrl << GEN7_SAMPLER_DW3_R_WRAP__SHIFT; + + if (info->non_normalized) + dw3 |= GEN7_SAMPLER_DW3_NON_NORMALIZED_COORD; + } else { + if (info->non_normalized) + dw3 |= GEN6_SAMPLER_DW3_NON_NORMALIZED_COORD; + } + + STATIC_ASSERT(ARRAY_SIZE(sampler->sampler) >= 3); + sampler->sampler[0] = dw0; + sampler->sampler[1] = dw1; + sampler->sampler[2] = dw3; + + sampler->filter_integer = sampler_get_gen6_integer_filters(dev, info); + sampler->filter_3d = sampler_get_gen6_3d_filters(dev, info); + sampler->addr_ctrl_1d = sampler_get_gen6_1d_addr_controls(dev, info); + sampler->addr_ctrl_2d_3d = sampler_get_gen6_2d_3d_addr_controls(dev, info); + sampler->addr_ctrl_cube = sampler_get_gen6_cube_addr_controls(dev, info); + + sampler->non_normalized = info->non_normalized; + + /* + * From the Sandy Bridge PRM, volume 4 part 1, page 21: + * + * "[DevSNB] Errata: Incorrect behavior is observed in cases where the + * min and mag mode filters are different and SurfMinLOD is nonzero. + * The determination of MagMode uses the following equation instead of + * the one in the above pseudocode: + * + * MagMode = (LOD + SurfMinLOD - Base <= 0)" + * + * As a way to work around that, request Base to be set to SurfMinLod. + */ + if (ilo_dev_gen(dev) == ILO_GEN(6) && + info->min_filter != info->mag_filter) + sampler->base_to_surf_min_lod = true; + + return true; +} + +static bool +sampler_border_set_gen6_SAMPLER_BORDER_COLOR_STATE(struct ilo_state_sampler_border *border, + const struct ilo_dev *dev, + const struct ilo_state_sampler_border_info *info) +{ + uint32_t dw[12]; + float rgba[4]; + + /* + * From the Ivy Bridge PRM, volume 4 part 1, page 117: + * + * "For ([DevSNB]), if border color is used, all formats must be + * provided. Hardware will choose the appropriate format based on + * Surface Format and Texture Border Color Mode. The values + * represented by each format should be the same (other than being + * subject to range-based clamping and precision) to avoid unexpected + * behavior." + * + * XXX We do not honor info->is_integer yet. + */ + + ILO_DEV_ASSERT(dev, 6, 6); + + /* make a copy so that we can clamp for SNORM and UNORM */ + memcpy(rgba, info->rgba.f, sizeof(rgba)); + + /* IEEE_FP */ + dw[1] = fui(rgba[0]); + dw[2] = fui(rgba[1]); + dw[3] = fui(rgba[2]); + dw[4] = fui(rgba[3]); + + /* FLOAT_16 */ + dw[5] = util_float_to_half(rgba[0]) | + util_float_to_half(rgba[1]) << 16; + dw[6] = util_float_to_half(rgba[2]) | + util_float_to_half(rgba[3]) << 16; + + /* clamp to [-1.0f, 1.0f] */ + rgba[0] = CLAMP(rgba[0], -1.0f, 1.0f); + rgba[1] = CLAMP(rgba[1], -1.0f, 1.0f); + rgba[2] = CLAMP(rgba[2], -1.0f, 1.0f); + rgba[3] = CLAMP(rgba[3], -1.0f, 1.0f); + + /* SNORM16 */ + dw[9] = (int16_t) util_iround(rgba[0] * 32767.0f) | + (int16_t) util_iround(rgba[1] * 32767.0f) << 16; + dw[10] = (int16_t) util_iround(rgba[2] * 32767.0f) | + (int16_t) util_iround(rgba[3] * 32767.0f) << 16; + + /* SNORM8 */ + dw[11] = (int8_t) util_iround(rgba[0] * 127.0f) | + (int8_t) util_iround(rgba[1] * 127.0f) << 8 | + (int8_t) util_iround(rgba[2] * 127.0f) << 16 | + (int8_t) util_iround(rgba[3] * 127.0f) << 24; + + /* clamp to [0.0f, 1.0f] */ + rgba[0] = CLAMP(rgba[0], 0.0f, 1.0f); + rgba[1] = CLAMP(rgba[1], 0.0f, 1.0f); + rgba[2] = CLAMP(rgba[2], 0.0f, 1.0f); + rgba[3] = CLAMP(rgba[3], 0.0f, 1.0f); + + /* UNORM8 */ + dw[0] = (uint8_t) util_iround(rgba[0] * 255.0f) | + (uint8_t) util_iround(rgba[1] * 255.0f) << 8 | + (uint8_t) util_iround(rgba[2] * 255.0f) << 16 | + (uint8_t) util_iround(rgba[3] * 255.0f) << 24; + + /* UNORM16 */ + dw[7] = (uint16_t) util_iround(rgba[0] * 65535.0f) | + (uint16_t) util_iround(rgba[1] * 65535.0f) << 16; + dw[8] = (uint16_t) util_iround(rgba[2] * 65535.0f) | + (uint16_t) util_iround(rgba[3] * 65535.0f) << 16; + + STATIC_ASSERT(ARRAY_SIZE(border->color) >= 12); + memcpy(border->color, dw, sizeof(dw)); + + return true; +} + +static bool +sampler_border_set_gen7_SAMPLER_BORDER_COLOR_STATE(struct ilo_state_sampler_border *border, + const struct ilo_dev *dev, + const struct ilo_state_sampler_border_info *info) +{ + ILO_DEV_ASSERT(dev, 7, 8); + + /* + * From the Ivy Bridge PRM, volume 4 part 1, page 116: + * + * "In DX10/OGL mode, the format of the border color is + * R32G32B32A32_FLOAT, regardless of the surface format chosen." + * + * From the Haswell PRM, volume 2d, page 240: + * + * "So, SW will have to program the table in SAMPLER_BORDER_COLOR_STATE + * at offsets DWORD16 to 19, as per the integer surface format type." + * + * From the Broadwell PRM, volume 2d, page 297: + * + * "DX10/OGL mode: the format of the border color depends on the format + * of the surface being sampled. If the map format is UINT, then the + * border color format is R32G32B32A32_UINT. If the map format is + * SINT, then the border color format is R32G32B32A32_SINT. Otherwise, + * the border color format is R32G32B32A32_FLOAT." + * + * XXX every Gen is different + */ + + STATIC_ASSERT(ARRAY_SIZE(border->color) >= 4); + memcpy(border->color, info->rgba.f, sizeof(info->rgba.f)); + + return true; +} + +bool +ilo_state_sampler_init(struct ilo_state_sampler *sampler, + const struct ilo_dev *dev, + const struct ilo_state_sampler_info *info) +{ + bool ret = true; + + assert(ilo_is_zeroed(sampler, sizeof(*sampler))); + + ret &= sampler_set_gen6_SAMPLER_STATE(sampler, dev, info); + + assert(ret); + + return ret; +} + +bool +ilo_state_sampler_init_disabled(struct ilo_state_sampler *sampler, + const struct ilo_dev *dev) +{ + ILO_DEV_ASSERT(dev, 6, 8); + + assert(ilo_is_zeroed(sampler, sizeof(*sampler))); + + sampler->sampler[0] = GEN6_SAMPLER_DW0_DISABLE; + sampler->sampler[1] = 0; + sampler->sampler[2] = 0; + + return true; +} + +/** + * Modify \p sampler to work with \p surf. There will be loss of information. + * Callers should make a copy of the orignal sampler first. + */ +bool +ilo_state_sampler_set_surface(struct ilo_state_sampler *sampler, + const struct ilo_dev *dev, + const struct ilo_state_surface *surf) +{ + uint32_t addr_ctrl; + + ILO_DEV_ASSERT(dev, 6, 8); + + if (sampler->non_normalized) { + /* see sampler_validate_gen6_non_normalized() */ + assert(surf->type == GEN6_SURFTYPE_2D || + surf->type == GEN6_SURFTYPE_3D); + assert(!surf->min_lod && !surf->mip_count); + } + + if (sampler->base_to_surf_min_lod) { + const uint8_t base = surf->min_lod << GEN6_SAMPLER_DW0_BASE_LOD__RADIX; + + sampler->sampler[0] = + (sampler->sampler[0] & ~GEN6_SAMPLER_DW0_BASE_LOD__MASK) | + base << GEN6_SAMPLER_DW0_BASE_LOD__SHIFT; + } + + if (surf->is_integer || surf->type == GEN6_SURFTYPE_3D) { + const uint32_t mask = (GEN6_SAMPLER_DW0_MIP_FILTER__MASK | + GEN6_SAMPLER_DW0_MIN_FILTER__MASK | + GEN6_SAMPLER_DW0_MAG_FILTER__MASK); + const uint32_t filter = (surf->is_integer) ? + sampler->filter_integer : sampler->filter_3d; + + assert((filter & mask) == filter); + sampler->sampler[0] = (sampler->sampler[0] & ~mask) | + filter; + } + + switch (surf->type) { + case GEN6_SURFTYPE_1D: + addr_ctrl = sampler->addr_ctrl_1d; + break; + case GEN6_SURFTYPE_2D: + case GEN6_SURFTYPE_3D: + addr_ctrl = sampler->addr_ctrl_2d_3d; + break; + case GEN6_SURFTYPE_CUBE: + addr_ctrl = sampler->addr_ctrl_cube; + break; + default: + assert(!"unexpected surface type"); + addr_ctrl = 0; + break; + } + + if (ilo_dev_gen(dev) >= ILO_GEN(7)) { + const uint32_t mask = (GEN7_SAMPLER_DW3_U_WRAP__MASK | + GEN7_SAMPLER_DW3_V_WRAP__MASK | + GEN7_SAMPLER_DW3_R_WRAP__MASK); + + assert((addr_ctrl & mask) == addr_ctrl); + sampler->sampler[2] = (sampler->sampler[2] & ~mask) | + addr_ctrl; + } else { + const uint32_t mask = (GEN6_SAMPLER_DW1_U_WRAP__MASK | + GEN6_SAMPLER_DW1_V_WRAP__MASK | + GEN6_SAMPLER_DW1_R_WRAP__MASK); + + assert((addr_ctrl & mask) == addr_ctrl); + sampler->sampler[1] = (sampler->sampler[1] & ~mask) | + addr_ctrl; + } + + return true; +} + +bool +ilo_state_sampler_border_init(struct ilo_state_sampler_border *border, + const struct ilo_dev *dev, + const struct ilo_state_sampler_border_info *info) +{ + bool ret = true; + + if (ilo_dev_gen(dev) >= ILO_GEN(7)) { + ret &= sampler_border_set_gen7_SAMPLER_BORDER_COLOR_STATE(border, + dev, info); + } else { + ret &= sampler_border_set_gen6_SAMPLER_BORDER_COLOR_STATE(border, + dev, info); + } + + assert(ret); + + return ret; +} diff --git a/src/gallium/drivers/ilo/core/ilo_state_sampler.h b/src/gallium/drivers/ilo/core/ilo_state_sampler.h new file mode 100644 index 00000000000..75c7620a678 --- /dev/null +++ b/src/gallium/drivers/ilo/core/ilo_state_sampler.h @@ -0,0 +1,103 @@ +/* + * Mesa 3-D graphics library + * + * Copyright (C) 2015 LunarG, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Authors: + * Chia-I Wu + */ + +#ifndef ILO_STATE_SAMPLER_H +#define ILO_STATE_SAMPLER_H + +#include "genhw/genhw.h" + +#include "ilo_core.h" +#include "ilo_dev.h" + +struct ilo_state_surface; + +struct ilo_state_sampler_info { + bool non_normalized; + + float lod_bias; + float min_lod; + float max_lod; + + enum gen_mip_filter mip_filter; + enum gen_map_filter min_filter; + enum gen_map_filter mag_filter; + enum gen_aniso_ratio max_anisotropy; + + enum gen_texcoord_mode tcx_ctrl; + enum gen_texcoord_mode tcy_ctrl; + enum gen_texcoord_mode tcz_ctrl; + + enum gen_prefilter_op shadow_func; +}; + +struct ilo_state_sampler_border_info { + union { + float f[4]; + uint32_t ui[4]; + } rgba; + + bool is_integer; +}; + +struct ilo_state_sampler { + uint32_t sampler[3]; + + uint32_t filter_integer; + uint32_t filter_3d; + + uint32_t addr_ctrl_1d; + uint32_t addr_ctrl_2d_3d; + uint32_t addr_ctrl_cube; + + bool non_normalized; + bool base_to_surf_min_lod; +}; + +struct ilo_state_sampler_border { + uint32_t color[12]; +}; + +bool +ilo_state_sampler_init(struct ilo_state_sampler *sampler, + const struct ilo_dev *dev, + const struct ilo_state_sampler_info *info); + +bool +ilo_state_sampler_init_disabled(struct ilo_state_sampler *sampler, + const struct ilo_dev *dev); + +bool +ilo_state_sampler_set_surface(struct ilo_state_sampler *sampler, + const struct ilo_dev *dev, + const struct ilo_state_surface *surf); + +bool +ilo_state_sampler_border_init(struct ilo_state_sampler_border *border, + const struct ilo_dev *dev, + const struct ilo_state_sampler_border_info *info); + +#endif /* ILO_STATE_SAMPLER_H */ From 4fa7ed99a1e9334d96c1efd42344774dae19f466 Mon Sep 17 00:00:00 2001 From: Chia-I Wu Date: Tue, 12 May 2015 23:43:50 +0800 Subject: [PATCH 611/834] ilo: add ilo_state_viewport We want to replace ilo_viewport_cso and ilo_scissor_state with ilo_state_viewport. --- src/gallium/drivers/ilo/Makefile.sources | 2 + .../drivers/ilo/core/ilo_state_viewport.c | 378 ++++++++++++++++++ .../drivers/ilo/core/ilo_state_viewport.h | 132 ++++++ 3 files changed, 512 insertions(+) create mode 100644 src/gallium/drivers/ilo/core/ilo_state_viewport.c create mode 100644 src/gallium/drivers/ilo/core/ilo_state_viewport.h diff --git a/src/gallium/drivers/ilo/Makefile.sources b/src/gallium/drivers/ilo/Makefile.sources index 3bb3dde0e18..476a9ffe361 100644 --- a/src/gallium/drivers/ilo/Makefile.sources +++ b/src/gallium/drivers/ilo/Makefile.sources @@ -27,6 +27,8 @@ C_SOURCES := \ core/ilo_state_sampler.h \ core/ilo_state_surface.c \ core/ilo_state_surface.h \ + core/ilo_state_viewport.c \ + core/ilo_state_viewport.h \ core/ilo_state_zs.c \ core/ilo_state_zs.h \ core/intel_winsys.h \ diff --git a/src/gallium/drivers/ilo/core/ilo_state_viewport.c b/src/gallium/drivers/ilo/core/ilo_state_viewport.c new file mode 100644 index 00000000000..aae57334541 --- /dev/null +++ b/src/gallium/drivers/ilo/core/ilo_state_viewport.c @@ -0,0 +1,378 @@ +/* + * Mesa 3-D graphics library + * + * Copyright (C) 2012-2015 LunarG, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Authors: + * Chia-I Wu + */ + +#include "ilo_debug.h" +#include "ilo_state_viewport.h" + +static void +viewport_matrix_get_gen6_guardband(const struct ilo_dev *dev, + const struct ilo_state_viewport_matrix_info *mat, + float *min_gbx, float *max_gbx, + float *min_gby, float *max_gby) +{ + /* + * From the Sandy Bridge PRM, volume 2 part 1, page 234: + * + * "Per-Device Guardband Extents + * + * - Supported X,Y ScreenSpace "Guardband" Extent: [-16K,16K-1] + * - Maximum Post-Clamp Delta (X or Y): 16K" + * + * "In addition, in order to be correctly rendered, objects must have a + * screenspace bounding box not exceeding 8K in the X or Y direction. + * This additional restriction must also be comprehended by software, + * i.e., enforced by use of clipping." + * + * From the Ivy Bridge PRM, volume 2 part 1, page 248: + * + * "Per-Device Guardband Extents + * + * - Supported X,Y ScreenSpace "Guardband" Extent: [-32K,32K-1] + * - Maximum Post-Clamp Delta (X or Y): N/A" + * + * "In addition, in order to be correctly rendered, objects must have a + * screenspace bounding box not exceeding 8K in the X or Y direction. + * This additional restriction must also be comprehended by software, + * i.e., enforced by use of clipping." + * + * Combined, the bounding box of any object can not exceed 8K in both + * width and height. + * + * Below we set the guardband as a squre of length 8K, centered at where + * the viewport is. This makes sure all objects passing the GB test are + * valid to the renderer, and those failing the XY clipping have a + * better chance of passing the GB test. + */ + const int max_extent = (ilo_dev_gen(dev) >= ILO_GEN(7)) ? 32768 : 16384; + const int half_len = 8192 / 2; + int center_x = (int) mat->translate[0]; + int center_y = (int) mat->translate[1]; + float scale_x, scale_y; + + ILO_DEV_ASSERT(dev, 6, 8); + + /* make sure the guardband is within the valid range */ + if (center_x - half_len < -max_extent) + center_x = -max_extent + half_len; + else if (center_x + half_len > max_extent - 1) + center_x = max_extent - half_len; + + if (center_y - half_len < -max_extent) + center_y = -max_extent + half_len; + else if (center_y + half_len > max_extent - 1) + center_y = max_extent - half_len; + + scale_x = fabsf(mat->scale[0]); + scale_y = fabsf(mat->scale[1]); + /* + * From the Haswell PRM, volume 2d, page 292-293: + * + * "Note: Minimum allowed value for this field (X/Y Min Clip Guardband) + * is -16384." + * + * "Note: Maximum allowed value for this field (X/Y Max Clip Guardband) + * is 16383." + * + * Avoid small scales. + */ + if (scale_x < 1.0f) + scale_x = 1.0f; + if (scale_y < 1.0f) + scale_y = 1.0f; + + /* in NDC space */ + *min_gbx = ((float) (center_x - half_len) - mat->translate[0]) / scale_x; + *max_gbx = ((float) (center_x + half_len) - mat->translate[0]) / scale_x; + *min_gby = ((float) (center_y - half_len) - mat->translate[1]) / scale_y; + *max_gby = ((float) (center_y + half_len) - mat->translate[1]) / scale_y; +} + +static void +viewport_matrix_get_extent(const struct ilo_state_viewport_matrix_info *mat, + int axis, float *min, float *max) +{ + const float scale_abs = fabsf(mat->scale[axis]); + + *min = -1.0f * scale_abs + mat->translate[axis]; + *max = 1.0f * scale_abs + mat->translate[axis]; +} + +static bool +viewport_matrix_set_gen7_SF_CLIP_VIEWPORT(struct ilo_state_viewport *vp, + const struct ilo_dev *dev, + const struct ilo_state_viewport_matrix_info *matrices, + uint8_t count) +{ + uint8_t i; + + ILO_DEV_ASSERT(dev, 6, 8); + + for (i = 0; i < count; i++) { + const struct ilo_state_viewport_matrix_info *mat = &matrices[i]; + float min_gbx, max_gbx, min_gby, max_gby; + uint32_t dw[16]; + + viewport_matrix_get_gen6_guardband(dev, mat, + &min_gbx, &max_gbx, &min_gby, &max_gby); + + dw[0] = fui(mat->scale[0]); + dw[1] = fui(mat->scale[1]); + dw[2] = fui(mat->scale[2]); + dw[3] = fui(mat->translate[0]); + dw[4] = fui(mat->translate[1]); + dw[5] = fui(mat->translate[2]); + dw[6] = 0; + dw[7] = 0; + + dw[8] = fui(min_gbx); + dw[9] = fui(max_gbx); + dw[10] = fui(min_gby); + dw[11] = fui(max_gby); + + if (ilo_dev_gen(dev) >= ILO_GEN(8)) { + float min_x, max_x, min_y, max_y; + + viewport_matrix_get_extent(mat, 0, &min_x, &max_x); + viewport_matrix_get_extent(mat, 1, &min_y, &max_y); + + dw[12] = fui(min_x); + dw[13] = fui(max_x - 1.0f); + dw[14] = fui(min_y); + dw[15] = fui(max_y - 1.0f); + } else { + dw[12] = 0; + dw[13] = 0; + dw[14] = 0; + dw[15] = 0; + } + + STATIC_ASSERT(ARRAY_SIZE(vp->sf_clip[i]) >= 16); + memcpy(vp->sf_clip[i], dw, sizeof(dw)); + } + + return true; +} + +static bool +viewport_matrix_set_gen6_CC_VIEWPORT(struct ilo_state_viewport *vp, + const struct ilo_dev *dev, + const struct ilo_state_viewport_matrix_info *matrices, + uint8_t count) +{ + uint8_t i; + + ILO_DEV_ASSERT(dev, 6, 8); + + for (i = 0; i < count; i++) { + const struct ilo_state_viewport_matrix_info *mat = &matrices[i]; + float min_z, max_z; + + viewport_matrix_get_extent(mat, 2, &min_z, &max_z); + + STATIC_ASSERT(ARRAY_SIZE(vp->cc[i]) >= 2); + vp->cc[i][0] = fui(min_z); + vp->cc[i][1] = fui(max_z); + } + + return true; +} + +static bool +viewport_scissor_set_gen6_SCISSOR_RECT(struct ilo_state_viewport *vp, + const struct ilo_dev *dev, + const struct ilo_state_viewport_scissor_info *scissors, + uint8_t count) +{ + const uint16_t max_size = (ilo_dev_gen(dev) >= ILO_GEN(7)) ? 16384 : 8192; + uint8_t i; + + ILO_DEV_ASSERT(dev, 6, 8); + + for (i = 0; i < count; i++) { + const struct ilo_state_viewport_scissor_info *scissor = &scissors[i]; + uint16_t min_x, min_y, max_x, max_y; + uint32_t dw0, dw1; + + min_x = (scissor->min_x < max_size) ? scissor->min_x : max_size - 1; + min_y = (scissor->min_y < max_size) ? scissor->min_y : max_size - 1; + max_x = (scissor->max_x < max_size) ? scissor->max_x : max_size - 1; + max_y = (scissor->max_y < max_size) ? scissor->max_y : max_size - 1; + + dw0 = min_y << GEN6_SCISSOR_DW0_MIN_Y__SHIFT | + min_x << GEN6_SCISSOR_DW0_MIN_X__SHIFT; + dw1 = max_y << GEN6_SCISSOR_DW1_MAX_Y__SHIFT | + max_x << GEN6_SCISSOR_DW1_MAX_X__SHIFT; + + STATIC_ASSERT(ARRAY_SIZE(vp->scissor[i]) >= 2); + vp->scissor[i][0] = dw0; + vp->scissor[i][1] = dw1; + } + + return true; +} + +bool +ilo_state_viewport_init(struct ilo_state_viewport *vp, + const struct ilo_dev *dev, + const struct ilo_state_viewport_info *info) +{ + const size_t elem_size = ilo_state_viewport_data_size(dev, 1); + + assert(ilo_is_zeroed(vp, sizeof(*vp))); + assert(ilo_is_zeroed(info->data, info->data_size)); + + vp->data = info->data; + + if (info->data_size / elem_size < ILO_STATE_VIEWPORT_MAX_COUNT) + vp->array_size = info->data_size / elem_size; + else + vp->array_size = ILO_STATE_VIEWPORT_MAX_COUNT; + + return ilo_state_viewport_set_params(vp, dev, &info->params, false); +} + +bool +ilo_state_viewport_init_data_only(struct ilo_state_viewport *vp, + const struct ilo_dev *dev, + void *data, size_t data_size) +{ + struct ilo_state_viewport_info info; + + memset(&info, 0, sizeof(info)); + info.data = data; + info.data_size = data_size; + + return ilo_state_viewport_init(vp, dev, &info); +} + +bool +ilo_state_viewport_init_for_rectlist(struct ilo_state_viewport *vp, + const struct ilo_dev *dev, + void *data, size_t data_size) +{ + struct ilo_state_viewport_info info; + struct ilo_state_viewport_matrix_info mat; + struct ilo_state_viewport_scissor_info sci; + + memset(&info, 0, sizeof(info)); + memset(&mat, 0, sizeof(mat)); + memset(&sci, 0, sizeof(sci)); + + info.data = data; + info.data_size = data_size; + info.params.matrices = &mat; + info.params.scissors = &sci; + info.params.count = 1; + + mat.scale[0] = 1.0f; + mat.scale[1] = 1.0f; + mat.scale[2] = 1.0f; + + return ilo_state_viewport_init(vp, dev, &info); +} + +static void +viewport_set_count(struct ilo_state_viewport *vp, + const struct ilo_dev *dev, + uint8_t count) +{ + assert(count <= vp->array_size); + + vp->count = count; + vp->sf_clip = (uint32_t (*)[16]) vp->data; + vp->cc = (uint32_t (*)[ 2]) (vp->sf_clip + count); + vp->scissor = (uint32_t (*)[ 2]) (vp->cc + count); +} + +bool +ilo_state_viewport_set_params(struct ilo_state_viewport *vp, + const struct ilo_dev *dev, + const struct ilo_state_viewport_params_info *params, + bool scissors_only) +{ + bool ret = true; + + if (scissors_only) { + assert(vp->count == params->count); + + ret &= viewport_scissor_set_gen6_SCISSOR_RECT(vp, dev, + params->scissors, params->count); + } else { + viewport_set_count(vp, dev, params->count); + + ret &= viewport_matrix_set_gen7_SF_CLIP_VIEWPORT(vp, dev, + params->matrices, params->count); + ret &= viewport_matrix_set_gen6_CC_VIEWPORT(vp, dev, + params->matrices, params->count); + ret &= viewport_scissor_set_gen6_SCISSOR_RECT(vp, dev, + params->scissors, params->count); + } + + assert(ret); + + return ret; +} + +void +ilo_state_viewport_full_delta(const struct ilo_state_viewport *vp, + const struct ilo_dev *dev, + struct ilo_state_viewport_delta *delta) +{ + delta->dirty = ILO_STATE_VIEWPORT_SF_CLIP_VIEWPORT | + ILO_STATE_VIEWPORT_CC_VIEWPORT | + ILO_STATE_VIEWPORT_SCISSOR_RECT; +} + +void +ilo_state_viewport_get_delta(const struct ilo_state_viewport *vp, + const struct ilo_dev *dev, + const struct ilo_state_viewport *old, + struct ilo_state_viewport_delta *delta) +{ + const size_t sf_clip_size = sizeof(vp->sf_clip[0]) * vp->count; + const size_t cc_size = sizeof(vp->cc[0]) * vp->count; + const size_t scissor_size = sizeof(vp->scissor[0]) * vp->count; + + /* no shallow copying */ + assert(vp->data != old->data); + + if (vp->count != old->count) { + ilo_state_viewport_full_delta(vp, dev, delta); + return; + } + + delta->dirty = 0; + + if (memcmp(vp->sf_clip, old->sf_clip, sf_clip_size)) + delta->dirty |= ILO_STATE_VIEWPORT_SF_CLIP_VIEWPORT; + + if (memcmp(vp->cc, old->cc, cc_size)) + delta->dirty |= ILO_STATE_VIEWPORT_CC_VIEWPORT; + + if (memcmp(vp->scissor, old->scissor, scissor_size)) + delta->dirty |= ILO_STATE_VIEWPORT_SCISSOR_RECT; +} diff --git a/src/gallium/drivers/ilo/core/ilo_state_viewport.h b/src/gallium/drivers/ilo/core/ilo_state_viewport.h new file mode 100644 index 00000000000..b42ad6571da --- /dev/null +++ b/src/gallium/drivers/ilo/core/ilo_state_viewport.h @@ -0,0 +1,132 @@ +/* + * Mesa 3-D graphics library + * + * Copyright (C) 2015 LunarG, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Authors: + * Chia-I Wu + */ + +#ifndef ILO_STATE_VIEWPORT_H +#define ILO_STATE_VIEWPORT_H + +#include "genhw/genhw.h" + +#include "ilo_core.h" +#include "ilo_dev.h" + +/* + * From the Sandy Bridge PRM, volume 2 part 1, page 38: + * + * "... 16 sets of viewport (VP) state parameters in the Clip unit's + * VertexClipTest function and in the SF unit's ViewportMapping and + * Scissor functions." + */ +#define ILO_STATE_VIEWPORT_MAX_COUNT 16 + +enum ilo_state_viewport_dirty_bits { + ILO_STATE_VIEWPORT_SF_CLIP_VIEWPORT = (1 << 0), + ILO_STATE_VIEWPORT_CC_VIEWPORT = (1 << 1), + ILO_STATE_VIEWPORT_SCISSOR_RECT = (1 << 2), +}; + +struct ilo_state_viewport_matrix_info { + float scale[3]; + float translate[3]; +}; + +struct ilo_state_viewport_scissor_info { + /* all inclusive */ + uint16_t min_x; + uint16_t min_y; + uint16_t max_x; + uint16_t max_y; +}; + +struct ilo_state_viewport_params_info { + const struct ilo_state_viewport_matrix_info *matrices; + const struct ilo_state_viewport_scissor_info *scissors; + uint8_t count; +}; + +struct ilo_state_viewport_info { + void *data; + size_t data_size; + + struct ilo_state_viewport_params_info params; +}; + +struct ilo_state_viewport { + void *data; + uint8_t array_size; + + uint8_t count; + uint32_t (*sf_clip)[16]; + uint32_t (*cc)[2]; + uint32_t (*scissor)[2]; +}; + +struct ilo_state_viewport_delta { + uint32_t dirty; +}; + +static inline size_t +ilo_state_viewport_data_size(const struct ilo_dev *dev, uint8_t array_size) +{ + const struct ilo_state_viewport *vp = NULL; + return (sizeof(vp->sf_clip[0]) + + sizeof(vp->cc[0]) + + sizeof(vp->scissor[0])) * array_size; +} + +bool +ilo_state_viewport_init(struct ilo_state_viewport *vp, + const struct ilo_dev *dev, + const struct ilo_state_viewport_info *info); + +bool +ilo_state_viewport_init_data_only(struct ilo_state_viewport *vp, + const struct ilo_dev *dev, + void *data, size_t data_size); + +bool +ilo_state_viewport_init_for_rectlist(struct ilo_state_viewport *vp, + const struct ilo_dev *dev, + void *data, size_t data_size); + +bool +ilo_state_viewport_set_params(struct ilo_state_viewport *vp, + const struct ilo_dev *dev, + const struct ilo_state_viewport_params_info *params, + bool scissors_only); + +void +ilo_state_viewport_full_delta(const struct ilo_state_viewport *vp, + const struct ilo_dev *dev, + struct ilo_state_viewport_delta *delta); + +void +ilo_state_viewport_get_delta(const struct ilo_state_viewport *vp, + const struct ilo_dev *dev, + const struct ilo_state_viewport *old, + struct ilo_state_viewport_delta *delta); + +#endif /* ILO_STATE_VIEWPORT_H */ From 6be8b6053de356a679707a0de92b083a4ea83937 Mon Sep 17 00:00:00 2001 From: Chia-I Wu Date: Sun, 10 May 2015 13:52:21 +0800 Subject: [PATCH 612/834] ilo: add ilo_state_raster We want to replace ilo_rasterizer_state with ilo_state_raster. --- src/gallium/drivers/ilo/Makefile.sources | 2 + .../drivers/ilo/core/ilo_state_raster.c | 1028 +++++++++++++++++ .../drivers/ilo/core/ilo_state_raster.h | 232 ++++ 3 files changed, 1262 insertions(+) create mode 100644 src/gallium/drivers/ilo/core/ilo_state_raster.c create mode 100644 src/gallium/drivers/ilo/core/ilo_state_raster.h diff --git a/src/gallium/drivers/ilo/Makefile.sources b/src/gallium/drivers/ilo/Makefile.sources index 476a9ffe361..b4b4498a024 100644 --- a/src/gallium/drivers/ilo/Makefile.sources +++ b/src/gallium/drivers/ilo/Makefile.sources @@ -23,6 +23,8 @@ C_SOURCES := \ core/ilo_state_3d.h \ core/ilo_state_3d_bottom.c \ core/ilo_state_3d_top.c \ + core/ilo_state_raster.c \ + core/ilo_state_raster.h \ core/ilo_state_sampler.c \ core/ilo_state_sampler.h \ core/ilo_state_surface.c \ diff --git a/src/gallium/drivers/ilo/core/ilo_state_raster.c b/src/gallium/drivers/ilo/core/ilo_state_raster.c new file mode 100644 index 00000000000..2b7567e3111 --- /dev/null +++ b/src/gallium/drivers/ilo/core/ilo_state_raster.c @@ -0,0 +1,1028 @@ +/* + * Mesa 3-D graphics library + * + * Copyright (C) 2012-2015 LunarG, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Authors: + * Chia-I Wu + */ + +#include "ilo_debug.h" +#include "ilo_state_raster.h" + +static bool +raster_validate_gen6_clip(const struct ilo_dev *dev, + const struct ilo_state_raster_info *info) +{ + const struct ilo_state_raster_clip_info *clip = &info->clip; + + ILO_DEV_ASSERT(dev, 6, 8); + + assert(clip->viewport_count); + + /* + * From the Sandy Bridge PRM, volume 2 part 1, page 188: + * + * ""Clip Distance Cull Test Enable Bitmask" and "Clip Distance Clip + * Test Enable Bitmask" should not have overlapping bits in the mask, + * else the results are undefined." + */ + assert(!(clip->user_cull_enables & clip->user_clip_enables)); + + if (ilo_dev_gen(dev) < ILO_GEN(9)) + assert(clip->z_near_enable == clip->z_far_enable); + + return true; +} + +static bool +raster_set_gen6_3DSTATE_CLIP(struct ilo_state_raster *rs, + const struct ilo_dev *dev, + const struct ilo_state_raster_info *info) +{ + const struct ilo_state_raster_clip_info *clip = &info->clip; + const struct ilo_state_raster_setup_info *setup = &info->setup; + const struct ilo_state_raster_tri_info *tri = &info->tri; + const struct ilo_state_raster_scan_info *scan = &info->scan; + uint32_t dw1, dw2, dw3; + + ILO_DEV_ASSERT(dev, 6, 8); + + if (!raster_validate_gen6_clip(dev, info)) + return false; + + dw1 = clip->user_cull_enables << GEN6_CLIP_DW1_UCP_CULL_ENABLES__SHIFT; + + if (clip->stats_enable) + dw1 |= GEN6_CLIP_DW1_STATISTICS; + + if (ilo_dev_gen(dev) >= ILO_GEN(7)) { + /* + * From the Ivy Bridge PRM, volume 2 part 1, page 219: + * + * "Workaround : Due to Hardware issue "EarlyCull" needs to be + * enabled only for the cases where the incoming primitive topology + * into the clipper guaranteed to be Trilist." + * + * What does this mean? + */ + dw1 |= GEN7_CLIP_DW1_SUBPIXEL_8BITS | + GEN7_CLIP_DW1_EARLY_CULL_ENABLE; + + if (ilo_dev_gen(dev) <= ILO_GEN(7.5)) { + dw1 |= tri->front_winding << GEN7_CLIP_DW1_FRONT_WINDING__SHIFT | + tri->cull_mode << GEN7_CLIP_DW1_CULL_MODE__SHIFT; + } + } + + dw2 = clip->user_clip_enables << GEN6_CLIP_DW2_UCP_CLIP_ENABLES__SHIFT | + GEN6_CLIPMODE_NORMAL << GEN6_CLIP_DW2_CLIP_MODE__SHIFT; + + if (clip->clip_enable) + dw2 |= GEN6_CLIP_DW2_CLIP_ENABLE; + + if (clip->z_near_zero) + dw2 |= GEN6_CLIP_DW2_APIMODE_D3D; + else + dw2 |= GEN6_CLIP_DW2_APIMODE_OGL; + + if (clip->xy_test_enable) + dw2 |= GEN6_CLIP_DW2_XY_TEST_ENABLE; + + if (ilo_dev_gen(dev) < ILO_GEN(8) && clip->z_near_enable) + dw2 |= GEN6_CLIP_DW2_Z_TEST_ENABLE; + + if (clip->gb_test_enable) + dw2 |= GEN6_CLIP_DW2_GB_TEST_ENABLE; + + if (scan->barycentric_interps & (GEN6_INTERP_NONPERSPECTIVE_PIXEL | + GEN6_INTERP_NONPERSPECTIVE_CENTROID | + GEN6_INTERP_NONPERSPECTIVE_SAMPLE)) + dw2 |= GEN6_CLIP_DW2_NONPERSPECTIVE_BARYCENTRIC_ENABLE; + + if (setup->first_vertex_provoking) { + dw2 |= 0 << GEN6_CLIP_DW2_TRI_PROVOKE__SHIFT | + 0 << GEN6_CLIP_DW2_LINE_PROVOKE__SHIFT | + 1 << GEN6_CLIP_DW2_TRIFAN_PROVOKE__SHIFT; + } else { + dw2 |= 2 << GEN6_CLIP_DW2_TRI_PROVOKE__SHIFT | + 1 << GEN6_CLIP_DW2_LINE_PROVOKE__SHIFT | + 2 << GEN6_CLIP_DW2_TRIFAN_PROVOKE__SHIFT; + } + + dw3 = 0x1 << GEN6_CLIP_DW3_MIN_POINT_WIDTH__SHIFT | + 0x7ff << GEN6_CLIP_DW3_MAX_POINT_WIDTH__SHIFT | + (clip->viewport_count - 1) << GEN6_CLIP_DW3_MAX_VPINDEX__SHIFT; + + if (clip->force_rtaindex_zero) + dw3 |= GEN6_CLIP_DW3_FORCE_RTAINDEX_ZERO; + + STATIC_ASSERT(ARRAY_SIZE(rs->clip) >= 3); + rs->clip[0] = dw1; + rs->clip[1] = dw2; + rs->clip[2] = dw3; + + return true; +} + +static bool +raster_params_is_gen6_line_aa_allowed(const struct ilo_dev *dev, + const struct ilo_state_raster_params_info *params) +{ + ILO_DEV_ASSERT(dev, 6, 8); + + /* + * From the Sandy Bridge PRM, volume 2 part 1, page 251: + * + * "This field (Anti-aliasing Enable) must be disabled if any of the + * render targets have integer (UINT or SINT) surface format." + */ + if (params->any_integer_rt) + return false; + + /* + * From the Sandy Bridge PRM, volume 2 part 1, page 321: + * + * "[DevSNB+]: This field (Hierarchical Depth Buffer Enable) must be + * disabled if Anti-aliasing Enable in 3DSTATE_SF is enabled. + */ + if (ilo_dev_gen(dev) == ILO_GEN(6) && params->hiz_enable) + return false; + + return true; +} + +static void +raster_get_gen6_effective_line(const struct ilo_dev *dev, + const struct ilo_state_raster_info *info, + struct ilo_state_raster_line_info *line) +{ + const struct ilo_state_raster_setup_info *setup = &info->setup; + const struct ilo_state_raster_params_info *params = &info->params; + + *line = info->line; + + /* + * From the Sandy Bridge PRM, volume 2 part 1, page 251: + * + * "This field (Anti-aliasing Enable) is ignored when Multisample + * Rasterization Mode is MSRASTMODE_ON_xx." + * + * From the Sandy Bridge PRM, volume 2 part 1, page 251: + * + * "Setting a Line Width of 0.0 specifies the rasterization of the + * "thinnest" (one-pixel-wide), non-antialiased lines. Note that + * this effectively overrides the effect of AAEnable (though the + * AAEnable state variable is not modified). Lines rendered with + * zero Line Width are rasterized using GIQ (Grid Intersection + * Quantization) rules as specified by the GDI and Direct3D APIs." + * + * "Software must not program a value of 0.0 when running in + * MSRASTMODE_ON_xxx modes - zero-width lines are not available + * when multisampling rasterization is enabled." + * + * From the Sandy Bridge PRM, volume 2 part 1, page 294: + * + * "Line stipple, controlled via the Line Stipple Enable state variable + * in WM_STATE, discards certain pixels that are produced by non-AA + * line rasterization." + */ + if (setup->line_msaa_enable || + !raster_params_is_gen6_line_aa_allowed(dev, params)) + line->aa_enable = false; + if (setup->line_msaa_enable || line->aa_enable) { + line->stipple_enable = false; + line->giq_enable = false; + line->giq_last_pixel = false; + } +} + +static bool +raster_validate_gen8_raster(const struct ilo_dev *dev, + const struct ilo_state_raster_info *info) +{ + const struct ilo_state_raster_setup_info *setup = &info->setup; + const struct ilo_state_raster_tri_info *tri = &info->tri; + + ILO_DEV_ASSERT(dev, 6, 8); + + /* + * From the Sandy Bridge PRM, volume 2 part 1, page 249: + * + * "This setting (SOLID) is required when rendering rectangle + * (RECTLIST) objects. + */ + if (tri->fill_mode_front != GEN6_FILLMODE_SOLID || + tri->fill_mode_back != GEN6_FILLMODE_SOLID) + assert(!setup->cv_is_rectangle); + + return true; +} + +static enum gen_msrast_mode +raster_setup_get_gen6_msrast_mode(const struct ilo_dev *dev, + const struct ilo_state_raster_setup_info *setup) +{ + ILO_DEV_ASSERT(dev, 6, 8); + + if (setup->line_msaa_enable) { + return (setup->msaa_enable) ? GEN6_MSRASTMODE_ON_PATTERN : + GEN6_MSRASTMODE_ON_PIXEL; + } else { + return (setup->msaa_enable) ? GEN6_MSRASTMODE_OFF_PATTERN : + GEN6_MSRASTMODE_OFF_PIXEL; + } +} + +static int +get_gen6_line_width(const struct ilo_dev *dev, float fwidth, + bool line_aa_enable, bool line_giq_enable) +{ + int line_width; + + ILO_DEV_ASSERT(dev, 6, 8); + + /* in U3.7 */ + line_width = (int) (fwidth * 128.0f + 0.5f); + + /* + * Smooth lines should intersect ceil(line_width) or (ceil(line_width) + 1) + * pixels in the minor direction. We have to make the lines slightly + * thicker, 0.5 pixel on both sides, so that they intersect that many + * pixels. + */ + if (line_aa_enable) + line_width += 128; + + line_width = CLAMP(line_width, 1, 1023); + + if (line_giq_enable && line_width == 128) + line_width = 0; + + return line_width; +} + +static int +get_gen6_point_width(const struct ilo_dev *dev, float fwidth) +{ + int point_width; + + ILO_DEV_ASSERT(dev, 6, 8); + + /* in U8.3 */ + point_width = (int) (fwidth * 8.0f + 0.5f); + point_width = CLAMP(point_width, 1, 2047); + + return point_width; +} + +static bool +raster_set_gen7_3DSTATE_SF(struct ilo_state_raster *rs, + const struct ilo_dev *dev, + const struct ilo_state_raster_info *info, + const struct ilo_state_raster_line_info *line) +{ + const struct ilo_state_raster_clip_info *clip = &info->clip; + const struct ilo_state_raster_setup_info *setup = &info->setup; + const struct ilo_state_raster_point_info *point = &info->point; + const struct ilo_state_raster_tri_info *tri = &info->tri; + const struct ilo_state_raster_params_info *params = &info->params; + const enum gen_msrast_mode msrast = + raster_setup_get_gen6_msrast_mode(dev, setup); + const int line_width = get_gen6_line_width(dev, params->line_width, + line->aa_enable, line->giq_enable); + const int point_width = get_gen6_point_width(dev, params->point_width); + uint32_t dw1, dw2, dw3; + + ILO_DEV_ASSERT(dev, 6, 7.5); + + if (!raster_validate_gen8_raster(dev, info)) + return false; + + dw1 = tri->fill_mode_front << GEN7_SF_DW1_FILL_MODE_FRONT__SHIFT | + tri->fill_mode_back << GEN7_SF_DW1_FILL_MODE_BACK__SHIFT | + tri->front_winding << GEN7_SF_DW1_FRONT_WINDING__SHIFT; + + if (ilo_dev_gen(dev) >= ILO_GEN(7) && ilo_dev_gen(dev) <= ILO_GEN(7.5)) { + enum gen_depth_format format; + + /* do it here as we want 0x0 to be valid */ + switch (tri->depth_offset_format) { + case GEN6_ZFORMAT_D32_FLOAT_S8X24_UINT: + format = GEN6_ZFORMAT_D32_FLOAT; + break; + case GEN6_ZFORMAT_D24_UNORM_S8_UINT: + format = GEN6_ZFORMAT_D24_UNORM_X8_UINT; + break; + default: + format = tri->depth_offset_format; + break; + } + + dw1 |= format << GEN7_SF_DW1_DEPTH_FORMAT__SHIFT; + } + + /* + * From the Sandy Bridge PRM, volume 2 part 1, page 248: + * + * "This bit (Statistics Enable) should be set whenever clipping is + * enabled and the Statistics Enable bit is set in CLIP_STATE. It + * should be cleared if clipping is disabled or Statistics Enable in + * CLIP_STATE is clear." + */ + if (clip->stats_enable && clip->clip_enable) + dw1 |= GEN7_SF_DW1_STATISTICS; + + /* + * From the Ivy Bridge PRM, volume 2 part 1, page 258: + * + * "This bit (Legacy Global Depth Bias Enable, Global Depth Offset + * Enable Solid , Global Depth Offset Enable Wireframe, and Global + * Depth Offset Enable Point) should be set whenever non zero depth + * bias (Slope, Bias) values are used. Setting this bit may have some + * degradation of performance for some workloads." + * + * But it seems fine to ignore that. + */ + if (tri->depth_offset_solid) + dw1 |= GEN7_SF_DW1_DEPTH_OFFSET_SOLID; + if (tri->depth_offset_wireframe) + dw1 |= GEN7_SF_DW1_DEPTH_OFFSET_WIREFRAME; + if (tri->depth_offset_point) + dw1 |= GEN7_SF_DW1_DEPTH_OFFSET_POINT; + + if (setup->viewport_transform) + dw1 |= GEN7_SF_DW1_VIEWPORT_TRANSFORM; + + dw2 = tri->cull_mode << GEN7_SF_DW2_CULL_MODE__SHIFT | + line_width << GEN7_SF_DW2_LINE_WIDTH__SHIFT | + GEN7_SF_DW2_AA_LINE_CAP_1_0 | + msrast << GEN7_SF_DW2_MSRASTMODE__SHIFT; + + if (line->aa_enable) + dw2 |= GEN7_SF_DW2_AA_LINE_ENABLE; + + if (ilo_dev_gen(dev) == ILO_GEN(7.5) && line->stipple_enable) + dw2 |= GEN75_SF_DW2_LINE_STIPPLE_ENABLE; + + if (setup->scissor_enable) + dw2 |= GEN7_SF_DW2_SCISSOR_ENABLE; + + dw3 = GEN7_SF_DW3_TRUE_AA_LINE_DISTANCE | + GEN7_SF_DW3_SUBPIXEL_8BITS; + + /* this has no effect when line_width != 0 */ + if (line->giq_last_pixel) + dw3 |= GEN7_SF_DW3_LINE_LAST_PIXEL_ENABLE; + + if (setup->first_vertex_provoking) { + dw3 |= 0 << GEN7_SF_DW3_TRI_PROVOKE__SHIFT | + 0 << GEN7_SF_DW3_LINE_PROVOKE__SHIFT | + 1 << GEN7_SF_DW3_TRIFAN_PROVOKE__SHIFT; + } else { + dw3 |= 2 << GEN7_SF_DW3_TRI_PROVOKE__SHIFT | + 1 << GEN7_SF_DW3_LINE_PROVOKE__SHIFT | + 2 << GEN7_SF_DW3_TRIFAN_PROVOKE__SHIFT; + } + + /* setup->point_aa_enable is ignored */ + if (!point->programmable_width) { + dw3 |= GEN7_SF_DW3_USE_POINT_WIDTH | + point_width << GEN7_SF_DW3_POINT_WIDTH__SHIFT; + } + + STATIC_ASSERT(ARRAY_SIZE(rs->sf) >= 3); + rs->sf[0] = dw1; + rs->sf[1] = dw2; + rs->sf[2] = dw3; + + STATIC_ASSERT(ARRAY_SIZE(rs->raster) >= 4); + rs->raster[0] = 0; + rs->raster[1] = fui(params->depth_offset_const); + rs->raster[2] = fui(params->depth_offset_scale); + rs->raster[3] = fui(params->depth_offset_clamp); + + rs->line_aa_enable = line->aa_enable; + rs->line_giq_enable = line->giq_enable; + + return true; +} + +static bool +raster_set_gen8_3DSTATE_SF(struct ilo_state_raster *rs, + const struct ilo_dev *dev, + const struct ilo_state_raster_info *info, + const struct ilo_state_raster_line_info *line) +{ + const struct ilo_state_raster_clip_info *clip = &info->clip; + const struct ilo_state_raster_setup_info *setup = &info->setup; + const struct ilo_state_raster_point_info *point = &info->point; + const struct ilo_state_raster_params_info *params = &info->params; + const int line_width = get_gen6_line_width(dev, params->line_width, + line->aa_enable, line->giq_enable); + const int point_width = get_gen6_point_width(dev, params->point_width); + uint32_t dw1, dw2, dw3; + + ILO_DEV_ASSERT(dev, 8, 8); + + dw1 = 0; + + if (clip->stats_enable && clip->clip_enable) + dw1 |= GEN7_SF_DW1_STATISTICS; + + if (setup->viewport_transform) + dw1 |= GEN7_SF_DW1_VIEWPORT_TRANSFORM; + + dw2 = line_width << GEN7_SF_DW2_LINE_WIDTH__SHIFT | + GEN7_SF_DW2_AA_LINE_CAP_1_0; + + dw3 = GEN7_SF_DW3_TRUE_AA_LINE_DISTANCE | + GEN7_SF_DW3_SUBPIXEL_8BITS; + + /* this has no effect when line_width != 0 */ + if (line->giq_last_pixel) + dw3 |= GEN7_SF_DW3_LINE_LAST_PIXEL_ENABLE; + + if (setup->first_vertex_provoking) { + dw3 |= 0 << GEN7_SF_DW3_TRI_PROVOKE__SHIFT | + 0 << GEN7_SF_DW3_LINE_PROVOKE__SHIFT | + 1 << GEN7_SF_DW3_TRIFAN_PROVOKE__SHIFT; + } else { + dw3 |= 2 << GEN7_SF_DW3_TRI_PROVOKE__SHIFT | + 1 << GEN7_SF_DW3_LINE_PROVOKE__SHIFT | + 2 << GEN7_SF_DW3_TRIFAN_PROVOKE__SHIFT; + } + + if (!point->programmable_width) { + dw3 |= GEN7_SF_DW3_USE_POINT_WIDTH | + point_width << GEN7_SF_DW3_POINT_WIDTH__SHIFT; + } + + STATIC_ASSERT(ARRAY_SIZE(rs->sf) >= 3); + rs->sf[0] = dw1; + rs->sf[1] = dw2; + rs->sf[2] = dw3; + + return true; +} + +static bool +raster_set_gen8_3DSTATE_RASTER(struct ilo_state_raster *rs, + const struct ilo_dev *dev, + const struct ilo_state_raster_info *info, + const struct ilo_state_raster_line_info *line) +{ + const struct ilo_state_raster_clip_info *clip = &info->clip; + const struct ilo_state_raster_setup_info *setup = &info->setup; + const struct ilo_state_raster_point_info *point = &info->point; + const struct ilo_state_raster_tri_info *tri = &info->tri; + const struct ilo_state_raster_params_info *params = &info->params; + uint32_t dw1; + + ILO_DEV_ASSERT(dev, 8, 8); + + if (!raster_validate_gen8_raster(dev, info)) + return false; + + dw1 = tri->front_winding << GEN8_RASTER_DW1_FRONT_WINDING__SHIFT | + tri->cull_mode << GEN8_RASTER_DW1_CULL_MODE__SHIFT | + tri->fill_mode_front << GEN8_RASTER_DW1_FILL_MODE_FRONT__SHIFT | + tri->fill_mode_back << GEN8_RASTER_DW1_FILL_MODE_BACK__SHIFT; + + if (point->aa_enable) + dw1 |= GEN8_RASTER_DW1_SMOOTH_POINT_ENABLE; + + /* where should line_msaa_enable be set? */ + if (setup->msaa_enable) + dw1 |= GEN8_RASTER_DW1_API_MULTISAMPLE_ENABLE; + + if (tri->depth_offset_solid) + dw1 |= GEN8_RASTER_DW1_DEPTH_OFFSET_SOLID; + if (tri->depth_offset_wireframe) + dw1 |= GEN8_RASTER_DW1_DEPTH_OFFSET_WIREFRAME; + if (tri->depth_offset_point) + dw1 |= GEN8_RASTER_DW1_DEPTH_OFFSET_POINT; + + if (line->aa_enable) + dw1 |= GEN8_RASTER_DW1_AA_LINE_ENABLE; + + if (setup->scissor_enable) + dw1 |= GEN8_RASTER_DW1_SCISSOR_ENABLE; + + if (ilo_dev_gen(dev) >= ILO_GEN(9)) { + if (clip->z_far_enable) + dw1 |= GEN9_RASTER_DW1_Z_TEST_FAR_ENABLE; + if (clip->z_near_enable) + dw1 |= GEN9_RASTER_DW1_Z_TEST_NEAR_ENABLE; + } else { + if (clip->z_near_enable) + dw1 |= GEN8_RASTER_DW1_Z_TEST_ENABLE; + } + + STATIC_ASSERT(ARRAY_SIZE(rs->raster) >= 4); + rs->raster[0] = dw1; + rs->raster[1] = fui(params->depth_offset_const); + rs->raster[2] = fui(params->depth_offset_scale); + rs->raster[3] = fui(params->depth_offset_clamp); + + rs->line_aa_enable = line->aa_enable; + rs->line_giq_enable = line->giq_enable; + + return true; +} + +static enum gen_sample_count +get_gen6_sample_count(const struct ilo_dev *dev, uint8_t sample_count) +{ + enum gen_sample_count c; + int min_gen; + + ILO_DEV_ASSERT(dev, 6, 8); + + switch (sample_count) { + case 1: + c = GEN6_NUMSAMPLES_1; + min_gen = ILO_GEN(6); + break; + case 2: + c = GEN8_NUMSAMPLES_2; + min_gen = ILO_GEN(8); + break; + case 4: + c = GEN6_NUMSAMPLES_4; + min_gen = ILO_GEN(6); + break; + case 8: + c = GEN7_NUMSAMPLES_8; + min_gen = ILO_GEN(7); + break; + case 16: + c = GEN8_NUMSAMPLES_16; + min_gen = ILO_GEN(8); + break; + default: + assert(!"unexpected sample count"); + c = GEN6_NUMSAMPLES_1; + break; + } + + assert(ilo_dev_gen(dev) >= min_gen); + + return c; +} + +static bool +raster_set_gen8_3DSTATE_MULTISAMPLE(struct ilo_state_raster *rs, + const struct ilo_dev *dev, + const struct ilo_state_raster_info *info) +{ + const struct ilo_state_raster_setup_info *setup = &info->setup; + const struct ilo_state_raster_scan_info *scan = &info->scan; + const enum gen_sample_count count = + get_gen6_sample_count(dev, scan->sample_count); + uint32_t dw1; + + ILO_DEV_ASSERT(dev, 6, 8); + + /* + * From the Sandy Bridge PRM, volume 2 part 1, page 307: + * + * "Setting Multisample Rasterization Mode to MSRASTMODE_xxx_PATTERN + * when Number of Multisamples == NUMSAMPLES_1 is UNDEFINED." + */ + if (setup->msaa_enable) + assert(scan->sample_count > 1); + + dw1 = scan->pixloc << GEN6_MULTISAMPLE_DW1_PIXEL_LOCATION__SHIFT | + count << GEN6_MULTISAMPLE_DW1_NUM_SAMPLES__SHIFT; + + STATIC_ASSERT(ARRAY_SIZE(rs->sample) >= 1); + rs->sample[0] = dw1; + + return true; +} + +static bool +raster_set_gen6_3DSTATE_SAMPLE_MASK(struct ilo_state_raster *rs, + const struct ilo_dev *dev, + const struct ilo_state_raster_info *info) +{ + const struct ilo_state_raster_scan_info *scan = &info->scan; + /* + * From the Ivy Bridge PRM, volume 2 part 1, page 294: + * + * "If Number of Multisamples is NUMSAMPLES_1, bits 7:1 of this field + * (Sample Mask) must be zero. + * + * If Number of Multisamples is NUMSAMPLES_4, bits 7:4 of this field + * must be zero." + */ + const uint32_t mask = (1 << scan->sample_count) - 1; + uint32_t dw1; + + ILO_DEV_ASSERT(dev, 6, 8); + + dw1 = (scan->sample_mask & mask) << GEN6_SAMPLE_MASK_DW1_VAL__SHIFT; + + STATIC_ASSERT(ARRAY_SIZE(rs->sample) >= 2); + rs->sample[1] = dw1; + + return true; +} + +static bool +raster_validate_gen6_wm(const struct ilo_dev *dev, + const struct ilo_state_raster_info *info) +{ + const struct ilo_state_raster_scan_info *scan = &info->scan; + + ILO_DEV_ASSERT(dev, 6, 8); + + if (ilo_dev_gen(dev) == ILO_GEN(6)) + assert(scan->earlyz_control == GEN7_EDSC_NORMAL); + + /* + * From the Sandy Bridge PRM, volume 2 part 1, page 272: + * + * "This bit (Statistics Enable) must be disabled if either of these + * bits is set: Depth Buffer Clear , Hierarchical Depth Buffer Resolve + * Enable or Depth Buffer Resolve Enable." + */ + if (scan->earlyz_op != ILO_STATE_RASTER_EARLYZ_NORMAL) + assert(!scan->stats_enable); + + /* + * From the Sandy Bridge PRM, volume 2 part 1, page 273: + * + * "If this field (Depth Buffer Resolve Enable) is enabled, the Depth + * Buffer Clear and Hierarchical Depth Buffer Resolve Enable fields + * must both be disabled." + * + * "If this field (Hierarchical Depth Buffer Resolve Enable) is + * enabled, the Depth Buffer Clear and Depth Buffer Resolve Enable + * fields must both be disabled." + * + * This is guaranteed. + */ + + /* + * From the Sandy Bridge PRM, volume 2 part 1, page 314-315: + * + * "Stencil buffer clear can be performed at the same time by enabling + * Stencil Buffer Write Enable." + * + * "Note also that stencil buffer clear can be performed without depth + * buffer clear." + */ + if (scan->earlyz_stencil_clear) { + assert(scan->earlyz_op == ILO_STATE_RASTER_EARLYZ_NORMAL || + scan->earlyz_op == ILO_STATE_RASTER_EARLYZ_DEPTH_CLEAR); + } + + return true; +} + +static bool +raster_set_gen6_3dstate_wm(struct ilo_state_raster *rs, + const struct ilo_dev *dev, + const struct ilo_state_raster_info *info, + const struct ilo_state_raster_line_info *line) +{ + const struct ilo_state_raster_tri_info *tri = &info->tri; + const struct ilo_state_raster_setup_info *setup = &info->setup; + const struct ilo_state_raster_scan_info *scan = &info->scan; + const enum gen_msrast_mode msrast = + raster_setup_get_gen6_msrast_mode(dev, setup); + /* only scan conversion states are set, as in Gen8+ */ + uint32_t dw4, dw5, dw6; + + ILO_DEV_ASSERT(dev, 6, 6); + + if (!raster_validate_gen6_wm(dev, info)) + return false; + + dw4 = 0; + + if (scan->stats_enable) + dw4 |= GEN6_WM_DW4_STATISTICS; + + switch (scan->earlyz_op) { + case ILO_STATE_RASTER_EARLYZ_DEPTH_CLEAR: + dw4 |= GEN6_WM_DW4_DEPTH_CLEAR; + break; + case ILO_STATE_RASTER_EARLYZ_DEPTH_RESOLVE: + dw4 |= GEN6_WM_DW4_DEPTH_RESOLVE; + break; + case ILO_STATE_RASTER_EARLYZ_HIZ_RESOLVE: + dw4 |= GEN6_WM_DW4_HIZ_RESOLVE; + break; + default: + if (scan->earlyz_stencil_clear) + dw4 |= GEN6_WM_DW4_DEPTH_CLEAR; + break; + } + + dw5 = GEN6_WM_DW5_AA_LINE_CAP_1_0 | /* same as in 3DSTATE_SF */ + GEN6_WM_DW5_AA_LINE_WIDTH_2_0; + + if (tri->poly_stipple_enable) + dw5 |= GEN6_WM_DW5_POLY_STIPPLE_ENABLE; + if (line->stipple_enable) + dw5 |= GEN6_WM_DW5_LINE_STIPPLE_ENABLE; + + dw6 = scan->zw_interp << GEN6_WM_DW6_ZW_INTERP__SHIFT | + scan->barycentric_interps << GEN6_WM_DW6_BARYCENTRIC_INTERP__SHIFT | + GEN6_WM_DW6_POINT_RASTRULE_UPPER_RIGHT | + msrast << GEN6_WM_DW6_MSRASTMODE__SHIFT; + + STATIC_ASSERT(ARRAY_SIZE(rs->wm) >= 3); + rs->wm[0] = dw4; + rs->wm[1] = dw5; + rs->wm[2] = dw6; + + return true; +} + +static bool +raster_set_gen8_3DSTATE_WM(struct ilo_state_raster *rs, + const struct ilo_dev *dev, + const struct ilo_state_raster_info *info, + const struct ilo_state_raster_line_info *line) +{ + const struct ilo_state_raster_tri_info *tri = &info->tri; + const struct ilo_state_raster_setup_info *setup = &info->setup; + const struct ilo_state_raster_scan_info *scan = &info->scan; + const enum gen_msrast_mode msrast = + raster_setup_get_gen6_msrast_mode(dev, setup); + uint32_t dw1; + + ILO_DEV_ASSERT(dev, 7, 8); + + if (!raster_validate_gen6_wm(dev, info)) + return false; + + dw1 = scan->earlyz_control << GEN7_WM_DW1_EDSC__SHIFT | + scan->zw_interp << GEN7_WM_DW1_ZW_INTERP__SHIFT | + scan->barycentric_interps << GEN7_WM_DW1_BARYCENTRIC_INTERP__SHIFT | + GEN7_WM_DW1_AA_LINE_CAP_1_0 | /* same as in 3DSTATE_SF */ + GEN7_WM_DW1_AA_LINE_WIDTH_2_0 | + GEN7_WM_DW1_POINT_RASTRULE_UPPER_RIGHT; + + if (scan->stats_enable) + dw1 |= GEN7_WM_DW1_STATISTICS; + + if (ilo_dev_gen(dev) < ILO_GEN(8)) { + switch (scan->earlyz_op) { + case ILO_STATE_RASTER_EARLYZ_DEPTH_CLEAR: + dw1 |= GEN7_WM_DW1_DEPTH_CLEAR; + break; + case ILO_STATE_RASTER_EARLYZ_DEPTH_RESOLVE: + dw1 |= GEN7_WM_DW1_DEPTH_RESOLVE; + break; + case ILO_STATE_RASTER_EARLYZ_HIZ_RESOLVE: + dw1 |= GEN7_WM_DW1_HIZ_RESOLVE; + break; + default: + if (scan->earlyz_stencil_clear) + dw1 |= GEN7_WM_DW1_DEPTH_CLEAR; + break; + } + } + + if (tri->poly_stipple_enable) + dw1 |= GEN7_WM_DW1_POLY_STIPPLE_ENABLE; + if (line->stipple_enable) + dw1 |= GEN7_WM_DW1_LINE_STIPPLE_ENABLE; + + if (ilo_dev_gen(dev) < ILO_GEN(8)) + dw1 |= msrast << GEN7_WM_DW1_MSRASTMODE__SHIFT; + + STATIC_ASSERT(ARRAY_SIZE(rs->wm) >= 1); + rs->wm[0] = dw1; + + return true; +} + +static bool +raster_set_gen8_3dstate_wm_hz_op(struct ilo_state_raster *rs, + const struct ilo_dev *dev, + const struct ilo_state_raster_info *info) +{ + const struct ilo_state_raster_scan_info *scan = &info->scan; + const enum gen_sample_count count = + get_gen6_sample_count(dev, scan->sample_count); + const uint32_t mask = (1 << scan->sample_count) - 1; + uint32_t dw1, dw4; + + ILO_DEV_ASSERT(dev, 8, 8); + + dw1 = count << GEN8_WM_HZ_DW1_NUM_SAMPLES__SHIFT; + + if (scan->earlyz_stencil_clear) + dw1 |= GEN8_WM_HZ_DW1_STENCIL_CLEAR; + + switch (scan->earlyz_op) { + case ILO_STATE_RASTER_EARLYZ_DEPTH_CLEAR: + dw1 |= GEN8_WM_HZ_DW1_DEPTH_CLEAR; + break; + case ILO_STATE_RASTER_EARLYZ_DEPTH_RESOLVE: + dw1 |= GEN8_WM_HZ_DW1_DEPTH_RESOLVE; + break; + case ILO_STATE_RASTER_EARLYZ_HIZ_RESOLVE: + dw1 |= GEN8_WM_HZ_DW1_HIZ_RESOLVE; + break; + default: + break; + } + + dw4 = (scan->sample_mask & mask) << GEN8_WM_HZ_DW4_SAMPLE_MASK__SHIFT; + + STATIC_ASSERT(ARRAY_SIZE(rs->wm) >= 3); + rs->wm[1] = dw1; + rs->wm[2] = dw4; + + return true; +} + +bool +ilo_state_raster_init(struct ilo_state_raster *rs, + const struct ilo_dev *dev, + const struct ilo_state_raster_info *info) +{ + assert(ilo_is_zeroed(rs, sizeof(*rs))); + return ilo_state_raster_set_info(rs, dev, info); +} + +bool +ilo_state_raster_init_for_rectlist(struct ilo_state_raster *rs, + const struct ilo_dev *dev, + uint8_t sample_count, + enum ilo_state_raster_earlyz_op earlyz_op, + bool earlyz_stencil_clear) +{ + struct ilo_state_raster_info info; + + memset(&info, 0, sizeof(info)); + + info.clip.viewport_count = 1; + info.setup.cv_is_rectangle = true; + info.setup.msaa_enable = (sample_count > 1); + info.scan.sample_count = sample_count; + info.scan.sample_mask = ~0u; + info.scan.earlyz_op = earlyz_op; + info.scan.earlyz_stencil_clear = earlyz_stencil_clear; + + return ilo_state_raster_init(rs, dev, &info); +} + +bool +ilo_state_raster_set_info(struct ilo_state_raster *rs, + const struct ilo_dev *dev, + const struct ilo_state_raster_info *info) +{ + struct ilo_state_raster_line_info line; + bool ret = true; + + ret &= raster_set_gen6_3DSTATE_CLIP(rs, dev, info); + + raster_get_gen6_effective_line(dev, info, &line); + + if (ilo_dev_gen(dev) >= ILO_GEN(8)) { + ret &= raster_set_gen8_3DSTATE_SF(rs, dev, info, &line); + ret &= raster_set_gen8_3DSTATE_RASTER(rs, dev, info, &line); + } else { + ret &= raster_set_gen7_3DSTATE_SF(rs, dev, info, &line); + } + + ret &= raster_set_gen8_3DSTATE_MULTISAMPLE(rs, dev, info); + ret &= raster_set_gen6_3DSTATE_SAMPLE_MASK(rs, dev, info); + + if (ilo_dev_gen(dev) >= ILO_GEN(7)) { + ret &= raster_set_gen8_3DSTATE_WM(rs, dev, info, &line); + + if (ilo_dev_gen(dev) >= ILO_GEN(8)) + ret &= raster_set_gen8_3dstate_wm_hz_op(rs, dev, info); + } else { + ret &= raster_set_gen6_3dstate_wm(rs, dev, info, &line); + } + + assert(ret); + + return ret; +} + +bool +ilo_state_raster_set_params(struct ilo_state_raster *rs, + const struct ilo_dev *dev, + const struct ilo_state_raster_params_info *params) +{ + const bool line_aa_enable = (rs->line_aa_enable && + raster_params_is_gen6_line_aa_allowed(dev, params)); + const int line_width = get_gen6_line_width(dev, params->line_width, + line_aa_enable, rs->line_giq_enable); + + ILO_DEV_ASSERT(dev, 6, 8); + + /* modify line AA enable */ + if (rs->line_aa_enable) { + if (ilo_dev_gen(dev) >= ILO_GEN(8)) { + if (line_aa_enable) + rs->raster[0] |= GEN8_RASTER_DW1_AA_LINE_ENABLE; + else + rs->raster[0] &= ~GEN8_RASTER_DW1_AA_LINE_ENABLE; + } else { + if (line_aa_enable) + rs->sf[1] |= GEN7_SF_DW2_AA_LINE_ENABLE; + else + rs->sf[1] &= ~GEN7_SF_DW2_AA_LINE_ENABLE; + } + } + + /* modify line width */ + rs->sf[1] = (rs->sf[1] & ~GEN7_SF_DW2_LINE_WIDTH__MASK) | + line_width << GEN7_SF_DW2_LINE_WIDTH__SHIFT; + + /* modify point width */ + if (rs->sf[2] & GEN7_SF_DW3_USE_POINT_WIDTH) { + const int point_width = get_gen6_point_width(dev, params->point_width); + + rs->sf[2] = (rs->sf[2] & ~GEN7_SF_DW3_POINT_WIDTH__MASK) | + point_width << GEN7_SF_DW3_POINT_WIDTH__SHIFT; + } + + /* modify depth offset */ + rs->raster[1] = fui(params->depth_offset_const); + rs->raster[2] = fui(params->depth_offset_scale); + rs->raster[3] = fui(params->depth_offset_clamp); + + return true; +} + +void +ilo_state_raster_full_delta(const struct ilo_state_raster *rs, + const struct ilo_dev *dev, + struct ilo_state_raster_delta *delta) +{ + delta->dirty = ILO_STATE_RASTER_3DSTATE_CLIP | + ILO_STATE_RASTER_3DSTATE_SF | + ILO_STATE_RASTER_3DSTATE_MULTISAMPLE | + ILO_STATE_RASTER_3DSTATE_SAMPLE_MASK | + ILO_STATE_RASTER_3DSTATE_WM; + + if (ilo_dev_gen(dev) >= ILO_GEN(8)) { + delta->dirty |= ILO_STATE_RASTER_3DSTATE_RASTER | + ILO_STATE_RASTER_3DSTATE_WM_HZ_OP; + } +} + +void +ilo_state_raster_get_delta(const struct ilo_state_raster *rs, + const struct ilo_dev *dev, + const struct ilo_state_raster *old, + struct ilo_state_raster_delta *delta) +{ + delta->dirty = 0; + + if (memcmp(rs->clip, old->clip, sizeof(rs->clip))) + delta->dirty |= ILO_STATE_RASTER_3DSTATE_CLIP; + + if (memcmp(rs->sf, old->sf, sizeof(rs->sf))) + delta->dirty |= ILO_STATE_RASTER_3DSTATE_SF; + + if (memcmp(rs->raster, old->raster, sizeof(rs->raster))) { + if (ilo_dev_gen(dev) >= ILO_GEN(8)) + delta->dirty |= ILO_STATE_RASTER_3DSTATE_RASTER; + else + delta->dirty |= ILO_STATE_RASTER_3DSTATE_SF; + } + + if (memcmp(rs->sample, old->sample, sizeof(rs->sample))) { + delta->dirty |= ILO_STATE_RASTER_3DSTATE_MULTISAMPLE | + ILO_STATE_RASTER_3DSTATE_SAMPLE_MASK; + } + + if (memcmp(rs->wm, old->wm, sizeof(rs->wm))) { + delta->dirty |= ILO_STATE_RASTER_3DSTATE_WM; + + if (ilo_dev_gen(dev) >= ILO_GEN(8)) + delta->dirty |= ILO_STATE_RASTER_3DSTATE_WM_HZ_OP; + } +} diff --git a/src/gallium/drivers/ilo/core/ilo_state_raster.h b/src/gallium/drivers/ilo/core/ilo_state_raster.h new file mode 100644 index 00000000000..0b4665b5de8 --- /dev/null +++ b/src/gallium/drivers/ilo/core/ilo_state_raster.h @@ -0,0 +1,232 @@ +/* + * Mesa 3-D graphics library + * + * Copyright (C) 2015 LunarG, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Authors: + * Chia-I Wu + */ + +#ifndef ILO_STATE_RASTER_H +#define ILO_STATE_RASTER_H + +#include "genhw/genhw.h" + +#include "ilo_core.h" +#include "ilo_dev.h" + +enum ilo_state_raster_dirty_bits { + ILO_STATE_RASTER_3DSTATE_CLIP = (1 << 0), + ILO_STATE_RASTER_3DSTATE_SF = (1 << 1), + ILO_STATE_RASTER_3DSTATE_RASTER = (1 << 2), + ILO_STATE_RASTER_3DSTATE_MULTISAMPLE = (1 << 3), + ILO_STATE_RASTER_3DSTATE_SAMPLE_MASK = (1 << 4), + ILO_STATE_RASTER_3DSTATE_WM = (1 << 5), + ILO_STATE_RASTER_3DSTATE_WM_HZ_OP = (1 << 6), +}; + +enum ilo_state_raster_earlyz_op { + ILO_STATE_RASTER_EARLYZ_NORMAL, + ILO_STATE_RASTER_EARLYZ_DEPTH_CLEAR, + ILO_STATE_RASTER_EARLYZ_DEPTH_RESOLVE, + ILO_STATE_RASTER_EARLYZ_HIZ_RESOLVE, +}; + +/** + * VUE readback, VertexClipTest, ClipDetermination, and primitive output. + */ +struct ilo_state_raster_clip_info { + bool clip_enable; + /* CL_INVOCATION_COUNT and CL_PRIMITIVES_COUNT */ + bool stats_enable; + + uint8_t viewport_count; + bool force_rtaindex_zero; + + /* these should be mutually exclusive */ + uint8_t user_cull_enables; + uint8_t user_clip_enables; + + bool gb_test_enable; + bool xy_test_enable; + + /* far/near must be enabled together prior to Gen9 */ + bool z_far_enable; + bool z_near_enable; + bool z_near_zero; +}; + +/** + * Primitive assembly, viewport transformation, scissoring, MSAA, etc. + */ +struct ilo_state_raster_setup_info { + bool cv_is_rectangle; + + bool first_vertex_provoking; + bool viewport_transform; + + bool scissor_enable; + + /* MSAA enables for lines and non-lines */ + bool msaa_enable; + bool line_msaa_enable; +}; + +/** + * 3DOBJ_POINT rasterization rules. + */ +struct ilo_state_raster_point_info { + /* ignored when msaa_enable is set */ + bool aa_enable; + + bool programmable_width; +}; + +/** + * 3DOBJ_LINE rasterization rules. + */ +struct ilo_state_raster_line_info { + /* ignored when line_msaa_enable is set */ + bool aa_enable; + + /* ignored when line_msaa_enable or aa_enable is set */ + bool stipple_enable; + bool giq_enable; + bool giq_last_pixel; +}; + +/** + * 3DOBJ_TRIANGLE rasterization rules. + */ +struct ilo_state_raster_tri_info { + enum gen_front_winding front_winding; + enum gen_cull_mode cull_mode; + enum gen_fill_mode fill_mode_front; + enum gen_fill_mode fill_mode_back; + + enum gen_depth_format depth_offset_format; + bool depth_offset_solid; + bool depth_offset_wireframe; + bool depth_offset_point; + + bool poly_stipple_enable; +}; + +/** + * Scan conversion. + */ +struct ilo_state_raster_scan_info { + /* PS_DEPTH_COUNT and PS_INVOCATION_COUNT */ + bool stats_enable; + + uint8_t sample_count; + + /* pixel location for non-MSAA or 1x-MSAA */ + enum gen_pixel_location pixloc; + + uint32_t sample_mask; + + /* interpolations */ + enum gen_zw_interp zw_interp; + uint8_t barycentric_interps; + + /* Gen7+ only */ + enum gen_edsc_mode earlyz_control; + enum ilo_state_raster_earlyz_op earlyz_op; + bool earlyz_stencil_clear; +}; + +/** + * Raster parameters. + */ +struct ilo_state_raster_params_info { + bool any_integer_rt; + bool hiz_enable; + + float point_width; + float line_width; + + /* const term will be scaled by 'r' */ + float depth_offset_const; + float depth_offset_scale; + float depth_offset_clamp; +}; + +struct ilo_state_raster_info { + struct ilo_state_raster_clip_info clip; + struct ilo_state_raster_setup_info setup; + struct ilo_state_raster_point_info point; + struct ilo_state_raster_line_info line; + struct ilo_state_raster_tri_info tri; + struct ilo_state_raster_scan_info scan; + + struct ilo_state_raster_params_info params; +}; + +struct ilo_state_raster { + uint32_t clip[3]; + uint32_t sf[3]; + uint32_t raster[4]; + uint32_t sample[2]; + uint32_t wm[3]; + + bool line_aa_enable; + bool line_giq_enable; +}; + +struct ilo_state_raster_delta { + uint32_t dirty; +}; + +bool +ilo_state_raster_init(struct ilo_state_raster *rs, + const struct ilo_dev *dev, + const struct ilo_state_raster_info *info); + +bool +ilo_state_raster_init_for_rectlist(struct ilo_state_raster *rs, + const struct ilo_dev *dev, + uint8_t sample_count, + enum ilo_state_raster_earlyz_op earlyz_op, + bool earlyz_stencil_clear); + +bool +ilo_state_raster_set_info(struct ilo_state_raster *rs, + const struct ilo_dev *dev, + const struct ilo_state_raster_info *info); + +bool +ilo_state_raster_set_params(struct ilo_state_raster *rs, + const struct ilo_dev *dev, + const struct ilo_state_raster_params_info *params); + +void +ilo_state_raster_full_delta(const struct ilo_state_raster *rs, + const struct ilo_dev *dev, + struct ilo_state_raster_delta *delta); + +void +ilo_state_raster_get_delta(const struct ilo_state_raster *rs, + const struct ilo_dev *dev, + const struct ilo_state_raster *old, + struct ilo_state_raster_delta *delta); + +#endif /* ILO_STATE_RASTER_H */ From 62bb6437187b439d5959ccab094762163713a992 Mon Sep 17 00:00:00 2001 From: Chia-I Wu Date: Mon, 11 May 2015 14:23:49 +0800 Subject: [PATCH 613/834] ilo: add ilo_state_cc We want to replace ilo_dsa_state and ilo_blend_state with ilo_state_cc. --- src/gallium/drivers/ilo/Makefile.sources | 2 + src/gallium/drivers/ilo/core/ilo_state_cc.c | 890 ++++++++++++++++++++ src/gallium/drivers/ilo/core/ilo_state_cc.h | 199 +++++ 3 files changed, 1091 insertions(+) create mode 100644 src/gallium/drivers/ilo/core/ilo_state_cc.c create mode 100644 src/gallium/drivers/ilo/core/ilo_state_cc.h diff --git a/src/gallium/drivers/ilo/Makefile.sources b/src/gallium/drivers/ilo/Makefile.sources index b4b4498a024..e1f6d22b0f6 100644 --- a/src/gallium/drivers/ilo/Makefile.sources +++ b/src/gallium/drivers/ilo/Makefile.sources @@ -23,6 +23,8 @@ C_SOURCES := \ core/ilo_state_3d.h \ core/ilo_state_3d_bottom.c \ core/ilo_state_3d_top.c \ + core/ilo_state_cc.c \ + core/ilo_state_cc.h \ core/ilo_state_raster.c \ core/ilo_state_raster.h \ core/ilo_state_sampler.c \ diff --git a/src/gallium/drivers/ilo/core/ilo_state_cc.c b/src/gallium/drivers/ilo/core/ilo_state_cc.c new file mode 100644 index 00000000000..83ee8de979c --- /dev/null +++ b/src/gallium/drivers/ilo/core/ilo_state_cc.c @@ -0,0 +1,890 @@ +/* + * Mesa 3-D graphics library + * + * Copyright (C) 2012-2015 LunarG, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Authors: + * Chia-I Wu + */ + +#include "ilo_debug.h" +#include "ilo_state_cc.h" + +static bool +cc_validate_gen6_stencil(const struct ilo_dev *dev, + const struct ilo_state_cc_info *info) +{ + const struct ilo_state_cc_stencil_info *stencil = &info->stencil; + + ILO_DEV_ASSERT(dev, 6, 8); + + /* + * From the Sandy Bridge PRM, volume 2 part 1, page 359: + * + * "If the Depth Buffer is either undefined or does not have a surface + * format of D32_FLOAT_S8X24_UINT or D24_UNORM_S8_UINT and separate + * stencil buffer is disabled, Stencil Test Enable must be DISABLED" + * + * From the Sandy Bridge PRM, volume 2 part 1, page 370: + * + * "This field (Stencil Test Enable) cannot be enabled if Surface + * Format in 3DSTATE_DEPTH_BUFFER is set to D16_UNORM." + */ + if (stencil->test_enable) + assert(stencil->cv_has_buffer); + + return true; +} + +static bool +cc_validate_gen6_depth(const struct ilo_dev *dev, + const struct ilo_state_cc_info *info) +{ + const struct ilo_state_cc_depth_info *depth = &info->depth; + + ILO_DEV_ASSERT(dev, 6, 8); + + /* + * From the Sandy Bridge PRM, volume 2 part 1, page 360: + * + * "Enabling the Depth Test function without defining a Depth Buffer is + * UNDEFINED." + * + * From the Sandy Bridge PRM, volume 2 part 1, page 375: + * + * "A Depth Buffer must be defined before enabling writes to it, or + * operation is UNDEFINED." + */ + if (depth->test_enable || depth->write_enable) + assert(depth->cv_has_buffer); + + return true; +} + +static bool +cc_set_gen6_DEPTH_STENCIL_STATE(struct ilo_state_cc *cc, + const struct ilo_dev *dev, + const struct ilo_state_cc_info *info) +{ + const struct ilo_state_cc_stencil_info *stencil = &info->stencil; + const struct ilo_state_cc_depth_info *depth = &info->depth; + const struct ilo_state_cc_params_info *params = &info->params; + uint32_t dw0, dw1, dw2; + + ILO_DEV_ASSERT(dev, 6, 7.5); + + if (!cc_validate_gen6_stencil(dev, info) || + !cc_validate_gen6_depth(dev, info)) + return false; + + dw0 = 0; + dw1 = 0; + if (stencil->test_enable) { + const struct ilo_state_cc_stencil_op_info *front = &stencil->front; + const struct ilo_state_cc_stencil_params_info *front_p = + ¶ms->stencil_front; + const struct ilo_state_cc_stencil_op_info *back; + const struct ilo_state_cc_stencil_params_info *back_p; + + dw0 |= GEN6_ZS_DW0_STENCIL_TEST_ENABLE; + + if (stencil->twosided_enable) { + dw0 |= GEN6_ZS_DW0_STENCIL1_ENABLE; + + back = &stencil->back; + back_p = ¶ms->stencil_back; + } else { + back = &stencil->front; + back_p = ¶ms->stencil_front; + } + + dw0 |= front->test_func << GEN6_ZS_DW0_STENCIL_FUNC__SHIFT | + front->fail_op << GEN6_ZS_DW0_STENCIL_FAIL_OP__SHIFT | + front->zfail_op << GEN6_ZS_DW0_STENCIL_ZFAIL_OP__SHIFT | + front->zpass_op << GEN6_ZS_DW0_STENCIL_ZPASS_OP__SHIFT | + back->test_func << GEN6_ZS_DW0_STENCIL1_FUNC__SHIFT | + back->fail_op << GEN6_ZS_DW0_STENCIL1_FAIL_OP__SHIFT | + back->zfail_op << GEN6_ZS_DW0_STENCIL1_ZFAIL_OP__SHIFT | + back->zpass_op << GEN6_ZS_DW0_STENCIL1_ZPASS_OP__SHIFT; + + /* + * From the Ivy Bridge PRM, volume 2 part 1, page 363: + * + * "If this field (Stencil Buffer Write Enable) is enabled, Stencil + * Test Enable must also be enabled." + * + * This is different from depth write enable, which is independent from + * depth test enable. + */ + if (front_p->write_mask || back_p->write_mask) + dw0 |= GEN6_ZS_DW0_STENCIL_WRITE_ENABLE; + + dw1 |= front_p->test_mask << GEN6_ZS_DW1_STENCIL_TEST_MASK__SHIFT | + front_p->write_mask << GEN6_ZS_DW1_STENCIL_WRITE_MASK__SHIFT | + back_p->test_mask << GEN6_ZS_DW1_STENCIL1_TEST_MASK__SHIFT | + back_p->write_mask << GEN6_ZS_DW1_STENCIL1_WRITE_MASK__SHIFT; + } + + dw2 = 0; + if (depth->test_enable) { + dw2 |= GEN6_ZS_DW2_DEPTH_TEST_ENABLE | + depth->test_func << GEN6_ZS_DW2_DEPTH_FUNC__SHIFT; + } else { + dw2 |= GEN6_COMPAREFUNCTION_ALWAYS << GEN6_ZS_DW2_DEPTH_FUNC__SHIFT; + } + + /* independent from depth->test_enable */ + if (depth->write_enable) + dw2 |= GEN6_ZS_DW2_DEPTH_WRITE_ENABLE; + + STATIC_ASSERT(ARRAY_SIZE(cc->ds) >= 3); + cc->ds[0] = dw0; + cc->ds[1] = dw1; + cc->ds[2] = dw2; + + return true; +} + +static bool +cc_set_gen8_3DSTATE_WM_DEPTH_STENCIL(struct ilo_state_cc *cc, + const struct ilo_dev *dev, + const struct ilo_state_cc_info *info) +{ + const struct ilo_state_cc_stencil_info *stencil = &info->stencil; + const struct ilo_state_cc_depth_info *depth = &info->depth; + const struct ilo_state_cc_params_info *params = &info->params; + uint32_t dw1, dw2; + + ILO_DEV_ASSERT(dev, 8, 8); + + if (!cc_validate_gen6_stencil(dev, info) || + !cc_validate_gen6_depth(dev, info)) + return false; + + dw1 = 0; + dw2 = 0; + if (stencil->test_enable) { + const struct ilo_state_cc_stencil_op_info *front = &stencil->front; + const struct ilo_state_cc_stencil_params_info *front_p = + ¶ms->stencil_front; + const struct ilo_state_cc_stencil_op_info *back; + const struct ilo_state_cc_stencil_params_info *back_p; + + dw1 |= GEN8_ZS_DW1_STENCIL_TEST_ENABLE; + + if (stencil->twosided_enable) { + dw1 |= GEN8_ZS_DW1_STENCIL1_ENABLE; + + back = &stencil->back; + back_p = ¶ms->stencil_back; + } else { + back = &stencil->front; + back_p = ¶ms->stencil_front; + } + + dw1 |= front->fail_op << GEN8_ZS_DW1_STENCIL_FAIL_OP__SHIFT | + front->zfail_op << GEN8_ZS_DW1_STENCIL_ZFAIL_OP__SHIFT | + front->zpass_op << GEN8_ZS_DW1_STENCIL_ZPASS_OP__SHIFT | + back->test_func << GEN8_ZS_DW1_STENCIL1_FUNC__SHIFT | + back->fail_op << GEN8_ZS_DW1_STENCIL1_FAIL_OP__SHIFT | + back->zfail_op << GEN8_ZS_DW1_STENCIL1_ZFAIL_OP__SHIFT | + back->zpass_op << GEN8_ZS_DW1_STENCIL1_ZPASS_OP__SHIFT | + front->test_func << GEN8_ZS_DW1_STENCIL_FUNC__SHIFT; + + if (front_p->write_mask || back_p->write_mask) + dw1 |= GEN8_ZS_DW1_STENCIL_WRITE_ENABLE; + + dw2 |= front_p->test_mask << GEN8_ZS_DW2_STENCIL_TEST_MASK__SHIFT | + front_p->write_mask << GEN8_ZS_DW2_STENCIL_WRITE_MASK__SHIFT | + back_p->test_mask << GEN8_ZS_DW2_STENCIL1_TEST_MASK__SHIFT | + back_p->write_mask << GEN8_ZS_DW2_STENCIL1_WRITE_MASK__SHIFT; + } + + if (depth->test_enable) { + dw1 |= GEN8_ZS_DW1_DEPTH_TEST_ENABLE | + depth->test_func << GEN8_ZS_DW1_DEPTH_FUNC__SHIFT; + } else { + dw1 |= GEN6_COMPAREFUNCTION_ALWAYS << GEN8_ZS_DW1_DEPTH_FUNC__SHIFT; + } + + if (depth->write_enable) + dw1 |= GEN8_ZS_DW1_DEPTH_WRITE_ENABLE; + + STATIC_ASSERT(ARRAY_SIZE(cc->ds) >= 2); + cc->ds[0] = dw1; + cc->ds[1] = dw2; + + return true; +} + +static bool +is_dual_source_blend_factor(enum gen_blend_factor factor) +{ + switch (factor) { + case GEN6_BLENDFACTOR_SRC1_COLOR: + case GEN6_BLENDFACTOR_SRC1_ALPHA: + case GEN6_BLENDFACTOR_INV_SRC1_COLOR: + case GEN6_BLENDFACTOR_INV_SRC1_ALPHA: + return true; + default: + return false; + } +} + +static bool +cc_get_gen6_dual_source_blending(const struct ilo_dev *dev, + const struct ilo_state_cc_info *info) +{ + const struct ilo_state_cc_blend_info *blend = &info->blend; + bool dual_source_blending; + uint8_t i; + + ILO_DEV_ASSERT(dev, 6, 8); + + dual_source_blending = (blend->rt_count && + (is_dual_source_blend_factor(blend->rt[0].rgb_src) || + is_dual_source_blend_factor(blend->rt[0].rgb_dst) || + is_dual_source_blend_factor(blend->rt[0].a_src) || + is_dual_source_blend_factor(blend->rt[0].a_dst))); + + /* + * From the Ivy Bridge PRM, volume 2 part 1, page 356: + * + * "Dual Source Blending: When using "Dual Source" Render Target + * Write messages, the Source1 pixel color+alpha passed in the + * message can be selected as a src/dst blend factor. See Color + * Buffer Blending. In single-source mode, those blend factor + * selections are invalid. If SRC1 is included in a src/dst blend + * factor and a DualSource RT Write message is not utilized, + * results are UNDEFINED. (This reflects the same restriction in DX + * APIs, where undefined results are produced if "o1" is not + * written by a PS - there are no default values defined). If SRC1 + * is not included in a src/dst blend factor, dual source blending + * must be disabled." + * + * From the Ivy Bridge PRM, volume 4 part 1, page 356: + * + * "The single source message will not cause a write to the render + * target if Dual Source Blend Enable in 3DSTATE_WM is enabled." + * + * "The dual source message will revert to a single source message + * using source 0 if Dual Source Blend Enable in 3DSTATE_WM is + * disabled." + * + * Dual source blending must be enabled or disabled universally. + */ + for (i = 1; i < blend->rt_count; i++) { + assert(dual_source_blending == + (is_dual_source_blend_factor(blend->rt[i].rgb_src) || + is_dual_source_blend_factor(blend->rt[i].rgb_dst) || + is_dual_source_blend_factor(blend->rt[i].a_src) || + is_dual_source_blend_factor(blend->rt[i].a_dst))); + } + + return dual_source_blending; +} + +static bool +cc_validate_gen6_alpha(const struct ilo_dev *dev, + const struct ilo_state_cc_info *info) +{ + const struct ilo_state_cc_alpha_info *alpha = &info->alpha; + + ILO_DEV_ASSERT(dev, 6, 8); + + /* + * From the Sandy Bridge PRM, volume 2 part 1, page 356: + * + * "Alpha values from the pixel shader are treated as FLOAT32 format + * for computing the AlphaToCoverage Mask." + * + * From the Sandy Bridge PRM, volume 2 part 1, page 378: + * + * "If set (AlphaToCoverage Enable), Source0 Alpha is converted to a + * temporary 1/2/4-bit coverage mask and the mask bit corresponding to + * the sample# ANDed with the sample mask bit. If set, sample coverage + * is computed based on src0 alpha value. Value of 0 disables all + * samples and value of 1 enables all samples for that pixel. The same + * coverage needs to apply to all the RTs in MRT case. Further, any + * value of src0 alpha between 0 and 1 monotonically increases the + * number of enabled pixels. + * + * The same coverage needs to be applied to all the RTs in MRT case." + * + * "If set (AlphaToOne Enable), Source0 Alpha is set to 1.0f after + * (possibly) being used to generate the AlphaToCoverage coverage + * mask. + * + * The same coverage needs to be applied to all the RTs in MRT case. + * + * If Dual Source Blending is enabled, this bit must be disabled." + * + * From the Sandy Bridge PRM, volume 2 part 1, page 382: + * + * "Alpha Test can only be enabled if Pixel Shader outputs a float + * alpha value. + * + * Alpha Test is applied independently on each render target by + * comparing that render target's alpha value against the alpha + * reference value. If the alpha test fails, the corresponding pixel + * write will be supressed only for that render target. The + * depth/stencil update will occur if alpha test passes for any render + * target." + * + * From the Sandy Bridge PRM, volume 4 part 1, page 194: + * + * "Multiple render targets are supported with the single source and + * replicate data messages. Each render target is accessed with a + * separate Render Target Write message, each with a different surface + * indicated (different binding table index). The depth buffer is + * written only by the message(s) to the last render target, indicated + * by the Last Render Target Select bit set to clear the pixel + * scoreboard bits." + * + * When AlphaToCoverage/AlphaToOne/AlphaTest is enabled, it is + * required/desirable for the RT write messages to set "Source0 Alpha + * Present to RenderTarget" in the MRT case. It is also required/desirable + * for the alpha values to be FLOAT32. + */ + if (alpha->alpha_to_coverage || alpha->alpha_to_one || alpha->test_enable) + assert(alpha->cv_float_source0_alpha); + + /* + * From the Sandy Bridge PRM, volume 2 part 1, page 356: + * + * "[DevSNB]: When NumSamples = 1, AlphaToCoverage and AlphaTo + * Coverage Dither both must be disabled." + */ + if (ilo_dev_gen(dev) == ILO_GEN(6) && alpha->alpha_to_coverage) + assert(alpha->cv_sample_count_one); + + /* + * From the Sandy Bridge PRM, volume 2 part 1, page 378: + * + * "If Dual Source Blending is enabled, this bit (AlphaToOne Enable) + * must be disabled." + */ + if (alpha->alpha_to_one) + assert(!cc_get_gen6_dual_source_blending(dev, info)); + + return true; +} + +static bool +cc_validate_gen6_blend(const struct ilo_dev *dev, + const struct ilo_state_cc_info *info) +{ + const struct ilo_state_cc_blend_info *blend = &info->blend; + + ILO_DEV_ASSERT(dev, 6, 8); + + assert(blend->rt_count <= ILO_STATE_CC_BLEND_MAX_RT_COUNT); + + return true; +} + +static enum gen_blend_factor +get_dst_alpha_one_blend_factor(enum gen_blend_factor factor, bool is_rgb) +{ + switch (factor) { + case GEN6_BLENDFACTOR_DST_ALPHA: + return GEN6_BLENDFACTOR_ONE; + case GEN6_BLENDFACTOR_INV_DST_ALPHA: + return GEN6_BLENDFACTOR_ZERO; + case GEN6_BLENDFACTOR_SRC_ALPHA_SATURATE: + return (is_rgb) ? GEN6_BLENDFACTOR_ZERO : GEN6_BLENDFACTOR_ONE; + default: + return factor; + } +} + +static void +cc_get_gen6_effective_rt(const struct ilo_dev *dev, + const struct ilo_state_cc_info *info, + uint8_t rt_index, + struct ilo_state_cc_blend_rt_info *dst) +{ + const struct ilo_state_cc_blend_rt_info *rt = &info->blend.rt[rt_index]; + + if (rt->logicop_enable || rt->blend_enable || + rt->argb_write_disables != 0xf) + assert(rt->cv_has_buffer); + + /* + * From the Sandy Bridge PRM, volume 2 part 1, page 365: + * + * "Logic Ops are only supported on *_UNORM surfaces (excluding _SRGB + * variants), otherwise Logic Ops must be DISABLED." + * + * From the Broadwell PRM, volume 7, page 671: + * + * "Logic Ops are supported on all blendable render targets and render + * targets with *INT formats." + */ + if (ilo_dev_gen(dev) < ILO_GEN(8) && rt->logicop_enable) + assert(rt->cv_is_unorm); + + /* + * From the Sandy Bridge PRM, volume 2 part 1, page 361: + * + * "Only certain surface formats support Color Buffer Blending. Refer + * to the Surface Format tables in Sampling Engine. Blending must be + * disabled on a RenderTarget if blending is not supported." + * + * From the Sandy Bridge PRM, volume 2 part 1, page 365: + * + * "Color Buffer Blending and Logic Ops must not be enabled + * simultaneously, or behavior is UNDEFINED." + */ + if (rt->blend_enable) + assert(!rt->cv_is_integer && !rt->logicop_enable); + + *dst = *rt; + if (rt->blend_enable) { + /* 0x0 is reserved in enum gen_blend_factor */ + assert(rt->rgb_src && rt->rgb_dst && rt->a_src && rt->a_dst); + + if (rt->force_dst_alpha_one) { + dst->rgb_src = get_dst_alpha_one_blend_factor(rt->rgb_src, true); + dst->rgb_dst = get_dst_alpha_one_blend_factor(rt->rgb_dst, true); + dst->a_src = get_dst_alpha_one_blend_factor(rt->a_src, false); + dst->a_dst = get_dst_alpha_one_blend_factor(rt->a_dst, false); + dst->force_dst_alpha_one = false; + } + } else { + dst->rgb_src = GEN6_BLENDFACTOR_ONE; + dst->rgb_dst = GEN6_BLENDFACTOR_ZERO; + dst->rgb_func = GEN6_BLENDFUNCTION_ADD; + dst->a_src = dst->rgb_src; + dst->a_dst = dst->rgb_dst; + dst->a_func = dst->rgb_func; + } +} + +static bool +cc_set_gen6_BLEND_STATE(struct ilo_state_cc *cc, + const struct ilo_dev *dev, + const struct ilo_state_cc_info *info) +{ + const struct ilo_state_cc_alpha_info *alpha = &info->alpha; + const struct ilo_state_cc_blend_info *blend = &info->blend; + uint32_t dw_rt[2 * ILO_STATE_CC_BLEND_MAX_RT_COUNT], dw1_invariant; + uint32_t dw0, dw1; + uint8_t i; + + ILO_DEV_ASSERT(dev, 6, 7.5); + + if (!cc_validate_gen6_alpha(dev, info) || + !cc_validate_gen6_blend(dev, info)) + return false; + + /* + * According to the Sandy Bridge PRM, volume 2 part 1, page 360, pre-blend + * and post-blend color clamps must be enabled in most cases. For the + * other cases, they are either desirable or ignored. We can enable them + * unconditionally. + */ + dw1 = GEN6_RT_DW1_COLORCLAMP_RTFORMAT | + GEN6_RT_DW1_PRE_BLEND_CLAMP | + GEN6_RT_DW1_POST_BLEND_CLAMP; + + if (alpha->alpha_to_coverage) { + dw1 |= GEN6_RT_DW1_ALPHA_TO_COVERAGE; + + /* + * From the Sandy Bridge PRM, volume 2 part 1, page 379: + * + * "[DevSNB]: This bit (AlphaToCoverage Dither Enable) must be + * disabled." + */ + if (ilo_dev_gen(dev) >= ILO_GEN(7)) + dw1 |= GEN6_RT_DW1_ALPHA_TO_COVERAGE_DITHER; + } + + if (alpha->alpha_to_one) + dw1 |= GEN6_RT_DW1_ALPHA_TO_ONE; + + if (alpha->test_enable) { + dw1 |= GEN6_RT_DW1_ALPHA_TEST_ENABLE | + alpha->test_func << GEN6_RT_DW1_ALPHA_TEST_FUNC__SHIFT; + } else { + /* + * From the Ivy Bridge PRM, volume 2 part 1, page 371: + * + * "When Alpha Test is disabled, Alpha Test Function must be + * COMPAREFUNCTION_ALWAYS." + */ + dw1 |= GEN6_COMPAREFUNCTION_ALWAYS << + GEN6_RT_DW1_ALPHA_TEST_FUNC__SHIFT; + } + + if (blend->dither_enable) + dw1 |= GEN6_RT_DW1_DITHER_ENABLE; + + dw1_invariant = dw1; + + for (i = 0; i < blend->rt_count; i++) { + struct ilo_state_cc_blend_rt_info rt; + + cc_get_gen6_effective_rt(dev, info, i, &rt); + + /* 0x0 is reserved for blend factors and we have to set them all */ + dw0 = rt.a_func << GEN6_RT_DW0_ALPHA_FUNC__SHIFT | + rt.a_src << GEN6_RT_DW0_SRC_ALPHA_FACTOR__SHIFT | + rt.a_dst << GEN6_RT_DW0_DST_ALPHA_FACTOR__SHIFT | + rt.rgb_func << GEN6_RT_DW0_COLOR_FUNC__SHIFT | + rt.rgb_src << GEN6_RT_DW0_SRC_COLOR_FACTOR__SHIFT | + rt.rgb_dst << GEN6_RT_DW0_DST_COLOR_FACTOR__SHIFT; + + if (rt.blend_enable) { + dw0 |= GEN6_RT_DW0_BLEND_ENABLE; + + if (rt.a_src != rt.rgb_src || + rt.a_dst != rt.rgb_dst || + rt.a_func != rt.rgb_func) + dw0 |= GEN6_RT_DW0_INDEPENDENT_ALPHA_ENABLE; + } + + dw1 = dw1_invariant | + rt.argb_write_disables << GEN6_RT_DW1_WRITE_DISABLES__SHIFT; + + if (rt.logicop_enable) { + dw1 |= GEN6_RT_DW1_LOGICOP_ENABLE | + rt.logicop_func << GEN6_RT_DW1_LOGICOP_FUNC__SHIFT; + } + + dw_rt[2 * i + 0] = dw0; + dw_rt[2 * i + 1] = dw1; + } + + + STATIC_ASSERT(ARRAY_SIZE(cc->blend) >= ARRAY_SIZE(dw_rt)); + memcpy(&cc->blend[0], dw_rt, sizeof(uint32_t) * 2 * blend->rt_count); + cc->blend_state_count = info->blend.rt_count; + + return true; +} + +static bool +cc_set_gen8_BLEND_STATE(struct ilo_state_cc *cc, + const struct ilo_dev *dev, + const struct ilo_state_cc_info *info) +{ + const struct ilo_state_cc_alpha_info *alpha = &info->alpha; + const struct ilo_state_cc_blend_info *blend = &info->blend; + uint32_t dw_rt[2 * ILO_STATE_CC_BLEND_MAX_RT_COUNT], dw0, dw1; + bool indep_alpha_enable; + uint8_t i; + + ILO_DEV_ASSERT(dev, 8, 8); + + if (!cc_validate_gen6_alpha(dev, info) || + !cc_validate_gen6_blend(dev, info)) + return false; + + indep_alpha_enable = false; + for (i = 0; i < blend->rt_count; i++) { + struct ilo_state_cc_blend_rt_info rt; + + cc_get_gen6_effective_rt(dev, info, i, &rt); + + dw0 = rt.rgb_src << GEN8_RT_DW0_SRC_COLOR_FACTOR__SHIFT | + rt.rgb_dst << GEN8_RT_DW0_DST_COLOR_FACTOR__SHIFT | + rt.rgb_func << GEN8_RT_DW0_COLOR_FUNC__SHIFT | + rt.a_src << GEN8_RT_DW0_SRC_ALPHA_FACTOR__SHIFT | + rt.a_dst << GEN8_RT_DW0_DST_ALPHA_FACTOR__SHIFT | + rt.a_func << GEN8_RT_DW0_ALPHA_FUNC__SHIFT | + rt.argb_write_disables << GEN8_RT_DW0_WRITE_DISABLES__SHIFT; + + if (rt.blend_enable) { + dw0 |= GEN8_RT_DW0_BLEND_ENABLE; + + if (rt.a_src != rt.rgb_src || + rt.a_dst != rt.rgb_dst || + rt.a_func != rt.rgb_func) + indep_alpha_enable = true; + } + + dw1 = GEN8_RT_DW1_COLORCLAMP_RTFORMAT | + GEN8_RT_DW1_PRE_BLEND_CLAMP | + GEN8_RT_DW1_POST_BLEND_CLAMP; + + if (rt.logicop_enable) { + dw1 |= GEN8_RT_DW1_LOGICOP_ENABLE | + rt.logicop_func << GEN8_RT_DW1_LOGICOP_FUNC__SHIFT; + } + + dw_rt[2 * i + 0] = dw0; + dw_rt[2 * i + 1] = dw1; + } + + dw0 = 0; + + if (alpha->alpha_to_coverage) { + dw0 |= GEN8_BLEND_DW0_ALPHA_TO_COVERAGE | + GEN8_BLEND_DW0_ALPHA_TO_COVERAGE_DITHER; + } + + if (indep_alpha_enable) + dw0 |= GEN8_BLEND_DW0_INDEPENDENT_ALPHA_ENABLE; + + if (alpha->alpha_to_one) + dw0 |= GEN8_BLEND_DW0_ALPHA_TO_ONE; + + if (alpha->test_enable) { + dw0 |= GEN8_BLEND_DW0_ALPHA_TEST_ENABLE | + alpha->test_func << GEN8_BLEND_DW0_ALPHA_TEST_FUNC__SHIFT; + } else { + dw0 |= GEN6_COMPAREFUNCTION_ALWAYS << + GEN8_BLEND_DW0_ALPHA_TEST_FUNC__SHIFT; + } + + if (blend->dither_enable) + dw0 |= GEN8_BLEND_DW0_DITHER_ENABLE; + + STATIC_ASSERT(ARRAY_SIZE(cc->blend) >= 2 + ARRAY_SIZE(dw_rt)); + cc->blend[1] = dw0; + memcpy(&cc->blend[2], dw_rt, sizeof(uint32_t) * 2 * blend->rt_count); + cc->blend_state_count = info->blend.rt_count; + + return true; +} + +static bool +cc_set_gen8_3DSTATE_PS_BLEND(struct ilo_state_cc *cc, + const struct ilo_dev *dev, + const struct ilo_state_cc_info *info) +{ + const struct ilo_state_cc_alpha_info *alpha = &info->alpha; + const struct ilo_state_cc_blend_info *blend = &info->blend; + uint32_t dw1; + + ILO_DEV_ASSERT(dev, 8, 8); + + dw1 = 0; + + if (alpha->alpha_to_coverage) + dw1 |= GEN8_PS_BLEND_DW1_ALPHA_TO_COVERAGE; + + if (alpha->test_enable) + dw1 |= GEN8_PS_BLEND_DW1_ALPHA_TEST_ENABLE; + + if (blend->rt_count) { + struct ilo_state_cc_blend_rt_info rt0; + uint8_t i; + + cc_get_gen6_effective_rt(dev, info, 0, &rt0); + + /* 0x0 is reserved for blend factors and we have to set them all */ + dw1 |= rt0.a_src << GEN8_PS_BLEND_DW1_SRC_ALPHA_FACTOR__SHIFT | + rt0.a_dst << GEN8_PS_BLEND_DW1_DST_ALPHA_FACTOR__SHIFT | + rt0.rgb_src << GEN8_PS_BLEND_DW1_SRC_COLOR_FACTOR__SHIFT | + rt0.rgb_dst << GEN8_PS_BLEND_DW1_DST_COLOR_FACTOR__SHIFT; + + for (i = 0; i < blend->rt_count; i++) { + if (blend->rt[i].argb_write_disables != 0xf) { + dw1 |= GEN8_PS_BLEND_DW1_WRITABLE_RT; + break; + } + } + + if (rt0.blend_enable) { + dw1 |= GEN8_PS_BLEND_DW1_BLEND_ENABLE; + + if (rt0.a_src != rt0.rgb_src || rt0.a_dst != rt0.rgb_dst) + dw1 |= GEN8_PS_BLEND_DW1_INDEPENDENT_ALPHA_ENABLE; + } + } + + STATIC_ASSERT(ARRAY_SIZE(cc->blend) >= 1); + cc->blend[0] = dw1; + + return true; +} + +static bool +cc_params_set_gen6_COLOR_CALC_STATE(struct ilo_state_cc *cc, + const struct ilo_dev *dev, + const struct ilo_state_cc_params_info *params) +{ + uint32_t dw0; + + ILO_DEV_ASSERT(dev, 6, 8); + + dw0 = params->stencil_front.test_ref << GEN6_CC_DW0_STENCIL_REF__SHIFT | + params->stencil_back.test_ref << GEN6_CC_DW0_STENCIL1_REF__SHIFT | + GEN6_CC_DW0_ALPHATEST_FLOAT32; + + STATIC_ASSERT(ARRAY_SIZE(cc->cc) >= 6); + cc->cc[0] = dw0; + cc->cc[1] = fui(params->alpha_ref); + cc->cc[2] = fui(params->blend_rgba[0]); + cc->cc[3] = fui(params->blend_rgba[1]); + cc->cc[4] = fui(params->blend_rgba[2]); + cc->cc[5] = fui(params->blend_rgba[3]); + + return true; +} + +bool +ilo_state_cc_init(struct ilo_state_cc *cc, + const struct ilo_dev *dev, + const struct ilo_state_cc_info *info) +{ + assert(ilo_is_zeroed(cc, sizeof(*cc))); + return ilo_state_cc_set_info(cc, dev, info); +} + +bool +ilo_state_cc_set_info(struct ilo_state_cc *cc, + const struct ilo_dev *dev, + const struct ilo_state_cc_info *info) +{ + bool ret = true; + + if (ilo_dev_gen(dev) >= ILO_GEN(8)) { + ret &= cc_set_gen8_3DSTATE_WM_DEPTH_STENCIL(cc, dev, info); + ret &= cc_set_gen8_BLEND_STATE(cc, dev, info); + ret &= cc_set_gen8_3DSTATE_PS_BLEND(cc, dev, info); + } else { + ret &= cc_set_gen6_DEPTH_STENCIL_STATE(cc, dev, info); + ret &= cc_set_gen6_BLEND_STATE(cc, dev, info); + } + + ret &= cc_params_set_gen6_COLOR_CALC_STATE(cc, dev, &info->params); + + assert(ret); + + return ret; +} + +bool +ilo_state_cc_set_params(struct ilo_state_cc *cc, + const struct ilo_dev *dev, + const struct ilo_state_cc_params_info *params) +{ + /* modify stencil masks */ + if (ilo_dev_gen(dev) >= ILO_GEN(8)) { + uint32_t dw1 = cc->ds[0]; + uint32_t dw2 = cc->ds[1]; + + if (dw1 & GEN8_ZS_DW1_STENCIL_TEST_ENABLE) { + const bool twosided_enable = (dw1 & GEN8_ZS_DW1_STENCIL1_ENABLE); + const struct ilo_state_cc_stencil_params_info *front_p = + ¶ms->stencil_front; + const struct ilo_state_cc_stencil_params_info *back_p = + (twosided_enable) ? ¶ms->stencil_back : + ¶ms->stencil_front; + + if (front_p->write_mask || back_p->write_mask) + dw1 |= GEN8_ZS_DW1_STENCIL_WRITE_ENABLE; + else + dw1 &= ~GEN8_ZS_DW1_STENCIL_WRITE_ENABLE; + + dw2 = + front_p->test_mask << GEN8_ZS_DW2_STENCIL_TEST_MASK__SHIFT | + front_p->write_mask << GEN8_ZS_DW2_STENCIL_WRITE_MASK__SHIFT | + back_p->test_mask << GEN8_ZS_DW2_STENCIL1_TEST_MASK__SHIFT | + back_p->write_mask << GEN8_ZS_DW2_STENCIL1_WRITE_MASK__SHIFT; + } + + cc->ds[0] = dw1; + cc->ds[1] = dw2; + } else { + uint32_t dw0 = cc->ds[0]; + uint32_t dw1 = cc->ds[1]; + + if (dw0 & GEN6_ZS_DW0_STENCIL_TEST_ENABLE) { + const bool twosided_enable = (dw0 & GEN6_ZS_DW0_STENCIL1_ENABLE); + const struct ilo_state_cc_stencil_params_info *front_p = + ¶ms->stencil_front; + const struct ilo_state_cc_stencil_params_info *back_p = + (twosided_enable) ? ¶ms->stencil_back : + ¶ms->stencil_front; + + if (front_p->write_mask || back_p->write_mask) + dw0 |= GEN6_ZS_DW0_STENCIL_WRITE_ENABLE; + else + dw0 &= ~GEN6_ZS_DW0_STENCIL_WRITE_ENABLE; + + dw1 = + front_p->test_mask << GEN6_ZS_DW1_STENCIL_TEST_MASK__SHIFT | + front_p->write_mask << GEN6_ZS_DW1_STENCIL_WRITE_MASK__SHIFT | + back_p->test_mask << GEN6_ZS_DW1_STENCIL1_TEST_MASK__SHIFT | + back_p->write_mask << GEN6_ZS_DW1_STENCIL1_WRITE_MASK__SHIFT; + } + + cc->ds[0] = dw0; + cc->ds[1] = dw1; + } + + /* modify COLOR_CALC_STATE */ + cc_params_set_gen6_COLOR_CALC_STATE(cc, dev, params); + + return true; +} + +void +ilo_state_cc_full_delta(const struct ilo_state_cc *cc, + const struct ilo_dev *dev, + struct ilo_state_cc_delta *delta) +{ + delta->dirty = ILO_STATE_CC_BLEND_STATE | + ILO_STATE_CC_COLOR_CALC_STATE; + + if (ilo_dev_gen(dev) >= ILO_GEN(8)) { + delta->dirty |= ILO_STATE_CC_3DSTATE_WM_DEPTH_STENCIL | + ILO_STATE_CC_3DSTATE_PS_BLEND; + } else { + delta->dirty |= ILO_STATE_CC_DEPTH_STENCIL_STATE; + } +} + +void +ilo_state_cc_get_delta(const struct ilo_state_cc *cc, + const struct ilo_dev *dev, + const struct ilo_state_cc *old, + struct ilo_state_cc_delta *delta) +{ + delta->dirty = 0; + + if (memcmp(cc->ds, old->ds, sizeof(cc->ds))) { + if (ilo_dev_gen(dev) >= ILO_GEN(8)) + delta->dirty |= ILO_STATE_CC_3DSTATE_WM_DEPTH_STENCIL; + else + delta->dirty |= ILO_STATE_CC_DEPTH_STENCIL_STATE; + } + + if (ilo_dev_gen(dev) >= ILO_GEN(8)) { + if (cc->blend[0] != old->blend[0]) + delta->dirty |= ILO_STATE_CC_3DSTATE_PS_BLEND; + + if (memcmp(&cc->blend[1], &old->blend[1], + sizeof(uint32_t) * (1 + 2 * cc->blend_state_count))) + delta->dirty |= ILO_STATE_CC_BLEND_STATE; + } else if (memcmp(cc->blend, old->blend, + sizeof(uint32_t) * 2 * cc->blend_state_count)) { + delta->dirty |= ILO_STATE_CC_BLEND_STATE; + } + + if (memcmp(cc->cc, old->cc, sizeof(cc->cc))) + delta->dirty |= ILO_STATE_CC_COLOR_CALC_STATE; +} diff --git a/src/gallium/drivers/ilo/core/ilo_state_cc.h b/src/gallium/drivers/ilo/core/ilo_state_cc.h new file mode 100644 index 00000000000..5b96a60f988 --- /dev/null +++ b/src/gallium/drivers/ilo/core/ilo_state_cc.h @@ -0,0 +1,199 @@ +/* + * Mesa 3-D graphics library + * + * Copyright (C) 2015 LunarG, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Authors: + * Chia-I Wu + */ + +#ifndef ILO_STATE_CC_H +#define ILO_STATE_CC_H + +#include "genhw/genhw.h" + +#include "ilo_core.h" +#include "ilo_dev.h" + +/* + * From the Sandy Bridge PRM, volume 2 part 1, page 38: + * + * "Render Target Index. Specifies the render target index that will be + * used to select blend state from BLEND_STATE. + * Format = U3" + */ +#define ILO_STATE_CC_BLEND_MAX_RT_COUNT 8 + +enum ilo_state_cc_dirty_bits { + ILO_STATE_CC_3DSTATE_WM_DEPTH_STENCIL = (1 << 0), + ILO_STATE_CC_3DSTATE_PS_BLEND = (1 << 1), + ILO_STATE_CC_DEPTH_STENCIL_STATE = (1 << 2), + ILO_STATE_CC_BLEND_STATE = (1 << 3), + ILO_STATE_CC_COLOR_CALC_STATE = (1 << 4), +}; + +/** + * AlphaCoverage and AlphaTest. + */ +struct ilo_state_cc_alpha_info { + bool cv_sample_count_one; + bool cv_float_source0_alpha; + + bool alpha_to_coverage; + bool alpha_to_one; + + bool test_enable; + enum gen_compare_function test_func; +}; + +struct ilo_state_cc_stencil_op_info { + enum gen_compare_function test_func; + enum gen_stencil_op fail_op; + enum gen_stencil_op zfail_op; + enum gen_stencil_op zpass_op; +}; + +/** + * StencilTest. + */ +struct ilo_state_cc_stencil_info { + bool cv_has_buffer; + + bool test_enable; + bool twosided_enable; + + struct ilo_state_cc_stencil_op_info front; + struct ilo_state_cc_stencil_op_info back; +}; + +/** + * DepthTest. + */ +struct ilo_state_cc_depth_info { + bool cv_has_buffer; + + bool test_enable; + /* independent from test_enable */ + bool write_enable; + + enum gen_compare_function test_func; +}; + +struct ilo_state_cc_blend_rt_info { + bool cv_has_buffer; + bool cv_is_unorm; + bool cv_is_integer; + + uint8_t argb_write_disables; + + bool logicop_enable; + enum gen_logic_op logicop_func; + + bool blend_enable; + bool force_dst_alpha_one; + enum gen_blend_factor rgb_src; + enum gen_blend_factor rgb_dst; + enum gen_blend_function rgb_func; + enum gen_blend_factor a_src; + enum gen_blend_factor a_dst; + enum gen_blend_function a_func; +}; + +/** + * ColorBufferBlending, Dithering, and LogicOps. + */ +struct ilo_state_cc_blend_info { + const struct ilo_state_cc_blend_rt_info *rt; + uint8_t rt_count; + + bool dither_enable; +}; + +struct ilo_state_cc_stencil_params_info { + uint8_t test_ref; + uint8_t test_mask; + uint8_t write_mask; +}; + +/** + * CC parameters. + */ +struct ilo_state_cc_params_info { + float alpha_ref; + + struct ilo_state_cc_stencil_params_info stencil_front; + struct ilo_state_cc_stencil_params_info stencil_back; + + float blend_rgba[4]; +}; + +/** + * Pixel processing. + */ +struct ilo_state_cc_info { + struct ilo_state_cc_alpha_info alpha; + struct ilo_state_cc_stencil_info stencil; + struct ilo_state_cc_depth_info depth; + struct ilo_state_cc_blend_info blend; + + struct ilo_state_cc_params_info params; +}; + +struct ilo_state_cc { + uint32_t ds[3]; + + uint8_t blend_state_count; + uint32_t blend[1 + 1 + 2 * ILO_STATE_CC_BLEND_MAX_RT_COUNT]; + + uint32_t cc[6]; +}; + +struct ilo_state_cc_delta { + uint32_t dirty; +}; + +bool +ilo_state_cc_init(struct ilo_state_cc *cc, + const struct ilo_dev *dev, + const struct ilo_state_cc_info *info); + +bool +ilo_state_cc_set_info(struct ilo_state_cc *cc, + const struct ilo_dev *dev, + const struct ilo_state_cc_info *info); + +bool +ilo_state_cc_set_params(struct ilo_state_cc *cc, + const struct ilo_dev *dev, + const struct ilo_state_cc_params_info *params); + +void +ilo_state_cc_full_delta(const struct ilo_state_cc *cc, + const struct ilo_dev *dev, + struct ilo_state_cc_delta *delta); + +void +ilo_state_cc_get_delta(const struct ilo_state_cc *cc, + const struct ilo_dev *dev, + const struct ilo_state_cc *old, + struct ilo_state_cc_delta *delta); + +#endif /* ILO_STATE_CC_H */ From 3ff40be0eecfd6bbcc17471590e44042b3ffa5d3 Mon Sep 17 00:00:00 2001 From: Chia-I Wu Date: Fri, 29 May 2015 13:08:18 +0800 Subject: [PATCH 614/834] ilo: add ilo_state_sol --- src/gallium/drivers/ilo/Makefile.sources | 2 + src/gallium/drivers/ilo/core/ilo_state_sol.c | 320 +++++++++++++++++++ src/gallium/drivers/ilo/core/ilo_state_sol.h | 126 ++++++++ 3 files changed, 448 insertions(+) create mode 100644 src/gallium/drivers/ilo/core/ilo_state_sol.c create mode 100644 src/gallium/drivers/ilo/core/ilo_state_sol.h diff --git a/src/gallium/drivers/ilo/Makefile.sources b/src/gallium/drivers/ilo/Makefile.sources index e1f6d22b0f6..79fb0c8d9a0 100644 --- a/src/gallium/drivers/ilo/Makefile.sources +++ b/src/gallium/drivers/ilo/Makefile.sources @@ -29,6 +29,8 @@ C_SOURCES := \ core/ilo_state_raster.h \ core/ilo_state_sampler.c \ core/ilo_state_sampler.h \ + core/ilo_state_sol.c \ + core/ilo_state_sol.h \ core/ilo_state_surface.c \ core/ilo_state_surface.h \ core/ilo_state_viewport.c \ diff --git a/src/gallium/drivers/ilo/core/ilo_state_sol.c b/src/gallium/drivers/ilo/core/ilo_state_sol.c new file mode 100644 index 00000000000..dbc4b894f6a --- /dev/null +++ b/src/gallium/drivers/ilo/core/ilo_state_sol.c @@ -0,0 +1,320 @@ +/* + * Mesa 3-D graphics library + * + * Copyright (C) 2012-2015 LunarG, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Authors: + * Chia-I Wu + */ + +#include "ilo_debug.h" +#include "ilo_state_sol.h" + +static bool +sol_stream_validate_gen7(const struct ilo_dev *dev, + const struct ilo_state_sol_stream_info *stream) +{ + uint8_t i; + + ILO_DEV_ASSERT(dev, 7, 8); + + assert(stream->vue_read_base + stream->vue_read_count <= + stream->cv_vue_attr_count); + + /* + * From the Ivy Bridge PRM, volume 2 part 1, page 200: + * + * "(Stream 0 Vertex Read Offset) + * Format: U1 count of 256-bit units + * + * Specifies amount of data to skip over before reading back Stream 0 + * vertex data. Must be zero if the GS is enabled and the Output + * Vertex Size field in 3DSTATE_GS is programmed to 0 (i.e., one 16B + * unit)." + * + * "(Stream 0 Vertex Read Length) + * Format: U5-1 count of 256-bit units + * + * Specifies amount of vertex data to read back for Stream 0 vertices, + * starting at the Stream 0 Vertex Read Offset location. Maximum + * readback is 17 256-bit units (34 128-bit vertex attributes). Read + * data past the end of the valid vertex data has undefined contents, + * and therefore shouldn't be used to source stream out data. Must be + * zero (i.e., read length = 256b) if the GS is enabled and the Output + * Vertex Size field in 3DSTATE_GS is programmed to 0 (i.e., one 16B + * unit)." + */ + assert(stream->vue_read_base == 0 || stream->vue_read_base == 2); + assert(stream->vue_read_count <= 34); + + assert(stream->decl_count <= ILO_STATE_SOL_MAX_DECL_COUNT); + + for (i = 0; i < stream->decl_count; i++) { + const struct ilo_state_sol_decl_info *decl = &stream->decls[i]; + + assert(decl->is_hole || decl->attr < stream->vue_read_count); + + /* + * From the Ivy Bridge PRM, volume 2 part 1, page 205: + * + * "There is only enough internal storage for the 128-bit vertex + * header and 32 128-bit vertex attributes." + */ + assert(decl->attr < 33); + + assert(decl->component_base < 4 && + decl->component_base + decl->component_count <= 4); + assert(decl->buffer < ILO_STATE_SOL_MAX_BUFFER_COUNT); + } + + return true; +} + +static bool +sol_validate_gen7(const struct ilo_dev *dev, + const struct ilo_state_sol_info *info) +{ + uint8_t i; + + ILO_DEV_ASSERT(dev, 7, 8); + + /* + * From the Ivy Bridge PRM, volume 2 part 1, page 198: + * + * "This bit (Render Stream Select) is used even if SO Function Enable + * is DISABLED." + * + * From the Haswell PRM, volume 2b, page 796: + * + * "SO Function Enable must also be ENABLED in order for thiis field + * (Render Stream Select) to select a stream for rendering. When SO + * Function Enable is DISABLED and Rendering Disable is cleared (i.e., + * rendering is enabled), StreamID is ignored downstream of the SO + * stage, allowing any stream to be rendered." + * + * We want Gen7 behavior, but we have to require users to follow Gen7.5 + * behavior: info->sol_enable must be set for info->render_stream to work. + */ + + for (i = 0; i < ARRAY_SIZE(info->streams); i++) { + if (!sol_stream_validate_gen7(dev, &info->streams[i])) + return false; + } + + /* + * From the Ivy Bridge PRM, volume 2 part 1, page 208: + * + * "(Surface Pitch) + * [0,2048] Must be 0 or a multiple of 4 Bytes." + */ + for (i = 0; i < ARRAY_SIZE(info->buffer_strides); i++) { + assert(info->buffer_strides[i] <= 2048 && + info->buffer_strides[i] % 4 == 0); + } + + return true; +} + +static bool +sol_set_gen7_3DSTATE_STREAMOUT(struct ilo_state_sol *so, + const struct ilo_dev *dev, + const struct ilo_state_sol_info *info) +{ + struct { + uint8_t offset; + uint8_t len; + } vue_read[ILO_STATE_SOL_MAX_STREAM_COUNT]; + uint8_t i; + uint32_t dw1, dw2; + + ILO_DEV_ASSERT(dev, 7, 8); + + if (!sol_validate_gen7(dev, info)) + return false; + + for (i = 0; i < ARRAY_SIZE(info->streams); i++) { + const struct ilo_state_sol_stream_info *stream = &info->streams[i]; + + vue_read[i].offset = stream->vue_read_base / 2; + /* + * In pairs minus 1. URB entries are aligned to 512-bits. There is no + * need to worry about reading past entries. + */ + vue_read[i].len = (stream->vue_read_count + 1) / 2; + if (vue_read[i].len) + vue_read[i].len--; + } + + dw1 = info->render_stream << GEN7_SO_DW1_RENDER_STREAM_SELECT__SHIFT | + info->tristrip_reorder << GEN7_SO_DW1_REORDER_MODE__SHIFT; + + if (info->sol_enable) + dw1 |= GEN7_SO_DW1_SO_ENABLE; + + if (info->render_disable) + dw1 |= GEN7_SO_DW1_RENDER_DISABLE; + + if (info->stats_enable) + dw1 |= GEN7_SO_DW1_STATISTICS; + + if (ilo_dev_gen(dev) < ILO_GEN(8)) { + const uint8_t buffer_enables = + ((bool) info->buffer_strides[3]) << 3 | + ((bool) info->buffer_strides[2]) << 2 | + ((bool) info->buffer_strides[1]) << 1 | + ((bool) info->buffer_strides[0]); + + dw1 |= buffer_enables << GEN7_SO_DW1_BUFFER_ENABLES__SHIFT; + } + + dw2 = vue_read[3].offset << GEN7_SO_DW2_STREAM3_READ_OFFSET__SHIFT | + vue_read[3].len << GEN7_SO_DW2_STREAM3_READ_LEN__SHIFT | + vue_read[2].offset << GEN7_SO_DW2_STREAM2_READ_OFFSET__SHIFT | + vue_read[2].len << GEN7_SO_DW2_STREAM2_READ_LEN__SHIFT | + vue_read[1].offset << GEN7_SO_DW2_STREAM1_READ_OFFSET__SHIFT | + vue_read[1].len << GEN7_SO_DW2_STREAM1_READ_LEN__SHIFT | + vue_read[0].offset << GEN7_SO_DW2_STREAM0_READ_OFFSET__SHIFT | + vue_read[0].len << GEN7_SO_DW2_STREAM0_READ_LEN__SHIFT; + + STATIC_ASSERT(ARRAY_SIZE(so->so) >= 4); + so->so[0] = dw1; + so->so[1] = dw2; + + if (ilo_dev_gen(dev) >= ILO_GEN(8)) { + uint32_t dw3, dw4; + + dw3 = info->buffer_strides[1] << GEN8_SO_DW3_BUFFER1_PITCH__SHIFT | + info->buffer_strides[0] << GEN8_SO_DW3_BUFFER0_PITCH__SHIFT; + dw4 = info->buffer_strides[3] << GEN8_SO_DW4_BUFFER3_PITCH__SHIFT | + info->buffer_strides[2] << GEN8_SO_DW4_BUFFER2_PITCH__SHIFT; + + so->so[2] = dw3; + so->so[3] = dw4; + } + + return true; +} + +static bool +sol_set_gen7_3DSTATE_SO_DECL_LIST(struct ilo_state_sol *so, + const struct ilo_dev *dev, + const struct ilo_state_sol_info *info, + uint8_t max_decl_count) +{ + uint64_t decl_list[ILO_STATE_SOL_MAX_DECL_COUNT]; + uint8_t decl_counts[ILO_STATE_SOL_MAX_STREAM_COUNT]; + uint8_t buffer_selects[ILO_STATE_SOL_MAX_STREAM_COUNT]; + uint32_t dw1, dw2; + uint8_t i, j; + + ILO_DEV_ASSERT(dev, 7, 8); + + memset(decl_list, 0, sizeof(decl_list[0]) * max_decl_count); + + for (i = 0; i < ARRAY_SIZE(info->streams); i++) { + const struct ilo_state_sol_stream_info *stream = &info->streams[i]; + + assert(stream->decl_count <= max_decl_count); + decl_counts[i] = stream->decl_count; + buffer_selects[i] = 0; + + for (j = 0; j < stream->decl_count; j++) { + const struct ilo_state_sol_decl_info *decl = &stream->decls[j]; + const uint8_t mask = ((1 << decl->component_count) - 1) << + decl->component_base; + uint16_t val; + + val = decl->buffer << GEN7_SO_DECL_OUTPUT_SLOT__SHIFT | + mask << GEN7_SO_DECL_COMPONENT_MASK__SHIFT; + + if (decl->is_hole) + val |= GEN7_SO_DECL_HOLE_FLAG; + else + val |= decl->attr << GEN7_SO_DECL_REG_INDEX__SHIFT; + + decl_list[j] |= (uint64_t) val << (16 * i); + buffer_selects[i] |= 1 << decl->buffer; + } + } + + dw1 = buffer_selects[3] << GEN7_SO_DECL_DW1_STREAM3_BUFFER_SELECTS__SHIFT | + buffer_selects[2] << GEN7_SO_DECL_DW1_STREAM2_BUFFER_SELECTS__SHIFT | + buffer_selects[1] << GEN7_SO_DECL_DW1_STREAM1_BUFFER_SELECTS__SHIFT | + buffer_selects[0] << GEN7_SO_DECL_DW1_STREAM0_BUFFER_SELECTS__SHIFT; + dw2 = decl_counts[3] << GEN7_SO_DECL_DW2_STREAM3_ENTRY_COUNT__SHIFT | + decl_counts[2] << GEN7_SO_DECL_DW2_STREAM2_ENTRY_COUNT__SHIFT | + decl_counts[1] << GEN7_SO_DECL_DW2_STREAM1_ENTRY_COUNT__SHIFT | + decl_counts[0] << GEN7_SO_DECL_DW2_STREAM0_ENTRY_COUNT__SHIFT; + + STATIC_ASSERT(ARRAY_SIZE(so->so) >= 6); + so->so[4] = dw1; + so->so[5] = dw2; + + STATIC_ASSERT(ARRAY_SIZE(so->decl[0]) == 2); + memcpy(so->decl, decl_list, sizeof(so->decl[0]) * max_decl_count); + so->decl_count = max_decl_count; + + return true; +} + +bool +ilo_state_sol_init(struct ilo_state_sol *so, + const struct ilo_dev *dev, + const struct ilo_state_sol_info *info) +{ + bool ret = true; + + assert(ilo_is_zeroed(so, sizeof(*so))); + assert(ilo_is_zeroed(info->data, info->data_size)); + + if (ilo_dev_gen(dev) >= ILO_GEN(7)) { + uint8_t max_decl_count, i; + + max_decl_count = info->streams[0].decl_count; + for (i = 1; i < ARRAY_SIZE(info->streams); i++) { + if (max_decl_count < info->streams[i].decl_count) + max_decl_count = info->streams[i].decl_count; + } + + assert(ilo_state_sol_data_size(dev, max_decl_count) <= info->data_size); + so->decl = (uint32_t (*)[2]) info->data; + + ret &= sol_set_gen7_3DSTATE_STREAMOUT(so, dev, info); + ret &= sol_set_gen7_3DSTATE_SO_DECL_LIST(so, dev, info, max_decl_count); + } + + assert(ret); + + return ret; +} + +bool +ilo_state_sol_init_disabled(struct ilo_state_sol *sol, + const struct ilo_dev *dev, + bool render_disable) +{ + struct ilo_state_sol_info info; + + memset(&info, 0, sizeof(info)); + info.render_disable = render_disable; + + return ilo_state_sol_init(sol, dev, &info); +} diff --git a/src/gallium/drivers/ilo/core/ilo_state_sol.h b/src/gallium/drivers/ilo/core/ilo_state_sol.h new file mode 100644 index 00000000000..c5c693e5e56 --- /dev/null +++ b/src/gallium/drivers/ilo/core/ilo_state_sol.h @@ -0,0 +1,126 @@ +/* + * Mesa 3-D graphics library + * + * Copyright (C) 2015 LunarG, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Authors: + * Chia-I Wu + */ + +#ifndef ILO_STATE_SOL_H +#define ILO_STATE_SOL_H + +#include "genhw/genhw.h" + +#include "ilo_core.h" +#include "ilo_dev.h" + +/* + * From the Ivy Bridge PRM, volume 2 part 1, page 193: + * + * "Incoming topologies are tagged with a 2-bit StreamID." + */ +#define ILO_STATE_SOL_MAX_STREAM_COUNT 4 + +/* + * From the Ivy Bridge PRM, volume 2 part 1, page 195: + * + * "Up to four SO buffers are supported." + */ +#define ILO_STATE_SOL_MAX_BUFFER_COUNT 4 + +/* + * From the Ivy Bridge PRM, volume 2 part 1, page 201: + * + * "All 128 decls..." + */ +#define ILO_STATE_SOL_MAX_DECL_COUNT 128 + +/** + * Output a vertex attribute. + */ +struct ilo_state_sol_decl_info { + /* select an attribute from read ones */ + uint8_t attr; + bool is_hole; + + /* which components to write */ + uint8_t component_base; + uint8_t component_count; + + /* destination buffer */ + uint8_t buffer; +}; + +struct ilo_state_sol_stream_info { + /* which VUE attributes to read */ + uint8_t cv_vue_attr_count; + uint8_t vue_read_base; + uint8_t vue_read_count; + + uint8_t decl_count; + const struct ilo_state_sol_decl_info *decls; +}; + +struct ilo_state_sol_info { + void *data; + size_t data_size; + + bool sol_enable; + bool stats_enable; + enum gen_reorder_mode tristrip_reorder; + + bool render_disable; + /* ignored when SOL is disabled */ + uint8_t render_stream; + + /* a buffer is disabled when its stride is zero */ + uint16_t buffer_strides[ILO_STATE_SOL_MAX_BUFFER_COUNT]; + + struct ilo_state_sol_stream_info streams[ILO_STATE_SOL_MAX_STREAM_COUNT]; +}; + +struct ilo_state_sol { + uint32_t so[6]; + + uint32_t (*decl)[2]; + uint8_t decl_count; +}; + +static inline size_t +ilo_state_sol_data_size(const struct ilo_dev *dev, uint8_t max_decl_count) +{ + const struct ilo_state_sol *so = NULL; + return (ilo_dev_gen(dev) >= ILO_GEN(7)) ? + sizeof(so->decl[0]) * max_decl_count : 0; +} + +bool +ilo_state_sol_init(struct ilo_state_sol *sol, + const struct ilo_dev *dev, + const struct ilo_state_sol_info *info); + +bool +ilo_state_sol_init_disabled(struct ilo_state_sol *sol, + const struct ilo_dev *dev, + bool render_disable); + +#endif /* ILO_STATE_SOL_H */ From 9c77ebef2499a79fc9a0816971a6d16d50cf2954 Mon Sep 17 00:00:00 2001 From: Chia-I Wu Date: Thu, 28 May 2015 13:21:02 +0800 Subject: [PATCH 615/834] ilo: add ilo_state_urb --- src/gallium/drivers/ilo/Makefile.sources | 2 + src/gallium/drivers/ilo/core/ilo_state_urb.c | 769 +++++++++++++++++++ src/gallium/drivers/ilo/core/ilo_state_urb.h | 103 +++ 3 files changed, 874 insertions(+) create mode 100644 src/gallium/drivers/ilo/core/ilo_state_urb.c create mode 100644 src/gallium/drivers/ilo/core/ilo_state_urb.h diff --git a/src/gallium/drivers/ilo/Makefile.sources b/src/gallium/drivers/ilo/Makefile.sources index 79fb0c8d9a0..b04ee515c1f 100644 --- a/src/gallium/drivers/ilo/Makefile.sources +++ b/src/gallium/drivers/ilo/Makefile.sources @@ -33,6 +33,8 @@ C_SOURCES := \ core/ilo_state_sol.h \ core/ilo_state_surface.c \ core/ilo_state_surface.h \ + core/ilo_state_urb.c \ + core/ilo_state_urb.h \ core/ilo_state_viewport.c \ core/ilo_state_viewport.h \ core/ilo_state_zs.c \ diff --git a/src/gallium/drivers/ilo/core/ilo_state_urb.c b/src/gallium/drivers/ilo/core/ilo_state_urb.c new file mode 100644 index 00000000000..cbd150c71c9 --- /dev/null +++ b/src/gallium/drivers/ilo/core/ilo_state_urb.c @@ -0,0 +1,769 @@ +/* + * Mesa 3-D graphics library + * + * Copyright (C) 2012-2015 LunarG, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Authors: + * Chia-I Wu + */ + +#include "ilo_debug.h" +#include "ilo_state_urb.h" + +struct urb_configuration { + uint8_t vs_pcb_alloc_kb; + uint8_t hs_pcb_alloc_kb; + uint8_t ds_pcb_alloc_kb; + uint8_t gs_pcb_alloc_kb; + uint8_t ps_pcb_alloc_kb; + + uint8_t urb_offset_8kb; + + uint8_t vs_urb_alloc_8kb; + uint8_t hs_urb_alloc_8kb; + uint8_t ds_urb_alloc_8kb; + uint8_t gs_urb_alloc_8kb; + + uint8_t vs_entry_rows; + uint8_t hs_entry_rows; + uint8_t ds_entry_rows; + uint8_t gs_entry_rows; + + int vs_entry_count; + int hs_entry_count; + int ds_entry_count; + int gs_entry_count; +}; + +static void +urb_alloc_gen7_pcb(const struct ilo_dev *dev, + const struct ilo_state_urb_info *info, + struct urb_configuration *conf) +{ + /* + * From the Haswell PRM, volume 2b, page 940: + * + * "[0,16] (0KB - 16KB) Increments of 1KB DevHSW:GT1, DevHSW:GT2 + * [0,32] (0KB - 32KB) Increments of 2KB DevHSW:GT3" + */ + const uint8_t increment_kb = + (ilo_dev_gen(dev) >= ILO_GEN(8) || + (ilo_dev_gen(dev) == ILO_GEN(7.5) && dev->gt == 3)) ? 2 : 1; + + ILO_DEV_ASSERT(dev, 7, 8); + + /* + * Keep the strategy simple as we do not know the workloads and how + * expensive it is to change the configuration frequently. + */ + if (info->hs_const_data || info->ds_const_data) { + conf->vs_pcb_alloc_kb = increment_kb * 4; + conf->hs_pcb_alloc_kb = increment_kb * 3; + conf->ds_pcb_alloc_kb = increment_kb * 3; + conf->gs_pcb_alloc_kb = increment_kb * 3; + conf->ps_pcb_alloc_kb = increment_kb * 3; + } else if (info->gs_const_data) { + conf->vs_pcb_alloc_kb = increment_kb * 6; + conf->gs_pcb_alloc_kb = increment_kb * 5; + conf->ps_pcb_alloc_kb = increment_kb * 5; + } else { + conf->vs_pcb_alloc_kb = increment_kb * 8; + conf->ps_pcb_alloc_kb = increment_kb * 8; + } + + conf->urb_offset_8kb = increment_kb * 16 / 8; +} + +static void +urb_alloc_gen6_urb(const struct ilo_dev *dev, + const struct ilo_state_urb_info *info, + struct urb_configuration *conf) +{ + /* + * From the Ivy Bridge PRM, volume 2 part 1, page 34: + * + * "(VS URB Starting Address) Offset from the start of the URB memory + * where VS starts its allocation, specified in multiples of 8 KB." + * + * Same for other stages. + */ + const int space_avail_8kb = dev->urb_size / 8192 - conf->urb_offset_8kb; + + /* + * From the Sandy Bridge PRM, volume 2 part 1, page 173: + * + * "Programming Note: If the GS stage is enabled, software must always + * allocate at least one GS URB Entry. This is true even if the GS + * thread never needs to output vertices to the urb, e.g., when only + * performing stream output. This is an artifact of the need to pass + * the GS thread an initial destination URB handle." + */ + const bool force_gs_alloc = + (ilo_dev_gen(dev) == ILO_GEN(6) && info->gs_enable); + + ILO_DEV_ASSERT(dev, 6, 8); + + if (info->hs_entry_size || info->ds_entry_size) { + conf->vs_urb_alloc_8kb = space_avail_8kb / 4; + conf->hs_urb_alloc_8kb = space_avail_8kb / 4; + conf->ds_urb_alloc_8kb = space_avail_8kb / 4; + conf->gs_urb_alloc_8kb = space_avail_8kb / 4; + + if (space_avail_8kb % 4) { + assert(space_avail_8kb % 2 == 0); + conf->vs_urb_alloc_8kb++; + conf->gs_urb_alloc_8kb++; + } + } else if (info->gs_entry_size || force_gs_alloc) { + assert(space_avail_8kb % 2 == 0); + conf->vs_urb_alloc_8kb = space_avail_8kb / 2; + conf->gs_urb_alloc_8kb = space_avail_8kb / 2; + } else { + conf->vs_urb_alloc_8kb = space_avail_8kb; + } +} + +static bool +urb_init_gen6_vs_entry(const struct ilo_dev *dev, + const struct ilo_state_urb_info *info, + struct urb_configuration *conf) +{ + /* + * From the Sandy Bridge PRM, volume 2 part 1, page 28: + * + * "(VS URB Entry Allocation Size) + * Range [0,4] = [1,5] 1024-bit URB rows" + * + * "(VS Number of URB Entries) + * Range [24,256] in multiples of 4 + * [24, 128] in multiples of 4[DevSNBGT1]" + */ + const int max_entry_count = (dev->gt == 2) ? 256 : 252; + const int row_size = 1024 / 8; + int row_count, entry_count; + int entry_size; + + ILO_DEV_ASSERT(dev, 6, 6); + + /* VE and VS share the same VUE for each vertex */ + entry_size = info->vs_entry_size; + if (entry_size < info->ve_entry_size) + entry_size = info->ve_entry_size; + + row_count = (entry_size + row_size - 1) / row_size; + if (row_count > 5) + return false; + else if (!row_count) + row_count++; + + entry_count = conf->vs_urb_alloc_8kb * 8192 / (row_size * row_count); + if (entry_count > max_entry_count) + entry_count = max_entry_count; + entry_count &= ~3; + assert(entry_count >= 24); + + conf->vs_entry_rows = row_count; + conf->vs_entry_count = entry_count; + + return true; +} + +static bool +urb_init_gen6_gs_entry(const struct ilo_dev *dev, + const struct ilo_state_urb_info *info, + struct urb_configuration *conf) +{ + /* + * From the Sandy Bridge PRM, volume 2 part 1, page 29: + * + * "(GS Number of URB Entries) + * Range [0,256] in multiples of 4 + * [0, 254] in multiples of 4[DevSNBGT1]" + * + * "(GS URB Entry Allocation Size) + * Range [0,4] = [1,5] 1024-bit URB rows" + */ + const int max_entry_count = (dev->gt == 2) ? 256 : 252; + const int row_size = 1024 / 8; + int row_count, entry_count; + + ILO_DEV_ASSERT(dev, 6, 6); + + row_count = (info->gs_entry_size + row_size - 1) / row_size; + if (row_count > 5) + return false; + else if (!row_count) + row_count++; + + entry_count = conf->gs_urb_alloc_8kb * 8192 / (row_size * row_count); + if (entry_count > max_entry_count) + entry_count = max_entry_count; + entry_count &= ~3; + + conf->gs_entry_rows = row_count; + conf->gs_entry_count = entry_count; + + return true; +} + +static bool +urb_init_gen7_vs_entry(const struct ilo_dev *dev, + const struct ilo_state_urb_info *info, + struct urb_configuration *conf) +{ + /* + * From the Ivy Bridge PRM, volume 2 part 1, page 34-35: + * + * "VS URB Entry Allocation Size equal to 4(5 512-bit URB rows) may + * cause performance to decrease due to banking in the URB. Element + * sizes of 16 to 20 should be programmed with six 512-bit URB rows." + * + * "(VS URB Entry Allocation Size) + * Format: U9-1 count of 512-bit units" + * + * "(VS Number of URB Entries) + * [32,704] + * [32,512] + * + * Programming Restriction: VS Number of URB Entries must be divisible + * by 8 if the VS URB Entry Allocation Size is less than 9 512-bit URB + * entries."2:0" = reserved "000b"" + * + * From the Haswell PRM, volume 2b, page 847: + * + * "(VS Number of URB Entries) + * [64,1664] DevHSW:GT3 + * [64,1664] DevHSW:GT2 + * [32,640] DevHSW:GT1" + */ + const int row_size = 512 / 8; + int row_count, entry_count; + int entry_size; + int max_entry_count, min_entry_count; + + ILO_DEV_ASSERT(dev, 7, 8); + + /* + * From the Ivy Bridge PRM, volume 2 part 1, page 35: + * + * "Programming Restriction: As the VS URB entry serves as both the + * per-vertex input and output of the VS shader, the VS URB Allocation + * Size must be sized to the maximum of the vertex input and output + * structures." + * + * From the Ivy Bridge PRM, volume 2 part 1, page 42: + * + * "If the VS function is enabled, the VF-written VUEs are not required + * to have Vertex Headers, as the VS-incoming vertices are guaranteed + * to be consumed by the VS (i.e., the VS thread is responsible for + * overwriting the input vertex data)." + * + * VE and VS share the same VUE for each vertex. + */ + entry_size = info->vs_entry_size; + if (entry_size < info->ve_entry_size) + entry_size = info->ve_entry_size; + + row_count = (entry_size + row_size - 1) / row_size; + if (row_count == 5 || !row_count) + row_count++; + + entry_count = conf->vs_urb_alloc_8kb * 8192 / (row_size * row_count); + if (row_count < 9) + entry_count &= ~7; + + switch (ilo_dev_gen(dev)) { + case ILO_GEN(8): + case ILO_GEN(7.5): + max_entry_count = (dev->gt >= 2) ? 1664 : 640; + min_entry_count = (dev->gt >= 2) ? 64 : 32; + break; + case ILO_GEN(7): + max_entry_count = (dev->gt == 2) ? 704 : 512; + min_entry_count = 32; + break; + default: + assert(!"unexpected gen"); + return false; + break; + } + + if (entry_count > max_entry_count) + entry_count = max_entry_count; + else if (entry_count < min_entry_count) + return false; + + conf->vs_entry_rows = row_count; + conf->vs_entry_count = entry_count; + + return true; +} + +static bool +urb_init_gen7_hs_entry(const struct ilo_dev *dev, + const struct ilo_state_urb_info *info, + struct urb_configuration *conf) +{ + /* + * From the Ivy Bridge PRM, volume 2 part 1, page 37: + * + * "HS Number of URB Entries must be divisible by 8 if the HS URB Entry + * Allocation Size is less than 9 512-bit URB + * entries."2:0" = reserved "000" + * + * [0,64] + * [0,32]" + * + * From the Haswell PRM, volume 2b, page 849: + * + * "(HS Number of URB Entries) + * [0,128] DevHSW:GT2 + * [0,64] DevHSW:GT1" + */ + const int row_size = 512 / 8; + int row_count, entry_count; + int max_entry_count; + + ILO_DEV_ASSERT(dev, 7, 8); + + row_count = (info->hs_entry_size + row_size - 1) / row_size; + if (!row_count) + row_count++; + + entry_count = conf->hs_urb_alloc_8kb * 8192 / (row_size * row_count); + if (row_count < 9) + entry_count &= ~7; + + switch (ilo_dev_gen(dev)) { + case ILO_GEN(8): + case ILO_GEN(7.5): + max_entry_count = (dev->gt >= 2) ? 128 : 64; + break; + case ILO_GEN(7): + max_entry_count = (dev->gt == 2) ? 64 : 32; + break; + default: + assert(!"unexpected gen"); + return false; + break; + } + + if (entry_count > max_entry_count) + entry_count = max_entry_count; + else if (info->hs_entry_size && !entry_count) + return false; + + conf->hs_entry_rows = row_count; + conf->hs_entry_count = entry_count; + + return true; +} + +static bool +urb_init_gen7_ds_entry(const struct ilo_dev *dev, + const struct ilo_state_urb_info *info, + struct urb_configuration *conf) +{ + /* + * From the Ivy Bridge PRM, volume 2 part 1, page 38: + * + * "(DS URB Entry Allocation Size) + * [0,9]" + * + * "(DS Number of URB Entries) If Domain Shader Thread Dispatch is + * Enabled then the minimum number handles that must be allocated is + * 138 URB entries. + * "2:0" = reserved "000" + * + * [0,448] + * [0,288] + * + * DS Number of URB Entries must be divisible by 8 if the DS URB Entry + * Allocation Size is less than 9 512-bit URB entries.If Domain Shader + * Thread Dispatch is Enabled then the minimum number of handles that + * must be allocated is 10 URB entries." + * + * From the Haswell PRM, volume 2b, page 851: + * + * "(DS Number of URB Entries) + * [0,960] DevHSW:GT2 + * [0,384] DevHSW:GT1" + */ + const int row_size = 512 / 8; + int row_count, entry_count; + int max_entry_count; + + ILO_DEV_ASSERT(dev, 7, 8); + + row_count = (info->ds_entry_size + row_size - 1) / row_size; + if (row_count > 10) + return false; + else if (!row_count) + row_count++; + + entry_count = conf->ds_urb_alloc_8kb * 8192 / (row_size * row_count); + if (row_count < 9) + entry_count &= ~7; + + switch (ilo_dev_gen(dev)) { + case ILO_GEN(8): + case ILO_GEN(7.5): + max_entry_count = (dev->gt >= 2) ? 960 : 384; + break; + case ILO_GEN(7): + max_entry_count = (dev->gt == 2) ? 448 : 288; + break; + default: + assert(!"unexpected gen"); + return false; + break; + } + + if (entry_count > max_entry_count) + entry_count = max_entry_count; + else if (info->ds_entry_size && entry_count < 10) + return false; + + conf->ds_entry_rows = row_count; + conf->ds_entry_count = entry_count; + + return true; +} + +static bool +urb_init_gen7_gs_entry(const struct ilo_dev *dev, + const struct ilo_state_urb_info *info, + struct urb_configuration *conf) +{ + /* + * From the Ivy Bridge PRM, volume 2 part 1, page 40: + * + * "(GS Number of URB Entries) GS Number of URB Entries must be + * divisible by 8 if the GS URB Entry Allocation Size is less than 9 + * 512-bit URB entries. + * "2:0" = reserved "000" + * + * [0,320] + * [0,192]" + * + * From the Ivy Bridge PRM, volume 2 part 1, page 171: + * + * "(DUAL_INSTANCE and DUAL_OBJECT) The GS must be allocated at least + * two URB handles or behavior is UNDEFINED." + * + * From the Haswell PRM, volume 2b, page 853: + * + * "(GS Number of URB Entries) + * [0,640] DevHSW:GT2 + * [0,256] DevHSW:GT1 + * + * Only if GS is disabled can this field be programmed to 0. If GS is + * enabled this field shall be programmed to a value greater than 0. + * For GS Dispatch Mode "Single", this field shall be programmed to a + * value greater than or equal to 1. For other GS Dispatch Modes, + * refer to the definition of Dispatch Mode (3DSTATE_GS) for minimum + * values of this field." + */ + const int row_size = 512 / 8; + int row_count, entry_count; + int max_entry_count; + + ILO_DEV_ASSERT(dev, 7, 8); + + row_count = (info->gs_entry_size + row_size - 1) / row_size; + if (!row_count) + row_count++; + + entry_count = conf->gs_urb_alloc_8kb * 8192 / (row_size * row_count); + if (row_count < 9) + entry_count &= ~7; + + switch (ilo_dev_gen(dev)) { + case ILO_GEN(8): + case ILO_GEN(7.5): + max_entry_count = (dev->gt >= 2) ? 640 : 256; + break; + case ILO_GEN(7): + max_entry_count = (dev->gt == 2) ? 320 : 192; + break; + default: + assert(!"unexpected gen"); + return false; + break; + } + + if (entry_count > max_entry_count) + entry_count = max_entry_count; + else if (info->gs_entry_size && entry_count < 2) + return false; + + conf->gs_entry_rows = row_count; + conf->gs_entry_count = entry_count; + + return true; +} + +static bool +urb_get_gen6_configuration(const struct ilo_dev *dev, + const struct ilo_state_urb_info *info, + struct urb_configuration *conf) +{ + ILO_DEV_ASSERT(dev, 6, 8); + + memset(conf, 0, sizeof(*conf)); + + if (ilo_dev_gen(dev) >= ILO_GEN(7)) + urb_alloc_gen7_pcb(dev, info, conf); + + urb_alloc_gen6_urb(dev, info, conf); + + if (ilo_dev_gen(dev) >= ILO_GEN(7)) { + if (!urb_init_gen7_vs_entry(dev, info, conf) || + !urb_init_gen7_hs_entry(dev, info, conf) || + !urb_init_gen7_ds_entry(dev, info, conf) || + !urb_init_gen7_gs_entry(dev, info, conf)) + return false; + } else { + if (!urb_init_gen6_vs_entry(dev, info, conf) || + !urb_init_gen6_gs_entry(dev, info, conf)) + return false; + } + + return true; +} + +static bool +urb_set_gen7_3dstate_push_constant_alloc(struct ilo_state_urb *urb, + const struct ilo_dev *dev, + const struct ilo_state_urb_info *info, + const struct urb_configuration *conf) +{ + uint32_t dw1[5]; + uint8_t sizes_kb[5], offset_kb; + int i; + + ILO_DEV_ASSERT(dev, 7, 8); + + sizes_kb[0] = conf->vs_pcb_alloc_kb; + sizes_kb[1] = conf->hs_pcb_alloc_kb; + sizes_kb[2] = conf->ds_pcb_alloc_kb; + sizes_kb[3] = conf->gs_pcb_alloc_kb; + sizes_kb[4] = conf->ps_pcb_alloc_kb; + offset_kb = 0; + + for (i = 0; i < 5; i++) { + /* careful for the valid range of offsets */ + if (sizes_kb[i]) { + dw1[i] = offset_kb << GEN7_PCB_ALLOC_DW1_OFFSET__SHIFT | + sizes_kb[i] << GEN7_PCB_ALLOC_DW1_SIZE__SHIFT; + offset_kb += sizes_kb[i]; + } else { + dw1[i] = 0; + } + } + + STATIC_ASSERT(ARRAY_SIZE(urb->pcb) >= 5); + memcpy(urb->pcb, dw1, sizeof(dw1)); + + return true; +} + +static bool +urb_set_gen6_3DSTATE_URB(struct ilo_state_urb *urb, + const struct ilo_dev *dev, + const struct ilo_state_urb_info *info, + const struct urb_configuration *conf) +{ + uint32_t dw1, dw2; + + ILO_DEV_ASSERT(dev, 6, 6); + + assert(conf->vs_entry_rows && conf->gs_entry_rows); + + dw1 = (conf->vs_entry_rows - 1) << GEN6_URB_DW1_VS_ENTRY_SIZE__SHIFT | + conf->vs_entry_count << GEN6_URB_DW1_VS_ENTRY_COUNT__SHIFT; + dw2 = conf->gs_entry_count << GEN6_URB_DW2_GS_ENTRY_COUNT__SHIFT | + (conf->gs_entry_rows - 1) << GEN6_URB_DW2_GS_ENTRY_SIZE__SHIFT; + + STATIC_ASSERT(ARRAY_SIZE(urb->urb) >= 2); + urb->urb[0] = dw1; + urb->urb[1] = dw2; + + return true; +} + +static bool +urb_set_gen7_3dstate_urb(struct ilo_state_urb *urb, + const struct ilo_dev *dev, + const struct ilo_state_urb_info *info, + const struct urb_configuration *conf) +{ + uint32_t dw1[4]; + struct { + uint8_t alloc_8kb; + uint8_t entry_rows; + int entry_count; + } stages[4]; + uint8_t offset_8kb; + int i; + + ILO_DEV_ASSERT(dev, 7, 8); + + stages[0].alloc_8kb = conf->vs_urb_alloc_8kb; + stages[1].alloc_8kb = conf->hs_urb_alloc_8kb; + stages[2].alloc_8kb = conf->ds_urb_alloc_8kb; + stages[3].alloc_8kb = conf->gs_urb_alloc_8kb; + + stages[0].entry_rows = conf->vs_entry_rows; + stages[1].entry_rows = conf->hs_entry_rows; + stages[2].entry_rows = conf->ds_entry_rows; + stages[3].entry_rows = conf->gs_entry_rows; + + stages[0].entry_count = conf->vs_entry_count; + stages[1].entry_count = conf->hs_entry_count; + stages[2].entry_count = conf->ds_entry_count; + stages[3].entry_count = conf->gs_entry_count; + + offset_8kb = conf->urb_offset_8kb; + + for (i = 0; i < 4; i++) { + /* careful for the valid range of offsets */ + if (stages[i].alloc_8kb) { + assert(stages[i].entry_rows); + dw1[i] = + offset_8kb << GEN7_URB_DW1_OFFSET__SHIFT | + (stages[i].entry_rows - 1) << GEN7_URB_DW1_ENTRY_SIZE__SHIFT | + stages[i].entry_count << GEN7_URB_DW1_ENTRY_COUNT__SHIFT; + offset_8kb += stages[i].alloc_8kb; + } else { + dw1[i] = 0; + } + } + + STATIC_ASSERT(ARRAY_SIZE(urb->urb) >= 4); + memcpy(urb->urb, dw1, sizeof(dw1)); + + return true; +} + +bool +ilo_state_urb_init(struct ilo_state_urb *urb, + const struct ilo_dev *dev, + const struct ilo_state_urb_info *info) +{ + assert(ilo_is_zeroed(urb, sizeof(*urb))); + return ilo_state_urb_set_info(urb, dev, info); +} + +bool +ilo_state_urb_init_for_rectlist(struct ilo_state_urb *urb, + const struct ilo_dev *dev, + uint8_t vf_attr_count) +{ + struct ilo_state_urb_info info; + + memset(&info, 0, sizeof(info)); + info.ve_entry_size = sizeof(uint32_t) * 4 * vf_attr_count; + + return ilo_state_urb_init(urb, dev, &info); +} + +bool +ilo_state_urb_set_info(struct ilo_state_urb *urb, + const struct ilo_dev *dev, + const struct ilo_state_urb_info *info) +{ + struct urb_configuration conf; + bool ret = true; + + ret &= urb_get_gen6_configuration(dev, info, &conf); + if (ilo_dev_gen(dev) >= ILO_GEN(7)) { + ret &= urb_set_gen7_3dstate_push_constant_alloc(urb, dev, info, &conf); + ret &= urb_set_gen7_3dstate_urb(urb, dev, info, &conf); + } else { + ret &= urb_set_gen6_3DSTATE_URB(urb, dev, info, &conf); + } + + assert(ret); + + return ret; +} + +void +ilo_state_urb_full_delta(const struct ilo_state_urb *urb, + const struct ilo_dev *dev, + struct ilo_state_urb_delta *delta) +{ + if (ilo_dev_gen(dev) >= ILO_GEN(7)) { + delta->dirty = ILO_STATE_URB_3DSTATE_PUSH_CONSTANT_ALLOC_VS | + ILO_STATE_URB_3DSTATE_PUSH_CONSTANT_ALLOC_HS | + ILO_STATE_URB_3DSTATE_PUSH_CONSTANT_ALLOC_DS | + ILO_STATE_URB_3DSTATE_PUSH_CONSTANT_ALLOC_GS | + ILO_STATE_URB_3DSTATE_PUSH_CONSTANT_ALLOC_PS | + ILO_STATE_URB_3DSTATE_URB_VS | + ILO_STATE_URB_3DSTATE_URB_HS | + ILO_STATE_URB_3DSTATE_URB_DS | + ILO_STATE_URB_3DSTATE_URB_GS; + } else { + delta->dirty = ILO_STATE_URB_3DSTATE_URB_VS | + ILO_STATE_URB_3DSTATE_URB_GS; + } +} + +void +ilo_state_urb_get_delta(const struct ilo_state_urb *urb, + const struct ilo_dev *dev, + const struct ilo_state_urb *old, + struct ilo_state_urb_delta *delta) +{ + delta->dirty = 0; + + if (ilo_dev_gen(dev) >= ILO_GEN(7)) { + if (memcmp(urb->pcb, old->pcb, sizeof(urb->pcb))) { + delta->dirty |= ILO_STATE_URB_3DSTATE_PUSH_CONSTANT_ALLOC_VS | + ILO_STATE_URB_3DSTATE_PUSH_CONSTANT_ALLOC_HS | + ILO_STATE_URB_3DSTATE_PUSH_CONSTANT_ALLOC_DS | + ILO_STATE_URB_3DSTATE_PUSH_CONSTANT_ALLOC_GS | + ILO_STATE_URB_3DSTATE_PUSH_CONSTANT_ALLOC_PS; + } + + /* + * From the Ivy Bridge PRM, volume 2 part 1, page 34: + * + * "3DSTATE_URB_HS, 3DSTATE_URB_DS, and 3DSTATE_URB_GS must also be + * programmed in order for the programming of this state + * (3DSTATE_URB_VS) to be valid." + * + * The same is true for the other three states. + */ + if (memcmp(urb->urb, old->urb, sizeof(urb->urb))) { + delta->dirty |= ILO_STATE_URB_3DSTATE_URB_VS | + ILO_STATE_URB_3DSTATE_URB_HS | + ILO_STATE_URB_3DSTATE_URB_DS | + ILO_STATE_URB_3DSTATE_URB_GS; + } + } else { + if (memcmp(urb->urb, old->urb, sizeof(uint32_t) * 2)) { + delta->dirty |= ILO_STATE_URB_3DSTATE_URB_VS | + ILO_STATE_URB_3DSTATE_URB_GS; + } + } +} diff --git a/src/gallium/drivers/ilo/core/ilo_state_urb.h b/src/gallium/drivers/ilo/core/ilo_state_urb.h new file mode 100644 index 00000000000..9522b3bd681 --- /dev/null +++ b/src/gallium/drivers/ilo/core/ilo_state_urb.h @@ -0,0 +1,103 @@ +/* + * Mesa 3-D graphics library + * + * Copyright (C) 2015 LunarG, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Authors: + * Chia-I Wu + */ + +#ifndef ILO_STATE_URB_H +#define ILO_STATE_URB_H + +#include "genhw/genhw.h" + +#include "ilo_core.h" +#include "ilo_dev.h" + +enum ilo_state_urb_dirty_bits { + ILO_STATE_URB_3DSTATE_PUSH_CONSTANT_ALLOC_VS = (1 << 0), + ILO_STATE_URB_3DSTATE_PUSH_CONSTANT_ALLOC_HS = (1 << 1), + ILO_STATE_URB_3DSTATE_PUSH_CONSTANT_ALLOC_DS = (1 << 2), + ILO_STATE_URB_3DSTATE_PUSH_CONSTANT_ALLOC_GS = (1 << 3), + ILO_STATE_URB_3DSTATE_PUSH_CONSTANT_ALLOC_PS = (1 << 4), + ILO_STATE_URB_3DSTATE_URB_VS = (1 << 5), + ILO_STATE_URB_3DSTATE_URB_HS = (1 << 6), + ILO_STATE_URB_3DSTATE_URB_DS = (1 << 7), + ILO_STATE_URB_3DSTATE_URB_GS = (1 << 8), +}; + +/** + * URB entry allocation sizes and sizes of constant data extracted from PCBs + * to threads. + */ +struct ilo_state_urb_info { + bool gs_enable; + + bool vs_const_data; + bool hs_const_data; + bool ds_const_data; + bool gs_const_data; + bool ps_const_data; + + uint16_t ve_entry_size; + uint16_t vs_entry_size; + uint16_t hs_entry_size; + uint16_t ds_entry_size; + uint16_t gs_entry_size; +}; + +struct ilo_state_urb { + uint32_t pcb[5]; + uint32_t urb[4]; +}; + +struct ilo_state_urb_delta { + uint32_t dirty; +}; + +bool +ilo_state_urb_init(struct ilo_state_urb *urb, + const struct ilo_dev *dev, + const struct ilo_state_urb_info *info); + +bool +ilo_state_urb_init_for_rectlist(struct ilo_state_urb *urb, + const struct ilo_dev *dev, + uint8_t vf_attr_count); + +bool +ilo_state_urb_set_info(struct ilo_state_urb *urb, + const struct ilo_dev *dev, + const struct ilo_state_urb_info *info); + +void +ilo_state_urb_full_delta(const struct ilo_state_urb *urb, + const struct ilo_dev *dev, + struct ilo_state_urb_delta *delta); + +void +ilo_state_urb_get_delta(const struct ilo_state_urb *urb, + const struct ilo_dev *dev, + const struct ilo_state_urb *old, + struct ilo_state_urb_delta *delta); + +#endif /* ILO_STATE_URB_H */ From 1ccab943b66de70b49cdbf3f14071fec9fe833cc Mon Sep 17 00:00:00 2001 From: Chia-I Wu Date: Sun, 31 May 2015 00:00:49 +0800 Subject: [PATCH 616/834] ilo: add ilo_state_vf We want to replace ilo_ve_state with ilo_state_vf. --- src/gallium/drivers/ilo/Makefile.sources | 2 + src/gallium/drivers/ilo/core/ilo_state_vf.c | 481 ++++++++++++++++++++ src/gallium/drivers/ilo/core/ilo_state_vf.h | 154 +++++++ 3 files changed, 637 insertions(+) create mode 100644 src/gallium/drivers/ilo/core/ilo_state_vf.c create mode 100644 src/gallium/drivers/ilo/core/ilo_state_vf.h diff --git a/src/gallium/drivers/ilo/Makefile.sources b/src/gallium/drivers/ilo/Makefile.sources index b04ee515c1f..3d330f87d78 100644 --- a/src/gallium/drivers/ilo/Makefile.sources +++ b/src/gallium/drivers/ilo/Makefile.sources @@ -35,6 +35,8 @@ C_SOURCES := \ core/ilo_state_surface.h \ core/ilo_state_urb.c \ core/ilo_state_urb.h \ + core/ilo_state_vf.c \ + core/ilo_state_vf.h \ core/ilo_state_viewport.c \ core/ilo_state_viewport.h \ core/ilo_state_zs.c \ diff --git a/src/gallium/drivers/ilo/core/ilo_state_vf.c b/src/gallium/drivers/ilo/core/ilo_state_vf.c new file mode 100644 index 00000000000..f9a462db254 --- /dev/null +++ b/src/gallium/drivers/ilo/core/ilo_state_vf.c @@ -0,0 +1,481 @@ +/* + * Mesa 3-D graphics library + * + * Copyright (C) 2012-2015 LunarG, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Authors: + * Chia-I Wu + */ + +#include "ilo_debug.h" +#include "ilo_state_vf.h" + +static bool +vf_validate_gen6_elements(const struct ilo_dev *dev, + const struct ilo_state_vf_info *info) +{ + /* + * From the Sandy Bridge PRM, volume 2 part 1, page 95: + * + * "(Source Element Offset (in bytes)) + * Format: U11 + * Range [0,2047" + * + * From the Haswell PRM, volume 2d, page 415: + * + * "(Source Element Offset) + * Format: U12 byte offset + * ... + * [0,4095]" + * + * From the Broadwell PRM, volume 2d, page 469: + * + * "(Source Element Offset) + * Format: U12 byte offset + * ... + * [0,2047]" + */ + const uint16_t max_vertex_offset = + (ilo_dev_gen(dev) == ILO_GEN(7.5)) ? 4096 : 2048; + uint8_t i; + + ILO_DEV_ASSERT(dev, 6, 8); + + assert(info->element_count <= ILO_STATE_VF_MAX_ELEMENT_COUNT); + + for (i = 0; i < info->element_count; i++) { + const struct ilo_state_vf_element_info *elem = &info->elements[i]; + + assert(elem->buffer < ILO_STATE_VF_MAX_BUFFER_COUNT); + assert(elem->vertex_offset < max_vertex_offset); + + /* + * From the Sandy Bridge PRM, volume 2 part 1, page 86: + * + * "64-bit floating point values must be 64-bit aligned in memory, + * or UNPREDICTABLE data will be fetched. When accessing an element + * containing 64-bit floating point values, the Buffer Starting + * Address and Source Element Offset values must add to a 64-bit + * aligned address, and BufferPitch must be a multiple of 64-bits." + */ + if (elem->is_double) + assert(elem->vertex_offset % 8 == 0); + } + + return true; +} + +static uint32_t +get_gen6_component_controls(const struct ilo_dev *dev, + enum gen_vf_component comp_x, + enum gen_vf_component comp_y, + enum gen_vf_component comp_z, + enum gen_vf_component comp_w) +{ + ILO_DEV_ASSERT(dev, 6, 8); + + return comp_x << GEN6_VE_DW1_COMP0__SHIFT | + comp_y << GEN6_VE_DW1_COMP1__SHIFT | + comp_z << GEN6_VE_DW1_COMP2__SHIFT | + comp_w << GEN6_VE_DW1_COMP3__SHIFT; +} + +static bool +get_gen6_edge_flag_format(const struct ilo_dev *dev, + const struct ilo_state_vf_element_info *elem, + enum gen_surface_format *format) +{ + ILO_DEV_ASSERT(dev, 6, 8); + + /* + * From the Sandy Bridge PRM, volume 2 part 1, page 94: + * + * "The Source Element Format must be set to the UINT format." + * + * From the Haswell PRM, volume 2d, page 413: + * + * "The SourceElementFormat needs to be a single-component format with + * an element which has edge flag enabled." + */ + if (elem->component_count != 1) + return false; + + /* pick the format we like */ + switch (elem->format_size) { + case 1: + *format = GEN6_FORMAT_R8_UINT; + break; + case 2: + *format = GEN6_FORMAT_R16_UINT; + break; + case 4: + *format = GEN6_FORMAT_R32_UINT; + break; + default: + return false; + break; + } + + return true; +} + +static bool +vf_set_gen6_3DSTATE_VERTEX_ELEMENTS(struct ilo_state_vf *vf, + const struct ilo_dev *dev, + const struct ilo_state_vf_info *info) +{ + enum gen_surface_format edge_flag_format; + uint32_t dw0, dw1; + uint8_t i; + + ILO_DEV_ASSERT(dev, 6, 8); + + if (!vf_validate_gen6_elements(dev, info)) + return false; + + for (i = 0; i < info->element_count; i++) { + const struct ilo_state_vf_element_info *elem = &info->elements[i]; + enum gen_vf_component components[4] = { + GEN6_VFCOMP_STORE_0, + GEN6_VFCOMP_STORE_0, + GEN6_VFCOMP_STORE_0, + (elem->is_integer) ? GEN6_VFCOMP_STORE_1_INT : + GEN6_VFCOMP_STORE_1_FP, + }; + + switch (elem->component_count) { + case 4: components[3] = GEN6_VFCOMP_STORE_SRC; /* fall through */ + case 3: components[2] = GEN6_VFCOMP_STORE_SRC; /* fall through */ + case 2: components[1] = GEN6_VFCOMP_STORE_SRC; /* fall through */ + case 1: components[0] = GEN6_VFCOMP_STORE_SRC; break; + default: + assert(!"unexpected component count"); + break; + } + + dw0 = elem->buffer << GEN6_VE_DW0_VB_INDEX__SHIFT | + GEN6_VE_DW0_VALID | + elem->format << GEN6_VE_DW0_FORMAT__SHIFT | + elem->vertex_offset << GEN6_VE_DW0_VB_OFFSET__SHIFT; + dw1 = get_gen6_component_controls(dev, + components[0], components[1], + components[2], components[3]); + + STATIC_ASSERT(ARRAY_SIZE(vf->user_ve[i]) >= 2); + vf->user_ve[i][0] = dw0; + vf->user_ve[i][1] = dw1; + } + + vf->user_ve_count = i; + + vf->edge_flag_supported = (i && get_gen6_edge_flag_format(dev, + &info->elements[i - 1], &edge_flag_format)); + if (vf->edge_flag_supported) { + const struct ilo_state_vf_element_info *elem = &info->elements[i - 1]; + + /* without edge flag enable */ + vf->last_user_ve[0][0] = dw0; + vf->last_user_ve[0][1] = dw1; + + /* + * From the Sandy Bridge PRM, volume 2 part 1, page 94: + * + * "This bit (Edge Flag Enable) must only be ENABLED on the last + * valid VERTEX_ELEMENT structure. + * + * When set, Component 0 Control must be set to + * VFCOMP_STORE_SRC, and Component 1-3 Control must be set to + * VFCOMP_NOSTORE." + */ + dw0 = elem->buffer << GEN6_VE_DW0_VB_INDEX__SHIFT | + GEN6_VE_DW0_VALID | + edge_flag_format << GEN6_VE_DW0_FORMAT__SHIFT | + GEN6_VE_DW0_EDGE_FLAG_ENABLE | + elem->vertex_offset << GEN6_VE_DW0_VB_OFFSET__SHIFT; + dw1 = get_gen6_component_controls(dev, GEN6_VFCOMP_STORE_SRC, + GEN6_VFCOMP_NOSTORE, GEN6_VFCOMP_NOSTORE, GEN6_VFCOMP_NOSTORE); + + /* with edge flag enable */ + vf->last_user_ve[1][0] = dw0; + vf->last_user_ve[1][1] = dw1; + } + + return true; +} + +static uint32_t +get_gen6_component_zeros(const struct ilo_dev *dev) +{ + ILO_DEV_ASSERT(dev, 6, 8); + + return get_gen6_component_controls(dev, + GEN6_VFCOMP_STORE_0, + GEN6_VFCOMP_STORE_0, + GEN6_VFCOMP_STORE_0, + GEN6_VFCOMP_STORE_0); +} + +static uint32_t +get_gen6_component_ids(const struct ilo_dev *dev, + bool vertexid, bool instanceid) +{ + ILO_DEV_ASSERT(dev, 6, 7.5); + + return get_gen6_component_controls(dev, + (vertexid) ? GEN6_VFCOMP_STORE_VID : GEN6_VFCOMP_STORE_0, + (instanceid) ? GEN6_VFCOMP_STORE_IID : GEN6_VFCOMP_STORE_0, + GEN6_VFCOMP_STORE_0, + GEN6_VFCOMP_STORE_0); +} + +static bool +vf_params_set_gen6_internal_ve(struct ilo_state_vf *vf, + const struct ilo_dev *dev, + const struct ilo_state_vf_params_info *params, + uint8_t user_ve_count) +{ + const bool prepend_ids = + (params->prepend_vertexid || params->prepend_instanceid); + uint8_t internal_ve_count = 0; + + ILO_DEV_ASSERT(dev, 6, 8); + + /* + * From the Sandy Bridge PRM, volume 2 part 1, page 92: + * + * "- At least one VERTEX_ELEMENT_STATE structure must be included. + * + * - Inclusion of partial VERTEX_ELEMENT_STATE structures is + * UNDEFINED. + * + * - SW must ensure that at least one vertex element is defined prior + * to issuing a 3DPRIMTIVE command, or operation is UNDEFINED. + * + * - There are no "holes" allowed in the destination vertex: NOSTORE + * components must be overwritten by subsequent components unless + * they are the trailing DWords of the vertex. Software must + * explicitly chose some value (probably 0) to be written into + * DWords that would otherwise be "holes"." + * + * - ... + * + * - [DevILK+] Element[0] must be valid." + */ + if (params->prepend_zeros || (!user_ve_count && !prepend_ids)) { + STATIC_ASSERT(ARRAY_SIZE(vf->internal_ve[internal_ve_count]) >= 2); + vf->internal_ve[internal_ve_count][0] = GEN6_VE_DW0_VALID; + vf->internal_ve[internal_ve_count][1] = get_gen6_component_zeros(dev); + internal_ve_count++; + } + + if (prepend_ids) { + uint32_t dw1; + + if (ilo_dev_gen(dev) >= ILO_GEN(8)) { + /* placeholder for 3DSTATE_VF_SGVS */ + dw1 = get_gen6_component_zeros(dev); + } else { + dw1 = get_gen6_component_ids(dev, + params->prepend_vertexid, + params->prepend_instanceid); + } + + STATIC_ASSERT(ARRAY_SIZE(vf->internal_ve[internal_ve_count]) >= 2); + vf->internal_ve[internal_ve_count][0] = GEN6_VE_DW0_VALID; + vf->internal_ve[internal_ve_count][1] = dw1; + internal_ve_count++; + } + + vf->internal_ve_count = internal_ve_count; + + return true; +} + +static bool +vf_params_set_gen8_3DSTATE_VF_SGVS(struct ilo_state_vf *vf, + const struct ilo_dev *dev, + const struct ilo_state_vf_params_info *params) +{ + const uint8_t attr = (params->prepend_zeros) ? 1 : 0; + uint32_t dw1; + + ILO_DEV_ASSERT(dev, 8, 8); + + dw1 = 0; + + if (params->prepend_instanceid) { + dw1 |= GEN8_SGVS_DW1_IID_ENABLE | + 1 << GEN8_SGVS_DW1_IID_VE_COMP__SHIFT | + attr << GEN8_SGVS_DW1_IID_VE_INDEX__SHIFT; + } + + if (params->prepend_vertexid) { + dw1 |= GEN8_SGVS_DW1_VID_ENABLE | + 0 << GEN8_SGVS_DW1_VID_VE_COMP__SHIFT | + attr << GEN8_SGVS_DW1_VID_VE_INDEX__SHIFT; + } + + STATIC_ASSERT(ARRAY_SIZE(vf->sgvs) >= 1); + vf->sgvs[0] = dw1; + + return true; +} + +bool +ilo_state_vf_init(struct ilo_state_vf *vf, + const struct ilo_dev *dev, + const struct ilo_state_vf_info *info) +{ + bool ret = true; + + assert(ilo_is_zeroed(vf, sizeof(*vf))); + assert(ilo_is_zeroed(info->data, info->data_size)); + + assert(ilo_state_vf_data_size(dev, info->element_count) <= + info->data_size); + vf->user_ve = (uint32_t (*)[2]) info->data; + + ret &= vf_set_gen6_3DSTATE_VERTEX_ELEMENTS(vf, dev, info); + ret &= ilo_state_vf_set_params(vf, dev, &info->params); + + assert(ret); + + return ret; +} + +bool +ilo_state_vf_init_for_rectlist(struct ilo_state_vf *vf, + const struct ilo_dev *dev, + void *data, size_t data_size, + const struct ilo_state_vf_element_info *elements, + uint8_t element_count) +{ + struct ilo_state_vf_info info; + + memset(&info, 0, sizeof(info)); + + info.data = data; + info.data_size = data_size; + + info.elements = elements; + info.element_count = element_count; + + /* + * For VUE header, + * + * DW0: Reserved: MBZ + * DW1: Render Target Array Index + * DW2: Viewport Index + * DW3: Point Width + */ + info.params.prepend_zeros = true; + + return ilo_state_vf_init(vf, dev, &info); +} + +bool +ilo_state_vf_set_params(struct ilo_state_vf *vf, + const struct ilo_dev *dev, + const struct ilo_state_vf_params_info *params) +{ + bool ret = true; + + ILO_DEV_ASSERT(dev, 6, 8); + + ret &= vf_params_set_gen6_internal_ve(vf, dev, params, vf->user_ve_count); + if (ilo_dev_gen(dev) >= ILO_GEN(8)) + ret &= vf_params_set_gen8_3DSTATE_VF_SGVS(vf, dev, params); + + /* + * From the Sandy Bridge PRM, volume 2 part 1, page 94: + * + * "Edge flags are supported for the following primitive topology types + * only, otherwise EdgeFlagEnable must not be ENABLED. + * + * - 3DPRIM_TRILIST* + * - 3DPRIM_TRISTRIP* + * - 3DPRIM_TRIFAN* + * - 3DPRIM_POLYGON" + * + * "[DevSNB]: Edge Flags are not supported for QUADLIST primitives. + * Software may elect to convert QUADLIST primitives to some set of + * corresponding edge-flag-supported primitive types (e.g., POLYGONs) + * prior to submission to the 3D vf." + * + * From the Ivy Bridge PRM, volume 2 part 1, page 86: + * + * "Edge flags are supported for all primitive topology types." + * + * Both PRMs are confusing... + */ + if (params->last_element_edge_flag) { + assert(vf->edge_flag_supported); + if (ilo_dev_gen(dev) == ILO_GEN(6)) + assert(!params->cv_is_quad); + } + + if (vf->edge_flag_supported) { + assert(vf->user_ve_count); + memcpy(vf->user_ve[vf->user_ve_count - 1], + vf->last_user_ve[params->last_element_edge_flag], + sizeof(vf->user_ve[vf->user_ve_count - 1])); + } + + assert(ret); + + return ret; +} + +void +ilo_state_vf_full_delta(const struct ilo_state_vf *vf, + const struct ilo_dev *dev, + struct ilo_state_vf_delta *delta) +{ + delta->dirty = ILO_STATE_VF_3DSTATE_VERTEX_ELEMENTS; + + if (ilo_dev_gen(dev) >= ILO_GEN(8)) + delta->dirty |= ILO_STATE_VF_3DSTATE_VF_SGVS; +} + +void +ilo_state_vf_get_delta(const struct ilo_state_vf *vf, + const struct ilo_dev *dev, + const struct ilo_state_vf *old, + struct ilo_state_vf_delta *delta) +{ + /* no shallow copying */ + assert(vf->user_ve != old->user_ve); + + delta->dirty = 0; + + if (vf->internal_ve_count != old->internal_ve_count || + vf->user_ve_count != old->user_ve_count || + memcmp(vf->internal_ve, old->internal_ve, + sizeof(vf->internal_ve[0]) * vf->internal_ve_count) || + memcmp(vf->user_ve, old->user_ve, + sizeof(vf->user_ve[0]) * vf->user_ve_count)) + delta->dirty |= ILO_STATE_VF_3DSTATE_VERTEX_ELEMENTS; + + if (ilo_dev_gen(dev) >= ILO_GEN(8)) { + if (vf->sgvs[0] != old->sgvs[0]) + delta->dirty |= ILO_STATE_VF_3DSTATE_VF_SGVS; + } +} diff --git a/src/gallium/drivers/ilo/core/ilo_state_vf.h b/src/gallium/drivers/ilo/core/ilo_state_vf.h new file mode 100644 index 00000000000..7238e661d35 --- /dev/null +++ b/src/gallium/drivers/ilo/core/ilo_state_vf.h @@ -0,0 +1,154 @@ +/* + * Mesa 3-D graphics library + * + * Copyright (C) 2015 LunarG, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Authors: + * Chia-I Wu + */ + +#ifndef ILO_STATE_VF_H +#define ILO_STATE_VF_H + +#include "genhw/genhw.h" + +#include "ilo_core.h" +#include "ilo_dev.h" + +/* + * From the Sandy Bridge PRM, volume 2 part 1, page 93: + * + * "Up to 34 (DevSNB+) vertex elements are supported." + * + * "Up to 33 VBs are supported" + * + * Reserve two VEs and one VB for internal use. + */ +#define ILO_STATE_VF_MAX_ELEMENT_COUNT (34 - 2) +#define ILO_STATE_VF_MAX_BUFFER_COUNT (33 - 1) + +enum ilo_state_vf_dirty_bits { + ILO_STATE_VF_3DSTATE_VERTEX_ELEMENTS = (1 << 0), + ILO_STATE_VF_3DSTATE_VF_SGVS = (1 << 1), +}; + +/** + * Fetch a 128-bit vertex attribute. + */ +struct ilo_state_vf_element_info { + uint8_t buffer; + uint16_t vertex_offset; + enum gen_surface_format format; + + uint8_t format_size; + uint8_t component_count; + bool is_integer; + bool is_double; +}; + +/** + * VF parameters. + */ +struct ilo_state_vf_params_info { + bool cv_is_quad; + + /* prepend an attribute of zeros */ + bool prepend_zeros; + + /* prepend an attribute of VertexID and/or InstanceID */ + bool prepend_vertexid; + bool prepend_instanceid; + + bool last_element_edge_flag; +}; + +struct ilo_state_vf_info { + void *data; + size_t data_size; + + const struct ilo_state_vf_element_info *elements; + uint8_t element_count; + + struct ilo_state_vf_params_info params; +}; + +struct ilo_state_vf { + /* two VEs are reserved for internal use */ + uint32_t internal_ve[2][2]; + uint32_t (*user_ve)[2]; + uint8_t internal_ve_count; + uint8_t user_ve_count; + + uint32_t sgvs[1]; + + uint32_t last_user_ve[2][2]; + bool edge_flag_supported; +}; + +struct ilo_state_vf_delta { + uint32_t dirty; +}; + +static inline size_t +ilo_state_vf_data_size(const struct ilo_dev *dev, uint8_t element_count) +{ + const struct ilo_state_vf *vf = NULL; + return sizeof(vf->user_ve[0]) * element_count; +} + +bool +ilo_state_vf_init(struct ilo_state_vf *vf, + const struct ilo_dev *dev, + const struct ilo_state_vf_info *info); + +bool +ilo_state_vf_init_for_rectlist(struct ilo_state_vf *vf, + const struct ilo_dev *dev, + void *data, size_t data_size, + const struct ilo_state_vf_element_info *elements, + uint8_t element_count); + +bool +ilo_state_vf_set_params(struct ilo_state_vf *vf, + const struct ilo_dev *dev, + const struct ilo_state_vf_params_info *params); + +/** + * Return the number of attributes in the VUE. + */ +static inline uint8_t +ilo_state_vf_get_attr_count(const struct ilo_state_vf *vf) +{ + return vf->internal_ve_count + vf->user_ve_count; +} + +void +ilo_state_vf_full_delta(const struct ilo_state_vf *vf, + const struct ilo_dev *dev, + struct ilo_state_vf_delta *delta); + +void +ilo_state_vf_get_delta(const struct ilo_state_vf *vf, + const struct ilo_dev *dev, + const struct ilo_state_vf *old, + struct ilo_state_vf_delta *delta); + +#endif /* ILO_STATE_VF_H */ From a0bb1c2d1787cf2bd14620bf81d6d59cebfa766a Mon Sep 17 00:00:00 2001 From: Chia-I Wu Date: Fri, 12 Jun 2015 14:02:37 +0800 Subject: [PATCH 617/834] ilo: add ilo_state_sbe We want to replace ilo_kernel_routing with ilo_state_sbe. --- src/gallium/drivers/ilo/Makefile.sources | 2 + src/gallium/drivers/ilo/core/ilo_state_sbe.c | 350 +++++++++++++++++++ src/gallium/drivers/ilo/core/ilo_state_sbe.h | 103 ++++++ 3 files changed, 455 insertions(+) create mode 100644 src/gallium/drivers/ilo/core/ilo_state_sbe.c create mode 100644 src/gallium/drivers/ilo/core/ilo_state_sbe.h diff --git a/src/gallium/drivers/ilo/Makefile.sources b/src/gallium/drivers/ilo/Makefile.sources index 3d330f87d78..4a27cefe932 100644 --- a/src/gallium/drivers/ilo/Makefile.sources +++ b/src/gallium/drivers/ilo/Makefile.sources @@ -29,6 +29,8 @@ C_SOURCES := \ core/ilo_state_raster.h \ core/ilo_state_sampler.c \ core/ilo_state_sampler.h \ + core/ilo_state_sbe.c \ + core/ilo_state_sbe.h \ core/ilo_state_sol.c \ core/ilo_state_sol.h \ core/ilo_state_surface.c \ diff --git a/src/gallium/drivers/ilo/core/ilo_state_sbe.c b/src/gallium/drivers/ilo/core/ilo_state_sbe.c new file mode 100644 index 00000000000..5d1d400acdd --- /dev/null +++ b/src/gallium/drivers/ilo/core/ilo_state_sbe.c @@ -0,0 +1,350 @@ +/* + * Mesa 3-D graphics library + * + * Copyright (C) 2012-2015 LunarG, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Authors: + * Chia-I Wu + */ + +#include "ilo_debug.h" +#include "ilo_state_sbe.h" + +static bool +sbe_validate_gen8(const struct ilo_dev *dev, + const struct ilo_state_sbe_info *info) +{ + ILO_DEV_ASSERT(dev, 6, 8); + + assert(info->attr_count <= ILO_STATE_SBE_MAX_ATTR_COUNT); + + assert(info->vue_read_base + info->vue_read_count <= + info->cv_vue_attr_count); + + /* + * From the Sandy Bridge PRM, volume 2 part 1, page 248: + * + * "(Vertex URB Entry Read Length) + * Format: U5 + * Range [1,16] + * + * Specifies the amount of URB data read for each Vertex URB entry, in + * 256-bit register increments. + * + * Programming Notes + * It is UNDEFINED to set this field to 0 indicating no Vertex URB + * data to be read." + * + * "(Vertex URB Entry Read Offset) + * Format: U6 + * Range [0,63] + * + * Specifies the offset (in 256-bit units) at which Vertex URB data is + * to be read from the URB." + */ + assert(info->vue_read_base % 2 == 0 && info->vue_read_base <= 126); + assert(info->vue_read_count <= 32); + + /* + * From the Ivy Bridge PRM, volume 2 part 1, page 268: + * + * "This field (Point Sprite Texture Coordinate Enable) must be + * programmed to 0 when non-point primitives are rendered." + */ + if (ilo_dev_gen(dev) < ILO_GEN(7.5) && info->point_sprite_enables) + assert(info->cv_is_point); + + /* + * From the Sandy Bridge PRM, volume 2 part 1, page 246: + * + * "(Number of SF Output Attributes) 33-48: Specifies 17-32 attributes + * (# attributes = field value - 16). Swizzling performed on + * Attributes 16-31 (as required) only. Attributes 0-15 passed through + * unmodified. + * + * Note : + * + * Attribute n Component Override and Constant Source states apply to + * Attributes 16-31 (as required) instead of Attributes 0-15. E.g., + * this allows an Attribute 16-31 component to be overridden with the + * PrimitiveID value. + * + * Attribute n WrapShortest Enables still apply to Attributes 0-15. + * + * Attribute n Swizzle Select and Attribute n Source Attribute states + * are ignored and none of the swizzling functions available through + * these controls are performed." + * + * From the Sandy Bridge PRM, volume 2 part 1, page 247: + * + * "This bit (Attribute Swizzle Enable) controls the use of the + * Attribute n Swizzle Select and Attribute n Source Attribute fields + * only. If ENABLED, those fields are used as described below. If + * DISABLED, attributes are copied from their corresponding source + * attributes, for the purposes of Swizzle Select only. + * + * Note that the following fields are unaffected by this bit, and are + * therefore always used to control their respective fields: + * Attribute n Component Override X/Y/Z/W + * Attribute n Constant Source + * Attribute n WrapShortest Enables" + * + * From the Ivy Bridge PRM, volume 2 part 1, page 264: + * + * "When Attribute Swizzle Enable is ENABLED, this bit (Attribute + * Swizzle Control Mode) controls whether attributes 0-15 or 16-31 are + * subject to the following swizzle controls: + * + * - Attribute n Component Override X/Y/Z/W + * - Attribute n Constant Source + * - Attribute n Swizzle Select + * - Attribute n Source Attribute + * - Attribute n Wrap Shortest Enables" + * + * "SWIZ_16_31... Only valid when 16 or more attributes are output." + */ + assert(info->swizzle_count <= ILO_STATE_SBE_MAX_SWIZZLE_COUNT); + if (info->swizzle_16_31) { + assert(ilo_dev_gen(dev) >= ILO_GEN(7) && + info->swizzle_enable && + info->attr_count > 16); + } + + return true; +} + +static uint8_t +sbe_get_gen8_min_read_count(const struct ilo_dev *dev, + const struct ilo_state_sbe_info *info) +{ + uint8_t min_count = 0; + + ILO_DEV_ASSERT(dev, 6, 8); + + /* minimum read count for non-swizzled attributes */ + if (!info->swizzle_enable || info->swizzle_count < info->attr_count) { + if (info->swizzle_16_31 && info->swizzle_count + 16 == info->attr_count) + min_count = 16; + else + min_count = info->attr_count; + } + + if (info->swizzle_enable) { + uint8_t i; + + for (i = 0; i < info->swizzle_count; i++) { + const struct ilo_state_sbe_swizzle_info *swizzle = + &info->swizzles[i]; + bool inputattr_facing; + + switch (swizzle->attr_select) { + case GEN6_INPUTATTR_FACING: + case GEN6_INPUTATTR_FACING_W: + inputattr_facing = true; + break; + default: + inputattr_facing = false; + break; + } + + if (min_count < swizzle->attr + inputattr_facing + 1) + min_count = swizzle->attr + inputattr_facing + 1; + } + } + + return min_count; +} + +static uint8_t +sbe_get_gen8_read_length(const struct ilo_dev *dev, + const struct ilo_state_sbe_info *info) +{ + uint8_t read_len; + + ILO_DEV_ASSERT(dev, 6, 8); + + /* + * From the Sandy Bridge PRM, volume 2 part 1, page 248: + * + * "(Vertex URB Entry Read Length) + * This field should be set to the minimum length required to read the + * maximum source attribute. The maximum source attribute is indicated + * by the maximum value of the enabled Attribute # Source Attribute if + * Attribute Swizzle Enable is set, Number of Output Attributes -1 if + * enable is not set. + * read_length = ceiling((max_source_attr+1)/2) + * + * [errata] Corruption/Hang possible if length programmed larger than + * recommended" + */ + if (info->has_min_read_count) { + read_len = info->vue_read_count; + assert(read_len == sbe_get_gen8_min_read_count(dev, info)); + } else { + read_len = sbe_get_gen8_min_read_count(dev, info); + assert(read_len <= info->vue_read_count); + } + + /* + * In pairs. URB entries are aligned to 1024-bits or 512-bits. There is + * no need to worry about reading past entries. + */ + read_len = (read_len + 1) / 2; + if (!read_len) + read_len = 1; + + return read_len; +} + +static bool +sbe_set_gen8_3DSTATE_SBE(struct ilo_state_sbe *sbe, + const struct ilo_dev *dev, + const struct ilo_state_sbe_info *info) +{ + uint8_t vue_read_offset, vue_read_len; + uint8_t attr_count; + uint32_t dw1, dw2, dw3; + + ILO_DEV_ASSERT(dev, 6, 8); + + if (!sbe_validate_gen8(dev, info)) + return false; + + vue_read_offset = info->vue_read_base / 2; + vue_read_len = sbe_get_gen8_read_length(dev, info); + + attr_count = info->attr_count; + if (ilo_dev_gen(dev) == ILO_GEN(6) && info->swizzle_16_31) + attr_count += 16; + + dw1 = attr_count << GEN7_SBE_DW1_ATTR_COUNT__SHIFT | + vue_read_len << GEN7_SBE_DW1_URB_READ_LEN__SHIFT; + + if (ilo_dev_gen(dev) >= ILO_GEN(8)) { + dw1 |= GEN8_SBE_DW1_USE_URB_READ_LEN | + GEN8_SBE_DW1_USE_URB_READ_OFFSET | + vue_read_offset << GEN8_SBE_DW1_URB_READ_OFFSET__SHIFT; + } else { + dw1 |= vue_read_offset << GEN7_SBE_DW1_URB_READ_OFFSET__SHIFT; + } + + if (ilo_dev_gen(dev) >= ILO_GEN(7) && info->swizzle_16_31) + dw1 |= GEN7_SBE_DW1_ATTR_SWIZZLE_16_31; + + if (info->swizzle_enable) + dw1 |= GEN7_SBE_DW1_ATTR_SWIZZLE_ENABLE; + + dw1 |= (info->point_sprite_origin_lower_left) ? + GEN7_SBE_DW1_POINT_SPRITE_TEXCOORD_LOWERLEFT : + GEN7_SBE_DW1_POINT_SPRITE_TEXCOORD_UPPERLEFT; + + dw2 = info->point_sprite_enables; + dw3 = info->const_interp_enables; + + STATIC_ASSERT(ARRAY_SIZE(sbe->sbe) >= 3); + sbe->sbe[0] = dw1; + sbe->sbe[1] = dw2; + sbe->sbe[2] = dw3; + + return true; +} + +static bool +sbe_set_gen8_3DSTATE_SBE_SWIZ(struct ilo_state_sbe *sbe, + const struct ilo_dev *dev, + const struct ilo_state_sbe_info *info) +{ + uint16_t swiz[ILO_STATE_SBE_MAX_SWIZZLE_COUNT]; + uint8_t i; + + ILO_DEV_ASSERT(dev, 6, 8); + + for (i = 0; i < info->swizzle_count; i++) { + const struct ilo_state_sbe_swizzle_info *swizzle = &info->swizzles[i]; + + /* U5 */ + assert(swizzle->attr < 32); + swiz[i] = swizzle->attr_select << GEN8_SBE_SWIZ_SWIZZLE_SELECT__SHIFT | + swizzle->attr << GEN8_SBE_SWIZ_SRC_ATTR__SHIFT; + + if (swizzle->force_zeros) { + swiz[i] |= GEN8_SBE_SWIZ_OVERRIDE_W | + GEN8_SBE_SWIZ_OVERRIDE_Z | + GEN8_SBE_SWIZ_OVERRIDE_Y | + GEN8_SBE_SWIZ_OVERRIDE_X | + GEN8_SBE_SWIZ_CONST_0000; + } + } + + for (; i < ARRAY_SIZE(swiz); i++) { + swiz[i] = GEN6_INPUTATTR_NORMAL << GEN8_SBE_SWIZ_SWIZZLE_SELECT__SHIFT | + i << GEN8_SBE_SWIZ_SRC_ATTR__SHIFT; + } + + STATIC_ASSERT(sizeof(sbe->swiz) == sizeof(swiz)); + memcpy(sbe->swiz, swiz, sizeof(swiz)); + + return true; +} + +bool +ilo_state_sbe_init(struct ilo_state_sbe *sbe, + const struct ilo_dev *dev, + const struct ilo_state_sbe_info *info) +{ + assert(ilo_is_zeroed(sbe, sizeof(*sbe))); + return ilo_state_sbe_set_info(sbe, dev, info); +} + +bool +ilo_state_sbe_init_for_rectlist(struct ilo_state_sbe *sbe, + const struct ilo_dev *dev, + uint8_t read_base, + uint8_t read_count) +{ + struct ilo_state_sbe_info info; + + memset(&info, 0, sizeof(info)); + info.attr_count = read_count; + info.cv_vue_attr_count = read_base + read_count; + info.vue_read_base = read_base; + info.vue_read_count = read_count; + info.has_min_read_count = true; + + return ilo_state_sbe_set_info(sbe, dev, &info); +} + +bool +ilo_state_sbe_set_info(struct ilo_state_sbe *sbe, + const struct ilo_dev *dev, + const struct ilo_state_sbe_info *info) +{ + bool ret = true; + + ILO_DEV_ASSERT(dev, 6, 8); + + ret &= sbe_set_gen8_3DSTATE_SBE(sbe, dev, info); + ret &= sbe_set_gen8_3DSTATE_SBE_SWIZ(sbe, dev, info); + + assert(ret); + + return true; +} diff --git a/src/gallium/drivers/ilo/core/ilo_state_sbe.h b/src/gallium/drivers/ilo/core/ilo_state_sbe.h new file mode 100644 index 00000000000..122999a9e94 --- /dev/null +++ b/src/gallium/drivers/ilo/core/ilo_state_sbe.h @@ -0,0 +1,103 @@ +/* + * Mesa 3-D graphics library + * + * Copyright (C) 2015 LunarG, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Authors: + * Chia-I Wu + */ + +#ifndef ILO_STATE_SBE_H +#define ILO_STATE_SBE_H + +#include "genhw/genhw.h" + +#include "ilo_core.h" +#include "ilo_dev.h" + +/* + * From the Sandy Bridge PRM, volume 2 part 1, page 264: + * + * "Number of SF Output Attributes sets the number of attributes that will + * be output from the SF stage, not including position. This can be used + * to specify up to 32, and may differ from the number of input + * attributes." + * + * "The first or last set of 16 attributes can be swizzled according to + * certain state fields." + */ +#define ILO_STATE_SBE_MAX_ATTR_COUNT 32 +#define ILO_STATE_SBE_MAX_SWIZZLE_COUNT 16 + +struct ilo_state_sbe_swizzle_info { + /* select an attribute from read ones */ + enum gen_inputattr_select attr_select; + uint8_t attr; + + bool force_zeros; +}; + +struct ilo_state_sbe_info { + uint8_t attr_count; + + /* which VUE attributes to read */ + uint8_t cv_vue_attr_count; + uint8_t vue_read_base; + uint8_t vue_read_count; + bool has_min_read_count; + + bool cv_is_point; + bool point_sprite_origin_lower_left; + /* force sprite coordinates to the four corner vertices of the point */ + uint32_t point_sprite_enables; + + /* force attr at the provoking vertex to a0 and zero to a1/a2 */ + uint32_t const_interp_enables; + + bool swizzle_enable; + /* swizzle attribute 16 to 31 instead; Gen7+ only */ + bool swizzle_16_31; + uint8_t swizzle_count; + const struct ilo_state_sbe_swizzle_info *swizzles; +}; + +struct ilo_state_sbe { + uint32_t sbe[3]; + uint32_t swiz[8]; +}; + +bool +ilo_state_sbe_init(struct ilo_state_sbe *sbe, + const struct ilo_dev *dev, + const struct ilo_state_sbe_info *info); + +bool +ilo_state_sbe_init_for_rectlist(struct ilo_state_sbe *sbe, + const struct ilo_dev *dev, + uint8_t read_base, + uint8_t read_count); + +bool +ilo_state_sbe_set_info(struct ilo_state_sbe *sbe, + const struct ilo_dev *dev, + const struct ilo_state_sbe_info *info); + +#endif /* ILO_STATE_SBE_H */ From df9f846ac6153e171fbcf661bad19168b336a703 Mon Sep 17 00:00:00 2001 From: Chia-I Wu Date: Sat, 30 May 2015 00:58:51 +0800 Subject: [PATCH 618/834] ilo: add ilo_state_{vs,hs,ds,gs} We want to make ilo_shader_cso a union of ilo_state_{vs,hs,ds,gs} and ps payload. --- src/gallium/drivers/ilo/Makefile.sources | 2 + .../drivers/ilo/core/ilo_state_shader.c | 737 ++++++++++++++++++ .../drivers/ilo/core/ilo_state_shader.h | 180 +++++ 3 files changed, 919 insertions(+) create mode 100644 src/gallium/drivers/ilo/core/ilo_state_shader.c create mode 100644 src/gallium/drivers/ilo/core/ilo_state_shader.h diff --git a/src/gallium/drivers/ilo/Makefile.sources b/src/gallium/drivers/ilo/Makefile.sources index 4a27cefe932..2a630779848 100644 --- a/src/gallium/drivers/ilo/Makefile.sources +++ b/src/gallium/drivers/ilo/Makefile.sources @@ -31,6 +31,8 @@ C_SOURCES := \ core/ilo_state_sampler.h \ core/ilo_state_sbe.c \ core/ilo_state_sbe.h \ + core/ilo_state_shader.c \ + core/ilo_state_shader.h \ core/ilo_state_sol.c \ core/ilo_state_sol.h \ core/ilo_state_surface.c \ diff --git a/src/gallium/drivers/ilo/core/ilo_state_shader.c b/src/gallium/drivers/ilo/core/ilo_state_shader.c new file mode 100644 index 00000000000..f67326c7f10 --- /dev/null +++ b/src/gallium/drivers/ilo/core/ilo_state_shader.c @@ -0,0 +1,737 @@ +/* + * Mesa 3-D graphics library + * + * Copyright (C) 2012-2015 LunarG, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Authors: + * Chia-I Wu + */ + +#include "ilo_debug.h" +#include "ilo_state_shader.h" + +enum vertex_stage { + STAGE_VS, + STAGE_HS, + STAGE_DS, + STAGE_GS, +}; + +struct vertex_ff { + uint8_t grf_start; + uint8_t scratch_space; + + uint8_t sampler_count; + uint8_t surface_count; + bool has_uav; + + uint8_t vue_read_offset; + uint8_t vue_read_len; + + uint8_t user_clip_enables; +}; + +static bool +vertex_validate_gen6_kernel(const struct ilo_dev *dev, + enum vertex_stage stage, + const struct ilo_state_shader_kernel_info *kernel) +{ + /* + * "Dispatch GRF Start Register for URB Data" is U4 for GS and U5 for + * others. + */ + const uint8_t max_grf_start = (stage == STAGE_GS) ? 16 : 32; + /* + * From the Sandy Bridge PRM, volume 2 part 1, page 134: + * + * "(Per-Thread Scratch Space) + * Range [0,11] indicating [1K Bytes, 2M Bytes]" + */ + const uint32_t max_scratch_size = 2 * 1024 * 1024; + + ILO_DEV_ASSERT(dev, 6, 8); + + /* we do not want to save it */ + assert(!kernel->offset); + + assert(kernel->grf_start < max_grf_start); + assert(kernel->scratch_size <= max_scratch_size); + + return true; +} + +static bool +vertex_validate_gen6_urb(const struct ilo_dev *dev, + enum vertex_stage stage, + const struct ilo_state_shader_urb_info *urb) +{ + /* "Vertex/Patch URB Entry Read Offset" is U6, in pairs */ + const uint8_t max_read_base = 63 * 2; + /* + * "Vertex/Patch URB Entry Read Length" is limited to 64 for DS and U6 for + * others, in pairs + */ + const uint8_t max_read_count = ((stage == STAGE_DS) ? 64 : 63) * 2; + + ILO_DEV_ASSERT(dev, 6, 8); + + assert(urb->read_base + urb->read_count <= urb->cv_input_attr_count); + + assert(urb->read_base % 2 == 0 && urb->read_base <= max_read_base); + + /* + * There is no need to worry about reading past entries, as URB entries are + * aligned to 1024-bits (Gen6) or 512-bits (Gen7+). + */ + assert(urb->read_count <= max_read_count); + + return true; +} + +static bool +vertex_get_gen6_ff(const struct ilo_dev *dev, + enum vertex_stage stage, + const struct ilo_state_shader_kernel_info *kernel, + const struct ilo_state_shader_resource_info *resource, + const struct ilo_state_shader_urb_info *urb, + struct vertex_ff *ff) +{ + ILO_DEV_ASSERT(dev, 6, 8); + + if (!vertex_validate_gen6_kernel(dev, stage, kernel) || + !vertex_validate_gen6_urb(dev, stage, urb)) + return false; + + ff->grf_start = kernel->grf_start; + /* next power of two, starting from 1KB */ + ff->scratch_space = (kernel->scratch_size > 1024) ? + (util_last_bit(kernel->scratch_size - 1) - 10): 0; + + ff->sampler_count = (resource->sampler_count <= 12) ? + (resource->sampler_count + 3) / 4 : 4; + ff->surface_count = resource->surface_count; + ff->has_uav = resource->has_uav; + + ff->vue_read_offset = urb->read_base / 2; + ff->vue_read_len = (urb->read_count + 1) / 2; + + /* need to read something unless VUE handles are included */ + switch (stage) { + case STAGE_VS: + if (!ff->vue_read_len) + ff->vue_read_len = 1; + + /* one GRF per attribute */ + assert(kernel->grf_start + urb->read_count * 2 <= 128); + break; + case STAGE_GS: + if (ilo_dev_gen(dev) == ILO_GEN(6) && !ff->vue_read_len) + ff->vue_read_len = 1; + break; + default: + break; + } + + ff->user_clip_enables = urb->user_clip_enables; + + return true; +} + +static uint16_t +vs_get_gen6_thread_count(const struct ilo_dev *dev, + const struct ilo_state_vs_info *info) +{ + uint16_t thread_count; + + ILO_DEV_ASSERT(dev, 6, 8); + + /* Maximum Number of Threads of 3DSTATE_VS */ + switch (ilo_dev_gen(dev)) { + case ILO_GEN(8): + thread_count = 504; + break; + case ILO_GEN(7.5): + thread_count = (dev->gt >= 2) ? 280 : 70; + break; + case ILO_GEN(7): + case ILO_GEN(6): + default: + thread_count = dev->thread_count; + break; + } + + return thread_count - 1; +} + +static bool +vs_set_gen6_3DSTATE_VS(struct ilo_state_vs *vs, + const struct ilo_dev *dev, + const struct ilo_state_vs_info *info) +{ + struct vertex_ff ff; + uint16_t thread_count; + uint32_t dw2, dw3, dw4, dw5; + + ILO_DEV_ASSERT(dev, 6, 8); + + if (!vertex_get_gen6_ff(dev, STAGE_VS, &info->kernel, + &info->resource, &info->urb, &ff)) + return false; + + thread_count = vs_get_gen6_thread_count(dev, info); + + dw2 = ff.sampler_count << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT | + ff.surface_count << GEN6_THREADDISP_BINDING_TABLE_SIZE__SHIFT; + + if (false) + dw2 |= GEN6_THREADDISP_FP_MODE_ALT; + + if (ilo_dev_gen(dev) >= ILO_GEN(7.5) && ff.has_uav) + dw2 |= GEN75_THREADDISP_ACCESS_UAV; + + dw3 = ff.scratch_space << GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT; + + dw4 = ff.grf_start << GEN6_VS_DW4_URB_GRF_START__SHIFT | + ff.vue_read_len << GEN6_VS_DW4_URB_READ_LEN__SHIFT | + ff.vue_read_offset << GEN6_VS_DW4_URB_READ_OFFSET__SHIFT; + + dw5 = 0; + + if (ilo_dev_gen(dev) >= ILO_GEN(7.5)) + dw5 |= thread_count << GEN75_VS_DW5_MAX_THREADS__SHIFT; + else + dw5 |= thread_count << GEN6_VS_DW5_MAX_THREADS__SHIFT; + + if (info->stats_enable) + dw5 |= GEN6_VS_DW5_STATISTICS; + if (info->dispatch_enable) + dw5 |= GEN6_VS_DW5_VS_ENABLE; + + STATIC_ASSERT(ARRAY_SIZE(vs->vs) >= 5); + vs->vs[0] = dw2; + vs->vs[1] = dw3; + vs->vs[2] = dw4; + vs->vs[3] = dw5; + + if (ilo_dev_gen(dev) >= ILO_GEN(8)) + vs->vs[4] = ff.user_clip_enables << GEN8_VS_DW8_UCP_CLIP_ENABLES__SHIFT; + + return true; +} + +static uint16_t +hs_get_gen7_thread_count(const struct ilo_dev *dev, + const struct ilo_state_hs_info *info) +{ + uint16_t thread_count; + + ILO_DEV_ASSERT(dev, 7, 8); + + /* Maximum Number of Threads of 3DSTATE_HS */ + switch (ilo_dev_gen(dev)) { + case ILO_GEN(8): + thread_count = 504; + break; + case ILO_GEN(7.5): + thread_count = (dev->gt >= 2) ? 256 : 70; + break; + case ILO_GEN(7): + default: + thread_count = dev->thread_count; + break; + } + + return thread_count - 1; +} + +static bool +hs_set_gen7_3DSTATE_HS(struct ilo_state_hs *hs, + const struct ilo_dev *dev, + const struct ilo_state_hs_info *info) +{ + struct vertex_ff ff; + uint16_t thread_count; + uint32_t dw1, dw2, dw4, dw5; + + ILO_DEV_ASSERT(dev, 7, 8); + + if (!vertex_get_gen6_ff(dev, STAGE_HS, &info->kernel, + &info->resource, &info->urb, &ff)) + return false; + + thread_count = hs_get_gen7_thread_count(dev, info); + + dw1 = ff.sampler_count << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT | + ff.surface_count << GEN6_THREADDISP_BINDING_TABLE_SIZE__SHIFT; + + if (ilo_dev_gen(dev) >= ILO_GEN(7.5)) + dw1 |= thread_count << GEN75_HS_DW1_DISPATCH_MAX_THREADS__SHIFT; + else + dw1 |= thread_count << GEN7_HS_DW1_DISPATCH_MAX_THREADS__SHIFT; + + dw2 = 0 << GEN7_HS_DW2_INSTANCE_COUNT__SHIFT; + + if (info->dispatch_enable) + dw2 |= GEN7_HS_DW2_HS_ENABLE; + if (info->stats_enable) + dw2 |= GEN7_HS_DW2_STATISTICS; + + dw4 = ff.scratch_space << GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT; + + dw5 = GEN7_HS_DW5_INCLUDE_VERTEX_HANDLES | + ff.grf_start << GEN7_HS_DW5_URB_GRF_START__SHIFT | + ff.vue_read_len << GEN7_HS_DW5_URB_READ_LEN__SHIFT | + ff.vue_read_offset << GEN7_HS_DW5_URB_READ_OFFSET__SHIFT; + + if (ilo_dev_gen(dev) >= ILO_GEN(7.5) && ff.has_uav) + dw5 |= GEN75_HS_DW5_ACCESS_UAV; + + STATIC_ASSERT(ARRAY_SIZE(hs->hs) >= 4); + hs->hs[0] = dw1; + hs->hs[1] = dw2; + hs->hs[2] = dw4; + hs->hs[3] = dw5; + + return true; +} + +static bool +ds_set_gen7_3DSTATE_TE(struct ilo_state_ds *ds, + const struct ilo_dev *dev, + const struct ilo_state_ds_info *info) +{ + uint32_t dw1; + + ILO_DEV_ASSERT(dev, 7, 8); + + dw1 = 0; + + if (info->dispatch_enable) { + dw1 |= GEN7_TE_DW1_MODE_HW | + GEN7_TE_DW1_TE_ENABLE; + } + + STATIC_ASSERT(ARRAY_SIZE(ds->te) >= 3); + ds->te[0] = dw1; + ds->te[1] = fui(63.0f); + ds->te[2] = fui(64.0f); + + return true; +} + +static uint16_t +ds_get_gen7_thread_count(const struct ilo_dev *dev, + const struct ilo_state_ds_info *info) +{ + uint16_t thread_count; + + ILO_DEV_ASSERT(dev, 7, 8); + + /* Maximum Number of Threads of 3DSTATE_DS */ + switch (ilo_dev_gen(dev)) { + case ILO_GEN(8): + thread_count = 504; + break; + case ILO_GEN(7.5): + thread_count = (dev->gt >= 2) ? 280 : 70; + break; + case ILO_GEN(7): + default: + thread_count = dev->thread_count; + break; + } + + return thread_count - 1; +} + +static bool +ds_set_gen7_3DSTATE_DS(struct ilo_state_ds *ds, + const struct ilo_dev *dev, + const struct ilo_state_ds_info *info) +{ + struct vertex_ff ff; + uint16_t thread_count; + uint32_t dw2, dw3, dw4, dw5; + + ILO_DEV_ASSERT(dev, 7, 8); + + if (!vertex_get_gen6_ff(dev, STAGE_DS, &info->kernel, + &info->resource, &info->urb, &ff)) + return false; + + thread_count = ds_get_gen7_thread_count(dev, info); + + dw2 = ff.sampler_count << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT | + ff.surface_count << GEN6_THREADDISP_BINDING_TABLE_SIZE__SHIFT; + + if (ilo_dev_gen(dev) >= ILO_GEN(7.5) && ff.has_uav) + dw2 |= GEN75_THREADDISP_ACCESS_UAV; + + dw3 = ff.scratch_space << GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT; + + dw4 = ff.grf_start << GEN7_DS_DW4_URB_GRF_START__SHIFT | + ff.vue_read_len << GEN7_DS_DW4_URB_READ_LEN__SHIFT | + ff.vue_read_offset << GEN7_DS_DW4_URB_READ_OFFSET__SHIFT; + + dw5 = 0; + + if (ilo_dev_gen(dev) >= ILO_GEN(7.5)) + dw5 |= thread_count << GEN75_DS_DW5_MAX_THREADS__SHIFT; + else + dw5 |= thread_count << GEN7_DS_DW5_MAX_THREADS__SHIFT; + + if (info->stats_enable) + dw5 |= GEN7_DS_DW5_STATISTICS; + if (info->dispatch_enable) + dw5 |= GEN7_DS_DW5_DS_ENABLE; + + STATIC_ASSERT(ARRAY_SIZE(ds->ds) >= 5); + ds->ds[0] = dw2; + ds->ds[1] = dw3; + ds->ds[2] = dw4; + ds->ds[3] = dw5; + + if (ilo_dev_gen(dev) >= ILO_GEN(8)) + ds->ds[4] = ff.user_clip_enables << GEN8_DS_DW8_UCP_CLIP_ENABLES__SHIFT; + + return true; +} + +static bool +gs_get_gen6_ff(const struct ilo_dev *dev, + const struct ilo_state_gs_info *info, + struct vertex_ff *ff) +{ + const struct ilo_state_shader_urb_info *urb = &info->urb; + const struct ilo_state_gs_sol_info *sol = &info->sol; + + ILO_DEV_ASSERT(dev, 6, 8); + + if (!vertex_get_gen6_ff(dev, STAGE_GS, &info->kernel, + &info->resource, &info->urb, ff)) + return false; + + /* + * From the Ivy Bridge PRM, volume 2 part 1, page 168-169: + * + * "[0,62] indicating [1,63] 16B units" + * + * "Programming Restrictions: The vertex size must be programmed as a + * multiple of 32B units with the following exception: Rendering is + * disabled (as per SOL stage state) and the vertex size output by the + * GS thread is 16B. + * + * If rendering is enabled (as per SOL state) the vertex size must be + * programmed as a multiple of 32B units. In other words, the only + * time software can program a vertex size with an odd number of 16B + * units is when rendering is disabled." + */ + assert(urb->output_attr_count <= 63); + if (!sol->render_disable) + assert(urb->output_attr_count % 2 == 0); + + return true; +} + +static uint16_t +gs_get_gen6_thread_count(const struct ilo_dev *dev, + const struct ilo_state_gs_info *info) +{ + const struct ilo_state_gs_sol_info *sol = &info->sol; + uint16_t thread_count; + + ILO_DEV_ASSERT(dev, 6, 8); + + /* Maximum Number of Threads of 3DSTATE_GS */ + switch (ilo_dev_gen(dev)) { + case ILO_GEN(8): + thread_count = 504; + break; + case ILO_GEN(7.5): + thread_count = (dev->gt >= 2) ? 256 : 70; + break; + case ILO_GEN(7): + case ILO_GEN(6): + default: + thread_count = dev->thread_count; + + /* + * From the Sandy Bridge PRM, volume 2 part 1, page 154: + * + * "Maximum Number of Threads valid range is [0,27] when Rendering + * Enabled bit is set." + * + * According to the classic driver, [0, 20] for GT1. + */ + if (!sol->render_disable) + thread_count = (dev->gt == 2) ? 27 : 20; + break; + } + + return thread_count - 1; +} + +static bool +gs_set_gen6_3DSTATE_GS(struct ilo_state_gs *gs, + const struct ilo_dev *dev, + const struct ilo_state_gs_info *info) +{ + const struct ilo_state_gs_sol_info *sol = &info->sol; + struct vertex_ff ff; + uint16_t thread_count; + uint32_t dw2, dw3, dw4, dw5, dw6; + + ILO_DEV_ASSERT(dev, 6, 6); + + if (!gs_get_gen6_ff(dev, info, &ff)) + return false; + + thread_count = gs_get_gen6_thread_count(dev, info); + + dw2 = GEN6_THREADDISP_SPF | + ff.sampler_count << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT | + ff.surface_count << GEN6_THREADDISP_BINDING_TABLE_SIZE__SHIFT; + + dw3 = ff.scratch_space << GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT; + + dw4 = ff.vue_read_len << GEN6_GS_DW4_URB_READ_LEN__SHIFT | + ff.vue_read_offset << GEN6_GS_DW4_URB_READ_OFFSET__SHIFT | + ff.grf_start << GEN6_GS_DW4_URB_GRF_START__SHIFT; + + dw5 = thread_count << GEN6_GS_DW5_MAX_THREADS__SHIFT; + + if (info->stats_enable) + dw5 |= GEN6_GS_DW5_STATISTICS; + if (sol->stats_enable) + dw5 |= GEN6_GS_DW5_SO_STATISTICS; + if (!sol->render_disable) + dw5 |= GEN6_GS_DW5_RENDER_ENABLE; + + dw6 = 0; + + /* GEN7_REORDER_TRAILING is handled by the kernel */ + if (sol->tristrip_reorder == GEN7_REORDER_LEADING) + dw6 |= GEN6_GS_DW6_REORDER_LEADING_ENABLE; + + if (sol->sol_enable) { + dw6 |= GEN6_GS_DW6_SVBI_PAYLOAD_ENABLE; + + if (sol->svbi_post_inc) { + dw6 |= GEN6_GS_DW6_SVBI_POST_INC_ENABLE | + sol->svbi_post_inc << GEN6_GS_DW6_SVBI_POST_INC_VAL__SHIFT; + } + } + + if (info->dispatch_enable) + dw6 |= GEN6_GS_DW6_GS_ENABLE; + + STATIC_ASSERT(ARRAY_SIZE(gs->gs) >= 5); + gs->gs[0] = dw2; + gs->gs[1] = dw3; + gs->gs[2] = dw4; + gs->gs[3] = dw5; + gs->gs[4] = dw6; + + return true; +} + +static uint8_t +gs_get_gen7_vertex_size(const struct ilo_dev *dev, + const struct ilo_state_gs_info *info) +{ + const struct ilo_state_shader_urb_info *urb = &info->urb; + + ILO_DEV_ASSERT(dev, 7, 8); + + return (urb->output_attr_count) ? urb->output_attr_count - 1 : 0; +} + +static bool +gs_set_gen7_3DSTATE_GS(struct ilo_state_gs *gs, + const struct ilo_dev *dev, + const struct ilo_state_gs_info *info) +{ + struct vertex_ff ff; + uint16_t thread_count; + uint8_t vertex_size; + uint32_t dw2, dw3, dw4, dw5; + + ILO_DEV_ASSERT(dev, 7, 8); + + if (!gs_get_gen6_ff(dev, info, &ff)) + return false; + + thread_count = gs_get_gen6_thread_count(dev, info); + vertex_size = gs_get_gen7_vertex_size(dev, info); + + dw2 = ff.sampler_count << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT | + ff.surface_count << GEN6_THREADDISP_BINDING_TABLE_SIZE__SHIFT; + + if (ilo_dev_gen(dev) >= ILO_GEN(7.5) && ff.has_uav) + dw2 |= GEN75_THREADDISP_ACCESS_UAV; + + dw3 = ff.scratch_space << GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT; + + dw4 = vertex_size << GEN7_GS_DW4_OUTPUT_SIZE__SHIFT | + 0 << GEN7_GS_DW4_OUTPUT_TOPO__SHIFT | + ff.vue_read_len << GEN7_GS_DW4_URB_READ_LEN__SHIFT | + GEN7_GS_DW4_INCLUDE_VERTEX_HANDLES | + ff.vue_read_offset << GEN7_GS_DW4_URB_READ_OFFSET__SHIFT | + ff.grf_start << GEN7_GS_DW4_URB_GRF_START__SHIFT; + + dw5 = 0; + + if (ilo_dev_gen(dev) >= ILO_GEN(7.5)) + dw5 = thread_count << GEN75_GS_DW5_MAX_THREADS__SHIFT; + else + dw5 = thread_count << GEN7_GS_DW5_MAX_THREADS__SHIFT; + + if (info->stats_enable) + dw5 |= GEN7_GS_DW5_STATISTICS; + if (info->dispatch_enable) + dw5 |= GEN7_GS_DW5_GS_ENABLE; + + STATIC_ASSERT(ARRAY_SIZE(gs->gs) >= 5); + gs->gs[0] = dw2; + gs->gs[1] = dw3; + gs->gs[2] = dw4; + gs->gs[3] = dw5; + + if (ilo_dev_gen(dev) >= ILO_GEN(8)) + gs->gs[4] = ff.user_clip_enables << GEN8_GS_DW9_UCP_CLIP_ENABLES__SHIFT; + + return true; +} + +bool +ilo_state_vs_init(struct ilo_state_vs *vs, + const struct ilo_dev *dev, + const struct ilo_state_vs_info *info) +{ + bool ret = true; + + assert(ilo_is_zeroed(vs, sizeof(*vs))); + + ret &= vs_set_gen6_3DSTATE_VS(vs, dev, info); + + assert(ret); + + return ret; +} + +bool +ilo_state_vs_init_disabled(struct ilo_state_vs *vs, + const struct ilo_dev *dev) +{ + struct ilo_state_vs_info info; + + memset(&info, 0, sizeof(info)); + + return ilo_state_vs_init(vs, dev, &info); +} + +bool +ilo_state_hs_init(struct ilo_state_hs *hs, + const struct ilo_dev *dev, + const struct ilo_state_hs_info *info) +{ + bool ret = true; + + assert(ilo_is_zeroed(hs, sizeof(*hs))); + + if (ilo_dev_gen(dev) >= ILO_GEN(7)) + ret &= hs_set_gen7_3DSTATE_HS(hs, dev, info); + + assert(ret); + + return ret; +} + +bool +ilo_state_hs_init_disabled(struct ilo_state_hs *hs, + const struct ilo_dev *dev) +{ + struct ilo_state_hs_info info; + + memset(&info, 0, sizeof(info)); + + return ilo_state_hs_init(hs, dev, &info); +} + +bool +ilo_state_ds_init(struct ilo_state_ds *ds, + const struct ilo_dev *dev, + const struct ilo_state_ds_info *info) +{ + bool ret = true; + + assert(ilo_is_zeroed(ds, sizeof(*ds))); + + if (ilo_dev_gen(dev) >= ILO_GEN(7)) { + ret &= ds_set_gen7_3DSTATE_TE(ds, dev, info); + ret &= ds_set_gen7_3DSTATE_DS(ds, dev, info); + } + + assert(ret); + + return ret; +} + +bool +ilo_state_ds_init_disabled(struct ilo_state_ds *ds, + const struct ilo_dev *dev) +{ + struct ilo_state_ds_info info; + + memset(&info, 0, sizeof(info)); + + return ilo_state_ds_init(ds, dev, &info); +} + +bool +ilo_state_gs_init(struct ilo_state_gs *gs, + const struct ilo_dev *dev, + const struct ilo_state_gs_info *info) +{ + bool ret = true; + + assert(ilo_is_zeroed(gs, sizeof(*gs))); + + if (ilo_dev_gen(dev) >= ILO_GEN(7)) + ret &= gs_set_gen7_3DSTATE_GS(gs, dev, info); + else + ret &= gs_set_gen6_3DSTATE_GS(gs, dev, info); + + assert(ret); + + return ret; +} + +bool +ilo_state_gs_init_disabled(struct ilo_state_gs *gs, + const struct ilo_dev *dev) +{ + struct ilo_state_gs_info info; + + memset(&info, 0, sizeof(info)); + + return ilo_state_gs_init(gs, dev, &info); +} diff --git a/src/gallium/drivers/ilo/core/ilo_state_shader.h b/src/gallium/drivers/ilo/core/ilo_state_shader.h new file mode 100644 index 00000000000..c1dbf23dfb3 --- /dev/null +++ b/src/gallium/drivers/ilo/core/ilo_state_shader.h @@ -0,0 +1,180 @@ +/* + * Mesa 3-D graphics library + * + * Copyright (C) 2015 LunarG, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Authors: + * Chia-I Wu + */ + +#ifndef ILO_STATE_SHADER_H +#define ILO_STATE_SHADER_H + +#include "genhw/genhw.h" + +#include "ilo_core.h" +#include "ilo_dev.h" + +/** + * Kernel information. + */ +struct ilo_state_shader_kernel_info { + /* usually 0 unless the shader has multiple kernels */ + uint32_t offset; + + uint8_t grf_start; + uint8_t pcb_attr_count; + + uint32_t scratch_size; +}; + +/** + * Shader resources. + */ +struct ilo_state_shader_resource_info { + /* for prefetches */ + uint8_t sampler_count; + uint8_t surface_count; + + bool has_uav; +}; + +/** + * URB inputs/outputs. + */ +struct ilo_state_shader_urb_info { + uint8_t cv_input_attr_count; + + uint8_t read_base; + uint8_t read_count; + + uint8_t output_attr_count; + + uint8_t user_cull_enables; + uint8_t user_clip_enables; +}; + +struct ilo_state_vs_info { + struct ilo_state_shader_kernel_info kernel; + struct ilo_state_shader_resource_info resource; + struct ilo_state_shader_urb_info urb; + + bool dispatch_enable; + bool stats_enable; +}; + +struct ilo_state_hs_info { + struct ilo_state_shader_kernel_info kernel; + struct ilo_state_shader_resource_info resource; + struct ilo_state_shader_urb_info urb; + + bool dispatch_enable; + bool stats_enable; +}; + +struct ilo_state_ds_info { + struct ilo_state_shader_kernel_info kernel; + struct ilo_state_shader_resource_info resource; + struct ilo_state_shader_urb_info urb; + + bool dispatch_enable; + bool stats_enable; +}; + +/** + * Stream output. Must be consistent with ilo_state_sol_info. + */ +struct ilo_state_gs_sol_info { + bool sol_enable; + bool stats_enable; + bool render_disable; + + uint16_t svbi_post_inc; + + enum gen_reorder_mode tristrip_reorder; +}; + +struct ilo_state_gs_info { + struct ilo_state_shader_kernel_info kernel; + struct ilo_state_shader_resource_info resource; + struct ilo_state_shader_urb_info urb; + + struct ilo_state_gs_sol_info sol; + + bool dispatch_enable; + bool stats_enable; +}; + +struct ilo_state_vs { + uint32_t vs[5]; +}; + +struct ilo_state_hs { + uint32_t hs[4]; +}; + +struct ilo_state_ds { + uint32_t te[3]; + uint32_t ds[5]; +}; + +struct ilo_state_gs { + uint32_t gs[5]; +}; + +bool +ilo_state_vs_init(struct ilo_state_vs *vs, + const struct ilo_dev *dev, + const struct ilo_state_vs_info *info); + +bool +ilo_state_vs_init_disabled(struct ilo_state_vs *vs, + const struct ilo_dev *dev); + +bool +ilo_state_hs_init(struct ilo_state_hs *hs, + const struct ilo_dev *dev, + const struct ilo_state_hs_info *info); + +bool +ilo_state_hs_init_disabled(struct ilo_state_hs *hs, + const struct ilo_dev *dev); + + +bool +ilo_state_ds_init(struct ilo_state_ds *ds, + const struct ilo_dev *dev, + const struct ilo_state_ds_info *info); + +bool +ilo_state_ds_init_disabled(struct ilo_state_ds *ds, + const struct ilo_dev *dev); + +bool +ilo_state_gs_init(struct ilo_state_gs *gs, + const struct ilo_dev *dev, + const struct ilo_state_gs_info *info); + +bool +ilo_state_gs_init_disabled(struct ilo_state_gs *gs, + const struct ilo_dev *dev); + +#endif /* ILO_STATE_SHADER_H */ From 6dad848d1acfe781c735120c3db97f1a2f0c28fa Mon Sep 17 00:00:00 2001 From: Chia-I Wu Date: Fri, 12 Jun 2015 14:56:56 +0800 Subject: [PATCH 619/834] ilo: add ilo_state_ps We want to make ilo_shader_cso a union of ilo_state_{vs,hs,ds,gs,ps}. --- src/gallium/drivers/ilo/Makefile.sources | 1 + .../drivers/ilo/core/ilo_state_shader.h | 76 ++ .../drivers/ilo/core/ilo_state_shader_ps.c | 771 ++++++++++++++++++ 3 files changed, 848 insertions(+) create mode 100644 src/gallium/drivers/ilo/core/ilo_state_shader_ps.c diff --git a/src/gallium/drivers/ilo/Makefile.sources b/src/gallium/drivers/ilo/Makefile.sources index 2a630779848..3b38277af19 100644 --- a/src/gallium/drivers/ilo/Makefile.sources +++ b/src/gallium/drivers/ilo/Makefile.sources @@ -32,6 +32,7 @@ C_SOURCES := \ core/ilo_state_sbe.c \ core/ilo_state_sbe.h \ core/ilo_state_shader.c \ + core/ilo_state_shader_ps.c \ core/ilo_state_shader.h \ core/ilo_state_sol.c \ core/ilo_state_sol.h \ diff --git a/src/gallium/drivers/ilo/core/ilo_state_shader.h b/src/gallium/drivers/ilo/core/ilo_state_shader.h index c1dbf23dfb3..44690c5b0bb 100644 --- a/src/gallium/drivers/ilo/core/ilo_state_shader.h +++ b/src/gallium/drivers/ilo/core/ilo_state_shader.h @@ -123,6 +123,54 @@ struct ilo_state_gs_info { bool stats_enable; }; +struct ilo_state_ps_io_info { + /* inputs */ + enum gen_position_offset posoffset; + uint8_t attr_count; + bool use_z; + bool use_w; + bool use_coverage_mask; + + /* outputs */ + enum gen_pscdepth_mode pscdepth; + bool has_rt_write; + bool write_pixel_mask; + bool write_omask; +}; + +struct ilo_state_ps_params_info { + /* compatibility with raster states */ + uint32_t sample_mask; + bool earlyz_control_psexec; + + /* compatibility with cc states */ + bool alpha_may_kill; + bool dual_source_blending; + bool has_writeable_rt; +}; + +struct ilo_state_ps_info { + struct ilo_state_shader_kernel_info kernel_8; + struct ilo_state_shader_kernel_info kernel_16; + struct ilo_state_shader_kernel_info kernel_32; + struct ilo_state_shader_resource_info resource; + + struct ilo_state_ps_io_info io; + struct ilo_state_ps_params_info params; + + /* bitmask of GEN6_PS_DISPATCH_x */ + uint8_t valid_kernels; + bool per_sample_dispatch; + bool sample_count_one; + bool cv_per_sample_interp; + bool cv_has_earlyz_op; + + bool rt_clear_enable; + bool rt_resolve_enable; + + bool cv_has_depth_buffer; +}; + struct ilo_state_vs { uint32_t vs[5]; }; @@ -140,6 +188,20 @@ struct ilo_state_gs { uint32_t gs[5]; }; +struct ilo_state_ps { + uint32_t ps[8]; + + struct ilo_state_ps_dispatch_conds { + bool ps_valid; + + bool has_rt_write; + bool write_odepth; + bool write_ostencil; + bool has_uav_write; + bool ps_may_kill; + } conds; +}; + bool ilo_state_vs_init(struct ilo_state_vs *vs, const struct ilo_dev *dev, @@ -177,4 +239,18 @@ bool ilo_state_gs_init_disabled(struct ilo_state_gs *gs, const struct ilo_dev *dev); +bool +ilo_state_ps_init(struct ilo_state_ps *ps, + const struct ilo_dev *dev, + const struct ilo_state_ps_info *info); + +bool +ilo_state_ps_init_disabled(struct ilo_state_ps *ps, + const struct ilo_dev *dev); + +bool +ilo_state_ps_set_params(struct ilo_state_ps *ps, + const struct ilo_dev *dev, + const struct ilo_state_ps_params_info *params); + #endif /* ILO_STATE_SHADER_H */ diff --git a/src/gallium/drivers/ilo/core/ilo_state_shader_ps.c b/src/gallium/drivers/ilo/core/ilo_state_shader_ps.c new file mode 100644 index 00000000000..f4d801e9b56 --- /dev/null +++ b/src/gallium/drivers/ilo/core/ilo_state_shader_ps.c @@ -0,0 +1,771 @@ +/* + * Mesa 3-D graphics library + * + * Copyright (C) 2012-2015 LunarG, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Authors: + * Chia-I Wu + */ + +#include "ilo_debug.h" +#include "ilo_state_shader.h" + +struct pixel_ff { + uint8_t dispatch_modes; + + uint32_t kernel_offsets[3]; + uint8_t grf_starts[3]; + bool pcb_enable; + uint8_t scratch_space; + + uint8_t sampler_count; + uint8_t surface_count; + bool has_uav; + + uint16_t thread_count; + + struct ilo_state_ps_dispatch_conds conds; + + bool kill_pixel; + bool dispatch_enable; + bool dual_source_blending; + uint32_t sample_mask; +}; + +static bool +ps_kernel_validate_gen6(const struct ilo_dev *dev, + const struct ilo_state_shader_kernel_info *kernel) +{ + /* "Dispatch GRF Start Register for Constant/Setup Data" is U7 */ + const uint8_t max_grf_start = 128; + /* + * From the Sandy Bridge PRM, volume 2 part 1, page 271: + * + * "(Per-Thread Scratch Space) + * Range [0,11] indicating [1k bytes, 2M bytes] in powers of two" + */ + const uint32_t max_scratch_size = 2 * 1024 * 1024; + + ILO_DEV_ASSERT(dev, 6, 8); + + /* "Kernel Start Pointer" is 64-byte aligned */ + assert(kernel->offset % 64 == 0); + + assert(kernel->grf_start < max_grf_start); + assert(kernel->scratch_size <= max_scratch_size); + + return true; +} + +static bool +ps_validate_gen6(const struct ilo_dev *dev, + const struct ilo_state_ps_info *info) +{ + const struct ilo_state_shader_kernel_info *kernel_8 = &info->kernel_8; + const struct ilo_state_shader_kernel_info *kernel_16 = &info->kernel_16; + const struct ilo_state_shader_kernel_info *kernel_32 = &info->kernel_32; + const struct ilo_state_ps_io_info *io = &info->io; + + ILO_DEV_ASSERT(dev, 6, 8); + + if (!ps_kernel_validate_gen6(dev, kernel_8) || + !ps_kernel_validate_gen6(dev, kernel_16) || + !ps_kernel_validate_gen6(dev, kernel_32)) + return false; + + /* unsupported on Gen6 */ + if (ilo_dev_gen(dev) == ILO_GEN(6)) + assert(!io->use_coverage_mask); + + /* + * From the Sandy Bridge PRM, volume 2 part 1, page 275: + * + * "If a NULL Depth Buffer is selected, the Pixel Shader Computed Depth + * field must be set to disabled." + */ + if (ilo_dev_gen(dev) == ILO_GEN(6) && io->pscdepth != GEN7_PSCDEPTH_OFF) + assert(info->cv_has_depth_buffer); + + if (!info->per_sample_dispatch) { + /* + * From the Sandy Bridge PRM, volume 2 part 1, page 281: + * + * "MSDISPMODE_PERSAMPLE is required in order to select + * POSOFFSET_SAMPLE." + */ + assert(io->posoffset != GEN6_POSOFFSET_SAMPLE); + + /* + * From the Sandy Bridge PRM, volume 2 part 1, page 282: + * + * "MSDISPMODE_PERSAMPLE is required in order to select + * INTERP_SAMPLE." + * + * From the Sandy Bridge PRM, volume 2 part 1, page 283: + * + * "MSDISPMODE_PERSAMPLE is required in order to select Perspective + * Sample or Non-perspective Sample barycentric coordinates." + */ + assert(!info->cv_per_sample_interp); + } + + /* + * + * From the Sandy Bridge PRM, volume 2 part 1, page 314: + * + * "Pixel Shader Dispatch, Alpha... must all be disabled." + * + * Simply disallow any valid kernel when there is early-z op. Also, when + * there is no valid kernel, io should be zeroed. + */ + if (info->valid_kernels) + assert(!info->cv_has_earlyz_op); + else + assert(ilo_is_zeroed(io, sizeof(*io))); + + return true; +} + +static uint8_t +ps_get_gen6_dispatch_modes(const struct ilo_dev *dev, + const struct ilo_state_ps_info *info) +{ + const struct ilo_state_ps_io_info *io = &info->io; + uint8_t dispatch_modes = info->valid_kernels; + + ILO_DEV_ASSERT(dev, 6, 8); + + if (!dispatch_modes) + return 0; + + /* + * From the Sandy Bridge PRM, volume 2 part 1, page 334: + * + * "Not valid on [DevSNB] if 4x PERPIXEL mode with pixel shader + * computed depth." + * + * "Valid on all products, except when in non-1x PERSAMPLE mode + * (applies to [DevSNB+] only)" + * + * From the Sandy Bridge PRM, volume 4 part 1, page 239: + * + * "[DevSNB]: When Pixel Shader outputs oDepth and PS invocation mode + * is PERPIXEL, Message Type for Render Target Write must be SIMD8. + * + * Errata: [DevSNB+]: When Pixel Shader outputs oMask, this message + * type is not supported: SIMD8 (including SIMD8_DUALSRC_xx)." + * + * It is really hard to follow what combinations are valid on what + * platforms. Judging from the restrictions on RT write messages on Gen6, + * oDepth and oMask related issues should be Gen6-specific. PERSAMPLE + * issue should be universal, and disallows multiple dispatch modes. + */ + if (ilo_dev_gen(dev) == ILO_GEN(6)) { + if (io->pscdepth != GEN7_PSCDEPTH_OFF && !info->per_sample_dispatch) + dispatch_modes &= GEN6_PS_DISPATCH_8; + if (io->write_omask) + dispatch_modes &= ~GEN6_PS_DISPATCH_8; + } + if (info->per_sample_dispatch && !info->sample_count_one) { + /* prefer 32 over 16 over 8 */ + if (dispatch_modes & GEN6_PS_DISPATCH_32) + dispatch_modes &= GEN6_PS_DISPATCH_32; + else if (dispatch_modes & GEN6_PS_DISPATCH_16) + dispatch_modes &= GEN6_PS_DISPATCH_16; + else + dispatch_modes &= GEN6_PS_DISPATCH_8; + } + + /* + * From the Broadwell PRM, volume 2b, page 149: + * + * "When Render Target Fast Clear Enable is ENABLED or Render Target + * Resolve Type = RESOLVE_PARTIAL or RESOLVE_FULL, this bit (8 Pixel + * Dispatch or Dual-8 Pixel Dispatch Enable) must be DISABLED." + */ + if (info->rt_clear_enable || info->rt_resolve_enable) + dispatch_modes &= ~GEN6_PS_DISPATCH_8; + + assert(dispatch_modes); + + return dispatch_modes; +} + +static uint16_t +ps_get_gen6_thread_count(const struct ilo_dev *dev, + const struct ilo_state_ps_info *info) +{ + uint16_t thread_count; + + ILO_DEV_ASSERT(dev, 6, 8); + + /* Maximum Number of Threads of 3DSTATE_PS */ + switch (ilo_dev_gen(dev)) { + case ILO_GEN(8): + /* scaled automatically */ + thread_count = 64 - 1; + break; + case ILO_GEN(7.5): + thread_count = (dev->gt == 3) ? 408 : + (dev->gt == 2) ? 204 : 102; + break; + case ILO_GEN(7): + thread_count = (dev->gt == 2) ? 172 : 48; + break; + case ILO_GEN(6): + default: + /* from the classic driver instead of the PRM */ + thread_count = (dev->gt == 2) ? 80 : 40; + break; + } + + return thread_count - 1; +} + +static bool +ps_params_get_gen6_kill_pixel(const struct ilo_dev *dev, + const struct ilo_state_ps_params_info *params, + const struct ilo_state_ps_dispatch_conds *conds) +{ + ILO_DEV_ASSERT(dev, 6, 8); + + /* + * From the Sandy Bridge PRM, volume 2 part 1, page 275: + * + * "This bit (Pixel Shader Kill Pixel), if ENABLED, indicates that the + * PS kernel or color calculator has the ability to kill (discard) + * pixels or samples, other than due to depth or stencil testing. + * This bit is required to be ENABLED in the following situations: + * + * The API pixel shader program contains "killpix" or "discard" + * instructions, or other code in the pixel shader kernel that can + * cause the final pixel mask to differ from the pixel mask received + * on dispatch. + * + * A sampler with chroma key enabled with kill pixel mode is used by + * the pixel shader. + * + * Any render target has Alpha Test Enable or AlphaToCoverage Enable + * enabled. + * + * The pixel shader kernel generates and outputs oMask. + * + * Note: As ClipDistance clipping is fully supported in hardware and + * therefore not via PS instructions, there should be no need to + * ENABLE this bit due to ClipDistance clipping." + */ + return (conds->ps_may_kill || params->alpha_may_kill); +} + +static bool +ps_params_get_gen6_dispatch_enable(const struct ilo_dev *dev, + const struct ilo_state_ps_params_info *params, + const struct ilo_state_ps_dispatch_conds *conds) +{ + /* + * We want to skip dispatching when EarlyZ suffices. The conditions that + * require dispatching are + * + * - PS writes RTs and RTs are writeable + * - PS changes depth value and depth test/write is enabled + * - PS changes stencil value and stencil test is enabled + * - PS writes UAVs + * - PS or CC kills pixels + * - EDSC is PSEXEC, and depth test/write or stencil test is enabled + */ + bool dispatch_required = + ((conds->has_rt_write && params->has_writeable_rt) || + conds->write_odepth || + conds->write_ostencil || + conds->has_uav_write || + ps_params_get_gen6_kill_pixel(dev, params, conds) || + params->earlyz_control_psexec); + + ILO_DEV_ASSERT(dev, 6, 8); + + /* + * From the Ivy Bridge PRM, volume 2 part 1, page 280: + * + * "If EDSC_PSEXEC mode is selected, Thread Dispatch Enable must be + * set." + */ + if (ilo_dev_gen(dev) < ILO_GEN(8) && params->earlyz_control_psexec) + dispatch_required = true; + + /* assert it is valid to dispatch */ + if (dispatch_required) + assert(conds->ps_valid); + + return dispatch_required; +} + +static bool +ps_get_gen6_ff_kernels(const struct ilo_dev *dev, + const struct ilo_state_ps_info *info, + struct pixel_ff *ff) +{ + const struct ilo_state_shader_kernel_info *kernel_8 = &info->kernel_8; + const struct ilo_state_shader_kernel_info *kernel_16 = &info->kernel_16; + const struct ilo_state_shader_kernel_info *kernel_32 = &info->kernel_32; + uint32_t scratch_size; + + ILO_DEV_ASSERT(dev, 6, 8); + + ff->dispatch_modes = ps_get_gen6_dispatch_modes(dev, info); + + /* initialize kernel offsets and GRF starts */ + if (util_is_power_of_two(ff->dispatch_modes)) { + if (ff->dispatch_modes & GEN6_PS_DISPATCH_8) { + ff->kernel_offsets[0] = kernel_8->offset; + ff->grf_starts[0] = kernel_8->grf_start; + } else if (ff->dispatch_modes & GEN6_PS_DISPATCH_16) { + ff->kernel_offsets[0] = kernel_16->offset; + ff->grf_starts[0] = kernel_16->grf_start; + } else if (ff->dispatch_modes & GEN6_PS_DISPATCH_32) { + ff->kernel_offsets[0] = kernel_32->offset; + ff->grf_starts[0] = kernel_32->grf_start; + } + } else { + ff->kernel_offsets[0] = kernel_8->offset; + ff->kernel_offsets[1] = kernel_32->offset; + ff->kernel_offsets[2] = kernel_16->offset; + + ff->grf_starts[0] = kernel_8->grf_start; + ff->grf_starts[1] = kernel_32->grf_start; + ff->grf_starts[2] = kernel_16->grf_start; + } + + /* we do not want to save it */ + assert(ff->kernel_offsets[0] == 0); + + ff->pcb_enable = (((ff->dispatch_modes & GEN6_PS_DISPATCH_8) && + kernel_8->pcb_attr_count) || + ((ff->dispatch_modes & GEN6_PS_DISPATCH_16) && + kernel_16->pcb_attr_count) || + ((ff->dispatch_modes & GEN6_PS_DISPATCH_32) && + kernel_32->pcb_attr_count)); + + scratch_size = 0; + if ((ff->dispatch_modes & GEN6_PS_DISPATCH_8) && + scratch_size < kernel_8->scratch_size) + scratch_size = kernel_8->scratch_size; + if ((ff->dispatch_modes & GEN6_PS_DISPATCH_16) && + scratch_size < kernel_16->scratch_size) + scratch_size = kernel_16->scratch_size; + if ((ff->dispatch_modes & GEN6_PS_DISPATCH_32) && + scratch_size < kernel_32->scratch_size) + scratch_size = kernel_32->scratch_size; + + /* next power of two, starting from 1KB */ + ff->scratch_space = (scratch_size > 1024) ? + (util_last_bit(scratch_size - 1) - 10): 0; + + /* GPU hangs on Haswell if none of the dispatch mode bits is set */ + if (ilo_dev_gen(dev) == ILO_GEN(7.5) && !ff->dispatch_modes) + ff->dispatch_modes |= GEN6_PS_DISPATCH_8; + + return true; +} + +static bool +ps_get_gen6_ff(const struct ilo_dev *dev, + const struct ilo_state_ps_info *info, + struct pixel_ff *ff) +{ + const struct ilo_state_shader_resource_info *resource = &info->resource; + const struct ilo_state_ps_io_info *io = &info->io; + const struct ilo_state_ps_params_info *params = &info->params; + + ILO_DEV_ASSERT(dev, 6, 8); + + memset(ff, 0, sizeof(*ff)); + + if (!ps_validate_gen6(dev, info) || !ps_get_gen6_ff_kernels(dev, info, ff)) + return false; + + ff->sampler_count = (resource->sampler_count <= 12) ? + (resource->sampler_count + 3) / 4 : 4; + ff->surface_count = resource->surface_count; + ff->has_uav = resource->has_uav; + + ff->thread_count = ps_get_gen6_thread_count(dev, info); + + ff->conds.ps_valid = (info->valid_kernels != 0x0); + ff->conds.has_rt_write = io->has_rt_write; + ff->conds.write_odepth = (io->pscdepth != GEN7_PSCDEPTH_OFF); + ff->conds.write_ostencil = false; + ff->conds.has_uav_write = resource->has_uav; + ff->conds.ps_may_kill = (io->write_pixel_mask || io->write_omask); + + ff->kill_pixel = ps_params_get_gen6_kill_pixel(dev, params, &ff->conds); + ff->dispatch_enable = + ps_params_get_gen6_dispatch_enable(dev, params, &ff->conds); + ff->dual_source_blending = params->dual_source_blending; + ff->sample_mask = params->sample_mask; + + return true; +} + +static bool +ps_set_gen6_3dstate_wm(struct ilo_state_ps *ps, + const struct ilo_dev *dev, + const struct ilo_state_ps_info *info, + const struct pixel_ff *ff) +{ + const struct ilo_state_ps_io_info *io = &info->io; + uint32_t dw2, dw3, dw4, dw5, dw6; + + ILO_DEV_ASSERT(dev, 6, 6); + + dw2 = ff->sampler_count << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT | + ff->surface_count << GEN6_THREADDISP_BINDING_TABLE_SIZE__SHIFT; + + if (false) + dw2 |= GEN6_THREADDISP_FP_MODE_ALT; + + dw3 = ff->scratch_space << GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT; + + dw4 = ff->grf_starts[0] << GEN6_WM_DW4_URB_GRF_START0__SHIFT | + ff->grf_starts[1] << GEN6_WM_DW4_URB_GRF_START1__SHIFT | + ff->grf_starts[2] << GEN6_WM_DW4_URB_GRF_START2__SHIFT; + + dw5 = ff->thread_count << GEN6_WM_DW5_MAX_THREADS__SHIFT | + ff->dispatch_modes << GEN6_WM_DW5_PS_DISPATCH_MODE__SHIFT; + + if (ff->kill_pixel) + dw5 |= GEN6_WM_DW5_PS_KILL_PIXEL; + + if (io->pscdepth != GEN7_PSCDEPTH_OFF) + dw5 |= GEN6_WM_DW5_PS_COMPUTE_DEPTH; + if (io->use_z) + dw5 |= GEN6_WM_DW5_PS_USE_DEPTH; + + if (ff->dispatch_enable) + dw5 |= GEN6_WM_DW5_PS_DISPATCH_ENABLE; + + if (io->write_omask) + dw5 |= GEN6_WM_DW5_PS_COMPUTE_OMASK; + if (io->use_w) + dw5 |= GEN6_WM_DW5_PS_USE_W; + + if (ff->dual_source_blending) + dw5 |= GEN6_WM_DW5_PS_DUAL_SOURCE_BLEND; + + dw6 = io->attr_count << GEN6_WM_DW6_SF_ATTR_COUNT__SHIFT | + io->posoffset << GEN6_WM_DW6_PS_POSOFFSET__SHIFT; + + dw6 |= (info->per_sample_dispatch) ? + GEN6_WM_DW6_MSDISPMODE_PERSAMPLE : GEN6_WM_DW6_MSDISPMODE_PERPIXEL; + + STATIC_ASSERT(ARRAY_SIZE(ps->ps) >= 7); + ps->ps[0] = dw2; + ps->ps[1] = dw3; + ps->ps[2] = dw4; + ps->ps[3] = dw5; + ps->ps[4] = dw6; + ps->ps[5] = ff->kernel_offsets[1]; + ps->ps[6] = ff->kernel_offsets[2]; + + return true; +} + +static bool +ps_set_gen7_3dstate_wm(struct ilo_state_ps *ps, + const struct ilo_dev *dev, + const struct ilo_state_ps_info *info, + const struct pixel_ff *ff) +{ + const struct ilo_state_ps_io_info *io = &info->io; + uint32_t dw1, dw2; + + ILO_DEV_ASSERT(dev, 7, 7.5); + + dw1 = io->pscdepth << GEN7_WM_DW1_PSCDEPTH__SHIFT; + + if (ff->dispatch_enable) + dw1 |= GEN7_WM_DW1_PS_DISPATCH_ENABLE; + if (ff->kill_pixel) + dw1 |= GEN7_WM_DW1_PS_KILL_PIXEL; + + if (io->use_z) + dw1 |= GEN7_WM_DW1_PS_USE_DEPTH; + if (io->use_w) + dw1 |= GEN7_WM_DW1_PS_USE_W; + if (io->use_coverage_mask) + dw1 |= GEN7_WM_DW1_PS_USE_COVERAGE_MASK; + + dw2 = (info->per_sample_dispatch) ? + GEN7_WM_DW2_MSDISPMODE_PERSAMPLE : GEN7_WM_DW2_MSDISPMODE_PERPIXEL; + + STATIC_ASSERT(ARRAY_SIZE(ps->ps) >= 2); + ps->ps[0] = dw1; + ps->ps[1] = dw2; + + return true; +} + +static bool +ps_set_gen7_3DSTATE_PS(struct ilo_state_ps *ps, + const struct ilo_dev *dev, + const struct ilo_state_ps_info *info, + const struct pixel_ff *ff) +{ + const struct ilo_state_ps_io_info *io = &info->io; + uint32_t dw2, dw3, dw4, dw5; + + ILO_DEV_ASSERT(dev, 7, 7.5); + + dw2 = ff->sampler_count << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT | + ff->surface_count << GEN6_THREADDISP_BINDING_TABLE_SIZE__SHIFT; + + if (false) + dw2 |= GEN6_THREADDISP_FP_MODE_ALT; + + dw3 = ff->scratch_space << GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT; + + dw4 = io->posoffset << GEN7_PS_DW4_POSOFFSET__SHIFT | + ff->dispatch_modes << GEN7_PS_DW4_DISPATCH_MODE__SHIFT; + + if (ilo_dev_gen(dev) == ILO_GEN(7.5)) { + dw4 |= ff->thread_count << GEN75_PS_DW4_MAX_THREADS__SHIFT | + (ff->sample_mask & 0xff) << GEN75_PS_DW4_SAMPLE_MASK__SHIFT; + } else { + dw4 |= ff->thread_count << GEN7_PS_DW4_MAX_THREADS__SHIFT; + } + + if (ff->pcb_enable) + dw4 |= GEN7_PS_DW4_PUSH_CONSTANT_ENABLE; + if (io->attr_count) + dw4 |= GEN7_PS_DW4_ATTR_ENABLE; + if (io->write_omask) + dw4 |= GEN7_PS_DW4_COMPUTE_OMASK; + if (info->rt_clear_enable) + dw4 |= GEN7_PS_DW4_RT_FAST_CLEAR; + if (ff->dual_source_blending) + dw4 |= GEN7_PS_DW4_DUAL_SOURCE_BLEND; + if (info->rt_resolve_enable) + dw4 |= GEN7_PS_DW4_RT_RESOLVE; + if (ilo_dev_gen(dev) >= ILO_GEN(7.5) && ff->has_uav) + dw4 |= GEN75_PS_DW4_ACCESS_UAV; + + dw5 = ff->grf_starts[0] << GEN7_PS_DW5_URB_GRF_START0__SHIFT | + ff->grf_starts[1] << GEN7_PS_DW5_URB_GRF_START1__SHIFT | + ff->grf_starts[2] << GEN7_PS_DW5_URB_GRF_START2__SHIFT; + + STATIC_ASSERT(ARRAY_SIZE(ps->ps) >= 8); + ps->ps[2] = dw2; + ps->ps[3] = dw3; + ps->ps[4] = dw4; + ps->ps[5] = dw5; + ps->ps[6] = ff->kernel_offsets[1]; + ps->ps[7] = ff->kernel_offsets[2]; + + return true; +} + +static bool +ps_set_gen8_3DSTATE_PS(struct ilo_state_ps *ps, + const struct ilo_dev *dev, + const struct ilo_state_ps_info *info, + const struct pixel_ff *ff) +{ + const struct ilo_state_ps_io_info *io = &info->io; + uint32_t dw3, dw4, dw6, dw7; + + ILO_DEV_ASSERT(dev, 8, 8); + + dw3 = ff->sampler_count << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT | + ff->surface_count << GEN6_THREADDISP_BINDING_TABLE_SIZE__SHIFT; + + if (false) + dw3 |= GEN6_THREADDISP_FP_MODE_ALT; + + dw4 = ff->scratch_space << GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT; + + dw6 = ff->thread_count << GEN8_PS_DW6_MAX_THREADS__SHIFT | + io->posoffset << GEN8_PS_DW6_POSOFFSET__SHIFT | + ff->dispatch_modes << GEN8_PS_DW6_DISPATCH_MODE__SHIFT; + + if (ff->pcb_enable) + dw6 |= GEN8_PS_DW6_PUSH_CONSTANT_ENABLE; + + if (info->rt_clear_enable) + dw6 |= GEN8_PS_DW6_RT_FAST_CLEAR; + if (info->rt_resolve_enable) + dw6 |= GEN8_PS_DW6_RT_RESOLVE; + + dw7 = ff->grf_starts[0] << GEN8_PS_DW7_URB_GRF_START0__SHIFT | + ff->grf_starts[1] << GEN8_PS_DW7_URB_GRF_START1__SHIFT | + ff->grf_starts[2] << GEN8_PS_DW7_URB_GRF_START2__SHIFT; + + STATIC_ASSERT(ARRAY_SIZE(ps->ps) >= 6); + ps->ps[0] = dw3; + ps->ps[1] = dw4; + ps->ps[2] = dw6; + ps->ps[3] = dw7; + ps->ps[4] = ff->kernel_offsets[1]; + ps->ps[5] = ff->kernel_offsets[2]; + + return true; +} + +static bool +ps_set_gen8_3DSTATE_PS_EXTRA(struct ilo_state_ps *ps, + const struct ilo_dev *dev, + const struct ilo_state_ps_info *info, + const struct pixel_ff *ff) +{ + const struct ilo_state_ps_io_info *io = &info->io; + uint32_t dw1; + + ILO_DEV_ASSERT(dev, 8, 8); + + dw1 = io->pscdepth << GEN8_PSX_DW1_PSCDEPTH__SHIFT; + + if (info->valid_kernels) + dw1 |= GEN8_PSX_DW1_VALID; + if (!io->has_rt_write) + dw1 |= GEN8_PSX_DW1_UAV_ONLY; + if (io->write_omask) + dw1 |= GEN8_PSX_DW1_COMPUTE_OMASK; + if (io->write_pixel_mask) + dw1 |= GEN8_PSX_DW1_KILL_PIXEL; + + if (io->use_z) + dw1 |= GEN8_PSX_DW1_USE_DEPTH; + if (io->use_w) + dw1 |= GEN8_PSX_DW1_USE_W; + if (io->attr_count) + dw1 |= GEN8_PSX_DW1_ATTR_ENABLE; + + if (info->per_sample_dispatch) + dw1 |= GEN8_PSX_DW1_PER_SAMPLE; + if (ff->has_uav) + dw1 |= GEN8_PSX_DW1_ACCESS_UAV; + if (io->use_coverage_mask) + dw1 |= GEN8_PSX_DW1_USE_COVERAGE_MASK; + + /* + * From the Broadwell PRM, volume 2b, page 151: + * + * "When this bit (Pixel Shader Valid) clear the rest of this command + * should also be clear. + */ + if (!info->valid_kernels) + dw1 = 0; + + STATIC_ASSERT(ARRAY_SIZE(ps->ps) >= 5); + ps->ps[4] = dw1; + + return true; +} + +bool +ilo_state_ps_init(struct ilo_state_ps *ps, + const struct ilo_dev *dev, + const struct ilo_state_ps_info *info) +{ + struct pixel_ff ff; + bool ret = true; + + assert(ilo_is_zeroed(ps, sizeof(*ps))); + + ret &= ps_get_gen6_ff(dev, info, &ff); + + if (ilo_dev_gen(dev) >= ILO_GEN(8)) { + ret &= ps_set_gen8_3DSTATE_PS(ps, dev, info, &ff); + ret &= ps_set_gen8_3DSTATE_PS_EXTRA(ps, dev, info, &ff); + } else if (ilo_dev_gen(dev) >= ILO_GEN(7)) { + ret &= ps_set_gen7_3dstate_wm(ps, dev, info, &ff); + ret &= ps_set_gen7_3DSTATE_PS(ps, dev, info, &ff); + } else { + ret &= ps_set_gen6_3dstate_wm(ps, dev, info, &ff); + } + + /* save conditions */ + ps->conds = ff.conds; + + assert(ret); + + return ret; +} + +bool +ilo_state_ps_init_disabled(struct ilo_state_ps *ps, + const struct ilo_dev *dev) +{ + struct ilo_state_ps_info info; + + memset(&info, 0, sizeof(info)); + + return ilo_state_ps_init(ps, dev, &info); +} + +bool +ilo_state_ps_set_params(struct ilo_state_ps *ps, + const struct ilo_dev *dev, + const struct ilo_state_ps_params_info *params) +{ + ILO_DEV_ASSERT(dev, 6, 8); + + /* modify sample mask */ + if (ilo_dev_gen(dev) == ILO_GEN(7.5)) { + ps->ps[4] = (ps->ps[4] & ~GEN75_PS_DW4_SAMPLE_MASK__MASK) | + (params->sample_mask & 0xff) << GEN75_PS_DW4_SAMPLE_MASK__SHIFT; + } + + /* modify dispatch enable, pixel kill, and dual source blending */ + if (ilo_dev_gen(dev) < ILO_GEN(8)) { + if (ilo_dev_gen(dev) >= ILO_GEN(7)) { + if (ps_params_get_gen6_dispatch_enable(dev, params, &ps->conds)) + ps->ps[0] |= GEN7_WM_DW1_PS_DISPATCH_ENABLE; + else + ps->ps[0] &= ~GEN7_WM_DW1_PS_DISPATCH_ENABLE; + + if (ps_params_get_gen6_kill_pixel(dev, params, &ps->conds)) + ps->ps[0] |= GEN7_WM_DW1_PS_KILL_PIXEL; + else + ps->ps[0] &= ~GEN7_WM_DW1_PS_KILL_PIXEL; + + if (params->dual_source_blending) + ps->ps[4] |= GEN7_PS_DW4_DUAL_SOURCE_BLEND; + else + ps->ps[4] &= ~GEN7_PS_DW4_DUAL_SOURCE_BLEND; + } else { + if (ps_params_get_gen6_dispatch_enable(dev, params, &ps->conds)) + ps->ps[3] |= GEN6_WM_DW5_PS_DISPATCH_ENABLE; + else + ps->ps[3] &= ~GEN6_WM_DW5_PS_DISPATCH_ENABLE; + + if (ps_params_get_gen6_kill_pixel(dev, params, &ps->conds)) + ps->ps[3] |= GEN6_WM_DW5_PS_KILL_PIXEL; + else + ps->ps[3] &= ~GEN6_WM_DW5_PS_KILL_PIXEL; + + if (params->dual_source_blending) + ps->ps[3] |= GEN6_WM_DW5_PS_DUAL_SOURCE_BLEND; + else + ps->ps[3] &= ~GEN6_WM_DW5_PS_DUAL_SOURCE_BLEND; + } + } + + return true; +} From c10c1ac0cfb0ae42742f369d9f3fa2f4fba8639a Mon Sep 17 00:00:00 2001 From: Chia-I Wu Date: Mon, 18 May 2015 23:32:10 +0800 Subject: [PATCH 620/834] ilo: replace ilo_zs_surface with ilo_state_zs --- .../drivers/ilo/core/ilo_builder_3d_bottom.h | 96 +++-- src/gallium/drivers/ilo/core/ilo_image.c | 4 + src/gallium/drivers/ilo/core/ilo_state_3d.h | 20 +- .../drivers/ilo/core/ilo_state_3d_bottom.c | 391 ------------------ src/gallium/drivers/ilo/ilo_render_gen6.c | 10 +- src/gallium/drivers/ilo/ilo_render_gen7.c | 10 +- src/gallium/drivers/ilo/ilo_render_gen8.c | 10 +- src/gallium/drivers/ilo/ilo_state.c | 43 +- 8 files changed, 105 insertions(+), 479 deletions(-) diff --git a/src/gallium/drivers/ilo/core/ilo_builder_3d_bottom.h b/src/gallium/drivers/ilo/core/ilo_builder_3d_bottom.h index 093cca12840..c49f4e470e7 100644 --- a/src/gallium/drivers/ilo/core/ilo_builder_3d_bottom.h +++ b/src/gallium/drivers/ilo/core/ilo_builder_3d_bottom.h @@ -1159,8 +1159,7 @@ gen6_3DSTATE_AA_LINE_PARAMETERS(struct ilo_builder *builder) static inline void gen6_3DSTATE_DEPTH_BUFFER(struct ilo_builder *builder, - const struct ilo_zs_surface *zs, - bool aligned_8x4) + const struct ilo_state_zs *zs) { const uint32_t cmd = (ilo_dev_gen(builder->dev) >= ILO_GEN(7)) ? GEN7_RENDER_CMD(3D, 3DSTATE_DEPTH_BUFFER) : @@ -1174,44 +1173,49 @@ gen6_3DSTATE_DEPTH_BUFFER(struct ilo_builder *builder, pos = ilo_builder_batch_pointer(builder, cmd_len, &dw); dw[0] = cmd | (cmd_len - 2); - dw[1] = zs->payload[0]; - dw[2] = 0; - /* see ilo_gpe_init_zs_surface() */ + /* + * see zs_set_gen6_3DSTATE_DEPTH_BUFFER() and + * zs_set_gen7_3DSTATE_DEPTH_BUFFER() + */ if (ilo_dev_gen(builder->dev) >= ILO_GEN(8)) { + dw[1] = zs->depth[0]; + dw[2] = 0; dw[3] = 0; - dw[4] = (aligned_8x4) ? zs->dw_aligned_8x4 : zs->payload[2]; - dw[5] = zs->payload[3]; - dw[6] = zs->payload[4]; - dw[7] = zs->payload[5]; + dw[4] = zs->depth[2]; + dw[5] = zs->depth[3]; + dw[6] = 0; + dw[7] = zs->depth[4]; dw[5] |= builder->mocs << GEN8_DEPTH_DW5_MOCS__SHIFT; - if (zs->bo) { - ilo_builder_batch_reloc64(builder, pos + 2, zs->bo, - zs->payload[1], INTEL_RELOC_WRITE); + if (zs->depth_bo) { + ilo_builder_batch_reloc64(builder, pos + 2, zs->depth_bo, + zs->depth[1], (zs->z_readonly) ? 0 : INTEL_RELOC_WRITE); } } else { - dw[3] = (aligned_8x4) ? zs->dw_aligned_8x4 : zs->payload[2]; - dw[4] = zs->payload[3]; - dw[5] = zs->payload[4]; - dw[6] = zs->payload[5]; + dw[1] = zs->depth[0]; + dw[2] = 0; + dw[3] = zs->depth[2]; + dw[4] = zs->depth[3]; + dw[5] = 0; + dw[6] = zs->depth[4]; if (ilo_dev_gen(builder->dev) >= ILO_GEN(7)) dw[4] |= builder->mocs << GEN7_DEPTH_DW4_MOCS__SHIFT; else dw[6] |= builder->mocs << GEN6_DEPTH_DW6_MOCS__SHIFT; - if (zs->bo) { - ilo_builder_batch_reloc(builder, pos + 2, zs->bo, - zs->payload[1], INTEL_RELOC_WRITE); + if (zs->depth_bo) { + ilo_builder_batch_reloc(builder, pos + 2, zs->depth_bo, + zs->depth[1], (zs->z_readonly) ? 0 : INTEL_RELOC_WRITE); } } } static inline void gen6_3DSTATE_STENCIL_BUFFER(struct ilo_builder *builder, - const struct ilo_zs_surface *zs) + const struct ilo_state_zs *zs) { const uint32_t cmd = (ilo_dev_gen(builder->dev) >= ILO_GEN(7)) ? GEN7_RENDER_CMD(3D, 3DSTATE_STENCIL_BUFFER) : @@ -1225,33 +1229,36 @@ gen6_3DSTATE_STENCIL_BUFFER(struct ilo_builder *builder, pos = ilo_builder_batch_pointer(builder, cmd_len, &dw); dw[0] = cmd | (cmd_len - 2); - /* see ilo_gpe_init_zs_surface() */ - dw[1] = zs->payload[6]; - dw[2] = 0; + /* see zs_set_gen6_3DSTATE_STENCIL_BUFFER() */ if (ilo_dev_gen(builder->dev) >= ILO_GEN(8)) { + dw[1] = zs->stencil[0]; + dw[2] = 0; + dw[3] = 0; + dw[4] = zs->stencil[2]; + dw[1] |= builder->mocs << GEN8_STENCIL_DW1_MOCS__SHIFT; - dw[3] = 0; - dw[4] = zs->payload[8]; - - if (zs->separate_s8_bo) { - ilo_builder_batch_reloc64(builder, pos + 2, - zs->separate_s8_bo, zs->payload[7], INTEL_RELOC_WRITE); + if (zs->stencil_bo) { + ilo_builder_batch_reloc64(builder, pos + 2, zs->stencil_bo, + zs->stencil[1], (zs->s_readonly) ? 0 : INTEL_RELOC_WRITE); } } else { + dw[1] = zs->stencil[0]; + dw[2] = 0; + dw[1] |= builder->mocs << GEN6_STENCIL_DW1_MOCS__SHIFT; - if (zs->separate_s8_bo) { - ilo_builder_batch_reloc(builder, pos + 2, - zs->separate_s8_bo, zs->payload[7], INTEL_RELOC_WRITE); + if (zs->stencil_bo) { + ilo_builder_batch_reloc(builder, pos + 2, zs->stencil_bo, + zs->stencil[1], (zs->s_readonly) ? 0 : INTEL_RELOC_WRITE); } } } static inline void gen6_3DSTATE_HIER_DEPTH_BUFFER(struct ilo_builder *builder, - const struct ilo_zs_surface *zs) + const struct ilo_state_zs *zs) { const uint32_t cmd = (ilo_dev_gen(builder->dev) >= ILO_GEN(7)) ? GEN7_RENDER_CMD(3D, 3DSTATE_HIER_DEPTH_BUFFER) : @@ -1265,26 +1272,29 @@ gen6_3DSTATE_HIER_DEPTH_BUFFER(struct ilo_builder *builder, pos = ilo_builder_batch_pointer(builder, cmd_len, &dw); dw[0] = cmd | (cmd_len - 2); - /* see ilo_gpe_init_zs_surface() */ - dw[1] = zs->payload[9]; - dw[2] = 0; + /* see zs_set_gen6_3DSTATE_HIER_DEPTH_BUFFER() */ if (ilo_dev_gen(builder->dev) >= ILO_GEN(8)) { + dw[1] = zs->hiz[0]; + dw[2] = 0; + dw[3] = 0; + dw[4] = zs->hiz[2]; + dw[1] |= builder->mocs << GEN8_HIZ_DW1_MOCS__SHIFT; - dw[3] = 0; - dw[4] = zs->payload[11]; - if (zs->hiz_bo) { - ilo_builder_batch_reloc64(builder, pos + 2, - zs->hiz_bo, zs->payload[10], INTEL_RELOC_WRITE); + ilo_builder_batch_reloc64(builder, pos + 2, zs->hiz_bo, + zs->hiz[1], (zs->z_readonly) ? 0 : INTEL_RELOC_WRITE); } } else { + dw[1] = zs->hiz[0]; + dw[2] = 0; + dw[1] |= builder->mocs << GEN6_HIZ_DW1_MOCS__SHIFT; if (zs->hiz_bo) { - ilo_builder_batch_reloc(builder, pos + 2, - zs->hiz_bo, zs->payload[10], INTEL_RELOC_WRITE); + ilo_builder_batch_reloc(builder, pos + 2, zs->hiz_bo, + zs->hiz[1], (zs->z_readonly) ? 0 : INTEL_RELOC_WRITE); } } } diff --git a/src/gallium/drivers/ilo/core/ilo_image.c b/src/gallium/drivers/ilo/core/ilo_image.c index 631093273bf..0d837d8a9d5 100644 --- a/src/gallium/drivers/ilo/core/ilo_image.c +++ b/src/gallium/drivers/ilo/core/ilo_image.c @@ -797,6 +797,10 @@ img_want_hiz(const struct ilo_image *img, if (ilo_debug & ILO_DEBUG_NOHIZ) return false; + /* we want 8x4 aligned levels */ + if (templ->target == PIPE_TEXTURE_1D) + return false; + if (!(templ->bind & PIPE_BIND_DEPTH_STENCIL)) return false; diff --git a/src/gallium/drivers/ilo/core/ilo_state_3d.h b/src/gallium/drivers/ilo/core/ilo_state_3d.h index 819a17d2681..3c7c50de702 100644 --- a/src/gallium/drivers/ilo/core/ilo_state_3d.h +++ b/src/gallium/drivers/ilo/core/ilo_state_3d.h @@ -33,6 +33,7 @@ #include "ilo_core.h" #include "ilo_dev.h" +#include "ilo_state_zs.h" /** * \see brw_context.h @@ -260,14 +261,7 @@ struct ilo_surface_cso { bool is_rt; union { struct ilo_view_surface rt; - struct ilo_zs_surface { - uint32_t payload[12]; - uint32_t dw_aligned_8x4; - - struct intel_bo *bo; - struct intel_bo *hiz_bo; - struct intel_bo *separate_s8_bo; - } zs; + struct ilo_state_zs zs; } u; }; @@ -275,7 +269,7 @@ struct ilo_fb_state { struct pipe_framebuffer_state state; struct ilo_view_surface null_rt; - struct ilo_zs_surface null_zs; + struct ilo_state_zs null_zs; struct ilo_fb_blend_caps { bool can_logicop; @@ -394,14 +388,6 @@ ilo_gpe_init_view_surface_for_image(const struct ilo_dev *dev, bool is_rt, struct ilo_view_surface *surf); -void -ilo_gpe_init_zs_surface(const struct ilo_dev *dev, - const struct ilo_image *img, - const struct ilo_image *s8_img, - enum pipe_format format, unsigned level, - unsigned first_layer, unsigned num_layers, - struct ilo_zs_surface *zs); - void ilo_gpe_init_vs_cso(const struct ilo_dev *dev, const struct ilo_shader_state *vs, diff --git a/src/gallium/drivers/ilo/core/ilo_state_3d_bottom.c b/src/gallium/drivers/ilo/core/ilo_state_3d_bottom.c index 9d472d93fdc..0ca5106ca55 100644 --- a/src/gallium/drivers/ilo/core/ilo_state_3d_bottom.c +++ b/src/gallium/drivers/ilo/core/ilo_state_3d_bottom.c @@ -900,397 +900,6 @@ ilo_gpe_init_fs_cso(const struct ilo_dev *dev, fs_init_cso_gen6(dev, fs, cso); } -struct ilo_zs_surface_info { - int surface_type; - int format; - - struct { - struct intel_bo *bo; - unsigned stride; - unsigned qpitch; - enum gen_surface_tiling tiling; - uint32_t offset; - } zs, stencil, hiz; - - unsigned width, height, depth; - unsigned lod, first_layer, num_layers; -}; - -static void -zs_init_info_null(const struct ilo_dev *dev, - struct ilo_zs_surface_info *info) -{ - ILO_DEV_ASSERT(dev, 6, 8); - - memset(info, 0, sizeof(*info)); - - info->surface_type = GEN6_SURFTYPE_NULL; - info->format = GEN6_ZFORMAT_D32_FLOAT; - info->width = 1; - info->height = 1; - info->depth = 1; - info->num_layers = 1; -} - -static void -zs_init_info(const struct ilo_dev *dev, - const struct ilo_image *img, - const struct ilo_image *s8_img, - enum pipe_format format, unsigned level, - unsigned first_layer, unsigned num_layers, - struct ilo_zs_surface_info *info) -{ - bool separate_stencil; - - ILO_DEV_ASSERT(dev, 6, 8); - - memset(info, 0, sizeof(*info)); - - info->surface_type = ilo_gpe_gen6_translate_texture(img->target); - - if (info->surface_type == GEN6_SURFTYPE_CUBE) { - /* - * From the Sandy Bridge PRM, volume 2 part 1, page 325-326: - * - * "For Other Surfaces (Cube Surfaces): - * This field (Minimum Array Element) is ignored." - * - * "For Other Surfaces (Cube Surfaces): - * This field (Render Target View Extent) is ignored." - * - * As such, we cannot set first_layer and num_layers on cube surfaces. - * To work around that, treat it as a 2D surface. - */ - info->surface_type = GEN6_SURFTYPE_2D; - } - - if (ilo_dev_gen(dev) >= ILO_GEN(7)) { - separate_stencil = true; - } else { - /* - * From the Sandy Bridge PRM, volume 2 part 1, page 317: - * - * "This field (Separate Stencil Buffer Enable) must be set to the - * same value (enabled or disabled) as Hierarchical Depth Buffer - * Enable." - */ - separate_stencil = ilo_image_can_enable_aux(img, level); - } - - /* - * From the Sandy Bridge PRM, volume 2 part 1, page 317: - * - * "If this field (Hierarchical Depth Buffer Enable) is enabled, the - * Surface Format of the depth buffer cannot be - * D32_FLOAT_S8X24_UINT or D24_UNORM_S8_UINT. Use of stencil - * requires the separate stencil buffer." - * - * From the Ironlake PRM, volume 2 part 1, page 330: - * - * "If this field (Separate Stencil Buffer Enable) is disabled, the - * Surface Format of the depth buffer cannot be D24_UNORM_X8_UINT." - * - * There is no similar restriction for GEN6. But when D24_UNORM_X8_UINT - * is indeed used, the depth values output by the fragment shaders will - * be different when read back. - * - * As for GEN7+, separate_stencil is always true. - */ - switch (format) { - case PIPE_FORMAT_Z16_UNORM: - info->format = GEN6_ZFORMAT_D16_UNORM; - break; - case PIPE_FORMAT_Z32_FLOAT: - info->format = GEN6_ZFORMAT_D32_FLOAT; - break; - case PIPE_FORMAT_Z24X8_UNORM: - case PIPE_FORMAT_Z24_UNORM_S8_UINT: - info->format = (separate_stencil) ? - GEN6_ZFORMAT_D24_UNORM_X8_UINT : - GEN6_ZFORMAT_D24_UNORM_S8_UINT; - break; - case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: - info->format = (separate_stencil) ? - GEN6_ZFORMAT_D32_FLOAT : - GEN6_ZFORMAT_D32_FLOAT_S8X24_UINT; - break; - case PIPE_FORMAT_S8_UINT: - if (separate_stencil) { - info->format = GEN6_ZFORMAT_D32_FLOAT; - break; - } - /* fall through */ - default: - assert(!"unsupported depth/stencil format"); - zs_init_info_null(dev, info); - return; - break; - } - - if (format != PIPE_FORMAT_S8_UINT) { - info->zs.bo = img->bo; - info->zs.stride = img->bo_stride; - - assert(img->walk_layer_height % 4 == 0); - info->zs.qpitch = img->walk_layer_height / 4; - - info->zs.tiling = img->tiling; - info->zs.offset = 0; - } - - if (s8_img || format == PIPE_FORMAT_S8_UINT) { - info->stencil.bo = s8_img->bo; - - /* - * From the Sandy Bridge PRM, volume 2 part 1, page 329: - * - * "The pitch must be set to 2x the value computed based on width, - * as the stencil buffer is stored with two rows interleaved." - * - * For GEN7, we still dobule the stride because we did not double the - * slice widths when initializing the layout. - */ - info->stencil.stride = s8_img->bo_stride * 2; - - assert(s8_img->walk_layer_height % 4 == 0); - info->stencil.qpitch = s8_img->walk_layer_height / 4; - - info->stencil.tiling = s8_img->tiling; - - if (ilo_dev_gen(dev) == ILO_GEN(6)) { - unsigned x, y; - - assert(s8_img->walk == ILO_IMAGE_WALK_LOD); - - /* offset to the level */ - ilo_image_get_slice_pos(s8_img, level, 0, &x, &y); - ilo_image_pos_to_mem(s8_img, x, y, &x, &y); - info->stencil.offset = ilo_image_mem_to_raw(s8_img, x, y); - } - } - - if (ilo_image_can_enable_aux(img, level)) { - info->hiz.bo = img->aux.bo; - info->hiz.stride = img->aux.bo_stride; - - assert(img->aux.walk_layer_height % 4 == 0); - info->hiz.qpitch = img->aux.walk_layer_height / 4; - - info->hiz.tiling = GEN6_TILING_Y; - - /* offset to the level */ - if (ilo_dev_gen(dev) == ILO_GEN(6)) - info->hiz.offset = img->aux.walk_lod_offsets[level]; - } - - info->width = img->width0; - info->height = img->height0; - info->depth = (img->target == PIPE_TEXTURE_3D) ? img->depth0 : num_layers; - - info->lod = level; - info->first_layer = first_layer; - info->num_layers = num_layers; -} - -void -ilo_gpe_init_zs_surface(const struct ilo_dev *dev, - const struct ilo_image *img, - const struct ilo_image *s8_img, - enum pipe_format format, unsigned level, - unsigned first_layer, unsigned num_layers, - struct ilo_zs_surface *zs) -{ - const int max_2d_size = (ilo_dev_gen(dev) >= ILO_GEN(7)) ? 16384 : 8192; - const int max_array_size = (ilo_dev_gen(dev) >= ILO_GEN(7)) ? 2048 : 512; - struct ilo_zs_surface_info info; - uint32_t dw1, dw2, dw3, dw4, dw5, dw6; - int align_w = 8, align_h = 4; - - ILO_DEV_ASSERT(dev, 6, 8); - - if (img) { - zs_init_info(dev, img, s8_img, format, - level, first_layer, num_layers, &info); - - switch (img->sample_count) { - case 2: - align_w /= 2; - break; - case 4: - align_w /= 2; - align_h /= 2; - break; - case 8: - align_w /= 4; - align_h /= 2; - break; - case 16: - align_w /= 4; - align_h /= 4; - break; - default: - break; - } - } else { - zs_init_info_null(dev, &info); - } - - switch (info.surface_type) { - case GEN6_SURFTYPE_NULL: - break; - case GEN6_SURFTYPE_1D: - assert(info.width <= max_2d_size && info.height == 1 && - info.depth <= max_array_size); - assert(info.first_layer < max_array_size - 1 && - info.num_layers <= max_array_size); - break; - case GEN6_SURFTYPE_2D: - assert(info.width <= max_2d_size && info.height <= max_2d_size && - info.depth <= max_array_size); - assert(info.first_layer < max_array_size - 1 && - info.num_layers <= max_array_size); - break; - case GEN6_SURFTYPE_3D: - assert(info.width <= 2048 && info.height <= 2048 && info.depth <= 2048); - assert(info.first_layer < 2048 && info.num_layers <= max_array_size); - break; - case GEN6_SURFTYPE_CUBE: - assert(info.width <= max_2d_size && info.height <= max_2d_size && - info.depth == 1); - assert(info.first_layer == 0 && info.num_layers == 1); - assert(info.width == info.height); - break; - default: - assert(!"unexpected depth surface type"); - break; - } - - dw1 = info.surface_type << GEN6_DEPTH_DW1_TYPE__SHIFT | - info.format << GEN6_DEPTH_DW1_FORMAT__SHIFT; - - if (info.zs.bo) { - /* required for GEN6+ */ - assert(info.zs.tiling == GEN6_TILING_Y); - assert(info.zs.stride > 0 && info.zs.stride < 128 * 1024 && - info.zs.stride % 128 == 0); - assert(info.width <= info.zs.stride); - - dw1 |= (info.zs.stride - 1); - dw2 = info.zs.offset; - } else { - dw2 = 0; - } - - if (ilo_dev_gen(dev) >= ILO_GEN(7)) { - if (info.zs.bo) - dw1 |= GEN7_DEPTH_DW1_DEPTH_WRITE_ENABLE; - - if (info.stencil.bo) - dw1 |= GEN7_DEPTH_DW1_STENCIL_WRITE_ENABLE; - - if (info.hiz.bo) - dw1 |= GEN7_DEPTH_DW1_HIZ_ENABLE; - - dw3 = (info.height - 1) << GEN7_DEPTH_DW3_HEIGHT__SHIFT | - (info.width - 1) << GEN7_DEPTH_DW3_WIDTH__SHIFT | - info.lod << GEN7_DEPTH_DW3_LOD__SHIFT; - - zs->dw_aligned_8x4 = - (align(info.height, align_h) - 1) << GEN7_DEPTH_DW3_HEIGHT__SHIFT | - (align(info.width, align_w) - 1) << GEN7_DEPTH_DW3_WIDTH__SHIFT | - info.lod << GEN7_DEPTH_DW3_LOD__SHIFT; - - dw4 = (info.depth - 1) << GEN7_DEPTH_DW4_DEPTH__SHIFT | - info.first_layer << GEN7_DEPTH_DW4_MIN_ARRAY_ELEMENT__SHIFT; - - dw5 = 0; - - dw6 = (info.num_layers - 1) << GEN7_DEPTH_DW6_RT_VIEW_EXTENT__SHIFT; - - if (ilo_dev_gen(dev) >= ILO_GEN(8)) - dw6 |= info.zs.qpitch; - } else { - /* always Y-tiled */ - dw1 |= GEN6_TILING_Y << GEN6_DEPTH_DW1_TILING__SHIFT; - - if (info.hiz.bo) { - dw1 |= GEN6_DEPTH_DW1_HIZ_ENABLE | - GEN6_DEPTH_DW1_SEPARATE_STENCIL; - } - - dw3 = (info.height - 1) << GEN6_DEPTH_DW3_HEIGHT__SHIFT | - (info.width - 1) << GEN6_DEPTH_DW3_WIDTH__SHIFT | - info.lod << GEN6_DEPTH_DW3_LOD__SHIFT | - GEN6_DEPTH_DW3_MIPLAYOUT_BELOW; - - zs->dw_aligned_8x4 = - (align(info.height, align_h) - 1) << GEN6_DEPTH_DW3_HEIGHT__SHIFT | - (align(info.width, align_w) - 1) << GEN6_DEPTH_DW3_WIDTH__SHIFT | - info.lod << GEN6_DEPTH_DW3_LOD__SHIFT | - GEN6_DEPTH_DW3_MIPLAYOUT_BELOW; - - dw4 = (info.depth - 1) << GEN6_DEPTH_DW4_DEPTH__SHIFT | - info.first_layer << GEN6_DEPTH_DW4_MIN_ARRAY_ELEMENT__SHIFT | - (info.num_layers - 1) << GEN6_DEPTH_DW4_RT_VIEW_EXTENT__SHIFT; - - dw5 = 0; - - dw6 = 0; - } - - STATIC_ASSERT(Elements(zs->payload) >= 12); - - zs->payload[0] = dw1; - zs->payload[1] = dw2; - zs->payload[2] = dw3; - zs->payload[3] = dw4; - zs->payload[4] = dw5; - zs->payload[5] = dw6; - - /* do not increment reference count */ - zs->bo = info.zs.bo; - - /* separate stencil */ - if (info.stencil.bo) { - assert(info.stencil.stride > 0 && info.stencil.stride < 128 * 1024 && - info.stencil.stride % 128 == 0); - - dw1 = (info.stencil.stride - 1) << GEN6_STENCIL_DW1_PITCH__SHIFT; - if (ilo_dev_gen(dev) >= ILO_GEN(7.5)) - dw1 |= GEN75_STENCIL_DW1_STENCIL_BUFFER_ENABLE; - - dw2 = info.stencil.offset; - dw4 = info.stencil.qpitch; - } else { - dw1 = 0; - dw2 = 0; - dw4 = 0; - } - - zs->payload[6] = dw1; - zs->payload[7] = dw2; - zs->payload[8] = dw4; - /* do not increment reference count */ - zs->separate_s8_bo = info.stencil.bo; - - /* hiz */ - if (info.hiz.bo) { - dw1 = (info.hiz.stride - 1) << GEN6_HIZ_DW1_PITCH__SHIFT; - dw2 = info.hiz.offset; - dw4 = info.hiz.qpitch; - } else { - dw1 = 0; - dw2 = 0; - dw4 = 0; - } - - zs->payload[9] = dw1; - zs->payload[10] = dw2; - zs->payload[11] = dw4; - /* do not increment reference count */ - zs->hiz_bo = info.hiz.bo; -} - static void viewport_get_guardband(const struct ilo_dev *dev, int center_x, int center_y, diff --git a/src/gallium/drivers/ilo/ilo_render_gen6.c b/src/gallium/drivers/ilo/ilo_render_gen6.c index 47f711e7956..f3f8ae4a088 100644 --- a/src/gallium/drivers/ilo/ilo_render_gen6.c +++ b/src/gallium/drivers/ilo/ilo_render_gen6.c @@ -747,7 +747,7 @@ gen6_draw_wm_depth(struct ilo_render *r, { /* 3DSTATE_DEPTH_BUFFER and 3DSTATE_CLEAR_PARAMS */ if (DIRTY(FB) || r->batch_bo_changed) { - const struct ilo_zs_surface *zs; + const struct ilo_state_zs *zs; uint32_t clear_params; if (vec->fb.state.zsbuf) { @@ -772,7 +772,7 @@ gen6_draw_wm_depth(struct ilo_render *r, gen6_wa_pre_depth(r); } - gen6_3DSTATE_DEPTH_BUFFER(r->builder, zs, false); + gen6_3DSTATE_DEPTH_BUFFER(r->builder, zs); gen6_3DSTATE_HIER_DEPTH_BUFFER(r->builder, zs); gen6_3DSTATE_STENCIL_BUFFER(r->builder, zs); gen6_3DSTATE_CLEAR_PARAMS(r->builder, clear_params); @@ -903,10 +903,8 @@ gen6_rectlist_wm_depth(struct ilo_render *r, gen6_wa_pre_depth(r); if (blitter->uses & (ILO_BLITTER_USE_FB_DEPTH | - ILO_BLITTER_USE_FB_STENCIL)) { - gen6_3DSTATE_DEPTH_BUFFER(r->builder, - &blitter->fb.dst.u.zs, true); - } + ILO_BLITTER_USE_FB_STENCIL)) + gen6_3DSTATE_DEPTH_BUFFER(r->builder, &blitter->fb.dst.u.zs); if (blitter->uses & ILO_BLITTER_USE_FB_DEPTH) { gen6_3DSTATE_HIER_DEPTH_BUFFER(r->builder, diff --git a/src/gallium/drivers/ilo/ilo_render_gen7.c b/src/gallium/drivers/ilo/ilo_render_gen7.c index 07fe7c83536..88331bf2380 100644 --- a/src/gallium/drivers/ilo/ilo_render_gen7.c +++ b/src/gallium/drivers/ilo/ilo_render_gen7.c @@ -569,7 +569,7 @@ gen7_draw_wm(struct ilo_render *r, /* 3DSTATE_DEPTH_BUFFER and 3DSTATE_CLEAR_PARAMS */ if (DIRTY(FB) || r->batch_bo_changed) { - const struct ilo_zs_surface *zs; + const struct ilo_state_zs *zs; uint32_t clear_params; if (vec->fb.state.zsbuf) { @@ -588,7 +588,7 @@ gen7_draw_wm(struct ilo_render *r, clear_params = 0; } - gen6_3DSTATE_DEPTH_BUFFER(r->builder, zs, false); + gen6_3DSTATE_DEPTH_BUFFER(r->builder, zs); gen6_3DSTATE_HIER_DEPTH_BUFFER(r->builder, zs); gen6_3DSTATE_STENCIL_BUFFER(r->builder, zs); gen7_3DSTATE_CLEAR_PARAMS(r->builder, clear_params); @@ -766,10 +766,8 @@ gen7_rectlist_wm_depth(struct ilo_render *r, gen7_wa_pre_depth(r); if (blitter->uses & (ILO_BLITTER_USE_FB_DEPTH | - ILO_BLITTER_USE_FB_STENCIL)) { - gen6_3DSTATE_DEPTH_BUFFER(r->builder, - &blitter->fb.dst.u.zs, true); - } + ILO_BLITTER_USE_FB_STENCIL)) + gen6_3DSTATE_DEPTH_BUFFER(r->builder, &blitter->fb.dst.u.zs); if (blitter->uses & ILO_BLITTER_USE_FB_DEPTH) { gen6_3DSTATE_HIER_DEPTH_BUFFER(r->builder, diff --git a/src/gallium/drivers/ilo/ilo_render_gen8.c b/src/gallium/drivers/ilo/ilo_render_gen8.c index 715b93611f1..994d38b0d05 100644 --- a/src/gallium/drivers/ilo/ilo_render_gen8.c +++ b/src/gallium/drivers/ilo/ilo_render_gen8.c @@ -149,7 +149,7 @@ gen8_draw_wm(struct ilo_render *r, /* 3DSTATE_DEPTH_BUFFER and 3DSTATE_CLEAR_PARAMS */ if (DIRTY(FB) || r->batch_bo_changed) { - const struct ilo_zs_surface *zs; + const struct ilo_state_zs *zs; uint32_t clear_params; if (vec->fb.state.zsbuf) { @@ -170,7 +170,7 @@ gen8_draw_wm(struct ilo_render *r, gen8_wa_pre_depth(r); - gen6_3DSTATE_DEPTH_BUFFER(r->builder, zs, false); + gen6_3DSTATE_DEPTH_BUFFER(r->builder, zs); gen6_3DSTATE_HIER_DEPTH_BUFFER(r->builder, zs); gen6_3DSTATE_STENCIL_BUFFER(r->builder, zs); gen7_3DSTATE_CLEAR_PARAMS(r->builder, clear_params); @@ -372,10 +372,8 @@ ilo_render_emit_rectlist_commands_gen8(struct ilo_render *r, gen8_wa_pre_depth(r); if (blitter->uses & (ILO_BLITTER_USE_FB_DEPTH | - ILO_BLITTER_USE_FB_STENCIL)) { - gen6_3DSTATE_DEPTH_BUFFER(r->builder, - &blitter->fb.dst.u.zs, true); - } + ILO_BLITTER_USE_FB_STENCIL)) + gen6_3DSTATE_DEPTH_BUFFER(r->builder, &blitter->fb.dst.u.zs); if (blitter->uses & ILO_BLITTER_USE_FB_DEPTH) { gen6_3DSTATE_HIER_DEPTH_BUFFER(r->builder, diff --git a/src/gallium/drivers/ilo/ilo_state.c b/src/gallium/drivers/ilo/ilo_state.c index 3865e15bfb1..4e298254ce8 100644 --- a/src/gallium/drivers/ilo/ilo_state.c +++ b/src/gallium/drivers/ilo/ilo_state.c @@ -1047,7 +1047,7 @@ ilo_create_surface(struct pipe_context *pipe, struct ilo_texture *tex = ilo_texture(res); struct ilo_surface_cso *surf; - surf = MALLOC_STRUCT(ilo_surface_cso); + surf = CALLOC_STRUCT(ilo_surface_cso); assert(surf); surf->base = *templ; @@ -1071,14 +1071,37 @@ ilo_create_surface(struct pipe_context *pipe, templ->u.tex.last_layer - templ->u.tex.first_layer + 1, true, &surf->u.rt); } else { + struct ilo_state_zs_info info; + assert(res->target != PIPE_BUFFER); - ilo_gpe_init_zs_surface(dev, &tex->image, - (tex->separate_s8) ? &tex->separate_s8->image : NULL, - templ->format, - templ->u.tex.level, templ->u.tex.first_layer, - templ->u.tex.last_layer - templ->u.tex.first_layer + 1, - &surf->u.zs); + memset(&info, 0, sizeof(info)); + + if (templ->format == PIPE_FORMAT_S8_UINT) { + info.s_img = &tex->image; + } else { + info.z_img = &tex->image; + info.s_img = (tex->separate_s8) ? &tex->separate_s8->image : NULL; + + info.hiz_enable = + ilo_image_can_enable_aux(&tex->image, templ->u.tex.level); + } + + info.level = templ->u.tex.level; + info.slice_base = templ->u.tex.first_layer; + info.slice_count = templ->u.tex.last_layer - + templ->u.tex.first_layer + 1; + + ilo_state_zs_init(&surf->u.zs, dev, &info); + + if (info.z_img) { + surf->u.zs.depth_bo = info.z_img->bo; + if (info.hiz_enable) + surf->u.zs.hiz_bo = info.z_img->aux.bo; + } + + if (info.s_img) + surf->u.zs.stencil_bo = info.s_img->bo; } return &surf->base; @@ -1290,8 +1313,7 @@ ilo_state_vector_init(const struct ilo_dev *dev, { ilo_gpe_set_scissor_null(dev, &vec->scissor); - ilo_gpe_init_zs_surface(dev, NULL, NULL, - PIPE_FORMAT_NONE, 0, 0, 1, &vec->fb.null_zs); + ilo_state_zs_init_for_null(&vec->fb.null_zs, dev); util_dynarray_init(&vec->global_binding.bindings); @@ -1450,7 +1472,8 @@ ilo_state_vector_resource_renamed(struct ilo_state_vector *vec, struct ilo_surface_cso *cso = (struct ilo_surface_cso *) vec->fb.state.zsbuf; - cso->u.rt.bo = bo; + cso->u.zs.depth_bo = bo; + states |= ILO_DIRTY_FB; } } From 745ef2c07b23e1cf227eb26871fc464198b956e8 Mon Sep 17 00:00:00 2001 From: Chia-I Wu Date: Wed, 20 May 2015 21:44:30 +0800 Subject: [PATCH 621/834] ilo: replace ilo_view_surface with ilo_state_surface --- .../drivers/ilo/core/ilo_builder_3d_top.h | 48 +- src/gallium/drivers/ilo/core/ilo_state_3d.h | 99 +-- .../drivers/ilo/core/ilo_state_3d_bottom.c | 5 - .../drivers/ilo/core/ilo_state_3d_top.c | 832 ------------------ src/gallium/drivers/ilo/ilo_render_dynamic.c | 16 +- src/gallium/drivers/ilo/ilo_render_surface.c | 64 +- src/gallium/drivers/ilo/ilo_screen.c | 2 +- src/gallium/drivers/ilo/ilo_state.c | 141 +-- src/gallium/drivers/ilo/ilo_state.h | 35 + 9 files changed, 206 insertions(+), 1036 deletions(-) diff --git a/src/gallium/drivers/ilo/core/ilo_builder_3d_top.h b/src/gallium/drivers/ilo/core/ilo_builder_3d_top.h index cfa0e441855..cec1043b204 100644 --- a/src/gallium/drivers/ilo/core/ilo_builder_3d_top.h +++ b/src/gallium/drivers/ilo/core/ilo_builder_3d_top.h @@ -1627,8 +1627,7 @@ gen6_BINDING_TABLE_STATE(struct ilo_builder *builder, static inline uint32_t gen6_SURFACE_STATE(struct ilo_builder *builder, - const struct ilo_view_surface *surf, - bool for_render) + const struct ilo_state_surface *surf) { int state_align, state_len; uint32_t state_offset, *dw; @@ -1641,7 +1640,7 @@ gen6_SURFACE_STATE(struct ilo_builder *builder, state_offset = ilo_builder_surface_pointer(builder, ILO_BUILDER_ITEM_SURFACE, state_align, state_len, &dw); - memcpy(dw, surf->payload, state_len << 2); + memcpy(dw, surf->surface, state_len << 2); if (surf->bo) { const uint32_t mocs = (surf->scanout) ? @@ -1650,7 +1649,7 @@ gen6_SURFACE_STATE(struct ilo_builder *builder, dw[1] |= mocs << GEN8_SURFACE_DW1_MOCS__SHIFT; ilo_builder_surface_reloc64(builder, state_offset, 8, surf->bo, - surf->payload[8], (for_render) ? INTEL_RELOC_WRITE : 0); + surf->surface[8], (surf->readonly) ? 0 : INTEL_RELOC_WRITE); } } else { state_align = 32; @@ -1658,7 +1657,7 @@ gen6_SURFACE_STATE(struct ilo_builder *builder, state_offset = ilo_builder_surface_pointer(builder, ILO_BUILDER_ITEM_SURFACE, state_align, state_len, &dw); - memcpy(dw, surf->payload, state_len << 2); + memcpy(dw, surf->surface, state_len << 2); if (surf->bo) { /* @@ -1668,7 +1667,7 @@ gen6_SURFACE_STATE(struct ilo_builder *builder, dw[5] |= builder->mocs << GEN6_SURFACE_DW5_MOCS__SHIFT; ilo_builder_surface_reloc(builder, state_offset, 1, surf->bo, - surf->payload[1], (for_render) ? INTEL_RELOC_WRITE : 0); + surf->surface[1], (surf->readonly) ? 0 : INTEL_RELOC_WRITE); } } @@ -1682,38 +1681,49 @@ gen6_so_SURFACE_STATE(struct ilo_builder *builder, int so_index) { struct ilo_buffer *buf = ilo_buffer(so->buffer); - unsigned bo_offset, struct_size; - enum pipe_format elem_format; - struct ilo_view_surface surf; + struct ilo_state_surface_buffer_info info; + struct ilo_state_surface surf; ILO_DEV_ASSERT(builder->dev, 6, 6); - bo_offset = so->buffer_offset + so_info->output[so_index].dst_offset * 4; - struct_size = so_info->stride[so_info->output[so_index].output_buffer] * 4; + memset(&info, 0, sizeof(info)); + info.buf = buf; + info.access = ILO_STATE_SURFACE_ACCESS_DP_SVB; switch (so_info->output[so_index].num_components) { case 1: - elem_format = PIPE_FORMAT_R32_FLOAT; + info.format = GEN6_FORMAT_R32_FLOAT; + info.format_size = 4; break; case 2: - elem_format = PIPE_FORMAT_R32G32_FLOAT; + info.format = GEN6_FORMAT_R32G32_FLOAT; + info.format_size = 8; break; case 3: - elem_format = PIPE_FORMAT_R32G32B32_FLOAT; + info.format = GEN6_FORMAT_R32G32B32_FLOAT; + info.format_size = 12; break; case 4: - elem_format = PIPE_FORMAT_R32G32B32A32_FLOAT; + info.format = GEN6_FORMAT_R32G32B32A32_FLOAT; + info.format_size = 16; break; default: assert(!"unexpected SO components length"); - elem_format = PIPE_FORMAT_R32_FLOAT; + info.format = GEN6_FORMAT_R32_FLOAT; + info.format_size = 4; break; } - ilo_gpe_init_view_surface_for_buffer(builder->dev, buf, bo_offset, - so->buffer_size, struct_size, elem_format, false, &surf); + info.struct_size = + so_info->stride[so_info->output[so_index].output_buffer] * 4; + info.offset = so->buffer_offset + so_info->output[so_index].dst_offset * 4; + info.size = so->buffer_size - so_info->output[so_index].dst_offset * 4; - return gen6_SURFACE_STATE(builder, &surf, false); + memset(&surf, 0, sizeof(surf)); + ilo_state_surface_init_for_buffer(&surf, builder->dev, &info); + surf.bo = info.buf->bo; + + return gen6_SURFACE_STATE(builder, &surf); } static inline uint32_t diff --git a/src/gallium/drivers/ilo/core/ilo_state_3d.h b/src/gallium/drivers/ilo/core/ilo_state_3d.h index 3c7c50de702..6518c442249 100644 --- a/src/gallium/drivers/ilo/core/ilo_state_3d.h +++ b/src/gallium/drivers/ilo/core/ilo_state_3d.h @@ -33,6 +33,7 @@ #include "ilo_core.h" #include "ilo_dev.h" +#include "ilo_state_surface.h" #include "ilo_state_zs.h" /** @@ -214,53 +215,12 @@ struct ilo_sampler_state { const struct ilo_sampler_cso *cso[ILO_MAX_SAMPLERS]; }; -struct ilo_view_surface { - /* SURFACE_STATE */ - uint32_t payload[13]; - struct intel_bo *bo; - - uint32_t scanout; -}; - -struct ilo_view_cso { - struct pipe_sampler_view base; - - struct ilo_view_surface surface; -}; - -struct ilo_view_state { - struct pipe_sampler_view *states[ILO_MAX_SAMPLER_VIEWS]; - unsigned count; -}; - -struct ilo_cbuf_cso { - struct pipe_resource *resource; - struct ilo_view_surface surface; - - /* - * this CSO is not so constant because user buffer needs to be uploaded in - * finalize_constant_buffers() - */ - const void *user_buffer; - unsigned user_buffer_size; -}; - -struct ilo_cbuf_state { - struct ilo_cbuf_cso cso[ILO_MAX_CONST_BUFFERS]; - uint32_t enabled_mask; -}; - -struct ilo_resource_state { - struct pipe_surface *states[PIPE_MAX_SHADER_RESOURCES]; - unsigned count; -}; - struct ilo_surface_cso { struct pipe_surface base; bool is_rt; union { - struct ilo_view_surface rt; + struct ilo_state_surface rt; struct ilo_state_zs zs; } u; }; @@ -268,7 +228,7 @@ struct ilo_surface_cso { struct ilo_fb_state { struct pipe_framebuffer_state state; - struct ilo_view_surface null_rt; + struct ilo_state_surface null_rt; struct ilo_state_zs null_zs; struct ilo_fb_blend_caps { @@ -285,33 +245,6 @@ struct ilo_shader_cso { uint32_t payload[5]; }; -/** - * Translate a pipe texture target to the matching hardware surface type. - */ -static inline int -ilo_gpe_gen6_translate_texture(enum pipe_texture_target target) -{ - switch (target) { - case PIPE_BUFFER: - return GEN6_SURFTYPE_BUFFER; - case PIPE_TEXTURE_1D: - case PIPE_TEXTURE_1D_ARRAY: - return GEN6_SURFTYPE_1D; - case PIPE_TEXTURE_2D: - case PIPE_TEXTURE_RECT: - case PIPE_TEXTURE_2D_ARRAY: - return GEN6_SURFTYPE_2D; - case PIPE_TEXTURE_3D: - return GEN6_SURFTYPE_3D; - case PIPE_TEXTURE_CUBE: - case PIPE_TEXTURE_CUBE_ARRAY: - return GEN6_SURFTYPE_CUBE; - default: - assert(!"unknown texture target"); - return GEN6_SURFTYPE_BUFFER; - } -} - void ilo_gpe_init_ve(const struct ilo_dev *dev, unsigned num_states, @@ -362,32 +295,6 @@ ilo_gpe_init_sampler_cso(const struct ilo_dev *dev, const struct pipe_sampler_state *state, struct ilo_sampler_cso *sampler); -void -ilo_gpe_init_view_surface_null(const struct ilo_dev *dev, - unsigned width, unsigned height, - unsigned depth, unsigned level, - struct ilo_view_surface *surf); - -void -ilo_gpe_init_view_surface_for_buffer(const struct ilo_dev *dev, - const struct ilo_buffer *buf, - unsigned offset, unsigned size, - unsigned struct_size, - enum pipe_format elem_format, - bool is_rt, - struct ilo_view_surface *surf); - -void -ilo_gpe_init_view_surface_for_image(const struct ilo_dev *dev, - const struct ilo_image *img, - enum pipe_format format, - unsigned first_level, - unsigned num_levels, - unsigned first_layer, - unsigned num_layers, - bool is_rt, - struct ilo_view_surface *surf); - void ilo_gpe_init_vs_cso(const struct ilo_dev *dev, const struct ilo_shader_state *vs, diff --git a/src/gallium/drivers/ilo/core/ilo_state_3d_bottom.c b/src/gallium/drivers/ilo/core/ilo_state_3d_bottom.c index 0ca5106ca55..31d9a203c5a 100644 --- a/src/gallium/drivers/ilo/core/ilo_state_3d_bottom.c +++ b/src/gallium/drivers/ilo/core/ilo_state_3d_bottom.c @@ -1799,11 +1799,6 @@ ilo_gpe_set_fb(const struct ilo_dev *dev, util_copy_framebuffer_state(&fb->state, state); - ilo_gpe_init_view_surface_null(dev, - (state->width) ? state->width : 1, - (state->height) ? state->height : 1, - 1, 0, &fb->null_rt); - for (i = 0; i < state->nr_cbufs; i++) { if (state->cbufs[i]) { fb_set_blend_caps(dev, state->cbufs[i]->format, &fb->blend_caps[i]); diff --git a/src/gallium/drivers/ilo/core/ilo_state_3d_top.c b/src/gallium/drivers/ilo/core/ilo_state_3d_top.c index 4c06c91da90..8f3133e1345 100644 --- a/src/gallium/drivers/ilo/core/ilo_state_3d_top.c +++ b/src/gallium/drivers/ilo/core/ilo_state_3d_top.c @@ -409,838 +409,6 @@ ilo_gpe_init_gs_cso(const struct ilo_dev *dev, gs_init_cso_gen6(dev, gs, cso); } -static void -view_init_null_gen6(const struct ilo_dev *dev, - unsigned width, unsigned height, - unsigned depth, unsigned level, - struct ilo_view_surface *surf) -{ - uint32_t *dw; - - ILO_DEV_ASSERT(dev, 6, 6); - - assert(width >= 1 && height >= 1 && depth >= 1); - - /* - * From the Sandy Bridge PRM, volume 4 part 1, page 71: - * - * "A null surface will be used in instances where an actual surface is - * not bound. When a write message is generated to a null surface, no - * actual surface is written to. When a read message (including any - * sampling engine message) is generated to a null surface, the result - * is all zeros. Note that a null surface type is allowed to be used - * with all messages, even if it is not specificially indicated as - * supported. All of the remaining fields in surface state are ignored - * for null surfaces, with the following exceptions: - * - * * [DevSNB+]: Width, Height, Depth, and LOD fields must match the - * depth buffer's corresponding state for all render target - * surfaces, including null. - * * Surface Format must be R8G8B8A8_UNORM." - * - * From the Sandy Bridge PRM, volume 4 part 1, page 82: - * - * "If Surface Type is SURFTYPE_NULL, this field (Tiled Surface) must be - * true" - */ - - STATIC_ASSERT(Elements(surf->payload) >= 6); - dw = surf->payload; - - dw[0] = GEN6_SURFTYPE_NULL << GEN6_SURFACE_DW0_TYPE__SHIFT | - GEN6_FORMAT_B8G8R8A8_UNORM << GEN6_SURFACE_DW0_FORMAT__SHIFT; - - dw[1] = 0; - - dw[2] = (height - 1) << GEN6_SURFACE_DW2_HEIGHT__SHIFT | - (width - 1) << GEN6_SURFACE_DW2_WIDTH__SHIFT | - level << GEN6_SURFACE_DW2_MIP_COUNT_LOD__SHIFT; - - dw[3] = (depth - 1) << GEN6_SURFACE_DW3_DEPTH__SHIFT | - GEN6_TILING_X; - - dw[4] = 0; - dw[5] = 0; -} - -static void -view_init_for_buffer_gen6(const struct ilo_dev *dev, - const struct ilo_buffer *buf, - unsigned offset, unsigned size, - unsigned struct_size, - enum pipe_format elem_format, - bool is_rt, struct ilo_view_surface *surf) -{ - const int elem_size = util_format_get_blocksize(elem_format); - int width, height, depth, pitch; - int surface_format, num_entries; - uint32_t *dw; - - ILO_DEV_ASSERT(dev, 6, 6); - - /* - * For SURFTYPE_BUFFER, a SURFACE_STATE specifies an element of a - * structure in a buffer. - */ - - surface_format = ilo_format_translate_color(dev, elem_format); - - num_entries = size / struct_size; - /* see if there is enough space to fit another element */ - if (size % struct_size >= elem_size) - num_entries++; - - /* - * From the Sandy Bridge PRM, volume 4 part 1, page 76: - * - * "For SURFTYPE_BUFFER render targets, this field (Surface Base - * Address) specifies the base address of first element of the - * surface. The surface is interpreted as a simple array of that - * single element type. The address must be naturally-aligned to the - * element size (e.g., a buffer containing R32G32B32A32_FLOAT elements - * must be 16-byte aligned). - * - * For SURFTYPE_BUFFER non-rendertarget surfaces, this field specifies - * the base address of the first element of the surface, computed in - * software by adding the surface base address to the byte offset of - * the element in the buffer." - */ - if (is_rt) - assert(offset % elem_size == 0); - - /* - * From the Sandy Bridge PRM, volume 4 part 1, page 77: - * - * "For buffer surfaces, the number of entries in the buffer ranges - * from 1 to 2^27." - */ - assert(num_entries >= 1 && num_entries <= 1 << 27); - - /* - * From the Sandy Bridge PRM, volume 4 part 1, page 81: - * - * "For surfaces of type SURFTYPE_BUFFER, this field (Surface Pitch) - * indicates the size of the structure." - */ - pitch = struct_size; - - pitch--; - num_entries--; - /* bits [6:0] */ - width = (num_entries & 0x0000007f); - /* bits [19:7] */ - height = (num_entries & 0x000fff80) >> 7; - /* bits [26:20] */ - depth = (num_entries & 0x07f00000) >> 20; - - STATIC_ASSERT(Elements(surf->payload) >= 6); - dw = surf->payload; - - dw[0] = GEN6_SURFTYPE_BUFFER << GEN6_SURFACE_DW0_TYPE__SHIFT | - surface_format << GEN6_SURFACE_DW0_FORMAT__SHIFT; - - dw[1] = offset; - - dw[2] = height << GEN6_SURFACE_DW2_HEIGHT__SHIFT | - width << GEN6_SURFACE_DW2_WIDTH__SHIFT; - - dw[3] = depth << GEN6_SURFACE_DW3_DEPTH__SHIFT | - pitch << GEN6_SURFACE_DW3_PITCH__SHIFT; - - dw[4] = 0; - dw[5] = 0; -} - -static void -view_init_for_image_gen6(const struct ilo_dev *dev, - const struct ilo_image *img, - enum pipe_format format, - unsigned first_level, - unsigned num_levels, - unsigned first_layer, - unsigned num_layers, - bool is_rt, - struct ilo_view_surface *surf) -{ - int surface_type, surface_format; - int width, height, depth, pitch, lod; - uint32_t *dw; - - ILO_DEV_ASSERT(dev, 6, 6); - - surface_type = ilo_gpe_gen6_translate_texture(img->target); - assert(surface_type != GEN6_SURFTYPE_BUFFER); - - if (format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT && img->separate_stencil) - format = PIPE_FORMAT_Z32_FLOAT; - - if (is_rt) - surface_format = ilo_format_translate_render(dev, format); - else - surface_format = ilo_format_translate_texture(dev, format); - assert(surface_format >= 0); - - width = img->width0; - height = img->height0; - depth = (img->target == PIPE_TEXTURE_3D) ? img->depth0 : num_layers; - pitch = img->bo_stride; - - if (surface_type == GEN6_SURFTYPE_CUBE) { - /* - * From the Sandy Bridge PRM, volume 4 part 1, page 81: - * - * "For SURFTYPE_CUBE: [DevSNB+]: for Sampling Engine Surfaces, the - * range of this field (Depth) is [0,84], indicating the number of - * cube array elements (equal to the number of underlying 2D array - * elements divided by 6). For other surfaces, this field must be - * zero." - * - * When is_rt is true, we treat the texture as a 2D one to avoid the - * restriction. - */ - if (is_rt) { - surface_type = GEN6_SURFTYPE_2D; - } - else { - assert(num_layers % 6 == 0); - depth = num_layers / 6; - } - } - - /* sanity check the size */ - assert(width >= 1 && height >= 1 && depth >= 1 && pitch >= 1); - switch (surface_type) { - case GEN6_SURFTYPE_1D: - assert(width <= 8192 && height == 1 && depth <= 512); - assert(first_layer < 512 && num_layers <= 512); - break; - case GEN6_SURFTYPE_2D: - assert(width <= 8192 && height <= 8192 && depth <= 512); - assert(first_layer < 512 && num_layers <= 512); - break; - case GEN6_SURFTYPE_3D: - assert(width <= 2048 && height <= 2048 && depth <= 2048); - assert(first_layer < 2048 && num_layers <= 512); - if (!is_rt) - assert(first_layer == 0); - break; - case GEN6_SURFTYPE_CUBE: - assert(width <= 8192 && height <= 8192 && depth <= 85); - assert(width == height); - assert(first_layer < 512 && num_layers <= 512); - if (is_rt) - assert(first_layer == 0); - break; - default: - assert(!"unexpected surface type"); - break; - } - - /* non-full array spacing is supported only on GEN7+ */ - assert(img->walk != ILO_IMAGE_WALK_LOD); - /* non-interleaved samples are supported only on GEN7+ */ - if (img->sample_count > 1) - assert(img->interleaved_samples); - - if (is_rt) { - assert(num_levels == 1); - lod = first_level; - } - else { - lod = num_levels - 1; - } - - /* - * From the Sandy Bridge PRM, volume 4 part 1, page 76: - * - * "Linear render target surface base addresses must be element-size - * aligned, for non-YUV surface formats, or a multiple of 2 - * element-sizes for YUV surface formats. Other linear surfaces have - * no alignment requirements (byte alignment is sufficient.)" - * - * From the Sandy Bridge PRM, volume 4 part 1, page 81: - * - * "For linear render target surfaces, the pitch must be a multiple - * of the element size for non-YUV surface formats. Pitch must be a - * multiple of 2 * element size for YUV surface formats." - * - * From the Sandy Bridge PRM, volume 4 part 1, page 86: - * - * "For linear surfaces, this field (X Offset) must be zero" - */ - if (img->tiling == GEN6_TILING_NONE) { - if (is_rt) { - const int elem_size = util_format_get_blocksize(format); - assert(pitch % elem_size == 0); - } - } - - STATIC_ASSERT(Elements(surf->payload) >= 6); - dw = surf->payload; - - dw[0] = surface_type << GEN6_SURFACE_DW0_TYPE__SHIFT | - surface_format << GEN6_SURFACE_DW0_FORMAT__SHIFT | - GEN6_SURFACE_DW0_MIPLAYOUT_BELOW; - - if (surface_type == GEN6_SURFTYPE_CUBE && !is_rt) { - dw[0] |= 1 << 9 | - GEN6_SURFACE_DW0_CUBE_FACE_ENABLES__MASK; - } - - dw[1] = 0; - - dw[2] = (height - 1) << GEN6_SURFACE_DW2_HEIGHT__SHIFT | - (width - 1) << GEN6_SURFACE_DW2_WIDTH__SHIFT | - lod << GEN6_SURFACE_DW2_MIP_COUNT_LOD__SHIFT; - - assert(img->tiling != GEN8_TILING_W); - dw[3] = (depth - 1) << GEN6_SURFACE_DW3_DEPTH__SHIFT | - (pitch - 1) << GEN6_SURFACE_DW3_PITCH__SHIFT | - img->tiling; - - dw[4] = first_level << GEN6_SURFACE_DW4_MIN_LOD__SHIFT | - first_layer << 17 | - (num_layers - 1) << 8 | - ((img->sample_count > 1) ? GEN6_SURFACE_DW4_MULTISAMPLECOUNT_4 : - GEN6_SURFACE_DW4_MULTISAMPLECOUNT_1); - - dw[5] = 0; - - assert(img->align_j == 2 || img->align_j == 4); - if (img->align_j == 4) - dw[5] |= GEN6_SURFACE_DW5_VALIGN_4; -} - -static void -view_init_null_gen7(const struct ilo_dev *dev, - unsigned width, unsigned height, - unsigned depth, unsigned level, - struct ilo_view_surface *surf) -{ - uint32_t *dw; - - ILO_DEV_ASSERT(dev, 7, 8); - - assert(width >= 1 && height >= 1 && depth >= 1); - - /* - * From the Ivy Bridge PRM, volume 4 part 1, page 62: - * - * "A null surface is used in instances where an actual surface is not - * bound. When a write message is generated to a null surface, no - * actual surface is written to. When a read message (including any - * sampling engine message) is generated to a null surface, the result - * is all zeros. Note that a null surface type is allowed to be used - * with all messages, even if it is not specificially indicated as - * supported. All of the remaining fields in surface state are ignored - * for null surfaces, with the following exceptions: - * - * * Width, Height, Depth, LOD, and Render Target View Extent fields - * must match the depth buffer's corresponding state for all render - * target surfaces, including null. - * * All sampling engine and data port messages support null surfaces - * with the above behavior, even if not mentioned as specifically - * supported, except for the following: - * * Data Port Media Block Read/Write messages. - * * The Surface Type of a surface used as a render target (accessed - * via the Data Port's Render Target Write message) must be the same - * as the Surface Type of all other render targets and of the depth - * buffer (defined in 3DSTATE_DEPTH_BUFFER), unless either the depth - * buffer or render targets are SURFTYPE_NULL." - * - * From the Ivy Bridge PRM, volume 4 part 1, page 65: - * - * "If Surface Type is SURFTYPE_NULL, this field (Tiled Surface) must be - * true" - */ - - STATIC_ASSERT(Elements(surf->payload) >= 13); - dw = surf->payload; - - dw[0] = GEN6_SURFTYPE_NULL << GEN7_SURFACE_DW0_TYPE__SHIFT | - GEN6_FORMAT_B8G8R8A8_UNORM << GEN7_SURFACE_DW0_FORMAT__SHIFT; - - if (ilo_dev_gen(dev) >= ILO_GEN(8)) - dw[0] |= GEN6_TILING_X << GEN8_SURFACE_DW0_TILING__SHIFT; - else - dw[0] |= GEN6_TILING_X << GEN7_SURFACE_DW0_TILING__SHIFT; - - dw[1] = 0; - - dw[2] = GEN_SHIFT32(height - 1, GEN7_SURFACE_DW2_HEIGHT) | - GEN_SHIFT32(width - 1, GEN7_SURFACE_DW2_WIDTH); - - dw[3] = GEN_SHIFT32(depth - 1, GEN7_SURFACE_DW3_DEPTH); - - dw[4] = 0; - dw[5] = level; - - dw[6] = 0; - dw[7] = 0; - - if (ilo_dev_gen(dev) >= ILO_GEN(8)) - memset(&dw[8], 0, sizeof(*dw) * (13 - 8)); -} - -static void -view_init_for_buffer_gen7(const struct ilo_dev *dev, - const struct ilo_buffer *buf, - unsigned offset, unsigned size, - unsigned struct_size, - enum pipe_format elem_format, - bool is_rt, struct ilo_view_surface *surf) -{ - const bool typed = (elem_format != PIPE_FORMAT_NONE); - const bool structured = (!typed && struct_size > 1); - const int elem_size = (typed) ? - util_format_get_blocksize(elem_format) : 1; - int width, height, depth, pitch; - int surface_type, surface_format, num_entries; - uint32_t *dw; - - ILO_DEV_ASSERT(dev, 7, 8); - - surface_type = (structured) ? GEN7_SURFTYPE_STRBUF : GEN6_SURFTYPE_BUFFER; - - surface_format = (typed) ? - ilo_format_translate_color(dev, elem_format) : GEN6_FORMAT_RAW; - - num_entries = size / struct_size; - /* see if there is enough space to fit another element */ - if (size % struct_size >= elem_size && !structured) - num_entries++; - - /* - * From the Ivy Bridge PRM, volume 4 part 1, page 67: - * - * "For SURFTYPE_BUFFER render targets, this field (Surface Base - * Address) specifies the base address of first element of the - * surface. The surface is interpreted as a simple array of that - * single element type. The address must be naturally-aligned to the - * element size (e.g., a buffer containing R32G32B32A32_FLOAT elements - * must be 16-byte aligned) - * - * For SURFTYPE_BUFFER non-rendertarget surfaces, this field specifies - * the base address of the first element of the surface, computed in - * software by adding the surface base address to the byte offset of - * the element in the buffer." - */ - if (is_rt) - assert(offset % elem_size == 0); - - /* - * From the Ivy Bridge PRM, volume 4 part 1, page 68: - * - * "For typed buffer and structured buffer surfaces, the number of - * entries in the buffer ranges from 1 to 2^27. For raw buffer - * surfaces, the number of entries in the buffer is the number of - * bytes which can range from 1 to 2^30." - */ - assert(num_entries >= 1 && - num_entries <= 1 << ((typed || structured) ? 27 : 30)); - - /* - * From the Ivy Bridge PRM, volume 4 part 1, page 69: - * - * "For SURFTYPE_BUFFER: The low two bits of this field (Width) must be - * 11 if the Surface Format is RAW (the size of the buffer must be a - * multiple of 4 bytes)." - * - * From the Ivy Bridge PRM, volume 4 part 1, page 70: - * - * "For surfaces of type SURFTYPE_BUFFER and SURFTYPE_STRBUF, this - * field (Surface Pitch) indicates the size of the structure." - * - * "For linear surfaces with Surface Type of SURFTYPE_STRBUF, the pitch - * must be a multiple of 4 bytes." - */ - if (structured) - assert(struct_size % 4 == 0); - else if (!typed) - assert(num_entries % 4 == 0); - - pitch = struct_size; - - pitch--; - num_entries--; - /* bits [6:0] */ - width = (num_entries & 0x0000007f); - /* bits [20:7] */ - height = (num_entries & 0x001fff80) >> 7; - /* bits [30:21] */ - depth = (num_entries & 0x7fe00000) >> 21; - /* limit to [26:21] */ - if (typed || structured) - depth &= 0x3f; - - STATIC_ASSERT(Elements(surf->payload) >= 13); - dw = surf->payload; - - dw[0] = surface_type << GEN7_SURFACE_DW0_TYPE__SHIFT | - surface_format << GEN7_SURFACE_DW0_FORMAT__SHIFT; - - if (ilo_dev_gen(dev) >= ILO_GEN(8)) { - dw[8] = offset; - memset(&dw[9], 0, sizeof(*dw) * (13 - 9)); - } else { - dw[1] = offset; - } - - dw[2] = GEN_SHIFT32(height, GEN7_SURFACE_DW2_HEIGHT) | - GEN_SHIFT32(width, GEN7_SURFACE_DW2_WIDTH); - - dw[3] = GEN_SHIFT32(depth, GEN7_SURFACE_DW3_DEPTH) | - pitch; - - dw[4] = 0; - dw[5] = 0; - - dw[6] = 0; - dw[7] = 0; - - if (ilo_dev_gen(dev) >= ILO_GEN(7.5)) { - dw[7] |= GEN_SHIFT32(GEN75_SCS_RED, GEN75_SURFACE_DW7_SCS_R) | - GEN_SHIFT32(GEN75_SCS_GREEN, GEN75_SURFACE_DW7_SCS_G) | - GEN_SHIFT32(GEN75_SCS_BLUE, GEN75_SURFACE_DW7_SCS_B) | - GEN_SHIFT32(GEN75_SCS_ALPHA, GEN75_SURFACE_DW7_SCS_A); - } -} - -static void -view_init_for_image_gen7(const struct ilo_dev *dev, - const struct ilo_image *img, - enum pipe_format format, - unsigned first_level, - unsigned num_levels, - unsigned first_layer, - unsigned num_layers, - bool is_rt, - struct ilo_view_surface *surf) -{ - int surface_type, surface_format; - int width, height, depth, pitch, lod; - uint32_t *dw; - - ILO_DEV_ASSERT(dev, 7, 8); - - surface_type = ilo_gpe_gen6_translate_texture(img->target); - assert(surface_type != GEN6_SURFTYPE_BUFFER); - - if (format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT && img->separate_stencil) - format = PIPE_FORMAT_Z32_FLOAT; - - if (is_rt) - surface_format = ilo_format_translate_render(dev, format); - else - surface_format = ilo_format_translate_texture(dev, format); - assert(surface_format >= 0); - - width = img->width0; - height = img->height0; - depth = (img->target == PIPE_TEXTURE_3D) ? img->depth0 : num_layers; - pitch = img->bo_stride; - - if (surface_type == GEN6_SURFTYPE_CUBE) { - /* - * From the Ivy Bridge PRM, volume 4 part 1, page 70: - * - * "For SURFTYPE_CUBE:For Sampling Engine Surfaces, the range of - * this field is [0,340], indicating the number of cube array - * elements (equal to the number of underlying 2D array elements - * divided by 6). For other surfaces, this field must be zero." - * - * When is_rt is true, we treat the texture as a 2D one to avoid the - * restriction. - */ - if (is_rt) { - surface_type = GEN6_SURFTYPE_2D; - } - else { - assert(num_layers % 6 == 0); - depth = num_layers / 6; - } - } - - /* sanity check the size */ - assert(width >= 1 && height >= 1 && depth >= 1 && pitch >= 1); - assert(first_layer < 2048 && num_layers <= 2048); - switch (surface_type) { - case GEN6_SURFTYPE_1D: - assert(width <= 16384 && height == 1 && depth <= 2048); - break; - case GEN6_SURFTYPE_2D: - assert(width <= 16384 && height <= 16384 && depth <= 2048); - break; - case GEN6_SURFTYPE_3D: - assert(width <= 2048 && height <= 2048 && depth <= 2048); - if (!is_rt) - assert(first_layer == 0); - break; - case GEN6_SURFTYPE_CUBE: - assert(width <= 16384 && height <= 16384 && depth <= 86); - assert(width == height); - if (is_rt) - assert(first_layer == 0); - break; - default: - assert(!"unexpected surface type"); - break; - } - - if (is_rt) { - assert(num_levels == 1); - lod = first_level; - } - else { - lod = num_levels - 1; - } - - /* - * From the Ivy Bridge PRM, volume 4 part 1, page 68: - * - * "The Base Address for linear render target surfaces and surfaces - * accessed with the typed surface read/write data port messages must - * be element-size aligned, for non-YUV surface formats, or a multiple - * of 2 element-sizes for YUV surface formats. Other linear surfaces - * have no alignment requirements (byte alignment is sufficient)." - * - * From the Ivy Bridge PRM, volume 4 part 1, page 70: - * - * "For linear render target surfaces and surfaces accessed with the - * typed data port messages, the pitch must be a multiple of the - * element size for non-YUV surface formats. Pitch must be a multiple - * of 2 * element size for YUV surface formats. For linear surfaces - * with Surface Type of SURFTYPE_STRBUF, the pitch must be a multiple - * of 4 bytes.For other linear surfaces, the pitch can be any multiple - * of bytes." - * - * From the Ivy Bridge PRM, volume 4 part 1, page 74: - * - * "For linear surfaces, this field (X Offset) must be zero." - */ - if (img->tiling == GEN6_TILING_NONE) { - if (is_rt) { - const int elem_size = util_format_get_blocksize(format); - assert(pitch % elem_size == 0); - } - } - - STATIC_ASSERT(Elements(surf->payload) >= 13); - dw = surf->payload; - - dw[0] = surface_type << GEN7_SURFACE_DW0_TYPE__SHIFT | - surface_format << GEN7_SURFACE_DW0_FORMAT__SHIFT; - - /* - * From the Ivy Bridge PRM, volume 4 part 1, page 63: - * - * "If this field (Surface Array) is enabled, the Surface Type must be - * SURFTYPE_1D, SURFTYPE_2D, or SURFTYPE_CUBE. If this field is - * disabled and Surface Type is SURFTYPE_1D, SURFTYPE_2D, or - * SURFTYPE_CUBE, the Depth field must be set to zero." - * - * For non-3D sampler surfaces, resinfo (the sampler message) always - * returns zero for the number of layers when this field is not set. - */ - if (surface_type != GEN6_SURFTYPE_3D) { - switch (img->target) { - case PIPE_TEXTURE_1D_ARRAY: - case PIPE_TEXTURE_2D_ARRAY: - case PIPE_TEXTURE_CUBE_ARRAY: - dw[0] |= GEN7_SURFACE_DW0_IS_ARRAY; - break; - default: - assert(depth == 1); - break; - } - } - - if (ilo_dev_gen(dev) >= ILO_GEN(8)) { - switch (img->align_j) { - case 4: - dw[0] |= GEN7_SURFACE_DW0_VALIGN_4; - break; - case 8: - dw[0] |= GEN8_SURFACE_DW0_VALIGN_8; - break; - case 16: - dw[0] |= GEN8_SURFACE_DW0_VALIGN_16; - break; - default: - assert(!"unsupported valign"); - break; - } - - switch (img->align_i) { - case 4: - dw[0] |= GEN8_SURFACE_DW0_HALIGN_4; - break; - case 8: - dw[0] |= GEN8_SURFACE_DW0_HALIGN_8; - break; - case 16: - dw[0] |= GEN8_SURFACE_DW0_HALIGN_16; - break; - default: - assert(!"unsupported halign"); - break; - } - - dw[0] |= img->tiling << GEN8_SURFACE_DW0_TILING__SHIFT; - } else { - assert(img->align_i == 4 || img->align_i == 8); - assert(img->align_j == 2 || img->align_j == 4); - - if (img->align_j == 4) - dw[0] |= GEN7_SURFACE_DW0_VALIGN_4; - - if (img->align_i == 8) - dw[0] |= GEN7_SURFACE_DW0_HALIGN_8; - - assert(img->tiling != GEN8_TILING_W); - dw[0] |= img->tiling << GEN7_SURFACE_DW0_TILING__SHIFT; - - if (img->walk == ILO_IMAGE_WALK_LOD) - dw[0] |= GEN7_SURFACE_DW0_ARYSPC_LOD0; - else - dw[0] |= GEN7_SURFACE_DW0_ARYSPC_FULL; - } - - if (surface_type == GEN6_SURFTYPE_CUBE && !is_rt) - dw[0] |= GEN7_SURFACE_DW0_CUBE_FACE_ENABLES__MASK; - - if (ilo_dev_gen(dev) >= ILO_GEN(8)) { - assert(img->walk_layer_height % 4 == 0); - dw[1] = img->walk_layer_height / 4; - } else { - dw[1] = 0; - } - - dw[2] = GEN_SHIFT32(height - 1, GEN7_SURFACE_DW2_HEIGHT) | - GEN_SHIFT32(width - 1, GEN7_SURFACE_DW2_WIDTH); - - dw[3] = GEN_SHIFT32(depth - 1, GEN7_SURFACE_DW3_DEPTH) | - (pitch - 1); - - dw[4] = first_layer << 18 | - (num_layers - 1) << 7; - - /* - * MSFMT_MSS means the samples are not interleaved and MSFMT_DEPTH_STENCIL - * means the samples are interleaved. The layouts are the same when the - * number of samples is 1. - */ - if (img->interleaved_samples && img->sample_count > 1) { - assert(!is_rt); - dw[4] |= GEN7_SURFACE_DW4_MSFMT_DEPTH_STENCIL; - } - else { - dw[4] |= GEN7_SURFACE_DW4_MSFMT_MSS; - } - - switch (img->sample_count) { - case 0: - case 1: - default: - dw[4] |= GEN7_SURFACE_DW4_MULTISAMPLECOUNT_1; - break; - case 2: - dw[4] |= GEN8_SURFACE_DW4_MULTISAMPLECOUNT_2; - break; - case 4: - dw[4] |= GEN7_SURFACE_DW4_MULTISAMPLECOUNT_4; - break; - case 8: - dw[4] |= GEN7_SURFACE_DW4_MULTISAMPLECOUNT_8; - break; - case 16: - dw[4] |= GEN8_SURFACE_DW4_MULTISAMPLECOUNT_16; - break; - } - - dw[5] = GEN_SHIFT32(first_level, GEN7_SURFACE_DW5_MIN_LOD) | - lod; - - dw[6] = 0; - dw[7] = 0; - - if (ilo_dev_gen(dev) >= ILO_GEN(7.5)) { - dw[7] |= GEN_SHIFT32(GEN75_SCS_RED, GEN75_SURFACE_DW7_SCS_R) | - GEN_SHIFT32(GEN75_SCS_GREEN, GEN75_SURFACE_DW7_SCS_G) | - GEN_SHIFT32(GEN75_SCS_BLUE, GEN75_SURFACE_DW7_SCS_B) | - GEN_SHIFT32(GEN75_SCS_ALPHA, GEN75_SURFACE_DW7_SCS_A); - } - - if (ilo_dev_gen(dev) >= ILO_GEN(8)) - memset(&dw[8], 0, sizeof(*dw) * (13 - 8)); -} - -void -ilo_gpe_init_view_surface_null(const struct ilo_dev *dev, - unsigned width, unsigned height, - unsigned depth, unsigned level, - struct ilo_view_surface *surf) -{ - if (ilo_dev_gen(dev) >= ILO_GEN(7)) { - view_init_null_gen7(dev, - width, height, depth, level, surf); - } else { - view_init_null_gen6(dev, - width, height, depth, level, surf); - } - - surf->bo = NULL; - surf->scanout = false; -} - -void -ilo_gpe_init_view_surface_for_buffer(const struct ilo_dev *dev, - const struct ilo_buffer *buf, - unsigned offset, unsigned size, - unsigned struct_size, - enum pipe_format elem_format, - bool is_rt, - struct ilo_view_surface *surf) -{ - if (ilo_dev_gen(dev) >= ILO_GEN(7)) { - view_init_for_buffer_gen7(dev, buf, offset, size, - struct_size, elem_format, is_rt, surf); - } else { - view_init_for_buffer_gen6(dev, buf, offset, size, - struct_size, elem_format, is_rt, surf); - } - - /* do not increment reference count */ - surf->bo = buf->bo; - surf->scanout = false; -} - -void -ilo_gpe_init_view_surface_for_image(const struct ilo_dev *dev, - const struct ilo_image *img, - enum pipe_format format, - unsigned first_level, - unsigned num_levels, - unsigned first_layer, - unsigned num_layers, - bool is_rt, - struct ilo_view_surface *surf) -{ - if (ilo_dev_gen(dev) >= ILO_GEN(7)) { - view_init_for_image_gen7(dev, img, format, - first_level, num_levels, first_layer, num_layers, - is_rt, surf); - } else { - view_init_for_image_gen6(dev, img, format, - first_level, num_levels, first_layer, num_layers, - is_rt, surf); - } - - surf->scanout = img->scanout; - /* do not increment reference count */ - surf->bo = img->bo; -} - static void sampler_init_border_color_gen6(const struct ilo_dev *dev, const union pipe_color_union *color, diff --git a/src/gallium/drivers/ilo/ilo_render_dynamic.c b/src/gallium/drivers/ilo/ilo_render_dynamic.c index ef92b12da83..a346ebb3201 100644 --- a/src/gallium/drivers/ilo/ilo_render_dynamic.c +++ b/src/gallium/drivers/ilo/ilo_render_dynamic.c @@ -234,13 +234,13 @@ gen6_emit_draw_dynamic_pcb(struct ilo_render *r, const struct ilo_cbuf_state *cbuf = &vec->cbuf[PIPE_SHADER_VERTEX]; - if (cbuf0_size <= cbuf->cso[0].user_buffer_size) { + if (cbuf0_size <= cbuf->cso[0].info.size) { memcpy(pcb, cbuf->cso[0].user_buffer, cbuf0_size); } else { memcpy(pcb, cbuf->cso[0].user_buffer, - cbuf->cso[0].user_buffer_size); - memset(pcb + cbuf->cso[0].user_buffer_size, 0, - cbuf0_size - cbuf->cso[0].user_buffer_size); + cbuf->cso[0].info.size); + memset(pcb + cbuf->cso[0].info.size, 0, + cbuf0_size - cbuf->cso[0].info.size); } pcb += cbuf0_size; @@ -271,13 +271,13 @@ gen6_emit_draw_dynamic_pcb(struct ilo_render *r, gen6_push_constant_buffer(r->builder, cbuf0_size, &pcb); r->state.wm.PUSH_CONSTANT_BUFFER_size = cbuf0_size; - if (cbuf0_size <= cbuf->cso[0].user_buffer_size) { + if (cbuf0_size <= cbuf->cso[0].info.size) { memcpy(pcb, cbuf->cso[0].user_buffer, cbuf0_size); } else { memcpy(pcb, cbuf->cso[0].user_buffer, - cbuf->cso[0].user_buffer_size); - memset(pcb + cbuf->cso[0].user_buffer_size, 0, - cbuf0_size - cbuf->cso[0].user_buffer_size); + cbuf->cso[0].info.size); + memset(pcb + cbuf->cso[0].info.size, 0, + cbuf0_size - cbuf->cso[0].info.size); } session->pcb_fs_changed = true; diff --git a/src/gallium/drivers/ilo/ilo_render_surface.c b/src/gallium/drivers/ilo/ilo_render_surface.c index 52b1cb42c06..729cb604eaf 100644 --- a/src/gallium/drivers/ilo/ilo_render_surface.c +++ b/src/gallium/drivers/ilo/ilo_render_surface.c @@ -64,11 +64,9 @@ gen6_emit_draw_surface_rt(struct ilo_render *r, (const struct ilo_surface_cso *) fb->state.cbufs[i]; assert(surface->is_rt); - surface_state[i] = - gen6_SURFACE_STATE(r->builder, &surface->u.rt, true); + surface_state[i] = gen6_SURFACE_STATE(r->builder, &surface->u.rt); } else { - surface_state[i] = - gen6_SURFACE_STATE(r->builder, &fb->null_rt, true); + surface_state[i] = gen6_SURFACE_STATE(r->builder, &fb->null_rt); } } } @@ -173,8 +171,7 @@ gen6_emit_draw_surface_view(struct ilo_render *r, const struct ilo_view_cso *cso = (const struct ilo_view_cso *) view->states[i]; - surface_state[i] = - gen6_SURFACE_STATE(r->builder, &cso->surface, false); + surface_state[i] = gen6_SURFACE_STATE(r->builder, &cso->surface); } else { surface_state[i] = 0; } @@ -228,12 +225,10 @@ gen6_emit_draw_surface_const(struct ilo_render *r, for (i = 0; i < count; i++) { const struct ilo_cbuf_cso *cso = &cbuf->cso[i]; - if (cso->resource) { - surface_state[i] = gen6_SURFACE_STATE(r->builder, - &cso->surface, false); - } else { + if (cso->resource) + surface_state[i] = gen6_SURFACE_STATE(r->builder, &cso->surface); + else surface_state[i] = 0; - } } } @@ -406,8 +401,7 @@ gen6_emit_launch_grid_surface_view(struct ilo_render *r, const struct ilo_view_cso *cso = (const struct ilo_view_cso *) view->states[i]; - surface_state[i] = - gen6_SURFACE_STATE(r->builder, &cso->surface, false); + surface_state[i] = gen6_SURFACE_STATE(r->builder, &cso->surface); } else { surface_state[i] = 0; } @@ -421,7 +415,8 @@ gen6_emit_launch_grid_surface_const(struct ilo_render *r, { const struct ilo_shader_state *cs = vec->cs; uint32_t *surface_state = r->state.cs.SURFACE_STATE; - struct ilo_view_surface view; + struct ilo_state_surface_buffer_info info; + struct ilo_state_surface surf; int base, count; ILO_DEV_ASSERT(r->dev, 7, 7.5); @@ -432,15 +427,22 @@ gen6_emit_launch_grid_surface_const(struct ilo_render *r, if (!count) return; - ilo_gpe_init_view_surface_for_buffer(r->dev, - ilo_buffer(session->input->buffer), - session->input->buffer_offset, - session->input->buffer_size, - 1, PIPE_FORMAT_NONE, - false, &view); + memset(&info, 0, sizeof(info)); + info.buf = ilo_buffer(session->input->buffer); + info.access = ILO_STATE_SURFACE_ACCESS_DP_UNTYPED; + info.format = GEN6_FORMAT_RAW; + info.format_size = 1; + info.struct_size = 1; + info.readonly = true; + info.offset = session->input->buffer_offset; + info.size = session->input->buffer_size; + + memset(&surf, 0, sizeof(surf)); + ilo_state_surface_init_for_buffer(&surf, r->dev, &info); + surf.bo = info.buf->bo; assert(count == 1 && session->input->buffer); - surface_state[base] = gen6_SURFACE_STATE(r->builder, &view, false); + surface_state[base] = gen6_SURFACE_STATE(r->builder, &surf); } static void @@ -483,14 +485,24 @@ gen6_emit_launch_grid_surface_global(struct ilo_render *r, for (i = 0; i < count; i++) { if (i < vec->global_binding.count && bindings[i].resource) { const struct ilo_buffer *buf = ilo_buffer(bindings[i].resource); - struct ilo_view_surface view; + struct ilo_state_surface_buffer_info info; + struct ilo_state_surface surf; assert(bindings[i].resource->target == PIPE_BUFFER); - ilo_gpe_init_view_surface_for_buffer(r->dev, buf, 0, buf->bo_size, - 1, PIPE_FORMAT_NONE, true, &view); - surface_state[i] = - gen6_SURFACE_STATE(r->builder, &view, true); + memset(&info, 0, sizeof(info)); + info.buf = buf; + info.access = ILO_STATE_SURFACE_ACCESS_DP_UNTYPED; + info.format = GEN6_FORMAT_RAW; + info.format_size = 1; + info.struct_size = 1; + info.size = buf->bo_size; + + memset(&surf, 0, sizeof(surf)); + ilo_state_surface_init_for_buffer(&surf, r->dev, &info); + surf.bo = info.buf->bo; + + surface_state[i] = gen6_SURFACE_STATE(r->builder, &surf); } else { surface_state[i] = 0; } diff --git a/src/gallium/drivers/ilo/ilo_screen.c b/src/gallium/drivers/ilo/ilo_screen.c index 80e01c7bd1d..b75a2590b2b 100644 --- a/src/gallium/drivers/ilo/ilo_screen.c +++ b/src/gallium/drivers/ilo/ilo_screen.c @@ -345,7 +345,7 @@ ilo_get_param(struct pipe_screen *screen, enum pipe_cap param) case PIPE_CAP_INDEP_BLEND_FUNC: return true; case PIPE_CAP_MAX_TEXTURE_ARRAY_LAYERS: - return (ilo_dev_gen(&is->dev) >= ILO_GEN(7)) ? 2048 : 512; + return (ilo_dev_gen(&is->dev) >= ILO_GEN(7.5)) ? 2048 : 512; case PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT: case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT: case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER: diff --git a/src/gallium/drivers/ilo/ilo_state.c b/src/gallium/drivers/ilo/ilo_state.c index 4e298254ce8..060f8ae4453 100644 --- a/src/gallium/drivers/ilo/ilo_state.c +++ b/src/gallium/drivers/ilo/ilo_state.c @@ -25,9 +25,11 @@ * Chia-I Wu */ +#include "core/ilo_format.h" #include "core/ilo_state_3d.h" #include "util/u_dynarray.h" #include "util/u_helpers.h" +#include "util/u_resource.h" #include "util/u_upload_mgr.h" #include "ilo_context.h" @@ -97,7 +99,6 @@ finalize_cbuf_state(struct ilo_context *ilo, ~ilo_shader_get_kernel_param(sh, ILO_KERNEL_SKIP_CBUF0_UPLOAD); while (upload_mask) { - const enum pipe_format elem_format = PIPE_FORMAT_R32G32B32A32_FLOAT; unsigned offset, i; i = u_bit_scan(&upload_mask); @@ -105,14 +106,16 @@ finalize_cbuf_state(struct ilo_context *ilo, if (cbuf->cso[i].resource) continue; - u_upload_data(ilo->uploader, 0, cbuf->cso[i].user_buffer_size, + u_upload_data(ilo->uploader, 0, cbuf->cso[i].info.size, cbuf->cso[i].user_buffer, &offset, &cbuf->cso[i].resource); - ilo_gpe_init_view_surface_for_buffer(ilo->dev, - ilo_buffer(cbuf->cso[i].resource), - offset, cbuf->cso[i].user_buffer_size, - util_format_get_blocksize(elem_format), elem_format, - false, &cbuf->cso[i].surface); + cbuf->cso[i].info.buf = ilo_buffer(cbuf->cso[i].resource); + cbuf->cso[i].info.offset = offset; + + memset(&cbuf->cso[i].surface, 0, sizeof(cbuf->cso[i].surface)); + ilo_state_surface_init_for_buffer(&cbuf->cso[i].surface, + ilo->dev, &cbuf->cso[i].info); + cbuf->cso[i].surface.bo = cbuf->cso[i].info.buf->bo; ilo->state_vector.dirty |= ILO_DIRTY_CBUF; } @@ -675,47 +678,47 @@ ilo_set_constant_buffer(struct pipe_context *pipe, pipe_resource_reference(&cso->resource, buf[i].buffer); - if (buf[i].buffer) { - const enum pipe_format elem_format = - PIPE_FORMAT_R32G32B32A32_FLOAT; + cso->info.access = ILO_STATE_SURFACE_ACCESS_DP_DATA; + cso->info.format = GEN6_FORMAT_R32G32B32A32_FLOAT; + cso->info.format_size = 16; + cso->info.struct_size = 16; + cso->info.readonly = true; + cso->info.size = buf[i].buffer_size; - ilo_gpe_init_view_surface_for_buffer(dev, - ilo_buffer(buf[i].buffer), - buf[i].buffer_offset, buf[i].buffer_size, - util_format_get_blocksize(elem_format), elem_format, - false, &cso->surface); + if (buf[i].buffer) { + cso->info.buf = ilo_buffer(buf[i].buffer); + cso->info.offset = buf[i].buffer_offset; + + memset(&cso->surface, 0, sizeof(cso->surface)); + ilo_state_surface_init_for_buffer(&cso->surface, dev, &cso->info); + cso->surface.bo = cso->info.buf->bo; cso->user_buffer = NULL; - cso->user_buffer_size = 0; cbuf->enabled_mask |= 1 << (index + i); - } - else if (buf[i].user_buffer) { - cso->surface.bo = NULL; - + } else if (buf[i].user_buffer) { + cso->info.buf = NULL; /* buffer_offset does not apply for user buffer */ cso->user_buffer = buf[i].user_buffer; - cso->user_buffer_size = buf[i].buffer_size; cbuf->enabled_mask |= 1 << (index + i); - } - else { - cso->surface.bo = NULL; + } else { + cso->info.buf = NULL; + cso->info.size = 0; cso->user_buffer = NULL; - cso->user_buffer_size = 0; cbuf->enabled_mask &= ~(1 << (index + i)); } } - } - else { + } else { for (i = 0; i < count; i++) { struct ilo_cbuf_cso *cso = &cbuf->cso[index + i]; pipe_resource_reference(&cso->resource, NULL); - cso->surface.bo = NULL; + + cso->info.buf = NULL; + cso->info.size = 0; cso->user_buffer = NULL; - cso->user_buffer_size = 0; cbuf->enabled_mask &= ~(1 << (index + i)); } @@ -991,7 +994,7 @@ ilo_create_sampler_view(struct pipe_context *pipe, const struct ilo_dev *dev = ilo_context(pipe)->dev; struct ilo_view_cso *view; - view = MALLOC_STRUCT(ilo_view_cso); + view = CALLOC_STRUCT(ilo_view_cso); assert(view); view->base = *templ; @@ -1001,16 +1004,24 @@ ilo_create_sampler_view(struct pipe_context *pipe, view->base.context = pipe; if (res->target == PIPE_BUFFER) { - const unsigned elem_size = util_format_get_blocksize(templ->format); - const unsigned first_elem = templ->u.buf.first_element; - const unsigned num_elems = templ->u.buf.last_element - first_elem + 1; + struct ilo_state_surface_buffer_info info; - ilo_gpe_init_view_surface_for_buffer(dev, ilo_buffer(res), - first_elem * elem_size, num_elems * elem_size, - elem_size, templ->format, false, &view->surface); - } - else { + memset(&info, 0, sizeof(info)); + info.buf = ilo_buffer(res); + info.access = ILO_STATE_SURFACE_ACCESS_SAMPLER; + info.format = ilo_format_translate_color(dev, templ->format); + info.format_size = util_format_get_blocksize(templ->format); + info.struct_size = info.format_size; + info.readonly = true; + info.offset = templ->u.buf.first_element * info.struct_size; + info.size = (templ->u.buf.last_element - + templ->u.buf.first_element + 1) * info.struct_size; + + ilo_state_surface_init_for_buffer(&view->surface, dev, &info); + view->surface.bo = info.buf->bo; + } else { struct ilo_texture *tex = ilo_texture(res); + struct ilo_state_surface_image_info info; /* warn about degraded performance because of a missing binding flag */ if (tex->image.tiling == GEN6_TILING_NONE && @@ -1019,12 +1030,33 @@ ilo_create_sampler_view(struct pipe_context *pipe, "not created for sampling\n"); } - ilo_gpe_init_view_surface_for_image(dev, &tex->image, templ->format, - templ->u.tex.first_level, - templ->u.tex.last_level - templ->u.tex.first_level + 1, - templ->u.tex.first_layer, - templ->u.tex.last_layer - templ->u.tex.first_layer + 1, - false, &view->surface); + memset(&info, 0, sizeof(info)); + info.img = &tex->image; + + info.access = ILO_STATE_SURFACE_ACCESS_SAMPLER; + + if (templ->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT && + tex->image.separate_stencil) { + info.format = ilo_format_translate_texture(dev, + PIPE_FORMAT_Z32_FLOAT); + } else { + info.format = ilo_format_translate_texture(dev, templ->format); + } + + info.is_cube_map = (tex->image.target == PIPE_TEXTURE_CUBE || + tex->image.target == PIPE_TEXTURE_CUBE_ARRAY); + info.is_array = util_resource_is_array_texture(&tex->base); + info.readonly = true; + + info.level_base = templ->u.tex.first_level; + info.level_count = templ->u.tex.last_level - + templ->u.tex.first_level + 1; + info.slice_base = templ->u.tex.first_layer; + info.slice_count = templ->u.tex.last_layer - + templ->u.tex.first_layer + 1; + + ilo_state_surface_init_for_image(&view->surface, dev, &info); + view->surface.bo = info.img->bo; } return &view->base; @@ -1062,14 +1094,24 @@ ilo_create_surface(struct pipe_context *pipe, surf->is_rt = !util_format_is_depth_or_stencil(templ->format); if (surf->is_rt) { + struct ilo_state_surface_image_info info; + /* relax this? */ assert(tex->base.target != PIPE_BUFFER); - ilo_gpe_init_view_surface_for_image(dev, &tex->image, - templ->format, templ->u.tex.level, 1, - templ->u.tex.first_layer, - templ->u.tex.last_layer - templ->u.tex.first_layer + 1, - true, &surf->u.rt); + memset(&info, 0, sizeof(info)); + info.img = &tex->image; + info.access = ILO_STATE_SURFACE_ACCESS_DP_RENDER; + info.format = ilo_format_translate_render(dev, templ->format); + info.is_array = util_resource_is_array_texture(&tex->base); + info.level_base = templ->u.tex.level; + info.level_count = 1; + info.slice_base = templ->u.tex.first_layer; + info.slice_count = templ->u.tex.last_layer - + templ->u.tex.first_layer + 1; + + ilo_state_surface_init_for_image(&surf->u.rt, dev, &info); + surf->u.rt.bo = info.img->bo; } else { struct ilo_state_zs_info info; @@ -1313,6 +1355,7 @@ ilo_state_vector_init(const struct ilo_dev *dev, { ilo_gpe_set_scissor_null(dev, &vec->scissor); + ilo_state_surface_init_for_null(&vec->fb.null_rt, dev); ilo_state_zs_init_for_null(&vec->fb.null_zs, dev); util_dynarray_init(&vec->global_binding.bindings); diff --git a/src/gallium/drivers/ilo/ilo_state.h b/src/gallium/drivers/ilo/ilo_state.h index fd0a3156ebc..9a7d196a2a6 100644 --- a/src/gallium/drivers/ilo/ilo_state.h +++ b/src/gallium/drivers/ilo/ilo_state.h @@ -29,6 +29,8 @@ #define ILO_STATE_H #include "core/ilo_state_3d.h" +#include "core/ilo_state_surface.h" +#include "core/ilo_state_zs.h" #include "pipe/p_state.h" #include "util/u_dynarray.h" @@ -121,6 +123,39 @@ enum ilo_dirty_flags { struct ilo_context; +struct ilo_cbuf_cso { + struct pipe_resource *resource; + struct ilo_state_surface_buffer_info info; + struct ilo_state_surface surface; + + /* + * this CSO is not so constant because user buffer needs to be uploaded in + * finalize_constant_buffers() + */ + const void *user_buffer; +}; + +struct ilo_cbuf_state { + struct ilo_cbuf_cso cso[ILO_MAX_CONST_BUFFERS]; + uint32_t enabled_mask; +}; + +struct ilo_resource_state { + struct pipe_surface *states[PIPE_MAX_SHADER_RESOURCES]; + unsigned count; +}; + +struct ilo_view_cso { + struct pipe_sampler_view base; + + struct ilo_state_surface surface; +}; + +struct ilo_view_state { + struct pipe_sampler_view *states[ILO_MAX_SAMPLER_VIEWS]; + unsigned count; +}; + struct ilo_global_binding_cso { struct pipe_resource *resource; uint32_t *handle; From 4b5c0a83415137ba1f894d70a6cf73db83d21f15 Mon Sep 17 00:00:00 2001 From: Chia-I Wu Date: Thu, 21 May 2015 17:18:37 +0800 Subject: [PATCH 622/834] ilo: replace ilo_sampler_cso with ilo_state_sampler --- .../drivers/ilo/core/ilo_builder_3d_top.h | 106 +--- src/gallium/drivers/ilo/core/ilo_state_3d.h | 25 - .../drivers/ilo/core/ilo_state_3d_top.c | 459 ------------------ src/gallium/drivers/ilo/ilo_render_dynamic.c | 60 ++- src/gallium/drivers/ilo/ilo_state.c | 183 ++++++- src/gallium/drivers/ilo/ilo_state.h | 15 + 6 files changed, 258 insertions(+), 590 deletions(-) diff --git a/src/gallium/drivers/ilo/core/ilo_builder_3d_top.h b/src/gallium/drivers/ilo/core/ilo_builder_3d_top.h index cec1043b204..d5a4c778a87 100644 --- a/src/gallium/drivers/ilo/core/ilo_builder_3d_top.h +++ b/src/gallium/drivers/ilo/core/ilo_builder_3d_top.h @@ -36,6 +36,7 @@ #include "ilo_core.h" #include "ilo_dev.h" #include "ilo_state_3d.h" +#include "ilo_state_sampler.h" #include "ilo_builder.h" static inline void @@ -1728,13 +1729,12 @@ gen6_so_SURFACE_STATE(struct ilo_builder *builder, static inline uint32_t gen6_SAMPLER_STATE(struct ilo_builder *builder, - const struct ilo_sampler_cso * const *samplers, - const struct pipe_sampler_view * const *views, + const struct ilo_state_sampler *samplers, const uint32_t *sampler_border_colors, - int num_samplers) + int sampler_count) { const int state_align = 32; - const int state_len = 4 * num_samplers; + const int state_len = 4 * sampler_count; uint32_t state_offset, *dw; int i; @@ -1745,9 +1745,9 @@ gen6_SAMPLER_STATE(struct ilo_builder *builder, * * "The sampler state is stored as an array of up to 16 elements..." */ - assert(num_samplers <= 16); + assert(sampler_count <= 16); - if (!num_samplers) + if (!sampler_count) return 0; /* @@ -1759,86 +1759,19 @@ gen6_SAMPLER_STATE(struct ilo_builder *builder, * * It also applies to other shader stages. */ - ilo_builder_dynamic_pad_top(builder, 4 * (4 - (num_samplers % 4))); + ilo_builder_dynamic_pad_top(builder, 4 * (4 - (sampler_count % 4))); state_offset = ilo_builder_dynamic_pointer(builder, ILO_BUILDER_ITEM_SAMPLER, state_align, state_len, &dw); - for (i = 0; i < num_samplers; i++) { - const struct ilo_sampler_cso *sampler = samplers[i]; - const struct pipe_sampler_view *view = views[i]; - const uint32_t border_color = sampler_border_colors[i]; - uint32_t dw_filter, dw_wrap; + for (i = 0; i < sampler_count; i++) { + /* see sampler_set_gen6_SAMPLER_STATE() */ + dw[0] = samplers[i].sampler[0]; + dw[1] = samplers[i].sampler[1]; + dw[3] = samplers[i].sampler[2]; - /* there may be holes */ - if (!sampler || !view) { - /* disabled sampler */ - dw[0] = 1 << 31; - dw[1] = 0; - dw[2] = 0; - dw[3] = 0; - dw += 4; - - continue; - } - - /* determine filter and wrap modes */ - switch (view->texture->target) { - case PIPE_TEXTURE_1D: - dw_filter = (sampler->anisotropic) ? - sampler->dw_filter_aniso : sampler->dw_filter; - dw_wrap = sampler->dw_wrap_1d; - break; - case PIPE_TEXTURE_3D: - /* - * From the Sandy Bridge PRM, volume 4 part 1, page 103: - * - * "Only MAPFILTER_NEAREST and MAPFILTER_LINEAR are supported for - * surfaces of type SURFTYPE_3D." - */ - dw_filter = sampler->dw_filter; - dw_wrap = sampler->dw_wrap; - break; - case PIPE_TEXTURE_CUBE: - dw_filter = (sampler->anisotropic) ? - sampler->dw_filter_aniso : sampler->dw_filter; - dw_wrap = sampler->dw_wrap_cube; - break; - default: - dw_filter = (sampler->anisotropic) ? - sampler->dw_filter_aniso : sampler->dw_filter; - dw_wrap = sampler->dw_wrap; - break; - } - - dw[0] = sampler->payload[0]; - dw[1] = sampler->payload[1]; - assert(!(border_color & 0x1f)); - dw[2] = border_color; - dw[3] = sampler->payload[2]; - - dw[0] |= dw_filter; - - if (ilo_dev_gen(builder->dev) >= ILO_GEN(7)) { - dw[3] |= dw_wrap; - } - else { - /* - * From the Sandy Bridge PRM, volume 4 part 1, page 21: - * - * "[DevSNB] Errata: Incorrect behavior is observed in cases - * where the min and mag mode filters are different and - * SurfMinLOD is nonzero. The determination of MagMode uses the - * following equation instead of the one in the above - * pseudocode: MagMode = (LOD + SurfMinLOD - Base <= 0)" - * - * As a way to work around that, we set Base to - * view->u.tex.first_level. - */ - dw[0] |= view->u.tex.first_level << 22; - - dw[1] |= dw_wrap; - } + assert(!(sampler_border_colors[i] & 0x1f)); + dw[2] = sampler_border_colors[i]; dw += 4; } @@ -1848,7 +1781,7 @@ gen6_SAMPLER_STATE(struct ilo_builder *builder, static inline uint32_t gen6_SAMPLER_BORDER_COLOR_STATE(struct ilo_builder *builder, - const struct ilo_sampler_cso *sampler) + const struct ilo_state_sampler_border *border) { const int state_align = (ilo_dev_gen(builder->dev) >= ILO_GEN(8)) ? 64 : 32; @@ -1856,11 +1789,12 @@ gen6_SAMPLER_BORDER_COLOR_STATE(struct ilo_builder *builder, ILO_DEV_ASSERT(builder->dev, 6, 8); - assert(Elements(sampler->payload) >= 3 + state_len); - - /* see ilo_gpe_init_sampler_cso() */ + /* + * see border_set_gen6_SAMPLER_BORDER_COLOR_STATE() and + * border_set_gen7_SAMPLER_BORDER_COLOR_STATE() + */ return ilo_builder_dynamic_write(builder, ILO_BUILDER_ITEM_BLOB, - state_align, state_len, &sampler->payload[3]); + state_align, state_len, border->color); } static inline uint32_t diff --git a/src/gallium/drivers/ilo/core/ilo_state_3d.h b/src/gallium/drivers/ilo/core/ilo_state_3d.h index 6518c442249..260f94bf766 100644 --- a/src/gallium/drivers/ilo/core/ilo_state_3d.h +++ b/src/gallium/drivers/ilo/core/ilo_state_3d.h @@ -195,26 +195,6 @@ struct ilo_blend_state { uint32_t dw_ps_blend_dst_alpha_forced_one; }; -struct ilo_sampler_cso { - /* SAMPLER_STATE and SAMPLER_BORDER_COLOR_STATE */ - uint32_t payload[15]; - - uint32_t dw_filter; - uint32_t dw_filter_aniso; - uint32_t dw_wrap; - uint32_t dw_wrap_1d; - uint32_t dw_wrap_cube; - - bool anisotropic; - bool saturate_r; - bool saturate_s; - bool saturate_t; -}; - -struct ilo_sampler_state { - const struct ilo_sampler_cso *cso[ILO_MAX_SAMPLERS]; -}; - struct ilo_surface_cso { struct pipe_surface base; @@ -290,11 +270,6 @@ ilo_gpe_init_blend(const struct ilo_dev *dev, const struct pipe_blend_state *state, struct ilo_blend_state *blend); -void -ilo_gpe_init_sampler_cso(const struct ilo_dev *dev, - const struct pipe_sampler_state *state, - struct ilo_sampler_cso *sampler); - void ilo_gpe_init_vs_cso(const struct ilo_dev *dev, const struct ilo_shader_state *vs, diff --git a/src/gallium/drivers/ilo/core/ilo_state_3d_top.c b/src/gallium/drivers/ilo/core/ilo_state_3d_top.c index 8f3133e1345..ca58f73783e 100644 --- a/src/gallium/drivers/ilo/core/ilo_state_3d_top.c +++ b/src/gallium/drivers/ilo/core/ilo_state_3d_top.c @@ -408,462 +408,3 @@ ilo_gpe_init_gs_cso(const struct ilo_dev *dev, else gs_init_cso_gen6(dev, gs, cso); } - -static void -sampler_init_border_color_gen6(const struct ilo_dev *dev, - const union pipe_color_union *color, - uint32_t *dw, int num_dwords) -{ - float rgba[4] = { - color->f[0], color->f[1], color->f[2], color->f[3], - }; - - ILO_DEV_ASSERT(dev, 6, 6); - - assert(num_dwords >= 12); - - /* - * This state is not documented in the Sandy Bridge PRM, but in the - * Ironlake PRM. SNORM8 seems to be in DW11 instead of DW1. - */ - - /* IEEE_FP */ - dw[1] = fui(rgba[0]); - dw[2] = fui(rgba[1]); - dw[3] = fui(rgba[2]); - dw[4] = fui(rgba[3]); - - /* FLOAT_16 */ - dw[5] = util_float_to_half(rgba[0]) | - util_float_to_half(rgba[1]) << 16; - dw[6] = util_float_to_half(rgba[2]) | - util_float_to_half(rgba[3]) << 16; - - /* clamp to [-1.0f, 1.0f] */ - rgba[0] = CLAMP(rgba[0], -1.0f, 1.0f); - rgba[1] = CLAMP(rgba[1], -1.0f, 1.0f); - rgba[2] = CLAMP(rgba[2], -1.0f, 1.0f); - rgba[3] = CLAMP(rgba[3], -1.0f, 1.0f); - - /* SNORM16 */ - dw[9] = (int16_t) util_iround(rgba[0] * 32767.0f) | - (int16_t) util_iround(rgba[1] * 32767.0f) << 16; - dw[10] = (int16_t) util_iround(rgba[2] * 32767.0f) | - (int16_t) util_iround(rgba[3] * 32767.0f) << 16; - - /* SNORM8 */ - dw[11] = (int8_t) util_iround(rgba[0] * 127.0f) | - (int8_t) util_iround(rgba[1] * 127.0f) << 8 | - (int8_t) util_iround(rgba[2] * 127.0f) << 16 | - (int8_t) util_iround(rgba[3] * 127.0f) << 24; - - /* clamp to [0.0f, 1.0f] */ - rgba[0] = CLAMP(rgba[0], 0.0f, 1.0f); - rgba[1] = CLAMP(rgba[1], 0.0f, 1.0f); - rgba[2] = CLAMP(rgba[2], 0.0f, 1.0f); - rgba[3] = CLAMP(rgba[3], 0.0f, 1.0f); - - /* UNORM8 */ - dw[0] = (uint8_t) util_iround(rgba[0] * 255.0f) | - (uint8_t) util_iround(rgba[1] * 255.0f) << 8 | - (uint8_t) util_iround(rgba[2] * 255.0f) << 16 | - (uint8_t) util_iround(rgba[3] * 255.0f) << 24; - - /* UNORM16 */ - dw[7] = (uint16_t) util_iround(rgba[0] * 65535.0f) | - (uint16_t) util_iround(rgba[1] * 65535.0f) << 16; - dw[8] = (uint16_t) util_iround(rgba[2] * 65535.0f) | - (uint16_t) util_iround(rgba[3] * 65535.0f) << 16; -} - -/** - * Translate a pipe texture mipfilter to the matching hardware mipfilter. - */ -static int -gen6_translate_tex_mipfilter(unsigned filter) -{ - switch (filter) { - case PIPE_TEX_MIPFILTER_NEAREST: return GEN6_MIPFILTER_NEAREST; - case PIPE_TEX_MIPFILTER_LINEAR: return GEN6_MIPFILTER_LINEAR; - case PIPE_TEX_MIPFILTER_NONE: return GEN6_MIPFILTER_NONE; - default: - assert(!"unknown mipfilter"); - return GEN6_MIPFILTER_NONE; - } -} - -/** - * Translate a pipe texture filter to the matching hardware mapfilter. - */ -static int -gen6_translate_tex_filter(unsigned filter) -{ - switch (filter) { - case PIPE_TEX_FILTER_NEAREST: return GEN6_MAPFILTER_NEAREST; - case PIPE_TEX_FILTER_LINEAR: return GEN6_MAPFILTER_LINEAR; - default: - assert(!"unknown sampler filter"); - return GEN6_MAPFILTER_NEAREST; - } -} - -/** - * Translate a pipe texture coordinate wrapping mode to the matching hardware - * wrapping mode. - */ -static int -gen6_translate_tex_wrap(unsigned wrap) -{ - switch (wrap) { - case PIPE_TEX_WRAP_CLAMP: return GEN8_TEXCOORDMODE_HALF_BORDER; - case PIPE_TEX_WRAP_REPEAT: return GEN6_TEXCOORDMODE_WRAP; - case PIPE_TEX_WRAP_CLAMP_TO_EDGE: return GEN6_TEXCOORDMODE_CLAMP; - case PIPE_TEX_WRAP_CLAMP_TO_BORDER: return GEN6_TEXCOORDMODE_CLAMP_BORDER; - case PIPE_TEX_WRAP_MIRROR_REPEAT: return GEN6_TEXCOORDMODE_MIRROR; - case PIPE_TEX_WRAP_MIRROR_CLAMP: - case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: - case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: - default: - assert(!"unknown sampler wrap mode"); - return GEN6_TEXCOORDMODE_WRAP; - } -} - -/** - * Translate a pipe shadow compare function to the matching hardware shadow - * function. - */ -static int -gen6_translate_shadow_func(unsigned func) -{ - /* - * For PIPE_FUNC_x, the reference value is on the left-hand side of the - * comparison, and 1.0 is returned when the comparison is true. - * - * For GEN6_COMPAREFUNCTION_x, the reference value is on the right-hand side of - * the comparison, and 0.0 is returned when the comparison is true. - */ - switch (func) { - case PIPE_FUNC_NEVER: return GEN6_COMPAREFUNCTION_ALWAYS; - case PIPE_FUNC_LESS: return GEN6_COMPAREFUNCTION_LEQUAL; - case PIPE_FUNC_EQUAL: return GEN6_COMPAREFUNCTION_NOTEQUAL; - case PIPE_FUNC_LEQUAL: return GEN6_COMPAREFUNCTION_LESS; - case PIPE_FUNC_GREATER: return GEN6_COMPAREFUNCTION_GEQUAL; - case PIPE_FUNC_NOTEQUAL: return GEN6_COMPAREFUNCTION_EQUAL; - case PIPE_FUNC_GEQUAL: return GEN6_COMPAREFUNCTION_GREATER; - case PIPE_FUNC_ALWAYS: return GEN6_COMPAREFUNCTION_NEVER; - default: - assert(!"unknown shadow compare function"); - return GEN6_COMPAREFUNCTION_NEVER; - } -} - -void -ilo_gpe_init_sampler_cso(const struct ilo_dev *dev, - const struct pipe_sampler_state *state, - struct ilo_sampler_cso *sampler) -{ - int mip_filter, min_filter, mag_filter, max_aniso; - int lod_bias, max_lod, min_lod; - int wrap_s, wrap_t, wrap_r, wrap_cube; - uint32_t dw0, dw1, dw3; - - ILO_DEV_ASSERT(dev, 6, 8); - - memset(sampler, 0, sizeof(*sampler)); - - mip_filter = gen6_translate_tex_mipfilter(state->min_mip_filter); - min_filter = gen6_translate_tex_filter(state->min_img_filter); - mag_filter = gen6_translate_tex_filter(state->mag_img_filter); - - sampler->anisotropic = state->max_anisotropy; - - if (state->max_anisotropy >= 2 && state->max_anisotropy <= 16) - max_aniso = state->max_anisotropy / 2 - 1; - else if (state->max_anisotropy > 16) - max_aniso = GEN6_ANISORATIO_16; - else - max_aniso = GEN6_ANISORATIO_2; - - /* - * - * Here is how the hardware calculate per-pixel LOD, from my reading of the - * PRMs: - * - * 1) LOD is set to log2(ratio of texels to pixels) if not specified in - * other ways. The number of texels is measured using level - * SurfMinLod. - * 2) Bias is added to LOD. - * 3) LOD is clamped to [MinLod, MaxLod], and the clamped value is - * compared with Base to determine whether magnification or - * minification is needed. (if preclamp is disabled, LOD is compared - * with Base before clamping) - * 4) If magnification is needed, or no mipmapping is requested, LOD is - * set to floor(MinLod). - * 5) LOD is clamped to [0, MIPCnt], and SurfMinLod is added to LOD. - * - * With Gallium interface, Base is always zero and - * pipe_sampler_view::u.tex.first_level specifies SurfMinLod. - */ - if (ilo_dev_gen(dev) >= ILO_GEN(7)) { - const float scale = 256.0f; - - /* [-16.0, 16.0) in S4.8 */ - lod_bias = (int) - (CLAMP(state->lod_bias, -16.0f, 15.9f) * scale); - lod_bias &= 0x1fff; - - /* [0.0, 14.0] in U4.8 */ - max_lod = (int) (CLAMP(state->max_lod, 0.0f, 14.0f) * scale); - min_lod = (int) (CLAMP(state->min_lod, 0.0f, 14.0f) * scale); - } - else { - const float scale = 64.0f; - - /* [-16.0, 16.0) in S4.6 */ - lod_bias = (int) - (CLAMP(state->lod_bias, -16.0f, 15.9f) * scale); - lod_bias &= 0x7ff; - - /* [0.0, 13.0] in U4.6 */ - max_lod = (int) (CLAMP(state->max_lod, 0.0f, 13.0f) * scale); - min_lod = (int) (CLAMP(state->min_lod, 0.0f, 13.0f) * scale); - } - - /* - * We want LOD to be clamped to determine magnification/minification, and - * get set to zero when it is magnification or when mipmapping is disabled. - * The hardware would set LOD to floor(MinLod) and that is a problem when - * MinLod is greater than or equal to 1.0f. - * - * With Base being zero, it is always minification when MinLod is non-zero. - * To achieve our goal, we just need to set MinLod to zero and set - * MagFilter to MinFilter when mipmapping is disabled. - */ - if (state->min_mip_filter == PIPE_TEX_MIPFILTER_NONE && min_lod) { - min_lod = 0; - mag_filter = min_filter; - } - - /* determine wrap s/t/r */ - wrap_s = gen6_translate_tex_wrap(state->wrap_s); - wrap_t = gen6_translate_tex_wrap(state->wrap_t); - wrap_r = gen6_translate_tex_wrap(state->wrap_r); - if (ilo_dev_gen(dev) < ILO_GEN(8)) { - /* - * For nearest filtering, PIPE_TEX_WRAP_CLAMP means - * PIPE_TEX_WRAP_CLAMP_TO_EDGE; for linear filtering, - * PIPE_TEX_WRAP_CLAMP means PIPE_TEX_WRAP_CLAMP_TO_BORDER while - * additionally clamping the texture coordinates to [0.0, 1.0]. - * - * PIPE_TEX_WRAP_CLAMP is not supported natively until Gen8. The - * clamping has to be taken care of in the shaders. There are two - * filters here, but let the minification one has a say. - */ - const bool clamp_is_to_edge = - (state->min_img_filter == PIPE_TEX_FILTER_NEAREST); - - if (clamp_is_to_edge) { - if (wrap_s == GEN8_TEXCOORDMODE_HALF_BORDER) - wrap_s = GEN6_TEXCOORDMODE_CLAMP; - if (wrap_t == GEN8_TEXCOORDMODE_HALF_BORDER) - wrap_t = GEN6_TEXCOORDMODE_CLAMP; - if (wrap_r == GEN8_TEXCOORDMODE_HALF_BORDER) - wrap_r = GEN6_TEXCOORDMODE_CLAMP; - } else { - if (wrap_s == GEN8_TEXCOORDMODE_HALF_BORDER) { - wrap_s = GEN6_TEXCOORDMODE_CLAMP_BORDER; - sampler->saturate_s = true; - } - if (wrap_t == GEN8_TEXCOORDMODE_HALF_BORDER) { - wrap_t = GEN6_TEXCOORDMODE_CLAMP_BORDER; - sampler->saturate_t = true; - } - if (wrap_r == GEN8_TEXCOORDMODE_HALF_BORDER) { - wrap_r = GEN6_TEXCOORDMODE_CLAMP_BORDER; - sampler->saturate_r = true; - } - } - } - - /* - * From the Sandy Bridge PRM, volume 4 part 1, page 107: - * - * "When using cube map texture coordinates, only TEXCOORDMODE_CLAMP - * and TEXCOORDMODE_CUBE settings are valid, and each TC component - * must have the same Address Control mode." - * - * From the Ivy Bridge PRM, volume 4 part 1, page 96: - * - * "This field (Cube Surface Control Mode) must be set to - * CUBECTRLMODE_PROGRAMMED" - * - * Therefore, we cannot use "Cube Surface Control Mode" for semless cube - * map filtering. - */ - if (state->seamless_cube_map && - (state->min_img_filter != PIPE_TEX_FILTER_NEAREST || - state->mag_img_filter != PIPE_TEX_FILTER_NEAREST)) { - wrap_cube = GEN6_TEXCOORDMODE_CUBE; - } - else { - wrap_cube = GEN6_TEXCOORDMODE_CLAMP; - } - - if (!state->normalized_coords) { - /* - * From the Ivy Bridge PRM, volume 4 part 1, page 98: - * - * "The following state must be set as indicated if this field - * (Non-normalized Coordinate Enable) is enabled: - * - * - TCX/Y/Z Address Control Mode must be TEXCOORDMODE_CLAMP, - * TEXCOORDMODE_HALF_BORDER, or TEXCOORDMODE_CLAMP_BORDER. - * - Surface Type must be SURFTYPE_2D or SURFTYPE_3D. - * - Mag Mode Filter must be MAPFILTER_NEAREST or - * MAPFILTER_LINEAR. - * - Min Mode Filter must be MAPFILTER_NEAREST or - * MAPFILTER_LINEAR. - * - Mip Mode Filter must be MIPFILTER_NONE. - * - Min LOD must be 0. - * - Max LOD must be 0. - * - MIP Count must be 0. - * - Surface Min LOD must be 0. - * - Texture LOD Bias must be 0." - */ - assert(wrap_s == GEN6_TEXCOORDMODE_CLAMP || - wrap_s == GEN6_TEXCOORDMODE_CLAMP_BORDER); - assert(wrap_t == GEN6_TEXCOORDMODE_CLAMP || - wrap_t == GEN6_TEXCOORDMODE_CLAMP_BORDER); - assert(wrap_r == GEN6_TEXCOORDMODE_CLAMP || - wrap_r == GEN6_TEXCOORDMODE_CLAMP_BORDER); - - assert(mag_filter == GEN6_MAPFILTER_NEAREST || - mag_filter == GEN6_MAPFILTER_LINEAR); - assert(min_filter == GEN6_MAPFILTER_NEAREST || - min_filter == GEN6_MAPFILTER_LINEAR); - - /* work around a bug in util_blitter */ - mip_filter = GEN6_MIPFILTER_NONE; - - assert(mip_filter == GEN6_MIPFILTER_NONE); - } - - if (ilo_dev_gen(dev) >= ILO_GEN(7)) { - dw0 = 1 << 28 | - mip_filter << 20 | - lod_bias << 1; - - sampler->dw_filter = mag_filter << 17 | - min_filter << 14; - - sampler->dw_filter_aniso = GEN6_MAPFILTER_ANISOTROPIC << 17 | - GEN6_MAPFILTER_ANISOTROPIC << 14 | - 1; - - dw1 = min_lod << 20 | - max_lod << 8; - - if (state->compare_mode != PIPE_TEX_COMPARE_NONE) - dw1 |= gen6_translate_shadow_func(state->compare_func) << 1; - - dw3 = max_aniso << 19; - - /* round the coordinates for linear filtering */ - if (min_filter != GEN6_MAPFILTER_NEAREST) { - dw3 |= (GEN6_SAMPLER_DW3_U_MIN_ROUND | - GEN6_SAMPLER_DW3_V_MIN_ROUND | - GEN6_SAMPLER_DW3_R_MIN_ROUND); - } - if (mag_filter != GEN6_MAPFILTER_NEAREST) { - dw3 |= (GEN6_SAMPLER_DW3_U_MAG_ROUND | - GEN6_SAMPLER_DW3_V_MAG_ROUND | - GEN6_SAMPLER_DW3_R_MAG_ROUND); - } - - if (!state->normalized_coords) - dw3 |= 1 << 10; - - sampler->dw_wrap = wrap_s << 6 | - wrap_t << 3 | - wrap_r; - - /* - * As noted in the classic i965 driver, the HW may still reference - * wrap_t and wrap_r for 1D textures. We need to set them to a safe - * mode - */ - sampler->dw_wrap_1d = wrap_s << 6 | - GEN6_TEXCOORDMODE_WRAP << 3 | - GEN6_TEXCOORDMODE_WRAP; - - sampler->dw_wrap_cube = wrap_cube << 6 | - wrap_cube << 3 | - wrap_cube; - - STATIC_ASSERT(Elements(sampler->payload) >= 7); - - sampler->payload[0] = dw0; - sampler->payload[1] = dw1; - sampler->payload[2] = dw3; - - memcpy(&sampler->payload[3], - state->border_color.ui, sizeof(state->border_color.ui)); - } - else { - dw0 = 1 << 28 | - mip_filter << 20 | - lod_bias << 3; - - if (state->compare_mode != PIPE_TEX_COMPARE_NONE) - dw0 |= gen6_translate_shadow_func(state->compare_func); - - sampler->dw_filter = (min_filter != mag_filter) << 27 | - mag_filter << 17 | - min_filter << 14; - - sampler->dw_filter_aniso = GEN6_MAPFILTER_ANISOTROPIC << 17 | - GEN6_MAPFILTER_ANISOTROPIC << 14; - - dw1 = min_lod << 22 | - max_lod << 12; - - sampler->dw_wrap = wrap_s << 6 | - wrap_t << 3 | - wrap_r; - - sampler->dw_wrap_1d = wrap_s << 6 | - GEN6_TEXCOORDMODE_WRAP << 3 | - GEN6_TEXCOORDMODE_WRAP; - - sampler->dw_wrap_cube = wrap_cube << 6 | - wrap_cube << 3 | - wrap_cube; - - dw3 = max_aniso << 19; - - /* round the coordinates for linear filtering */ - if (min_filter != GEN6_MAPFILTER_NEAREST) { - dw3 |= (GEN6_SAMPLER_DW3_U_MIN_ROUND | - GEN6_SAMPLER_DW3_V_MIN_ROUND | - GEN6_SAMPLER_DW3_R_MIN_ROUND); - } - if (mag_filter != GEN6_MAPFILTER_NEAREST) { - dw3 |= (GEN6_SAMPLER_DW3_U_MAG_ROUND | - GEN6_SAMPLER_DW3_V_MAG_ROUND | - GEN6_SAMPLER_DW3_R_MAG_ROUND); - } - - if (!state->normalized_coords) - dw3 |= 1; - - STATIC_ASSERT(Elements(sampler->payload) >= 15); - - sampler->payload[0] = dw0; - sampler->payload[1] = dw1; - sampler->payload[2] = dw3; - - sampler_init_border_color_gen6(dev, - &state->border_color, &sampler->payload[3], 12); - } -} diff --git a/src/gallium/drivers/ilo/ilo_render_dynamic.c b/src/gallium/drivers/ilo/ilo_render_dynamic.c index a346ebb3201..d7822281611 100644 --- a/src/gallium/drivers/ilo/ilo_render_dynamic.c +++ b/src/gallium/drivers/ilo/ilo_render_dynamic.c @@ -137,12 +137,11 @@ gen6_emit_draw_dynamic_samplers(struct ilo_render *r, int shader_type, struct ilo_render_draw_session *session) { - const struct ilo_sampler_cso * const *samplers = - vec->sampler[shader_type].cso; - const struct pipe_sampler_view * const *views = - (const struct pipe_sampler_view **) vec->view[shader_type].states; + const struct ilo_view_cso * const *views = + (const struct ilo_view_cso **) vec->view[shader_type].states; + struct ilo_state_sampler samplers[ILO_MAX_SAMPLERS]; uint32_t *sampler_state, *border_color_state; - int sampler_count; + int sampler_count, i; bool emit_border_color = false; bool skip = false; @@ -194,16 +193,28 @@ gen6_emit_draw_dynamic_samplers(struct ilo_render *r, sampler_count <= Elements(vec->sampler[shader_type].cso)); if (emit_border_color) { - int i; - for (i = 0; i < sampler_count; i++) { - border_color_state[i] = (samplers[i]) ? - gen6_SAMPLER_BORDER_COLOR_STATE(r->builder, samplers[i]) : 0; + const struct ilo_sampler_cso *cso = vec->sampler[shader_type].cso[i]; + + border_color_state[i] = (cso) ? + gen6_SAMPLER_BORDER_COLOR_STATE(r->builder, &cso->border) : 0; } } - *sampler_state = gen6_SAMPLER_STATE(r->builder, - samplers, views, border_color_state, sampler_count); + for (i = 0; i < sampler_count; i++) { + const struct ilo_sampler_cso *cso = vec->sampler[shader_type].cso[i]; + + if (cso && views[i]) { + samplers[i] = cso->sampler; + ilo_state_sampler_set_surface(&samplers[i], + r->dev, &views[i]->surface); + } else { + samplers[i] = vec->disabled_sampler; + } + } + + *sampler_state = gen6_SAMPLER_STATE(r->builder, samplers, + border_color_state, sampler_count); } static void @@ -466,10 +477,9 @@ gen6_emit_launch_grid_dynamic_samplers(struct ilo_render *r, { const unsigned shader_type = PIPE_SHADER_COMPUTE; const struct ilo_shader_state *cs = vec->cs; - const struct ilo_sampler_cso * const *samplers = - vec->sampler[shader_type].cso; - const struct pipe_sampler_view * const *views = - (const struct pipe_sampler_view **) vec->view[shader_type].states; + const struct ilo_view_cso * const *views = + (const struct ilo_view_cso **) vec->view[shader_type].states; + struct ilo_state_sampler samplers[ILO_MAX_SAMPLERS]; int sampler_count, i; ILO_DEV_ASSERT(r->dev, 7, 7.5); @@ -480,11 +490,25 @@ gen6_emit_launch_grid_dynamic_samplers(struct ilo_render *r, sampler_count <= Elements(vec->sampler[shader_type].cso)); for (i = 0; i < sampler_count; i++) { - r->state.cs.SAMPLER_BORDER_COLOR_STATE[i] = (samplers[i]) ? - gen6_SAMPLER_BORDER_COLOR_STATE(r->builder, samplers[i]) : 0; + const struct ilo_sampler_cso *cso = vec->sampler[shader_type].cso[i]; + + r->state.cs.SAMPLER_BORDER_COLOR_STATE[i] = (cso) ? + gen6_SAMPLER_BORDER_COLOR_STATE(r->builder, &cso->border) : 0; } - r->state.cs.SAMPLER_STATE = gen6_SAMPLER_STATE(r->builder, samplers, views, + for (i = 0; i < sampler_count; i++) { + const struct ilo_sampler_cso *cso = vec->sampler[shader_type].cso[i]; + + if (cso && views[i]) { + samplers[i] = cso->sampler; + ilo_state_sampler_set_surface(&samplers[i], + r->dev, &views[i]->surface); + } else { + samplers[i] = vec->disabled_sampler; + } + } + + r->state.cs.SAMPLER_STATE = gen6_SAMPLER_STATE(r->builder, samplers, r->state.cs.SAMPLER_BORDER_COLOR_STATE, sampler_count); } diff --git a/src/gallium/drivers/ilo/ilo_state.c b/src/gallium/drivers/ilo/ilo_state.c index 060f8ae4453..82fa6696e90 100644 --- a/src/gallium/drivers/ilo/ilo_state.c +++ b/src/gallium/drivers/ilo/ilo_state.c @@ -37,6 +37,89 @@ #include "ilo_shader.h" #include "ilo_state.h" +static enum gen_mip_filter +ilo_translate_mip_filter(unsigned filter) +{ + switch (filter) { + case PIPE_TEX_MIPFILTER_NEAREST: return GEN6_MIPFILTER_NEAREST; + case PIPE_TEX_MIPFILTER_LINEAR: return GEN6_MIPFILTER_LINEAR; + case PIPE_TEX_MIPFILTER_NONE: return GEN6_MIPFILTER_NONE; + default: + assert(!"unknown mipfilter"); + return GEN6_MIPFILTER_NONE; + } +} + +static int +ilo_translate_img_filter(unsigned filter) +{ + switch (filter) { + case PIPE_TEX_FILTER_NEAREST: return GEN6_MAPFILTER_NEAREST; + case PIPE_TEX_FILTER_LINEAR: return GEN6_MAPFILTER_LINEAR; + default: + assert(!"unknown sampler filter"); + return GEN6_MAPFILTER_NEAREST; + } +} + +static enum gen_texcoord_mode +ilo_translate_address_wrap(unsigned wrap) +{ + switch (wrap) { + case PIPE_TEX_WRAP_CLAMP: return GEN8_TEXCOORDMODE_HALF_BORDER; + case PIPE_TEX_WRAP_REPEAT: return GEN6_TEXCOORDMODE_WRAP; + case PIPE_TEX_WRAP_CLAMP_TO_EDGE: return GEN6_TEXCOORDMODE_CLAMP; + case PIPE_TEX_WRAP_CLAMP_TO_BORDER: return GEN6_TEXCOORDMODE_CLAMP_BORDER; + case PIPE_TEX_WRAP_MIRROR_REPEAT: return GEN6_TEXCOORDMODE_MIRROR; + case PIPE_TEX_WRAP_MIRROR_CLAMP: + case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: + case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: + default: + assert(!"unknown sampler wrap mode"); + return GEN6_TEXCOORDMODE_WRAP; + } +} + +static enum gen_aniso_ratio +ilo_translate_max_anisotropy(unsigned max_anisotropy) +{ + switch (max_anisotropy) { + case 0: case 1: case 2: return GEN6_ANISORATIO_2; + case 3: case 4: return GEN6_ANISORATIO_4; + case 5: case 6: return GEN6_ANISORATIO_6; + case 7: case 8: return GEN6_ANISORATIO_8; + case 9: case 10: return GEN6_ANISORATIO_10; + case 11: case 12: return GEN6_ANISORATIO_12; + case 13: case 14: return GEN6_ANISORATIO_14; + default: return GEN6_ANISORATIO_16; + } +} + +static enum gen_prefilter_op +ilo_translate_shadow_func(unsigned func) +{ + /* + * For PIPE_FUNC_x, the reference value is on the left-hand side of the + * comparison, and 1.0 is returned when the comparison is true. + * + * For GEN6_PREFILTEROP_x, the reference value is on the right-hand side of + * the comparison, and 0.0 is returned when the comparison is true. + */ + switch (func) { + case PIPE_FUNC_NEVER: return GEN6_PREFILTEROP_ALWAYS; + case PIPE_FUNC_LESS: return GEN6_PREFILTEROP_LEQUAL; + case PIPE_FUNC_EQUAL: return GEN6_PREFILTEROP_NOTEQUAL; + case PIPE_FUNC_LEQUAL: return GEN6_PREFILTEROP_LESS; + case PIPE_FUNC_GREATER: return GEN6_PREFILTEROP_GEQUAL; + case PIPE_FUNC_NOTEQUAL: return GEN6_PREFILTEROP_EQUAL; + case PIPE_FUNC_GEQUAL: return GEN6_PREFILTEROP_GREATER; + case PIPE_FUNC_ALWAYS: return GEN6_PREFILTEROP_NEVER; + default: + assert(!"unknown shadow compare function"); + return GEN6_PREFILTEROP_NEVER; + } +} + static void finalize_shader_states(struct ilo_state_vector *vec) { @@ -336,11 +419,105 @@ ilo_create_sampler_state(struct pipe_context *pipe, { const struct ilo_dev *dev = ilo_context(pipe)->dev; struct ilo_sampler_cso *sampler; + struct ilo_state_sampler_info info; + struct ilo_state_sampler_border_info border; - sampler = MALLOC_STRUCT(ilo_sampler_cso); + sampler = CALLOC_STRUCT(ilo_sampler_cso); assert(sampler); - ilo_gpe_init_sampler_cso(dev, state, sampler); + memset(&info, 0, sizeof(info)); + + info.non_normalized = !state->normalized_coords; + if (state->normalized_coords) { + info.lod_bias = state->lod_bias; + info.min_lod = state->min_lod; + info.max_lod = state->max_lod; + + info.mip_filter = ilo_translate_mip_filter(state->min_mip_filter); + } else { + /* work around a bug in util_blitter */ + info.mip_filter = GEN6_MIPFILTER_NONE; + } + + if (state->max_anisotropy) { + info.min_filter = GEN6_MAPFILTER_ANISOTROPIC; + info.mag_filter = GEN6_MAPFILTER_ANISOTROPIC; + } else { + info.min_filter = ilo_translate_img_filter(state->min_img_filter); + info.mag_filter = ilo_translate_img_filter(state->mag_img_filter); + } + + info.max_anisotropy = ilo_translate_max_anisotropy(state->max_anisotropy); + + /* use LOD 0 when no mipmapping (see sampler_set_gen6_SAMPLER_STATE()) */ + if (info.mip_filter == GEN6_MIPFILTER_NONE && info.min_lod > 0.0f) { + info.min_lod = 0.0f; + info.mag_filter = info.min_filter; + } + + if (state->seamless_cube_map) { + if (state->min_img_filter == PIPE_TEX_FILTER_NEAREST || + state->mag_img_filter == PIPE_TEX_FILTER_NEAREST) { + info.tcx_ctrl = GEN6_TEXCOORDMODE_CLAMP; + info.tcy_ctrl = GEN6_TEXCOORDMODE_CLAMP; + info.tcz_ctrl = GEN6_TEXCOORDMODE_CLAMP; + } else { + info.tcx_ctrl = GEN6_TEXCOORDMODE_CUBE; + info.tcy_ctrl = GEN6_TEXCOORDMODE_CUBE; + info.tcz_ctrl = GEN6_TEXCOORDMODE_CUBE; + } + } else { + info.tcx_ctrl = ilo_translate_address_wrap(state->wrap_s); + info.tcy_ctrl = ilo_translate_address_wrap(state->wrap_t); + info.tcz_ctrl = ilo_translate_address_wrap(state->wrap_r); + + if (ilo_dev_gen(dev) < ILO_GEN(8)) { + /* + * For nearest filtering, PIPE_TEX_WRAP_CLAMP means + * PIPE_TEX_WRAP_CLAMP_TO_EDGE; for linear filtering, + * PIPE_TEX_WRAP_CLAMP means PIPE_TEX_WRAP_CLAMP_TO_BORDER while + * additionally clamping the texture coordinates to [0.0, 1.0]. + * + * PIPE_TEX_WRAP_CLAMP is not supported natively until Gen8. The + * clamping has to be taken care of in the shaders. There are two + * filters here, but let the minification one has a say. + */ + const bool clamp_is_to_edge = + (state->min_img_filter == PIPE_TEX_FILTER_NEAREST); + + if (clamp_is_to_edge) { + if (info.tcx_ctrl == GEN8_TEXCOORDMODE_HALF_BORDER) + info.tcx_ctrl = GEN6_TEXCOORDMODE_CLAMP; + if (info.tcy_ctrl == GEN8_TEXCOORDMODE_HALF_BORDER) + info.tcy_ctrl = GEN6_TEXCOORDMODE_CLAMP; + if (info.tcz_ctrl == GEN8_TEXCOORDMODE_HALF_BORDER) + info.tcz_ctrl = GEN6_TEXCOORDMODE_CLAMP; + } else { + if (info.tcx_ctrl == GEN8_TEXCOORDMODE_HALF_BORDER) { + info.tcx_ctrl = GEN6_TEXCOORDMODE_CLAMP_BORDER; + sampler->saturate_s = true; + } + if (info.tcy_ctrl == GEN8_TEXCOORDMODE_HALF_BORDER) { + info.tcy_ctrl = GEN6_TEXCOORDMODE_CLAMP_BORDER; + sampler->saturate_t = true; + } + if (info.tcz_ctrl == GEN8_TEXCOORDMODE_HALF_BORDER) { + info.tcz_ctrl = GEN6_TEXCOORDMODE_CLAMP_BORDER; + sampler->saturate_r = true; + } + } + } + } + + if (state->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) + info.shadow_func = ilo_translate_shadow_func(state->compare_func); + + ilo_state_sampler_init(&sampler->sampler, dev, &info); + + memset(&border, 0, sizeof(border)); + memcpy(border.rgba.f, state->border_color.f, sizeof(border.rgba.f)); + + ilo_state_sampler_border_init(&sampler->border, dev, &border); return sampler; } @@ -1358,6 +1535,8 @@ ilo_state_vector_init(const struct ilo_dev *dev, ilo_state_surface_init_for_null(&vec->fb.null_rt, dev); ilo_state_zs_init_for_null(&vec->fb.null_zs, dev); + ilo_state_sampler_init_disabled(&vec->disabled_sampler, dev); + util_dynarray_init(&vec->global_binding.bindings); vec->dirty = ILO_DIRTY_ALL; diff --git a/src/gallium/drivers/ilo/ilo_state.h b/src/gallium/drivers/ilo/ilo_state.h index 9a7d196a2a6..95dbe73bfdc 100644 --- a/src/gallium/drivers/ilo/ilo_state.h +++ b/src/gallium/drivers/ilo/ilo_state.h @@ -29,6 +29,7 @@ #define ILO_STATE_H #include "core/ilo_state_3d.h" +#include "core/ilo_state_sampler.h" #include "core/ilo_state_surface.h" #include "core/ilo_state_zs.h" #include "pipe/p_state.h" @@ -135,6 +136,18 @@ struct ilo_cbuf_cso { const void *user_buffer; }; +struct ilo_sampler_cso { + struct ilo_state_sampler sampler; + struct ilo_state_sampler_border border; + bool saturate_s; + bool saturate_t; + bool saturate_r; +}; + +struct ilo_sampler_state { + const struct ilo_sampler_cso *cso[ILO_MAX_SAMPLERS]; +}; + struct ilo_cbuf_state { struct ilo_cbuf_cso cso[ILO_MAX_CONST_BUFFERS]; uint32_t enabled_mask; @@ -216,6 +229,8 @@ struct ilo_state_vector { struct ilo_cbuf_state cbuf[PIPE_SHADER_TYPES]; struct ilo_resource_state resource; + struct ilo_state_sampler disabled_sampler; + /* GPGPU */ struct ilo_shader_state *cs; struct ilo_resource_state cs_resource; From ded7d412d04cf702596e91f36ba586b18f1933a2 Mon Sep 17 00:00:00 2001 From: Chia-I Wu Date: Mon, 18 May 2015 00:00:37 +0800 Subject: [PATCH 623/834] ilo: embed ilo_state_viewport in ilo_viewport_state --- .../drivers/ilo/core/ilo_builder_3d_bottom.h | 163 ++++-------------- src/gallium/drivers/ilo/core/ilo_state_3d.h | 42 ----- .../drivers/ilo/core/ilo_state_3d_bottom.c | 149 ---------------- src/gallium/drivers/ilo/ilo_blitter.h | 4 +- src/gallium/drivers/ilo/ilo_blitter_pipe.c | 2 +- .../drivers/ilo/ilo_blitter_rectlist.c | 10 +- src/gallium/drivers/ilo/ilo_render.c | 8 + src/gallium/drivers/ilo/ilo_render_dynamic.c | 34 ++-- src/gallium/drivers/ilo/ilo_render_gen.h | 2 + src/gallium/drivers/ilo/ilo_render_gen6.c | 15 +- src/gallium/drivers/ilo/ilo_state.c | 65 +++++-- src/gallium/drivers/ilo/ilo_state.h | 15 +- 12 files changed, 141 insertions(+), 368 deletions(-) diff --git a/src/gallium/drivers/ilo/core/ilo_builder_3d_bottom.h b/src/gallium/drivers/ilo/core/ilo_builder_3d_bottom.h index c49f4e470e7..0c0403fb73f 100644 --- a/src/gallium/drivers/ilo/core/ilo_builder_3d_bottom.h +++ b/src/gallium/drivers/ilo/core/ilo_builder_3d_bottom.h @@ -35,6 +35,7 @@ #include "ilo_core.h" #include "ilo_dev.h" #include "ilo_format.h" +#include "ilo_state_viewport.h" #include "ilo_builder.h" #include "ilo_builder_3d_top.h" @@ -1452,34 +1453,24 @@ gen7_3DSTATE_BLEND_STATE_POINTERS(struct ilo_builder *builder, static inline uint32_t gen6_CLIP_VIEWPORT(struct ilo_builder *builder, - const struct ilo_viewport_cso *viewports, - unsigned num_viewports) + const struct ilo_state_viewport *vp) { const int state_align = 32; - const int state_len = 4 * num_viewports; + const int state_len = 4 * vp->count; uint32_t state_offset, *dw; - unsigned i; + int i; ILO_DEV_ASSERT(builder->dev, 6, 6); - /* - * From the Sandy Bridge PRM, volume 2 part 1, page 193: - * - * "The viewport-related state is stored as an array of up to 16 - * elements..." - */ - assert(num_viewports && num_viewports <= 16); - state_offset = ilo_builder_dynamic_pointer(builder, ILO_BUILDER_ITEM_CLIP_VIEWPORT, state_align, state_len, &dw); - for (i = 0; i < num_viewports; i++) { - const struct ilo_viewport_cso *vp = &viewports[i]; - - dw[0] = fui(vp->min_gbx); - dw[1] = fui(vp->max_gbx); - dw[2] = fui(vp->min_gby); - dw[3] = fui(vp->max_gby); + for (i = 0; i < vp->count; i++) { + /* see viewport_matrix_set_gen7_SF_CLIP_VIEWPORT() */ + dw[0] = vp->sf_clip[i][8]; + dw[1] = vp->sf_clip[i][9]; + dw[2] = vp->sf_clip[i][10]; + dw[3] = vp->sf_clip[i][11]; dw += 4; } @@ -1489,38 +1480,21 @@ gen6_CLIP_VIEWPORT(struct ilo_builder *builder, static inline uint32_t gen6_SF_VIEWPORT(struct ilo_builder *builder, - const struct ilo_viewport_cso *viewports, - unsigned num_viewports) + const struct ilo_state_viewport *vp) { const int state_align = 32; - const int state_len = 8 * num_viewports; + const int state_len = 8 * vp->count; uint32_t state_offset, *dw; - unsigned i; + int i; ILO_DEV_ASSERT(builder->dev, 6, 6); - /* - * From the Sandy Bridge PRM, volume 2 part 1, page 262: - * - * "The viewport-specific state used by the SF unit (SF_VIEWPORT) is - * stored as an array of up to 16 elements..." - */ - assert(num_viewports && num_viewports <= 16); - state_offset = ilo_builder_dynamic_pointer(builder, ILO_BUILDER_ITEM_SF_VIEWPORT, state_align, state_len, &dw); - for (i = 0; i < num_viewports; i++) { - const struct ilo_viewport_cso *vp = &viewports[i]; - - dw[0] = fui(vp->m00); - dw[1] = fui(vp->m11); - dw[2] = fui(vp->m22); - dw[3] = fui(vp->m30); - dw[4] = fui(vp->m31); - dw[5] = fui(vp->m32); - dw[6] = 0; - dw[7] = 0; + for (i = 0; i < vp->count; i++) { + /* see viewport_matrix_set_gen7_SF_CLIP_VIEWPORT() */ + memcpy(dw, vp->sf_clip[i], sizeof(*dw) * 8); dw += 8; } @@ -1530,121 +1504,44 @@ gen6_SF_VIEWPORT(struct ilo_builder *builder, static inline uint32_t gen7_SF_CLIP_VIEWPORT(struct ilo_builder *builder, - const struct ilo_viewport_cso *viewports, - unsigned num_viewports) + const struct ilo_state_viewport *vp) { const int state_align = 64; - const int state_len = 16 * num_viewports; - uint32_t state_offset, *dw; - unsigned i; + const int state_len = 16 * vp->count; ILO_DEV_ASSERT(builder->dev, 7, 8); - /* - * From the Ivy Bridge PRM, volume 2 part 1, page 270: - * - * "The viewport-specific state used by both the SF and CL units - * (SF_CLIP_VIEWPORT) is stored as an array of up to 16 elements, each - * of which contains the DWords described below. The start of each - * element is spaced 16 DWords apart. The location of first element of - * the array, as specified by both Pointer to SF_VIEWPORT and Pointer - * to CLIP_VIEWPORT, is aligned to a 64-byte boundary." - */ - assert(num_viewports && num_viewports <= 16); - - state_offset = ilo_builder_dynamic_pointer(builder, - ILO_BUILDER_ITEM_SF_VIEWPORT, state_align, state_len, &dw); - - for (i = 0; i < num_viewports; i++) { - const struct ilo_viewport_cso *vp = &viewports[i]; - - dw[0] = fui(vp->m00); - dw[1] = fui(vp->m11); - dw[2] = fui(vp->m22); - dw[3] = fui(vp->m30); - dw[4] = fui(vp->m31); - dw[5] = fui(vp->m32); - dw[6] = 0; - dw[7] = 0; - - dw[8] = fui(vp->min_gbx); - dw[9] = fui(vp->max_gbx); - dw[10] = fui(vp->min_gby); - dw[11] = fui(vp->max_gby); - - if (ilo_dev_gen(builder->dev) >= ILO_GEN(8)) { - dw[12] = fui(vp->min_x); - dw[13] = fui(vp->max_x - 1.0f); - dw[14] = fui(vp->min_y); - dw[15] = fui(vp->max_y - 1.0f); - } else { - dw[12] = 0; - dw[13] = 0; - dw[14] = 0; - dw[15] = 0; - } - - dw += 16; - } - - return state_offset; + /* see viewport_matrix_set_gen7_SF_CLIP_VIEWPORT() */ + return ilo_builder_dynamic_write(builder, ILO_BUILDER_ITEM_SF_VIEWPORT, + state_align, state_len, (const uint32_t *) vp->sf_clip); } static inline uint32_t gen6_CC_VIEWPORT(struct ilo_builder *builder, - const struct ilo_viewport_cso *viewports, - unsigned num_viewports) + const struct ilo_state_viewport *vp) { const int state_align = 32; - const int state_len = 2 * num_viewports; - uint32_t state_offset, *dw; - unsigned i; + const int state_len = 2 * vp->count; ILO_DEV_ASSERT(builder->dev, 6, 8); - /* - * From the Sandy Bridge PRM, volume 2 part 1, page 385: - * - * "The viewport state is stored as an array of up to 16 elements..." - */ - assert(num_viewports && num_viewports <= 16); - - state_offset = ilo_builder_dynamic_pointer(builder, - ILO_BUILDER_ITEM_CC_VIEWPORT, state_align, state_len, &dw); - - for (i = 0; i < num_viewports; i++) { - const struct ilo_viewport_cso *vp = &viewports[i]; - - dw[0] = fui(vp->min_z); - dw[1] = fui(vp->max_z); - - dw += 2; - } - - return state_offset; + /* see viewport_matrix_set_gen6_CC_VIEWPORT() */ + return ilo_builder_dynamic_write(builder, ILO_BUILDER_ITEM_CC_VIEWPORT, + state_align, state_len, (const uint32_t *) vp->cc); } static inline uint32_t gen6_SCISSOR_RECT(struct ilo_builder *builder, - const struct ilo_scissor_state *scissor, - unsigned num_viewports) + const struct ilo_state_viewport *vp) { const int state_align = 32; - const int state_len = 2 * num_viewports; + const int state_len = 2 * vp->count; ILO_DEV_ASSERT(builder->dev, 6, 8); - /* - * From the Sandy Bridge PRM, volume 2 part 1, page 263: - * - * "The viewport-specific state used by the SF unit (SCISSOR_RECT) is - * stored as an array of up to 16 elements..." - */ - assert(num_viewports && num_viewports <= 16); - assert(Elements(scissor->payload) >= state_len); - + /* see viewport_scissor_set_gen6_SCISSOR_RECT() */ return ilo_builder_dynamic_write(builder, ILO_BUILDER_ITEM_SCISSOR_RECT, - state_align, state_len, scissor->payload); + state_align, state_len, (const uint32_t *) vp->scissor); } static inline uint32_t diff --git a/src/gallium/drivers/ilo/core/ilo_state_3d.h b/src/gallium/drivers/ilo/core/ilo_state_3d.h index 260f94bf766..9d9dd29831f 100644 --- a/src/gallium/drivers/ilo/core/ilo_state_3d.h +++ b/src/gallium/drivers/ilo/core/ilo_state_3d.h @@ -101,32 +101,6 @@ struct ilo_so_state { bool enabled; }; -struct ilo_viewport_cso { - /* matrix form */ - float m00, m11, m22, m30, m31, m32; - - /* guardband in NDC space */ - float min_gbx, min_gby, max_gbx, max_gby; - - /* viewport in screen space */ - float min_x, min_y, min_z; - float max_x, max_y, max_z; -}; - -struct ilo_viewport_state { - struct ilo_viewport_cso cso[ILO_MAX_VIEWPORTS]; - unsigned count; - - struct pipe_viewport_state viewport0; -}; - -struct ilo_scissor_state { - /* SCISSOR_RECT */ - uint32_t payload[ILO_MAX_VIEWPORTS * 2]; - - struct pipe_scissor_state scissor0; -}; - struct ilo_rasterizer_clip { /* 3DSTATE_CLIP */ uint32_t payload[3]; @@ -240,22 +214,6 @@ ilo_gpe_init_ve_nosrc(const struct ilo_dev *dev, int comp0, int comp1, int comp2, int comp3, struct ilo_ve_cso *cso); -void -ilo_gpe_set_viewport_cso(const struct ilo_dev *dev, - const struct pipe_viewport_state *state, - struct ilo_viewport_cso *vp); - -void -ilo_gpe_set_scissor(const struct ilo_dev *dev, - unsigned start_slot, - unsigned num_states, - const struct pipe_scissor_state *states, - struct ilo_scissor_state *scissor); - -void -ilo_gpe_set_scissor_null(const struct ilo_dev *dev, - struct ilo_scissor_state *scissor); - void ilo_gpe_init_rasterizer(const struct ilo_dev *dev, const struct pipe_rasterizer_state *state, diff --git a/src/gallium/drivers/ilo/core/ilo_state_3d_bottom.c b/src/gallium/drivers/ilo/core/ilo_state_3d_bottom.c index 31d9a203c5a..67233cf3d07 100644 --- a/src/gallium/drivers/ilo/core/ilo_state_3d_bottom.c +++ b/src/gallium/drivers/ilo/core/ilo_state_3d_bottom.c @@ -900,105 +900,6 @@ ilo_gpe_init_fs_cso(const struct ilo_dev *dev, fs_init_cso_gen6(dev, fs, cso); } -static void -viewport_get_guardband(const struct ilo_dev *dev, - int center_x, int center_y, - int *min_gbx, int *max_gbx, - int *min_gby, int *max_gby) -{ - /* - * From the Sandy Bridge PRM, volume 2 part 1, page 234: - * - * "Per-Device Guardband Extents - * - * - Supported X,Y ScreenSpace "Guardband" Extent: [-16K,16K-1] - * - Maximum Post-Clamp Delta (X or Y): 16K" - * - * "In addition, in order to be correctly rendered, objects must have a - * screenspace bounding box not exceeding 8K in the X or Y direction. - * This additional restriction must also be comprehended by software, - * i.e., enforced by use of clipping." - * - * From the Ivy Bridge PRM, volume 2 part 1, page 248: - * - * "Per-Device Guardband Extents - * - * - Supported X,Y ScreenSpace "Guardband" Extent: [-32K,32K-1] - * - Maximum Post-Clamp Delta (X or Y): N/A" - * - * "In addition, in order to be correctly rendered, objects must have a - * screenspace bounding box not exceeding 8K in the X or Y direction. - * This additional restriction must also be comprehended by software, - * i.e., enforced by use of clipping." - * - * Combined, the bounding box of any object can not exceed 8K in both - * width and height. - * - * Below we set the guardband as a squre of length 8K, centered at where - * the viewport is. This makes sure all objects passing the GB test are - * valid to the renderer, and those failing the XY clipping have a - * better chance of passing the GB test. - */ - const int max_extent = (ilo_dev_gen(dev) >= ILO_GEN(7)) ? 32768 : 16384; - const int half_len = 8192 / 2; - - /* make sure the guardband is within the valid range */ - if (center_x - half_len < -max_extent) - center_x = -max_extent + half_len; - else if (center_x + half_len > max_extent - 1) - center_x = max_extent - half_len; - - if (center_y - half_len < -max_extent) - center_y = -max_extent + half_len; - else if (center_y + half_len > max_extent - 1) - center_y = max_extent - half_len; - - *min_gbx = (float) (center_x - half_len); - *max_gbx = (float) (center_x + half_len); - *min_gby = (float) (center_y - half_len); - *max_gby = (float) (center_y + half_len); -} - -void -ilo_gpe_set_viewport_cso(const struct ilo_dev *dev, - const struct pipe_viewport_state *state, - struct ilo_viewport_cso *vp) -{ - const float scale_x = fabs(state->scale[0]); - const float scale_y = fabs(state->scale[1]); - const float scale_z = fabs(state->scale[2]); - int min_gbx, max_gbx, min_gby, max_gby; - - ILO_DEV_ASSERT(dev, 6, 8); - - viewport_get_guardband(dev, - (int) state->translate[0], - (int) state->translate[1], - &min_gbx, &max_gbx, &min_gby, &max_gby); - - /* matrix form */ - vp->m00 = state->scale[0]; - vp->m11 = state->scale[1]; - vp->m22 = state->scale[2]; - vp->m30 = state->translate[0]; - vp->m31 = state->translate[1]; - vp->m32 = state->translate[2]; - - /* guardband in NDC space */ - vp->min_gbx = ((float) min_gbx - state->translate[0]) / scale_x; - vp->max_gbx = ((float) max_gbx - state->translate[0]) / scale_x; - vp->min_gby = ((float) min_gby - state->translate[1]) / scale_y; - vp->max_gby = ((float) max_gby - state->translate[1]) / scale_y; - - /* viewport in screen space */ - vp->min_x = scale_x * -1.0f + state->translate[0]; - vp->max_x = scale_x * 1.0f + state->translate[0]; - vp->min_y = scale_y * -1.0f + state->translate[1]; - vp->max_y = scale_y * 1.0f + state->translate[1]; - vp->min_z = scale_z * -1.0f + state->translate[2]; - vp->max_z = scale_z * 1.0f + state->translate[2]; -} - /** * Translate a pipe logicop to the matching hardware logicop. */ @@ -1674,56 +1575,6 @@ ilo_gpe_init_dsa(const struct ilo_dev *dev, dsa->alpha_ref = float_to_ubyte(state->alpha.ref_value); } -void -ilo_gpe_set_scissor(const struct ilo_dev *dev, - unsigned start_slot, - unsigned num_states, - const struct pipe_scissor_state *states, - struct ilo_scissor_state *scissor) -{ - unsigned i; - - ILO_DEV_ASSERT(dev, 6, 8); - - for (i = 0; i < num_states; i++) { - uint16_t min_x, min_y, max_x, max_y; - - /* both max and min are inclusive in SCISSOR_RECT */ - if (states[i].minx < states[i].maxx && - states[i].miny < states[i].maxy) { - min_x = states[i].minx; - min_y = states[i].miny; - max_x = states[i].maxx - 1; - max_y = states[i].maxy - 1; - } - else { - /* we have to make min greater than max */ - min_x = 1; - min_y = 1; - max_x = 0; - max_y = 0; - } - - scissor->payload[(start_slot + i) * 2 + 0] = min_y << 16 | min_x; - scissor->payload[(start_slot + i) * 2 + 1] = max_y << 16 | max_x; - } - - if (!start_slot && num_states) - scissor->scissor0 = states[0]; -} - -void -ilo_gpe_set_scissor_null(const struct ilo_dev *dev, - struct ilo_scissor_state *scissor) -{ - unsigned i; - - for (i = 0; i < Elements(scissor->payload); i += 2) { - scissor->payload[i + 0] = 1 << 16 | 1; - scissor->payload[i + 1] = 0; - } -} - static void fb_set_blend_caps(const struct ilo_dev *dev, enum pipe_format format, diff --git a/src/gallium/drivers/ilo/ilo_blitter.h b/src/gallium/drivers/ilo/ilo_blitter.h index 4284f415c1c..1967c485ca5 100644 --- a/src/gallium/drivers/ilo/ilo_blitter.h +++ b/src/gallium/drivers/ilo/ilo_blitter.h @@ -66,7 +66,9 @@ struct ilo_blitter { struct ilo_ve_state ve; struct pipe_draw_info draw; - struct ilo_viewport_cso viewport; + struct ilo_state_viewport vp; + uint32_t vp_data[20]; + struct ilo_dsa_state dsa; struct { diff --git a/src/gallium/drivers/ilo/ilo_blitter_pipe.c b/src/gallium/drivers/ilo/ilo_blitter_pipe.c index c4c02bd3e53..0bfe7827f11 100644 --- a/src/gallium/drivers/ilo/ilo_blitter_pipe.c +++ b/src/gallium/drivers/ilo/ilo_blitter_pipe.c @@ -63,7 +63,7 @@ ilo_blitter_pipe_begin(struct ilo_blitter *blitter, util_blitter_save_viewport(b, &vec->viewport.viewport0); if (scissor_enable) - util_blitter_save_scissor(b, &vec->scissor.scissor0); + util_blitter_save_scissor(b, &vec->viewport.scissor0); switch (op) { case ILO_BLITTER_PIPE_BLIT: diff --git a/src/gallium/drivers/ilo/ilo_blitter_rectlist.c b/src/gallium/drivers/ilo/ilo_blitter_rectlist.c index 6d8afed9dca..84100c0f3c7 100644 --- a/src/gallium/drivers/ilo/ilo_blitter_rectlist.c +++ b/src/gallium/drivers/ilo/ilo_blitter_rectlist.c @@ -41,7 +41,6 @@ static bool ilo_blitter_set_invariants(struct ilo_blitter *blitter) { struct pipe_vertex_element velem; - struct pipe_viewport_state vp; if (blitter->initialized) return true; @@ -69,16 +68,13 @@ ilo_blitter_set_invariants(struct ilo_blitter *blitter) * From the Haswell PRM, volume 7, page 615: * * "The clear value must be between the min and max depth values - * (inclusive) defined in the CC_VIEWPORT." + * (inclusive) defined in the CC_VIEWPORT." * * Even though clipping and viewport transformation will be disabled, we * still need to set up the viewport states. */ - memset(&vp, 0, sizeof(vp)); - vp.scale[0] = 1.0f; - vp.scale[1] = 1.0f; - vp.scale[2] = 1.0f; - ilo_gpe_set_viewport_cso(blitter->ilo->dev, &vp, &blitter->viewport); + ilo_state_viewport_init_for_rectlist(&blitter->vp, blitter->ilo->dev, + blitter->vp_data, sizeof(blitter->vp_data)); blitter->initialized = true; diff --git a/src/gallium/drivers/ilo/ilo_render.c b/src/gallium/drivers/ilo/ilo_render.c index f5be3360f05..92898704cde 100644 --- a/src/gallium/drivers/ilo/ilo_render.c +++ b/src/gallium/drivers/ilo/ilo_render.c @@ -447,11 +447,19 @@ draw_session_prepare(struct ilo_render *render, session->prim_changed = true; session->primitive_restart_changed = true; + + ilo_state_viewport_full_delta(&vec->viewport.vp, render->dev, + &session->vp_delta); } else { session->prim_changed = (render->state.reduced_prim != session->reduced_prim); session->primitive_restart_changed = (render->state.primitive_restart != vec->draw->primitive_restart); + + if (vec->dirty & ILO_DIRTY_VIEWPORT) { + ilo_state_viewport_full_delta(&vec->viewport.vp, render->dev, + &session->vp_delta); + } } } diff --git a/src/gallium/drivers/ilo/ilo_render_dynamic.c b/src/gallium/drivers/ilo/ilo_render_dynamic.c index d7822281611..cc3791eb470 100644 --- a/src/gallium/drivers/ilo/ilo_render_dynamic.c +++ b/src/gallium/drivers/ilo/ilo_render_dynamic.c @@ -42,16 +42,14 @@ gen6_emit_draw_dynamic_viewports(struct ilo_render *r, { ILO_DEV_ASSERT(r->dev, 6, 6); - /* SF_VIEWPORT, CLIP_VIEWPORT, and CC_VIEWPORT */ - if (DIRTY(VIEWPORT)) { + /* CLIP_VIEWPORT, SF_VIEWPORT, and CC_VIEWPORT */ + if ((session->vp_delta.dirty & (ILO_STATE_VIEWPORT_SF_CLIP_VIEWPORT | + ILO_STATE_VIEWPORT_CC_VIEWPORT)) || + r->state_bo_changed) { r->state.CLIP_VIEWPORT = gen6_CLIP_VIEWPORT(r->builder, - vec->viewport.cso, vec->viewport.count); - - r->state.SF_VIEWPORT = gen6_SF_VIEWPORT(r->builder, - vec->viewport.cso, vec->viewport.count); - - r->state.CC_VIEWPORT = gen6_CC_VIEWPORT(r->builder, - vec->viewport.cso, vec->viewport.count); + &vec->viewport.vp); + r->state.SF_VIEWPORT = gen6_SF_VIEWPORT(r->builder, &vec->viewport.vp); + r->state.CC_VIEWPORT = gen6_CC_VIEWPORT(r->builder, &vec->viewport.vp); session->viewport_changed = true; } @@ -65,12 +63,12 @@ gen7_emit_draw_dynamic_viewports(struct ilo_render *r, ILO_DEV_ASSERT(r->dev, 7, 8); /* SF_CLIP_VIEWPORT and CC_VIEWPORT */ - if (DIRTY(VIEWPORT)) { + if ((session->vp_delta.dirty & (ILO_STATE_VIEWPORT_SF_CLIP_VIEWPORT | + ILO_STATE_VIEWPORT_CC_VIEWPORT)) || + r->state_bo_changed) { r->state.SF_CLIP_VIEWPORT = gen7_SF_CLIP_VIEWPORT(r->builder, - vec->viewport.cso, vec->viewport.count); - - r->state.CC_VIEWPORT = gen6_CC_VIEWPORT(r->builder, - vec->viewport.cso, vec->viewport.count); + &vec->viewport.vp); + r->state.CC_VIEWPORT = gen6_CC_VIEWPORT(r->builder, &vec->viewport.vp); session->viewport_changed = true; } @@ -84,10 +82,10 @@ gen6_emit_draw_dynamic_scissors(struct ilo_render *r, ILO_DEV_ASSERT(r->dev, 6, 8); /* SCISSOR_RECT */ - if (DIRTY(SCISSOR) || DIRTY(VIEWPORT)) { - /* there should be as many scissors as there are viewports */ + if ((session->vp_delta.dirty & ILO_STATE_VIEWPORT_SCISSOR_RECT) || + r->state_bo_changed) { r->state.SCISSOR_RECT = gen6_SCISSOR_RECT(r->builder, - &vec->scissor, vec->viewport.count); + &vec->viewport.vp); session->scissor_changed = true; } @@ -463,7 +461,7 @@ ilo_render_emit_rectlist_dynamic_states(struct ilo_render *render, if (blitter->uses & ILO_BLITTER_USE_VIEWPORT) { render->state.CC_VIEWPORT = - gen6_CC_VIEWPORT(render->builder, &blitter->viewport, 1); + gen6_CC_VIEWPORT(render->builder, &blitter->vp); } assert(ilo_builder_dynamic_used(render->builder) <= dynamic_used + diff --git a/src/gallium/drivers/ilo/ilo_render_gen.h b/src/gallium/drivers/ilo/ilo_render_gen.h index acfe8be3088..5de41623214 100644 --- a/src/gallium/drivers/ilo/ilo_render_gen.h +++ b/src/gallium/drivers/ilo/ilo_render_gen.h @@ -144,6 +144,8 @@ struct ilo_render_draw_session { bool prim_changed; bool primitive_restart_changed; + struct ilo_state_viewport_delta vp_delta; + /* dynamic states */ bool viewport_changed; bool scissor_changed; diff --git a/src/gallium/drivers/ilo/ilo_render_gen6.c b/src/gallium/drivers/ilo/ilo_render_gen6.c index f3f8ae4a088..31198723367 100644 --- a/src/gallium/drivers/ilo/ilo_render_gen6.c +++ b/src/gallium/drivers/ilo/ilo_render_gen6.c @@ -643,11 +643,18 @@ gen6_draw_clip(struct ilo_render *r, * unless we emulate viewport extent test on them. */ if (ilo_dev_gen(r->dev) < ILO_GEN(8)) { - for (i = 0; i < vec->viewport.count; i++) { - const struct ilo_viewport_cso *vp = &vec->viewport.cso[i]; + for (i = 0; i < vec->viewport.params.count; i++) { + const struct ilo_state_viewport_matrix_info *mat = + &vec->viewport.matrices[i]; + float min_x, max_x, min_y, max_y; - if (vp->min_x > 0.0f || vp->max_x < vec->fb.state.width || - vp->min_y > 0.0f || vp->max_y < vec->fb.state.height) { + min_x = -1.0f * fabsf(mat->scale[0]) + mat->translate[0]; + max_x = 1.0f * fabsf(mat->scale[0]) + mat->translate[0]; + min_y = -1.0f * fabsf(mat->scale[1]) + mat->translate[1]; + max_y = 1.0f * fabsf(mat->scale[1]) + mat->translate[1]; + + if (min_x > 0.0f || max_x < vec->fb.state.width || + min_y > 0.0f || max_y < vec->fb.state.height) { enable_guardband = false; break; } diff --git a/src/gallium/drivers/ilo/ilo_state.c b/src/gallium/drivers/ilo/ilo_state.c index 82fa6696e90..d4d12ca8431 100644 --- a/src/gallium/drivers/ilo/ilo_state.c +++ b/src/gallium/drivers/ilo/ilo_state.c @@ -330,6 +330,22 @@ finalize_vertex_elements(struct ilo_context *ilo) } } +static void +finalize_viewport(struct ilo_context *ilo) +{ + const struct ilo_dev *dev = ilo->dev; + struct ilo_state_vector *vec = &ilo->state_vector; + + if (vec->dirty & ILO_DIRTY_VIEWPORT) { + ilo_state_viewport_set_params(&vec->viewport.vp, + dev, &vec->viewport.params, false); + } else if (vec->dirty & ILO_DIRTY_SCISSOR) { + ilo_state_viewport_set_params(&vec->viewport.vp, + dev, &vec->viewport.params, true); + vec->dirty |= ILO_DIRTY_VIEWPORT; + } +} + /** * Finalize states. Some states depend on other states and are * incomplete/invalid until finalized. @@ -345,6 +361,8 @@ ilo_finalize_3d_states(struct ilo_context *ilo, finalize_index_buffer(ilo); finalize_vertex_elements(ilo); + finalize_viewport(ilo); + u_upload_unmap(ilo->uploader); } @@ -933,11 +951,26 @@ ilo_set_scissor_states(struct pipe_context *pipe, unsigned num_scissors, const struct pipe_scissor_state *scissors) { - const struct ilo_dev *dev = ilo_context(pipe)->dev; struct ilo_state_vector *vec = &ilo_context(pipe)->state_vector; + unsigned i; - ilo_gpe_set_scissor(dev, start_slot, num_scissors, - scissors, &vec->scissor); + for (i = 0; i < num_scissors; i++) { + struct ilo_state_viewport_scissor_info *info = + &vec->viewport.scissors[start_slot + i]; + + if (scissors[i].minx < scissors[i].maxx && + scissors[i].miny < scissors[i].maxy) { + info->min_x = scissors[i].minx; + info->min_y = scissors[i].miny; + info->max_x = scissors[i].maxx - 1; + info->max_y = scissors[i].maxy - 1; + } else { + info->min_x = 1; + info->min_y = 1; + info->max_x = 0; + info->max_y = 0; + } + } vec->dirty |= ILO_DIRTY_SCISSOR; } @@ -948,28 +981,31 @@ ilo_set_viewport_states(struct pipe_context *pipe, unsigned num_viewports, const struct pipe_viewport_state *viewports) { - const struct ilo_dev *dev = ilo_context(pipe)->dev; struct ilo_state_vector *vec = &ilo_context(pipe)->state_vector; if (viewports) { unsigned i; for (i = 0; i < num_viewports; i++) { - ilo_gpe_set_viewport_cso(dev, &viewports[i], - &vec->viewport.cso[start_slot + i]); + struct ilo_state_viewport_matrix_info *info = + &vec->viewport.matrices[start_slot + i]; + + memcpy(info->scale, viewports[i].scale, sizeof(info->scale)); + memcpy(info->translate, viewports[i].translate, + sizeof(info->translate)); } - if (vec->viewport.count < start_slot + num_viewports) - vec->viewport.count = start_slot + num_viewports; + if (vec->viewport.params.count < start_slot + num_viewports) + vec->viewport.params.count = start_slot + num_viewports; /* need to save viewport 0 for util_blitter */ if (!start_slot && num_viewports) vec->viewport.viewport0 = viewports[0]; } else { - if (vec->viewport.count <= start_slot + num_viewports && - vec->viewport.count > start_slot) - vec->viewport.count = start_slot; + if (vec->viewport.params.count <= start_slot + num_viewports && + vec->viewport.params.count > start_slot) + vec->viewport.params.count = start_slot; } vec->dirty |= ILO_DIRTY_VIEWPORT; @@ -1530,7 +1566,12 @@ void ilo_state_vector_init(const struct ilo_dev *dev, struct ilo_state_vector *vec) { - ilo_gpe_set_scissor_null(dev, &vec->scissor); + ilo_state_viewport_init_data_only(&vec->viewport.vp, dev, + vec->viewport.vp_data, sizeof(vec->viewport.vp_data)); + assert(vec->viewport.vp.array_size >= ILO_MAX_VIEWPORTS); + + vec->viewport.params.matrices = vec->viewport.matrices; + vec->viewport.params.scissors = vec->viewport.scissors; ilo_state_surface_init_for_null(&vec->fb.null_rt, dev); ilo_state_zs_init_for_null(&vec->fb.null_zs, dev); diff --git a/src/gallium/drivers/ilo/ilo_state.h b/src/gallium/drivers/ilo/ilo_state.h index 95dbe73bfdc..5541c40ba18 100644 --- a/src/gallium/drivers/ilo/ilo_state.h +++ b/src/gallium/drivers/ilo/ilo_state.h @@ -31,6 +31,7 @@ #include "core/ilo_state_3d.h" #include "core/ilo_state_sampler.h" #include "core/ilo_state_surface.h" +#include "core/ilo_state_viewport.h" #include "core/ilo_state_zs.h" #include "pipe/p_state.h" #include "util/u_dynarray.h" @@ -169,6 +170,18 @@ struct ilo_view_state { unsigned count; }; +struct ilo_viewport_state { + struct ilo_state_viewport_matrix_info matrices[ILO_MAX_VIEWPORTS]; + struct ilo_state_viewport_scissor_info scissors[ILO_MAX_VIEWPORTS]; + struct ilo_state_viewport_params_info params; + + struct pipe_viewport_state viewport0; + struct pipe_scissor_state scissor0; + + struct ilo_state_viewport vp; + uint32_t vp_data[20 * ILO_MAX_VIEWPORTS]; +}; + struct ilo_global_binding_cso { struct pipe_resource *resource; uint32_t *handle; @@ -208,8 +221,8 @@ struct ilo_state_vector { struct ilo_so_state so; struct pipe_clip_state clip; + struct ilo_viewport_state viewport; - struct ilo_scissor_state scissor; const struct ilo_rasterizer_state *rasterizer; struct pipe_poly_stipple poly_stipple; From 402e155cd3a757a583f81fa6545c855b63947e7c Mon Sep 17 00:00:00 2001 From: Chia-I Wu Date: Fri, 5 Jun 2015 10:23:24 +0800 Subject: [PATCH 624/834] ilo: embed ilo_state_raster in ilo_rasterizer_state --- .../drivers/ilo/core/ilo_builder_3d_bottom.h | 470 +++---------- src/gallium/drivers/ilo/core/ilo_state_3d.h | 47 +- .../drivers/ilo/core/ilo_state_3d_bottom.c | 626 +----------------- src/gallium/drivers/ilo/ilo_blitter.h | 11 +- .../drivers/ilo/ilo_blitter_rectlist.c | 35 +- src/gallium/drivers/ilo/ilo_render.c | 10 + src/gallium/drivers/ilo/ilo_render_gen.h | 4 + src/gallium/drivers/ilo/ilo_render_gen6.c | 92 +-- src/gallium/drivers/ilo/ilo_render_gen7.c | 90 ++- src/gallium/drivers/ilo/ilo_render_gen8.c | 55 +- src/gallium/drivers/ilo/ilo_state.c | 189 +++++- src/gallium/drivers/ilo/ilo_state.h | 12 +- 12 files changed, 456 insertions(+), 1185 deletions(-) diff --git a/src/gallium/drivers/ilo/core/ilo_builder_3d_bottom.h b/src/gallium/drivers/ilo/core/ilo_builder_3d_bottom.h index 0c0403fb73f..2d7b9e0035f 100644 --- a/src/gallium/drivers/ilo/core/ilo_builder_3d_bottom.h +++ b/src/gallium/drivers/ilo/core/ilo_builder_3d_bottom.h @@ -35,100 +35,27 @@ #include "ilo_core.h" #include "ilo_dev.h" #include "ilo_format.h" +#include "ilo_state_raster.h" #include "ilo_state_viewport.h" #include "ilo_builder.h" #include "ilo_builder_3d_top.h" static inline void gen6_3DSTATE_CLIP(struct ilo_builder *builder, - const struct ilo_rasterizer_state *rasterizer, - const struct ilo_shader_state *fs, - bool enable_guardband, - int num_viewports) -{ - const uint8_t cmd_len = 4; - uint32_t dw1, dw2, dw3, *dw; - int interps; - - ILO_DEV_ASSERT(builder->dev, 6, 8); - - dw1 = rasterizer->clip.payload[0]; - dw2 = rasterizer->clip.payload[1]; - dw3 = rasterizer->clip.payload[2]; - - if (enable_guardband && rasterizer->clip.can_enable_guardband) - dw2 |= GEN6_CLIP_DW2_GB_TEST_ENABLE; - - interps = (fs) ? ilo_shader_get_kernel_param(fs, - ILO_KERNEL_FS_BARYCENTRIC_INTERPOLATIONS) : 0; - - if (interps & (GEN6_INTERP_NONPERSPECTIVE_PIXEL | - GEN6_INTERP_NONPERSPECTIVE_CENTROID | - GEN6_INTERP_NONPERSPECTIVE_SAMPLE)) - dw2 |= GEN6_CLIP_DW2_NONPERSPECTIVE_BARYCENTRIC_ENABLE; - - dw3 |= GEN6_CLIP_DW3_FORCE_RTAINDEX_ZERO | - (num_viewports - 1); - - ilo_builder_batch_pointer(builder, cmd_len, &dw); - - dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_CLIP) | (cmd_len - 2); - dw[1] = dw1; - dw[2] = dw2; - dw[3] = dw3; -} - -static inline void -gen6_disable_3DSTATE_CLIP(struct ilo_builder *builder) + const struct ilo_state_raster *rs) { const uint8_t cmd_len = 4; uint32_t *dw; - ILO_DEV_ASSERT(builder->dev, 6, 7.5); + ILO_DEV_ASSERT(builder->dev, 6, 8); ilo_builder_batch_pointer(builder, cmd_len, &dw); dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_CLIP) | (cmd_len - 2); - dw[1] = 0; - dw[2] = 0; - dw[3] = 0; -} - -static inline void -gen7_internal_3dstate_sf(struct ilo_builder *builder, - uint8_t cmd_len, uint32_t *dw, - const struct ilo_rasterizer_sf *sf, - int num_samples) -{ - ILO_DEV_ASSERT(builder->dev, 6, 7.5); - - assert(cmd_len == 7); - - dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_SF) | (cmd_len - 2); - - if (!sf) { - dw[1] = 0; - dw[2] = (num_samples > 1) ? (GEN6_MSRASTMODE_ON_PATTERN << 8) : 0; - dw[3] = 0; - dw[4] = 0; - dw[5] = 0; - dw[6] = 0; - - return; - } - - /* see rasterizer_init_sf_gen6() */ - STATIC_ASSERT(Elements(sf->payload) >= 3); - dw[1] = sf->payload[0]; - dw[2] = sf->payload[1]; - dw[3] = sf->payload[2]; - - if (num_samples > 1) - dw[2] |= sf->dw_msaa; - - dw[4] = sf->dw_depth_offset_const; - dw[5] = sf->dw_depth_offset_scale; - dw[6] = sf->dw_depth_offset_clamp; + /* see raster_set_gen6_3DSTATE_CLIP() */ + dw[1] = rs->clip[0]; + dw[2] = rs->clip[1]; + dw[3] = rs->clip[2]; } static inline void @@ -232,34 +159,34 @@ gen8_internal_3dstate_sbe_swiz(struct ilo_builder *builder, static inline void gen6_3DSTATE_SF(struct ilo_builder *builder, - const struct ilo_rasterizer_state *rasterizer, - const struct ilo_shader_state *fs, - int sample_count) + const struct ilo_state_raster *rs, + unsigned sprite_coord_mode, + const struct ilo_shader_state *fs) { const uint8_t cmd_len = 20; uint32_t gen8_3dstate_sbe[4], gen8_3dstate_sbe_swiz[11]; - uint32_t gen7_3dstate_sf[7]; - const struct ilo_rasterizer_sf *sf; - int sprite_coord_mode; uint32_t *dw; ILO_DEV_ASSERT(builder->dev, 6, 6); - sf = (rasterizer) ? &rasterizer->sf : NULL; - sprite_coord_mode = (rasterizer) ? rasterizer->state.sprite_coord_mode : 0; - gen8_internal_3dstate_sbe(builder, Elements(gen8_3dstate_sbe), gen8_3dstate_sbe, fs, sprite_coord_mode); gen8_internal_3dstate_sbe_swiz(builder, Elements(gen8_3dstate_sbe_swiz), gen8_3dstate_sbe_swiz, fs); - gen7_internal_3dstate_sf(builder, Elements(gen7_3dstate_sf), - gen7_3dstate_sf, sf, sample_count); ilo_builder_batch_pointer(builder, cmd_len, &dw); dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_SF) | (cmd_len - 2); dw[1] = gen8_3dstate_sbe[1]; - memcpy(&dw[2], &gen7_3dstate_sf[1], sizeof(*dw) * 6); + + /* see raster_set_gen7_3DSTATE_SF() */ + dw[2] = rs->sf[0]; + dw[3] = rs->sf[1]; + dw[4] = rs->sf[2]; + dw[5] = rs->raster[1]; + dw[6] = rs->raster[2]; + dw[7] = rs->raster[3]; + memcpy(&dw[8], &gen8_3dstate_sbe_swiz[1], sizeof(*dw) * 8); dw[16] = gen8_3dstate_sbe[2]; dw[17] = gen8_3dstate_sbe[3]; @@ -269,63 +196,26 @@ gen6_3DSTATE_SF(struct ilo_builder *builder, static inline void gen7_3DSTATE_SF(struct ilo_builder *builder, - const struct ilo_rasterizer_sf *sf, - enum pipe_format zs_format, - int sample_count) + const struct ilo_state_raster *rs) { - const uint8_t cmd_len = 7; + const uint8_t cmd_len = (ilo_dev_gen(builder->dev) >= ILO_GEN(8)) ? 4 : 7; uint32_t *dw; - ILO_DEV_ASSERT(builder->dev, 7, 7.5); - - ilo_builder_batch_pointer(builder, cmd_len, &dw); - - gen7_internal_3dstate_sf(builder, cmd_len, dw, sf, sample_count); - - if (ilo_dev_gen(builder->dev) >= ILO_GEN(7)) { - int hw_format; - - /* separate stencil */ - switch (zs_format) { - case PIPE_FORMAT_Z16_UNORM: - hw_format = GEN6_ZFORMAT_D16_UNORM; - break; - case PIPE_FORMAT_Z32_FLOAT: - case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: - hw_format = GEN6_ZFORMAT_D32_FLOAT; - break; - case PIPE_FORMAT_Z24X8_UNORM: - case PIPE_FORMAT_Z24_UNORM_S8_UINT: - hw_format = GEN6_ZFORMAT_D24_UNORM_X8_UINT; - break; - default: - /* FLOAT surface is assumed when there is no depth buffer */ - hw_format = GEN6_ZFORMAT_D32_FLOAT; - break; - } - - dw[1] |= hw_format << GEN7_SF_DW1_DEPTH_FORMAT__SHIFT; - } -} - -static inline void -gen8_3DSTATE_SF(struct ilo_builder *builder, - const struct ilo_rasterizer_sf *sf) -{ - const uint8_t cmd_len = 4; - uint32_t *dw; - - ILO_DEV_ASSERT(builder->dev, 8, 8); + ILO_DEV_ASSERT(builder->dev, 7, 8); ilo_builder_batch_pointer(builder, cmd_len, &dw); dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_SF) | (cmd_len - 2); - /* see rasterizer_init_sf_gen8() */ - STATIC_ASSERT(Elements(sf->payload) >= 3); - dw[1] = sf->payload[0]; - dw[2] = sf->payload[1]; - dw[3] = sf->payload[2]; + /* see raster_set_gen7_3DSTATE_SF() or raster_set_gen8_3DSTATE_SF() */ + dw[1] = rs->sf[0]; + dw[2] = rs->sf[1]; + dw[3] = rs->sf[2]; + if (ilo_dev_gen(builder->dev) < ILO_GEN(8)) { + dw[4] = rs->raster[1]; + dw[5] = rs->raster[2]; + dw[6] = rs->raster[3]; + } } static inline void @@ -386,7 +276,7 @@ gen8_3DSTATE_SBE_SWIZ(struct ilo_builder *builder, static inline void gen8_3DSTATE_RASTER(struct ilo_builder *builder, - const struct ilo_rasterizer_sf *sf) + const struct ilo_state_raster *rs) { const uint8_t cmd_len = 5; uint32_t *dw; @@ -396,39 +286,47 @@ gen8_3DSTATE_RASTER(struct ilo_builder *builder, ilo_builder_batch_pointer(builder, cmd_len, &dw); dw[0] = GEN8_RENDER_CMD(3D, 3DSTATE_RASTER) | (cmd_len - 2); - dw[1] = sf->dw_raster; - dw[2] = sf->dw_depth_offset_const; - dw[3] = sf->dw_depth_offset_scale; - dw[4] = sf->dw_depth_offset_clamp; + /* see raster_set_gen8_3DSTATE_RASTER() */ + dw[1] = rs->raster[0]; + dw[2] = rs->raster[1]; + dw[3] = rs->raster[2]; + dw[4] = rs->raster[3]; } static inline void gen6_3DSTATE_WM(struct ilo_builder *builder, + const struct ilo_state_raster *rs, const struct ilo_shader_state *fs, - const struct ilo_rasterizer_state *rasterizer, bool dual_blend, bool cc_may_kill) { const uint8_t cmd_len = 9; + const bool multisample = false; const int num_samples = 1; - const struct ilo_shader_cso *cso; uint32_t dw2, dw4, dw5, dw6, *dw; ILO_DEV_ASSERT(builder->dev, 6, 6); - cso = ilo_shader_get_kernel_cso(fs); - dw2 = cso->payload[0]; - dw4 = cso->payload[1]; - dw5 = cso->payload[2]; - dw6 = cso->payload[3]; + dw2 = 0; + /* see raster_set_gen6_3dstate_wm() */ + dw4 = rs->raster[0]; + dw5 = rs->raster[1]; + dw6 = rs->raster[2]; - /* - * From the Sandy Bridge PRM, volume 2 part 1, page 248: - * - * "This bit (Statistics Enable) must be disabled if either of these - * bits is set: Depth Buffer Clear , Hierarchical Depth Buffer Resolve - * Enable or Depth Buffer Resolve Enable." - */ - dw4 |= GEN6_WM_DW4_STATISTICS; + if (fs) { + const struct ilo_shader_cso *cso; + + cso = ilo_shader_get_kernel_cso(fs); + /* see fs_init_cso_gen6() */ + dw2 |= cso->payload[0]; + dw4 |= cso->payload[1]; + dw5 |= cso->payload[2]; + dw6 |= cso->payload[3]; + } else { + const int max_threads = (builder->dev->gt == 2) ? 80 : 40; + + /* honor the valid range even if dispatching is disabled */ + dw5 |= (max_threads - 1) << GEN6_WM_DW5_MAX_THREADS__SHIFT; + } if (cc_may_kill) dw5 |= GEN6_WM_DW5_PS_KILL_PIXEL | GEN6_WM_DW5_PS_DISPATCH_ENABLE; @@ -436,14 +334,8 @@ gen6_3DSTATE_WM(struct ilo_builder *builder, if (dual_blend) dw5 |= GEN6_WM_DW5_PS_DUAL_SOURCE_BLEND; - dw5 |= rasterizer->wm.payload[0]; - - dw6 |= rasterizer->wm.payload[1]; - - if (num_samples > 1) { - dw6 |= rasterizer->wm.dw_msaa_rast | - rasterizer->wm.dw_msaa_disp; - } + if (multisample && num_samples > 1) + dw6 |= GEN6_WM_DW6_MSDISPMODE_PERPIXEL; ilo_builder_batch_pointer(builder, cmd_len, &dw); @@ -458,59 +350,36 @@ gen6_3DSTATE_WM(struct ilo_builder *builder, dw[8] = 0; /* kernel 2 */ } -static inline void -gen6_hiz_3DSTATE_WM(struct ilo_builder *builder, uint32_t hiz_op) -{ - const uint8_t cmd_len = 9; - const int max_threads = (builder->dev->gt == 2) ? 80 : 40; - uint32_t *dw; - - ILO_DEV_ASSERT(builder->dev, 6, 6); - - ilo_builder_batch_pointer(builder, cmd_len, &dw); - - dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_WM) | (cmd_len - 2); - dw[1] = 0; - dw[2] = 0; - dw[3] = 0; - dw[4] = hiz_op; - /* honor the valid range even if dispatching is disabled */ - dw[5] = (max_threads - 1) << GEN6_WM_DW5_MAX_THREADS__SHIFT; - dw[6] = 0; - dw[7] = 0; - dw[8] = 0; -} - static inline void gen7_3DSTATE_WM(struct ilo_builder *builder, + const struct ilo_state_raster *rs, const struct ilo_shader_state *fs, - const struct ilo_rasterizer_state *rasterizer, bool cc_may_kill) { const uint8_t cmd_len = 3; + const bool multisample = false; const int num_samples = 1; - const struct ilo_shader_cso *cso; uint32_t dw1, dw2, *dw; ILO_DEV_ASSERT(builder->dev, 7, 7.5); - /* see rasterizer_init_wm_gen7() */ - dw1 = rasterizer->wm.payload[0]; - dw2 = rasterizer->wm.payload[1]; + /* see raster_set_gen8_3DSTATE_WM() */ + dw1 = rs->wm[0]; - /* see fs_init_cso_gen7() */ - cso = ilo_shader_get_kernel_cso(fs); - dw1 |= cso->payload[3]; + if (fs) { + const struct ilo_shader_cso *cso; - dw1 |= GEN7_WM_DW1_STATISTICS; + cso = ilo_shader_get_kernel_cso(fs); + /* see fs_init_cso_gen7() */ + dw1 |= cso->payload[3]; + } if (cc_may_kill) dw1 |= GEN7_WM_DW1_PS_DISPATCH_ENABLE | GEN7_WM_DW1_PS_KILL_PIXEL; - if (num_samples > 1) { - dw1 |= rasterizer->wm.dw_msaa_rast; - dw2 |= rasterizer->wm.dw_msaa_disp; - } + dw2 = 0; + if (multisample && num_samples > 1) + dw2 |= GEN7_WM_DW2_MSDISPMODE_PERPIXEL; ilo_builder_batch_pointer(builder, cmd_len, &dw); @@ -521,43 +390,18 @@ gen7_3DSTATE_WM(struct ilo_builder *builder, static inline void gen8_3DSTATE_WM(struct ilo_builder *builder, - const struct ilo_shader_state *fs, - const struct ilo_rasterizer_state *rasterizer) + const struct ilo_state_raster *rs) { const uint8_t cmd_len = 2; - const struct ilo_shader_cso *cso; - uint32_t dw1, interps, *dw; + uint32_t *dw; ILO_DEV_ASSERT(builder->dev, 8, 8); - /* see rasterizer_get_wm_gen8() */ - dw1 = rasterizer->wm.payload[0]; - dw1 |= GEN7_WM_DW1_STATISTICS; - - /* see fs_init_cso_gen8() */ - cso = ilo_shader_get_kernel_cso(fs); - interps = cso->payload[4]; - - assert(!(dw1 & interps)); - ilo_builder_batch_pointer(builder, cmd_len, &dw); dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_WM) | (cmd_len - 2); - dw[1] = dw1 | interps; -} - -static inline void -gen7_hiz_3DSTATE_WM(struct ilo_builder *builder, uint32_t hiz_op) -{ - const uint8_t cmd_len = 3; - uint32_t *dw; - - ILO_DEV_ASSERT(builder->dev, 7, 7.5); - - ilo_builder_batch_pointer(builder, cmd_len, &dw); - dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_WM) | (cmd_len - 2); - dw[1] = hiz_op; - dw[2] = 0; + /* see raster_set_gen8_3DSTATE_WM() */ + dw[1] = rs->wm[0]; } static inline void @@ -580,48 +424,24 @@ gen8_3DSTATE_WM_DEPTH_STENCIL(struct ilo_builder *builder, } static inline void -gen8_3DSTATE_WM_HZ_OP(struct ilo_builder *builder, uint32_t op, - uint16_t width, uint16_t height, int sample_count) +gen8_3DSTATE_WM_HZ_OP(struct ilo_builder *builder, + const struct ilo_state_raster *rs, + uint16_t width, uint16_t height) { const uint8_t cmd_len = 5; - const uint32_t sample_mask = ((1 << sample_count) - 1) | 0x1; - uint32_t dw1, *dw; + uint32_t *dw; ILO_DEV_ASSERT(builder->dev, 8, 8); - dw1 = op; - - switch (sample_count) { - case 0: - case 1: - dw1 |= GEN6_NUMSAMPLES_1 << 13; - break; - case 2: - dw1 |= GEN8_NUMSAMPLES_2 << 13; - break; - case 4: - dw1 |= GEN6_NUMSAMPLES_4 << 13; - break; - case 8: - dw1 |= GEN7_NUMSAMPLES_8 << 13; - break; - case 16: - dw1 |= GEN8_NUMSAMPLES_16 << 13; - break; - default: - assert(!"unsupported sample count"); - dw1 |= GEN6_NUMSAMPLES_1 << 13; - break; - } - ilo_builder_batch_pointer(builder, cmd_len, &dw); dw[0] = GEN8_RENDER_CMD(3D, 3DSTATE_WM_HZ_OP) | (cmd_len - 2); - dw[1] = dw1; + /* see raster_set_gen8_3dstate_wm_hz_op() */ + dw[1] = rs->wm[1]; dw[2] = 0; - /* exclusive? */ + /* exclusive */ dw[3] = height << 16 | width; - dw[4] = sample_mask; + dw[4] = rs->wm[2]; } static inline void @@ -863,94 +683,40 @@ gen7_3DSTATE_SAMPLER_STATE_POINTERS_PS(struct ilo_builder *builder, static inline void gen6_3DSTATE_MULTISAMPLE(struct ilo_builder *builder, - int num_samples, const uint32_t *pattern, - bool pixel_location_center) + const struct ilo_state_raster *rs, + const uint32_t *pattern, int pattern_len) { const uint8_t cmd_len = (ilo_dev_gen(builder->dev) >= ILO_GEN(7)) ? 4 : 3; - const enum gen_pixel_location pixloc = (pixel_location_center) ? - GEN6_PIXLOC_CENTER : GEN6_PIXLOC_UL_CORNER; - uint32_t dw1, dw2, dw3, *dw; + uint32_t *dw; ILO_DEV_ASSERT(builder->dev, 6, 7.5); - dw1 = pixloc << 4; - - switch (num_samples) { - case 0: - case 1: - dw1 |= GEN6_NUMSAMPLES_1 << 1; - dw2 = 0; - dw3 = 0; - break; - case 4: - dw1 |= GEN6_NUMSAMPLES_4 << 1; - dw2 = pattern[0]; - dw3 = 0; - break; - case 8: - assert(ilo_dev_gen(builder->dev) >= ILO_GEN(7)); - dw1 |= GEN7_NUMSAMPLES_8 << 1; - dw2 = pattern[0]; - dw3 = pattern[1]; - break; - default: - assert(!"unsupported sample count"); - dw1 |= GEN6_NUMSAMPLES_1 << 1; - dw2 = 0; - dw3 = 0; - break; - } - ilo_builder_batch_pointer(builder, cmd_len, &dw); dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_MULTISAMPLE) | (cmd_len - 2); - dw[1] = dw1; - dw[2] = dw2; + /* see raster_set_gen8_3DSTATE_MULTISAMPLE() */ + dw[1] = rs->sample[0]; + + assert(pattern_len == 1 || pattern_len == 2); + dw[2] = pattern[0]; if (ilo_dev_gen(builder->dev) >= ILO_GEN(7)) - dw[3] = dw3; + dw[3] = (pattern_len == 2) ? pattern[1] : 0; } static inline void gen8_3DSTATE_MULTISAMPLE(struct ilo_builder *builder, - int num_samples, - bool pixel_location_center) + const struct ilo_state_raster *rs) { const uint8_t cmd_len = 2; - const enum gen_pixel_location pixloc = (pixel_location_center) ? - GEN6_PIXLOC_CENTER : GEN6_PIXLOC_UL_CORNER; - uint32_t dw1, *dw; + uint32_t *dw; ILO_DEV_ASSERT(builder->dev, 8, 8); - dw1 = pixloc << 4; - - switch (num_samples) { - case 0: - case 1: - dw1 |= GEN6_NUMSAMPLES_1 << 1; - break; - case 2: - dw1 |= GEN8_NUMSAMPLES_2 << 1; - break; - case 4: - dw1 |= GEN6_NUMSAMPLES_4 << 1; - break; - case 8: - dw1 |= GEN7_NUMSAMPLES_8 << 1; - break; - case 16: - dw1 |= GEN8_NUMSAMPLES_16 << 1; - break; - default: - assert(!"unsupported sample count"); - dw1 |= GEN6_NUMSAMPLES_1 << 1; - break; - } - ilo_builder_batch_pointer(builder, cmd_len, &dw); dw[0] = GEN8_RENDER_CMD(3D, 3DSTATE_MULTISAMPLE) | (cmd_len - 2); - dw[1] = dw1; + /* see raster_set_gen8_3DSTATE_MULTISAMPLE() */ + dw[1] = rs->sample[0]; } static inline void @@ -982,48 +748,18 @@ gen8_3DSTATE_SAMPLE_PATTERN(struct ilo_builder *builder, static inline void gen6_3DSTATE_SAMPLE_MASK(struct ilo_builder *builder, - unsigned sample_mask) + const struct ilo_state_raster *rs) { const uint8_t cmd_len = 2; - const unsigned valid_mask = 0xf; uint32_t *dw; - ILO_DEV_ASSERT(builder->dev, 6, 6); - - sample_mask &= valid_mask; + ILO_DEV_ASSERT(builder->dev, 6, 8); ilo_builder_batch_pointer(builder, cmd_len, &dw); dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_SAMPLE_MASK) | (cmd_len - 2); - dw[1] = sample_mask; -} - -static inline void -gen7_3DSTATE_SAMPLE_MASK(struct ilo_builder *builder, - unsigned sample_mask, - int num_samples) -{ - const uint8_t cmd_len = 2; - const unsigned valid_mask = ((1 << num_samples) - 1) | 0x1; - uint32_t *dw; - - ILO_DEV_ASSERT(builder->dev, 7, 8); - - /* - * From the Ivy Bridge PRM, volume 2 part 1, page 294: - * - * "If Number of Multisamples is NUMSAMPLES_1, bits 7:1 of this field - * (Sample Mask) must be zero. - * - * If Number of Multisamples is NUMSAMPLES_4, bits 7:4 of this field - * must be zero." - */ - sample_mask &= valid_mask; - - ilo_builder_batch_pointer(builder, cmd_len, &dw); - - dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_SAMPLE_MASK) | (cmd_len - 2); - dw[1] = sample_mask; + /* see raster_set_gen6_3DSTATE_SAMPLE_MASK() */ + dw[1] = rs->sample[1]; } static inline void diff --git a/src/gallium/drivers/ilo/core/ilo_state_3d.h b/src/gallium/drivers/ilo/core/ilo_state_3d.h index 9d9dd29831f..78cd67128af 100644 --- a/src/gallium/drivers/ilo/core/ilo_state_3d.h +++ b/src/gallium/drivers/ilo/core/ilo_state_3d.h @@ -101,42 +101,6 @@ struct ilo_so_state { bool enabled; }; -struct ilo_rasterizer_clip { - /* 3DSTATE_CLIP */ - uint32_t payload[3]; - - uint32_t can_enable_guardband; -}; - -struct ilo_rasterizer_sf { - /* 3DSTATE_SF */ - uint32_t payload[3]; - uint32_t dw_msaa; - - /* Global Depth Offset Constant/Scale/Clamp */ - uint32_t dw_depth_offset_const; - uint32_t dw_depth_offset_scale; - uint32_t dw_depth_offset_clamp; - - /* Gen8+ 3DSTATE_RASTER */ - uint32_t dw_raster; -}; - -struct ilo_rasterizer_wm { - /* 3DSTATE_WM */ - uint32_t payload[2]; - uint32_t dw_msaa_rast; - uint32_t dw_msaa_disp; -}; - -struct ilo_rasterizer_state { - struct pipe_rasterizer_state state; - - struct ilo_rasterizer_clip clip; - struct ilo_rasterizer_sf sf; - struct ilo_rasterizer_wm wm; -}; - struct ilo_dsa_state { /* DEPTH_STENCIL_STATE or Gen8+ 3DSTATE_WM_DEPTH_STENCIL */ uint32_t payload[3]; @@ -186,6 +150,9 @@ struct ilo_fb_state { struct ilo_state_zs null_zs; struct ilo_fb_blend_caps { + bool is_unorm; + bool is_integer; + bool can_logicop; bool can_blend; bool can_alpha_test; @@ -193,6 +160,10 @@ struct ilo_fb_state { } blend_caps[PIPE_MAX_COLOR_BUFS]; unsigned num_samples; + + bool has_integer_rt; + bool has_hiz; + enum gen_depth_format depth_offset_format; }; struct ilo_shader_cso { @@ -215,10 +186,6 @@ ilo_gpe_init_ve_nosrc(const struct ilo_dev *dev, struct ilo_ve_cso *cso); void -ilo_gpe_init_rasterizer(const struct ilo_dev *dev, - const struct pipe_rasterizer_state *state, - struct ilo_rasterizer_state *rasterizer); -void ilo_gpe_init_dsa(const struct ilo_dev *dev, const struct pipe_depth_stencil_alpha_state *state, struct ilo_dsa_state *dsa); diff --git a/src/gallium/drivers/ilo/core/ilo_state_3d_bottom.c b/src/gallium/drivers/ilo/core/ilo_state_3d_bottom.c index 67233cf3d07..83da224811e 100644 --- a/src/gallium/drivers/ilo/core/ilo_state_3d_bottom.c +++ b/src/gallium/drivers/ilo/core/ilo_state_3d_bottom.c @@ -35,581 +35,12 @@ #include "ilo_state_3d.h" #include "../ilo_shader.h" -static void -rasterizer_init_clip(const struct ilo_dev *dev, - const struct pipe_rasterizer_state *state, - struct ilo_rasterizer_clip *clip) -{ - uint32_t dw1, dw2, dw3; - - ILO_DEV_ASSERT(dev, 6, 8); - - dw1 = GEN6_CLIP_DW1_STATISTICS; - - if (ilo_dev_gen(dev) >= ILO_GEN(7)) { - /* - * From the Ivy Bridge PRM, volume 2 part 1, page 219: - * - * "Workaround : Due to Hardware issue "EarlyCull" needs to be - * enabled only for the cases where the incoming primitive topology - * into the clipper guaranteed to be Trilist." - * - * What does this mean? - */ - dw1 |= 0 << 19 | - GEN7_CLIP_DW1_EARLY_CULL_ENABLE; - - if (ilo_dev_gen(dev) < ILO_GEN(8)) { - if (state->front_ccw) - dw1 |= GEN6_FRONTWINDING_CCW << 20; - - switch (state->cull_face) { - case PIPE_FACE_NONE: - dw1 |= GEN6_CULLMODE_NONE << 16; - break; - case PIPE_FACE_FRONT: - dw1 |= GEN6_CULLMODE_FRONT << 16; - break; - case PIPE_FACE_BACK: - dw1 |= GEN6_CULLMODE_BACK << 16; - break; - case PIPE_FACE_FRONT_AND_BACK: - dw1 |= GEN6_CULLMODE_BOTH << 16; - break; - } - } - } - - dw2 = GEN6_CLIP_DW2_CLIP_ENABLE | - GEN6_CLIP_DW2_XY_TEST_ENABLE | - state->clip_plane_enable << GEN6_CLIP_DW2_UCP_CLIP_ENABLES__SHIFT | - GEN6_CLIPMODE_NORMAL << 13; - - if (state->clip_halfz) - dw2 |= GEN6_CLIP_DW2_APIMODE_D3D; - else - dw2 |= GEN6_CLIP_DW2_APIMODE_OGL; - - if (ilo_dev_gen(dev) < ILO_GEN(8) && state->depth_clip) - dw2 |= GEN6_CLIP_DW2_Z_TEST_ENABLE; - - if (state->flatshade_first) { - dw2 |= 0 << GEN6_CLIP_DW2_TRI_PROVOKE__SHIFT | - 0 << GEN6_CLIP_DW2_LINE_PROVOKE__SHIFT | - 1 << GEN6_CLIP_DW2_TRIFAN_PROVOKE__SHIFT; - } - else { - dw2 |= 2 << GEN6_CLIP_DW2_TRI_PROVOKE__SHIFT | - 1 << GEN6_CLIP_DW2_LINE_PROVOKE__SHIFT | - 2 << GEN6_CLIP_DW2_TRIFAN_PROVOKE__SHIFT; - } - - dw3 = 0x1 << GEN6_CLIP_DW3_MIN_POINT_WIDTH__SHIFT | - 0x7ff << GEN6_CLIP_DW3_MAX_POINT_WIDTH__SHIFT; - - clip->payload[0] = dw1; - clip->payload[1] = dw2; - clip->payload[2] = dw3; - - clip->can_enable_guardband = true; - - /* - * There are several reasons that guard band test should be disabled - * - * - GL wide points (to avoid partially visibie object) - * - GL wide or AA lines (to avoid partially visibie object) - */ - if (state->point_size_per_vertex || state->point_size > 1.0f) - clip->can_enable_guardband = false; - if (state->line_smooth || state->line_width > 1.0f) - clip->can_enable_guardband = false; -} - -static void -rasterizer_init_sf_depth_offset_gen6(const struct ilo_dev *dev, - const struct pipe_rasterizer_state *state, - struct ilo_rasterizer_sf *sf) -{ - ILO_DEV_ASSERT(dev, 6, 8); - - /* - * Scale the constant term. The minimum representable value used by the HW - * is not large enouch to be the minimum resolvable difference. - */ - sf->dw_depth_offset_const = fui(state->offset_units * 2.0f); - sf->dw_depth_offset_scale = fui(state->offset_scale); - sf->dw_depth_offset_clamp = fui(state->offset_clamp); -} - -static void -rasterizer_init_sf_gen6(const struct ilo_dev *dev, - const struct pipe_rasterizer_state *state, - struct ilo_rasterizer_sf *sf) -{ - int line_width, point_width; - uint32_t dw1, dw2, dw3; - - ILO_DEV_ASSERT(dev, 6, 7.5); - - /* - * From the Sandy Bridge PRM, volume 2 part 1, page 248: - * - * "This bit (Statistics Enable) should be set whenever clipping is - * enabled and the Statistics Enable bit is set in CLIP_STATE. It - * should be cleared if clipping is disabled or Statistics Enable in - * CLIP_STATE is clear." - */ - dw1 = GEN7_SF_DW1_STATISTICS | - GEN7_SF_DW1_VIEWPORT_TRANSFORM; - - /* XXX GEN6 path seems to work fine for GEN7 */ - if (false && ilo_dev_gen(dev) >= ILO_GEN(7)) { - /* - * From the Ivy Bridge PRM, volume 2 part 1, page 258: - * - * "This bit (Legacy Global Depth Bias Enable, Global Depth Offset - * Enable Solid , Global Depth Offset Enable Wireframe, and Global - * Depth Offset Enable Point) should be set whenever non zero depth - * bias (Slope, Bias) values are used. Setting this bit may have - * some degradation of performance for some workloads." - */ - if (state->offset_tri || state->offset_line || state->offset_point) { - /* XXX need to scale offset_const according to the depth format */ - dw1 |= GEN7_SF_DW1_LEGACY_DEPTH_OFFSET; - - dw1 |= GEN7_SF_DW1_DEPTH_OFFSET_SOLID | - GEN7_SF_DW1_DEPTH_OFFSET_WIREFRAME | - GEN7_SF_DW1_DEPTH_OFFSET_POINT; - } - } else { - if (state->offset_tri) - dw1 |= GEN7_SF_DW1_DEPTH_OFFSET_SOLID; - if (state->offset_line) - dw1 |= GEN7_SF_DW1_DEPTH_OFFSET_WIREFRAME; - if (state->offset_point) - dw1 |= GEN7_SF_DW1_DEPTH_OFFSET_POINT; - } - - switch (state->fill_front) { - case PIPE_POLYGON_MODE_FILL: - dw1 |= GEN6_FILLMODE_SOLID << 5; - break; - case PIPE_POLYGON_MODE_LINE: - dw1 |= GEN6_FILLMODE_WIREFRAME << 5; - break; - case PIPE_POLYGON_MODE_POINT: - dw1 |= GEN6_FILLMODE_POINT << 5; - break; - } - - switch (state->fill_back) { - case PIPE_POLYGON_MODE_FILL: - dw1 |= GEN6_FILLMODE_SOLID << 3; - break; - case PIPE_POLYGON_MODE_LINE: - dw1 |= GEN6_FILLMODE_WIREFRAME << 3; - break; - case PIPE_POLYGON_MODE_POINT: - dw1 |= GEN6_FILLMODE_POINT << 3; - break; - } - - if (state->front_ccw) - dw1 |= GEN6_FRONTWINDING_CCW; - - dw2 = 0; - - if (state->line_smooth) { - /* - * From the Sandy Bridge PRM, volume 2 part 1, page 251: - * - * "This field (Anti-aliasing Enable) must be disabled if any of the - * render targets have integer (UINT or SINT) surface format." - * - * From the Sandy Bridge PRM, volume 2 part 1, page 317: - * - * "This field (Hierarchical Depth Buffer Enable) must be disabled - * if Anti-aliasing Enable in 3DSTATE_SF is enabled. - * - * TODO We do not check those yet. - */ - dw2 |= GEN7_SF_DW2_AA_LINE_ENABLE | - GEN7_SF_DW2_AA_LINE_CAP_1_0; - } - - switch (state->cull_face) { - case PIPE_FACE_NONE: - dw2 |= GEN6_CULLMODE_NONE << 29; - break; - case PIPE_FACE_FRONT: - dw2 |= GEN6_CULLMODE_FRONT << 29; - break; - case PIPE_FACE_BACK: - dw2 |= GEN6_CULLMODE_BACK << 29; - break; - case PIPE_FACE_FRONT_AND_BACK: - dw2 |= GEN6_CULLMODE_BOTH << 29; - break; - } - - /* - * Smooth lines should intersect ceil(line_width) or (ceil(line_width) + 1) - * pixels in the minor direction. We have to make the lines slightly - * thicker, 0.5 pixel on both sides, so that they intersect that many - * pixels are considered into the lines. - * - * Line width is in U3.7. - */ - line_width = (int) - ((state->line_width + (float) state->line_smooth) * 128.0f + 0.5f); - line_width = CLAMP(line_width, 0, 1023); - - /* use GIQ rules */ - if (line_width == 128 && !state->line_smooth) - line_width = 0; - - dw2 |= line_width << GEN7_SF_DW2_LINE_WIDTH__SHIFT; - - if (ilo_dev_gen(dev) == ILO_GEN(7.5) && state->line_stipple_enable) - dw2 |= GEN75_SF_DW2_LINE_STIPPLE_ENABLE; - - if (state->scissor) - dw2 |= GEN7_SF_DW2_SCISSOR_ENABLE; - - dw3 = GEN7_SF_DW3_TRUE_AA_LINE_DISTANCE | - GEN7_SF_DW3_SUBPIXEL_8BITS; - - if (state->line_last_pixel) - dw3 |= GEN7_SF_DW3_LINE_LAST_PIXEL_ENABLE; - - if (state->flatshade_first) { - dw3 |= 0 << GEN7_SF_DW3_TRI_PROVOKE__SHIFT | - 0 << GEN7_SF_DW3_LINE_PROVOKE__SHIFT | - 1 << GEN7_SF_DW3_TRIFAN_PROVOKE__SHIFT; - } else { - dw3 |= 2 << GEN7_SF_DW3_TRI_PROVOKE__SHIFT | - 1 << GEN7_SF_DW3_LINE_PROVOKE__SHIFT | - 2 << GEN7_SF_DW3_TRIFAN_PROVOKE__SHIFT; - } - - if (!state->point_size_per_vertex) - dw3 |= GEN7_SF_DW3_USE_POINT_WIDTH; - - /* in U8.3 */ - point_width = (int) (state->point_size * 8.0f + 0.5f); - point_width = CLAMP(point_width, 1, 2047); - - dw3 |= point_width; - - STATIC_ASSERT(Elements(sf->payload) >= 3); - sf->payload[0] = dw1; - sf->payload[1] = dw2; - sf->payload[2] = dw3; - - if (state->multisample) { - sf->dw_msaa = GEN6_MSRASTMODE_ON_PATTERN << 8; - - /* - * From the Sandy Bridge PRM, volume 2 part 1, page 251: - * - * "Software must not program a value of 0.0 when running in - * MSRASTMODE_ON_xxx modes - zero-width lines are not available - * when multisampling rasterization is enabled." - */ - if (!line_width) { - line_width = 128; /* 1.0f */ - - sf->dw_msaa |= line_width << GEN7_SF_DW2_LINE_WIDTH__SHIFT; - } - } else { - sf->dw_msaa = 0; - } - - rasterizer_init_sf_depth_offset_gen6(dev, state, sf); - /* 3DSTATE_RASTER is Gen8+ only */ - sf->dw_raster = 0; -} - -static uint32_t -rasterizer_get_sf_raster_gen8(const struct ilo_dev *dev, - const struct pipe_rasterizer_state *state) -{ - uint32_t dw = 0; - - ILO_DEV_ASSERT(dev, 8, 8); - - if (state->front_ccw) - dw |= GEN6_FRONTWINDING_CCW << 21; - - switch (state->cull_face) { - case PIPE_FACE_NONE: - dw |= GEN6_CULLMODE_NONE << 16; - break; - case PIPE_FACE_FRONT: - dw |= GEN6_CULLMODE_FRONT << 16; - break; - case PIPE_FACE_BACK: - dw |= GEN6_CULLMODE_BACK << 16; - break; - case PIPE_FACE_FRONT_AND_BACK: - dw |= GEN6_CULLMODE_BOTH << 16; - break; - } - - if (state->point_smooth) - dw |= GEN8_RASTER_DW1_SMOOTH_POINT_ENABLE; - - if (state->multisample) - dw |= GEN8_RASTER_DW1_API_MULTISAMPLE_ENABLE; - - if (state->offset_tri) - dw|= GEN8_RASTER_DW1_DEPTH_OFFSET_SOLID; - if (state->offset_line) - dw|= GEN8_RASTER_DW1_DEPTH_OFFSET_WIREFRAME; - if (state->offset_point) - dw|= GEN8_RASTER_DW1_DEPTH_OFFSET_POINT; - - switch (state->fill_front) { - case PIPE_POLYGON_MODE_FILL: - dw |= GEN6_FILLMODE_SOLID << 5; - break; - case PIPE_POLYGON_MODE_LINE: - dw |= GEN6_FILLMODE_WIREFRAME << 5; - break; - case PIPE_POLYGON_MODE_POINT: - dw |= GEN6_FILLMODE_POINT << 5; - break; - } - - switch (state->fill_back) { - case PIPE_POLYGON_MODE_FILL: - dw |= GEN6_FILLMODE_SOLID << 3; - break; - case PIPE_POLYGON_MODE_LINE: - dw |= GEN6_FILLMODE_WIREFRAME << 3; - break; - case PIPE_POLYGON_MODE_POINT: - dw |= GEN6_FILLMODE_POINT << 3; - break; - } - - if (state->line_smooth) - dw |= GEN8_RASTER_DW1_AA_LINE_ENABLE; - - if (state->scissor) - dw |= GEN8_RASTER_DW1_SCISSOR_ENABLE; - - if (state->depth_clip) - dw |= GEN8_RASTER_DW1_Z_TEST_ENABLE; - - return dw; -} - -static void -rasterizer_init_sf_gen8(const struct ilo_dev *dev, - const struct pipe_rasterizer_state *state, - struct ilo_rasterizer_sf *sf) -{ - int line_width, point_width; - uint32_t dw1, dw2, dw3; - - ILO_DEV_ASSERT(dev, 8, 8); - - /* in U3.7 */ - line_width = (int) - ((state->line_width + (float) state->line_smooth) * 128.0f + 0.5f); - line_width = CLAMP(line_width, 0, 1023); - - /* use GIQ rules */ - if (line_width == 128 && !state->line_smooth) - line_width = 0; - - /* in U8.3 */ - point_width = (int) (state->point_size * 8.0f + 0.5f); - point_width = CLAMP(point_width, 1, 2047); - - dw1 = GEN7_SF_DW1_STATISTICS | - GEN7_SF_DW1_VIEWPORT_TRANSFORM; - - dw2 = line_width << GEN7_SF_DW2_LINE_WIDTH__SHIFT; - if (state->line_smooth) - dw2 |= GEN7_SF_DW2_AA_LINE_CAP_1_0; - - dw3 = GEN7_SF_DW3_TRUE_AA_LINE_DISTANCE | - GEN7_SF_DW3_SUBPIXEL_8BITS | - point_width; - - if (state->line_last_pixel) - dw3 |= GEN7_SF_DW3_LINE_LAST_PIXEL_ENABLE; - - if (state->flatshade_first) { - dw3 |= 0 << GEN7_SF_DW3_TRI_PROVOKE__SHIFT | - 0 << GEN7_SF_DW3_LINE_PROVOKE__SHIFT | - 1 << GEN7_SF_DW3_TRIFAN_PROVOKE__SHIFT; - } else { - dw3 |= 2 << GEN7_SF_DW3_TRI_PROVOKE__SHIFT | - 1 << GEN7_SF_DW3_LINE_PROVOKE__SHIFT | - 2 << GEN7_SF_DW3_TRIFAN_PROVOKE__SHIFT; - } - - if (!state->point_size_per_vertex) - dw3 |= GEN7_SF_DW3_USE_POINT_WIDTH; - - dw3 |= point_width; - - STATIC_ASSERT(Elements(sf->payload) >= 3); - sf->payload[0] = dw1; - sf->payload[1] = dw2; - sf->payload[2] = dw3; - - rasterizer_init_sf_depth_offset_gen6(dev, state, sf); - - sf->dw_msaa = 0; - sf->dw_raster = rasterizer_get_sf_raster_gen8(dev, state); -} - -static void -rasterizer_init_wm_gen6(const struct ilo_dev *dev, - const struct pipe_rasterizer_state *state, - struct ilo_rasterizer_wm *wm) -{ - uint32_t dw5, dw6; - - ILO_DEV_ASSERT(dev, 6, 6); - - /* only the FF unit states are set, as in GEN7 */ - - dw5 = GEN6_WM_DW5_AA_LINE_WIDTH_2_0; - - /* same value as in 3DSTATE_SF */ - if (state->line_smooth) - dw5 |= GEN6_WM_DW5_AA_LINE_CAP_1_0; - - if (state->poly_stipple_enable) - dw5 |= GEN6_WM_DW5_POLY_STIPPLE_ENABLE; - if (state->line_stipple_enable) - dw5 |= GEN6_WM_DW5_LINE_STIPPLE_ENABLE; - - /* - * assertion that makes sure - * - * dw6 |= wm->dw_msaa_rast | wm->dw_msaa_disp; - * - * is valid - */ - STATIC_ASSERT(GEN6_MSRASTMODE_OFF_PIXEL == 0 && - GEN6_WM_DW6_MSDISPMODE_PERSAMPLE == 0); - dw6 = GEN6_ZW_INTERP_PIXEL << GEN6_WM_DW6_ZW_INTERP__SHIFT; - - if (state->bottom_edge_rule) - dw6 |= GEN6_WM_DW6_POINT_RASTRULE_UPPER_RIGHT; - - wm->dw_msaa_rast = - (state->multisample) ? (GEN6_MSRASTMODE_ON_PATTERN << 1) : 0; - wm->dw_msaa_disp = GEN6_WM_DW6_MSDISPMODE_PERPIXEL; - - STATIC_ASSERT(Elements(wm->payload) >= 2); - wm->payload[0] = dw5; - wm->payload[1] = dw6; -} - -static void -rasterizer_init_wm_gen7(const struct ilo_dev *dev, - const struct pipe_rasterizer_state *state, - struct ilo_rasterizer_wm *wm) -{ - uint32_t dw1, dw2; - - ILO_DEV_ASSERT(dev, 7, 7.5); - - /* - * assertion that makes sure - * - * dw1 |= wm->dw_msaa_rast; - * dw2 |= wm->dw_msaa_disp; - * - * is valid - */ - STATIC_ASSERT(GEN6_MSRASTMODE_OFF_PIXEL == 0 && - GEN7_WM_DW2_MSDISPMODE_PERSAMPLE == 0); - dw1 = GEN6_ZW_INTERP_PIXEL << GEN7_WM_DW1_ZW_INTERP__SHIFT | - GEN7_WM_DW1_AA_LINE_WIDTH_2_0; - dw2 = 0; - - /* same value as in 3DSTATE_SF */ - if (state->line_smooth) - dw1 |= GEN7_WM_DW1_AA_LINE_CAP_1_0; - - if (state->poly_stipple_enable) - dw1 |= GEN7_WM_DW1_POLY_STIPPLE_ENABLE; - if (state->line_stipple_enable) - dw1 |= GEN7_WM_DW1_LINE_STIPPLE_ENABLE; - - if (state->bottom_edge_rule) - dw1 |= GEN7_WM_DW1_POINT_RASTRULE_UPPER_RIGHT; - - wm->dw_msaa_rast = - (state->multisample) ? GEN6_MSRASTMODE_ON_PATTERN : 0; - wm->dw_msaa_disp = GEN7_WM_DW2_MSDISPMODE_PERPIXEL; - - STATIC_ASSERT(Elements(wm->payload) >= 2); - wm->payload[0] = dw1; - wm->payload[1] = dw2; -} - -static uint32_t -rasterizer_get_wm_gen8(const struct ilo_dev *dev, - const struct pipe_rasterizer_state *state) -{ - uint32_t dw; - - ILO_DEV_ASSERT(dev, 8, 8); - - dw = GEN6_ZW_INTERP_PIXEL << GEN7_WM_DW1_ZW_INTERP__SHIFT | - GEN7_WM_DW1_AA_LINE_WIDTH_2_0; - - /* same value as in 3DSTATE_SF */ - if (state->line_smooth) - dw |= GEN7_WM_DW1_AA_LINE_CAP_1_0; - - if (state->poly_stipple_enable) - dw |= GEN7_WM_DW1_POLY_STIPPLE_ENABLE; - if (state->line_stipple_enable) - dw |= GEN7_WM_DW1_LINE_STIPPLE_ENABLE; - - if (state->bottom_edge_rule) - dw |= GEN7_WM_DW1_POINT_RASTRULE_UPPER_RIGHT; - - return dw; -} - -void -ilo_gpe_init_rasterizer(const struct ilo_dev *dev, - const struct pipe_rasterizer_state *state, - struct ilo_rasterizer_state *rasterizer) -{ - rasterizer_init_clip(dev, state, &rasterizer->clip); - - if (ilo_dev_gen(dev) >= ILO_GEN(8)) { - memset(&rasterizer->wm, 0, sizeof(rasterizer->wm)); - rasterizer->wm.payload[0] = rasterizer_get_wm_gen8(dev, state); - - rasterizer_init_sf_gen8(dev, state, &rasterizer->sf); - } else if (ilo_dev_gen(dev) >= ILO_GEN(7)) { - rasterizer_init_wm_gen7(dev, state, &rasterizer->wm); - rasterizer_init_sf_gen6(dev, state, &rasterizer->sf); - } else { - rasterizer_init_wm_gen6(dev, state, &rasterizer->wm); - rasterizer_init_sf_gen6(dev, state, &rasterizer->sf); - } -} - static void fs_init_cso_gen6(const struct ilo_dev *dev, const struct ilo_shader_state *fs, struct ilo_shader_cso *cso) { - int start_grf, input_count, sampler_count, interps, max_threads; + int start_grf, input_count, sampler_count, max_threads; uint32_t dw2, dw4, dw5, dw6; ILO_DEV_ASSERT(dev, 6, 6); @@ -617,8 +48,6 @@ fs_init_cso_gen6(const struct ilo_dev *dev, start_grf = ilo_shader_get_kernel_param(fs, ILO_KERNEL_URB_DATA_START_REG); input_count = ilo_shader_get_kernel_param(fs, ILO_KERNEL_INPUT_COUNT); sampler_count = ilo_shader_get_kernel_param(fs, ILO_KERNEL_SAMPLER_COUNT); - interps = ilo_shader_get_kernel_param(fs, - ILO_KERNEL_FS_BARYCENTRIC_INTERPOLATIONS); /* see brwCreateContext() */ max_threads = (dev->gt == 2) ? 80 : 40; @@ -691,8 +120,7 @@ fs_init_cso_gen6(const struct ilo_dev *dev, dw5 |= GEN6_PS_DISPATCH_8 << GEN6_WM_DW5_PS_DISPATCH_MODE__SHIFT; dw6 = input_count << GEN6_WM_DW6_SF_ATTR_COUNT__SHIFT | - GEN6_POSOFFSET_NONE << GEN6_WM_DW6_PS_POSOFFSET__SHIFT | - interps << GEN6_WM_DW6_BARYCENTRIC_INTERP__SHIFT; + GEN6_POSOFFSET_NONE << GEN6_WM_DW6_PS_POSOFFSET__SHIFT; STATIC_ASSERT(Elements(cso->payload) >= 4); cso->payload[0] = dw2; @@ -709,9 +137,7 @@ fs_get_wm_gen7(const struct ilo_dev *dev, ILO_DEV_ASSERT(dev, 7, 7.5); - dw = ilo_shader_get_kernel_param(fs, - ILO_KERNEL_FS_BARYCENTRIC_INTERPOLATIONS) << - GEN7_WM_DW1_BARYCENTRIC_INTERP__SHIFT; + dw = 0; /* * TODO set this bit only when @@ -839,17 +265,6 @@ fs_get_psx_gen8(const struct ilo_dev *dev, return dw; } -static uint32_t -fs_get_wm_gen8(const struct ilo_dev *dev, - const struct ilo_shader_state *fs) -{ - ILO_DEV_ASSERT(dev, 8, 8); - - return ilo_shader_get_kernel_param(fs, - ILO_KERNEL_FS_BARYCENTRIC_INTERPOLATIONS) << - GEN7_WM_DW1_BARYCENTRIC_INTERP__SHIFT; -} - static void fs_init_cso_gen8(const struct ilo_dev *dev, const struct ilo_shader_state *fs, @@ -879,12 +294,11 @@ fs_init_cso_gen8(const struct ilo_dev *dev, 0 << GEN8_PS_DW7_URB_GRF_START1__SHIFT | 0 << GEN8_PS_DW7_URB_GRF_START2__SHIFT; - STATIC_ASSERT(Elements(cso->payload) >= 5); + STATIC_ASSERT(Elements(cso->payload) >= 4); cso->payload[0] = dw3; cso->payload[1] = dw6; cso->payload[2] = dw7; cso->payload[3] = fs_get_psx_gen8(dev, fs); - cso->payload[4] = fs_get_wm_gen8(dev, fs); } void @@ -1589,6 +1003,11 @@ fb_set_blend_caps(const struct ilo_dev *dev, if (format == PIPE_FORMAT_NONE || desc->is_mixed) return; + caps->is_unorm = (ch >= 0 && desc->channel[ch].normalized && + desc->channel[ch].type == UTIL_FORMAT_TYPE_UNSIGNED && + desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB); + caps->is_integer = util_format_is_pure_integer(format); + /* * From the Sandy Bridge PRM, volume 2 part 1, page 365: * @@ -1597,16 +1016,10 @@ fb_set_blend_caps(const struct ilo_dev *dev, * * According to the classic driver, this is lifted on Gen8+. */ - if (ilo_dev_gen(dev) >= ILO_GEN(8)) { - caps->can_logicop = true; - } else { - caps->can_logicop = (ch >= 0 && desc->channel[ch].normalized && - desc->channel[ch].type == UTIL_FORMAT_TYPE_UNSIGNED && - desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB); - } + caps->can_logicop = (ilo_dev_gen(dev) >= ILO_GEN(8) || caps->is_unorm); /* no blending for pure integer formats */ - caps->can_blend = !util_format_is_pure_integer(format); + caps->can_blend = !caps->is_integer; /* * From the Sandy Bridge PRM, volume 2 part 1, page 382: @@ -1614,7 +1027,7 @@ fb_set_blend_caps(const struct ilo_dev *dev, * "Alpha Test can only be enabled if Pixel Shader outputs a float * alpha value." */ - caps->can_alpha_test = !util_format_is_pure_integer(format); + caps->can_alpha_test = !caps->is_integer; caps->dst_alpha_forced_one = (ilo_format_translate_render(dev, format) != @@ -1650,10 +1063,13 @@ ilo_gpe_set_fb(const struct ilo_dev *dev, util_copy_framebuffer_state(&fb->state, state); + fb->has_integer_rt = false; for (i = 0; i < state->nr_cbufs; i++) { if (state->cbufs[i]) { fb_set_blend_caps(dev, state->cbufs[i]->format, &fb->blend_caps[i]); + fb->has_integer_rt |= fb->blend_caps[i].is_integer; + if (!first_surf) first_surf = state->cbufs[i]; } else { @@ -1668,6 +1084,18 @@ ilo_gpe_set_fb(const struct ilo_dev *dev, if (!fb->num_samples) fb->num_samples = 1; + if (state->zsbuf) { + const struct ilo_surface_cso *cso = + (const struct ilo_surface_cso *) state->zsbuf; + + fb->has_hiz = cso->u.zs.hiz_bo; + fb->depth_offset_format = + ilo_state_zs_get_depth_format(&cso->u.zs, dev); + } else { + fb->has_hiz = false; + fb->depth_offset_format = GEN6_ZFORMAT_D32_FLOAT; + } + /* * The PRMs list several restrictions when the framebuffer has more than * one surface. It seems they are actually lifted on GEN6+. diff --git a/src/gallium/drivers/ilo/ilo_blitter.h b/src/gallium/drivers/ilo/ilo_blitter.h index 1967c485ca5..a092aff1993 100644 --- a/src/gallium/drivers/ilo/ilo_blitter.h +++ b/src/gallium/drivers/ilo/ilo_blitter.h @@ -39,12 +39,6 @@ enum ilo_blitter_uses { ILO_BLITTER_USE_FB_STENCIL = 1 << 4, }; -enum ilo_blitter_rectlist_op { - ILO_BLITTER_RECTLIST_CLEAR_ZS, - ILO_BLITTER_RECTLIST_RESOLVE_Z, - ILO_BLITTER_RECTLIST_RESOLVE_HIZ, -}; - struct blitter_context; struct pipe_resource; struct pipe_surface; @@ -57,7 +51,8 @@ struct ilo_blitter { /* * A minimal context with the goal to send RECTLISTs down the pipeline. */ - enum ilo_blitter_rectlist_op op; + enum ilo_state_raster_earlyz_op earlyz_op; + bool earlyz_stencil_clear; uint32_t uses; bool initialized; @@ -83,6 +78,8 @@ struct ilo_blitter { struct ilo_surface_cso dst; unsigned width, height; unsigned num_samples; + + struct ilo_state_raster rs; } fb; }; diff --git a/src/gallium/drivers/ilo/ilo_blitter_rectlist.c b/src/gallium/drivers/ilo/ilo_blitter_rectlist.c index 84100c0f3c7..51e640d5236 100644 --- a/src/gallium/drivers/ilo/ilo_blitter_rectlist.c +++ b/src/gallium/drivers/ilo/ilo_blitter_rectlist.c @@ -82,10 +82,12 @@ ilo_blitter_set_invariants(struct ilo_blitter *blitter) } static void -ilo_blitter_set_op(struct ilo_blitter *blitter, - enum ilo_blitter_rectlist_op op) +ilo_blitter_set_earlyz_op(struct ilo_blitter *blitter, + enum ilo_state_raster_earlyz_op op, + bool earlyz_stencil_clear) { - blitter->op = op; + blitter->earlyz_op = op; + blitter->earlyz_stencil_clear = earlyz_stencil_clear; } /** @@ -127,6 +129,15 @@ ilo_blitter_set_dsa(struct ilo_blitter *blitter, ilo_gpe_init_dsa(blitter->ilo->dev, state, &blitter->dsa); } +static void +ilo_blitter_set_fb_rs(struct ilo_blitter *blitter) +{ + memset(&blitter->fb.rs, 0, sizeof(blitter->fb.rs)); + ilo_state_raster_init_for_rectlist(&blitter->fb.rs, blitter->ilo->dev, + blitter->fb.num_samples, blitter->earlyz_op, + blitter->earlyz_stencil_clear); +} + static void ilo_blitter_set_fb(struct ilo_blitter *blitter, struct pipe_resource *res, unsigned level, @@ -142,6 +153,8 @@ ilo_blitter_set_fb(struct ilo_blitter *blitter, blitter->fb.num_samples = 1; memcpy(&blitter->fb.dst, cso, sizeof(*cso)); + + ilo_blitter_set_fb_rs(blitter); } static void @@ -187,9 +200,9 @@ hiz_align_fb(struct ilo_blitter *blitter) { unsigned align_w, align_h; - switch (blitter->op) { - case ILO_BLITTER_RECTLIST_CLEAR_ZS: - case ILO_BLITTER_RECTLIST_RESOLVE_Z: + switch (blitter->earlyz_op) { + case ILO_STATE_RASTER_EARLYZ_DEPTH_CLEAR: + case ILO_STATE_RASTER_EARLYZ_DEPTH_RESOLVE: break; default: return; @@ -393,7 +406,9 @@ ilo_blitter_rectlist_clear_zs(struct ilo_blitter *blitter, } ilo_blitter_set_invariants(blitter); - ilo_blitter_set_op(blitter, ILO_BLITTER_RECTLIST_CLEAR_ZS); + ilo_blitter_set_earlyz_op(blitter, + ILO_STATE_RASTER_EARLYZ_DEPTH_CLEAR, + clear_flags & PIPE_CLEAR_STENCIL); ilo_blitter_set_dsa(blitter, &dsa_state); ilo_blitter_set_clear_values(blitter, clear_value, (ubyte) stencil); @@ -437,7 +452,8 @@ ilo_blitter_rectlist_resolve_z(struct ilo_blitter *blitter, dsa_state.depth.func = PIPE_FUNC_NEVER; ilo_blitter_set_invariants(blitter); - ilo_blitter_set_op(blitter, ILO_BLITTER_RECTLIST_RESOLVE_Z); + ilo_blitter_set_earlyz_op(blitter, + ILO_STATE_RASTER_EARLYZ_DEPTH_RESOLVE, false); ilo_blitter_set_dsa(blitter, &dsa_state); ilo_blitter_set_clear_values(blitter, s->clear_value, 0); @@ -470,7 +486,8 @@ ilo_blitter_rectlist_resolve_hiz(struct ilo_blitter *blitter, dsa_state.depth.writemask = true; ilo_blitter_set_invariants(blitter); - ilo_blitter_set_op(blitter, ILO_BLITTER_RECTLIST_RESOLVE_HIZ); + ilo_blitter_set_earlyz_op(blitter, + ILO_STATE_RASTER_EARLYZ_HIZ_RESOLVE, false); ilo_blitter_set_dsa(blitter, &dsa_state); ilo_blitter_set_fb_from_resource(blitter, res, res->format, level, slice); diff --git a/src/gallium/drivers/ilo/ilo_render.c b/src/gallium/drivers/ilo/ilo_render.c index 92898704cde..f71059857c6 100644 --- a/src/gallium/drivers/ilo/ilo_render.c +++ b/src/gallium/drivers/ilo/ilo_render.c @@ -448,6 +448,9 @@ draw_session_prepare(struct ilo_render *render, session->prim_changed = true; session->primitive_restart_changed = true; + ilo_state_raster_full_delta(&vec->rasterizer->rs, render->dev, + &session->rs_delta); + ilo_state_viewport_full_delta(&vec->viewport.vp, render->dev, &session->vp_delta); } else { @@ -456,6 +459,11 @@ draw_session_prepare(struct ilo_render *render, session->primitive_restart_changed = (render->state.primitive_restart != vec->draw->primitive_restart); + if (vec->dirty & ILO_DIRTY_RASTERIZER) { + ilo_state_raster_get_delta(&vec->rasterizer->rs, render->dev, + &render->state.rs, &session->rs_delta); + } + if (vec->dirty & ILO_DIRTY_VIEWPORT) { ilo_state_viewport_full_delta(&vec->viewport.vp, render->dev, &session->vp_delta); @@ -476,6 +484,8 @@ draw_session_end(struct ilo_render *render, render->state.reduced_prim = session->reduced_prim; render->state.primitive_restart = vec->draw->primitive_restart; + + render->state.rs = vec->rasterizer->rs; } void diff --git a/src/gallium/drivers/ilo/ilo_render_gen.h b/src/gallium/drivers/ilo/ilo_render_gen.h index 5de41623214..2bf51724733 100644 --- a/src/gallium/drivers/ilo/ilo_render_gen.h +++ b/src/gallium/drivers/ilo/ilo_render_gen.h @@ -31,6 +31,7 @@ #include "core/ilo_builder.h" #include "core/ilo_builder_3d.h" #include "core/ilo_builder_render.h" +#include "core/ilo_state_raster.h" #include "ilo_common.h" #include "ilo_state.h" @@ -89,6 +90,8 @@ struct ilo_render { int reduced_prim; int so_max_vertices; + struct ilo_state_raster rs; + uint32_t SF_VIEWPORT; uint32_t CLIP_VIEWPORT; uint32_t SF_CLIP_VIEWPORT; /* GEN7+ */ @@ -144,6 +147,7 @@ struct ilo_render_draw_session { bool prim_changed; bool primitive_restart_changed; + struct ilo_state_raster_delta rs_delta; struct ilo_state_viewport_delta vp_delta; /* dynamic states */ diff --git a/src/gallium/drivers/ilo/ilo_render_gen6.c b/src/gallium/drivers/ilo/ilo_render_gen6.c index 31198723367..e292ae8f3f9 100644 --- a/src/gallium/drivers/ilo/ilo_render_gen6.c +++ b/src/gallium/drivers/ilo/ilo_render_gen6.c @@ -633,37 +633,8 @@ gen6_draw_clip(struct ilo_render *r, struct ilo_render_draw_session *session) { /* 3DSTATE_CLIP */ - if (DIRTY(RASTERIZER) || DIRTY(FS) || DIRTY(VIEWPORT) || DIRTY(FB)) { - bool enable_guardband = true; - unsigned i; - - /* - * Gen8+ has viewport extent test. Guard band test can be enabled on - * prior Gens only when the viewport is larger than the framebuffer, - * unless we emulate viewport extent test on them. - */ - if (ilo_dev_gen(r->dev) < ILO_GEN(8)) { - for (i = 0; i < vec->viewport.params.count; i++) { - const struct ilo_state_viewport_matrix_info *mat = - &vec->viewport.matrices[i]; - float min_x, max_x, min_y, max_y; - - min_x = -1.0f * fabsf(mat->scale[0]) + mat->translate[0]; - max_x = 1.0f * fabsf(mat->scale[0]) + mat->translate[0]; - min_y = -1.0f * fabsf(mat->scale[1]) + mat->translate[1]; - max_y = 1.0f * fabsf(mat->scale[1]) + mat->translate[1]; - - if (min_x > 0.0f || max_x < vec->fb.state.width || - min_y > 0.0f || max_y < vec->fb.state.height) { - enable_guardband = false; - break; - } - } - } - - gen6_3DSTATE_CLIP(r->builder, vec->rasterizer, - vec->fs, enable_guardband, 1); - } + if (session->rs_delta.dirty & ILO_STATE_RASTER_3DSTATE_CLIP) + gen6_3DSTATE_CLIP(r->builder, &vec->rasterizer->rs); } static void @@ -672,9 +643,10 @@ gen6_draw_sf(struct ilo_render *r, struct ilo_render_draw_session *session) { /* 3DSTATE_SF */ - if (DIRTY(RASTERIZER) || DIRTY(FS) || DIRTY(FB)) { - gen6_3DSTATE_SF(r->builder, vec->rasterizer, vec->fs, - vec->fb.num_samples); + if ((session->rs_delta.dirty & ILO_STATE_RASTER_3DSTATE_SF) || + DIRTY(RASTERIZER) || DIRTY(FS)) { + gen6_3DSTATE_SF(r->builder, &vec->rasterizer->rs, + vec->rasterizer->state.sprite_coord_mode, vec->fs); } } @@ -708,7 +680,8 @@ gen6_draw_wm(struct ilo_render *r, /* 3DSTATE_WM */ if (DIRTY(FS) || DIRTY(BLEND) || DIRTY(DSA) || - DIRTY(RASTERIZER) || r->instruction_bo_changed) { + (session->rs_delta.dirty & ILO_STATE_RASTER_3DSTATE_WM) || + r->instruction_bo_changed) { const bool dual_blend = vec->blend->dual_blend; const bool cc_may_kill = (vec->dsa->dw_blend_alpha || vec->blend->alpha_to_coverage); @@ -716,8 +689,8 @@ gen6_draw_wm(struct ilo_render *r, if (ilo_dev_gen(r->dev) == ILO_GEN(6) && r->hw_ctx_changed) gen6_wa_pre_3dstate_wm_max_threads(r); - gen6_3DSTATE_WM(r->builder, vec->fs, - vec->rasterizer, dual_blend, cc_may_kill); + gen6_3DSTATE_WM(r->builder, &vec->rasterizer->rs, vec->fs, + dual_blend, cc_may_kill); } } @@ -726,8 +699,9 @@ gen6_draw_wm_multisample(struct ilo_render *r, const struct ilo_state_vector *vec, struct ilo_render_draw_session *session) { - /* 3DSTATE_MULTISAMPLE and 3DSTATE_SAMPLE_MASK */ - if (DIRTY(SAMPLE_MASK) || DIRTY(FB)) { + /* 3DSTATE_MULTISAMPLE */ + if (DIRTY(FB) || (session->rs_delta.dirty & + ILO_STATE_RASTER_3DSTATE_MULTISAMPLE)) { const uint32_t *pattern; pattern = (vec->fb.num_samples > 1) ? @@ -739,12 +713,12 @@ gen6_draw_wm_multisample(struct ilo_render *r, } gen6_3DSTATE_MULTISAMPLE(r->builder, - vec->fb.num_samples, pattern, - vec->rasterizer->state.half_pixel_center); - - gen6_3DSTATE_SAMPLE_MASK(r->builder, - (vec->fb.num_samples > 1) ? vec->sample_mask : 0x1); + &vec->rasterizer->rs, pattern, 1); } + + /* 3DSTATE_SAMPLE_MASK */ + if (session->rs_delta.dirty & ILO_STATE_RASTER_3DSTATE_SAMPLE_MASK) + gen6_3DSTATE_SAMPLE_MASK(r->builder, &vec->rasterizer->rs); } static void @@ -872,35 +846,18 @@ gen6_rectlist_vs_to_sf(struct ilo_render *r, gen6_3DSTATE_CONSTANT_GS(r->builder, NULL, NULL, 0); gen6_disable_3DSTATE_GS(r->builder); - gen6_disable_3DSTATE_CLIP(r->builder); - gen6_3DSTATE_SF(r->builder, NULL, NULL, blitter->fb.num_samples); + gen6_3DSTATE_CLIP(r->builder, &blitter->fb.rs); + gen6_3DSTATE_SF(r->builder, &blitter->fb.rs, 0, NULL); } static void gen6_rectlist_wm(struct ilo_render *r, const struct ilo_blitter *blitter) { - uint32_t hiz_op; - - switch (blitter->op) { - case ILO_BLITTER_RECTLIST_CLEAR_ZS: - hiz_op = GEN6_WM_DW4_DEPTH_CLEAR; - break; - case ILO_BLITTER_RECTLIST_RESOLVE_Z: - hiz_op = GEN6_WM_DW4_DEPTH_RESOLVE; - break; - case ILO_BLITTER_RECTLIST_RESOLVE_HIZ: - hiz_op = GEN6_WM_DW4_HIZ_RESOLVE; - break; - default: - hiz_op = 0; - break; - } - gen6_3DSTATE_CONSTANT_PS(r->builder, NULL, NULL, 0); gen6_wa_pre_3dstate_wm_max_threads(r); - gen6_hiz_3DSTATE_WM(r->builder, hiz_op); + gen6_3DSTATE_WM(r->builder, &blitter->fb.rs, NULL, false, false); } static void @@ -936,11 +893,8 @@ gen6_rectlist_wm_multisample(struct ilo_render *r, gen6_wa_pre_3dstate_multisample(r); - gen6_3DSTATE_MULTISAMPLE(r->builder, blitter->fb.num_samples, - pattern, true); - - gen6_3DSTATE_SAMPLE_MASK(r->builder, - (1 << blitter->fb.num_samples) - 1); + gen6_3DSTATE_MULTISAMPLE(r->builder, &blitter->fb.rs, pattern, true); + gen6_3DSTATE_SAMPLE_MASK(r->builder, &blitter->fb.rs); } int diff --git a/src/gallium/drivers/ilo/ilo_render_gen7.c b/src/gallium/drivers/ilo/ilo_render_gen7.c index 88331bf2380..b427b2920f8 100644 --- a/src/gallium/drivers/ilo/ilo_render_gen7.c +++ b/src/gallium/drivers/ilo/ilo_render_gen7.c @@ -483,16 +483,11 @@ gen7_draw_sf(struct ilo_render *r, } /* 3DSTATE_SF */ - if (DIRTY(RASTERIZER) || DIRTY(FB)) { - struct pipe_surface *zs = vec->fb.state.zsbuf; - + if (session->rs_delta.dirty & ILO_STATE_RASTER_3DSTATE_SF) { if (ilo_dev_gen(r->dev) == ILO_GEN(7)) gen7_wa_pre_3dstate_sf_depth_bias(r); - gen7_3DSTATE_SF(r->builder, - (vec->rasterizer) ? &vec->rasterizer->sf : NULL, - (zs) ? zs->format : PIPE_FORMAT_NONE, - vec->fb.num_samples); + gen7_3DSTATE_SF(r->builder, &vec->rasterizer->rs); } } @@ -502,11 +497,12 @@ gen7_draw_wm(struct ilo_render *r, struct ilo_render_draw_session *session) { /* 3DSTATE_WM */ - if (DIRTY(FS) || DIRTY(BLEND) || DIRTY(DSA) || DIRTY(RASTERIZER)) { + if (DIRTY(FS) || DIRTY(BLEND) || DIRTY(DSA) || + (session->rs_delta.dirty & ILO_STATE_RASTER_3DSTATE_WM)) { const bool cc_may_kill = (vec->dsa->dw_blend_alpha || vec->blend->alpha_to_coverage); - gen7_3DSTATE_WM(r->builder, vec->fs, vec->rasterizer, cc_may_kill); + gen7_3DSTATE_WM(r->builder, &vec->rasterizer->rs, vec->fs, cc_may_kill); } /* 3DSTATE_BINDING_TABLE_POINTERS_PS */ @@ -600,24 +596,30 @@ gen7_draw_wm_multisample(struct ilo_render *r, const struct ilo_state_vector *vec, struct ilo_render_draw_session *session) { - /* 3DSTATE_MULTISAMPLE and 3DSTATE_SAMPLE_MASK */ - if (DIRTY(SAMPLE_MASK) || DIRTY(FB)) { + /* 3DSTATE_MULTISAMPLE */ + if (DIRTY(FB) || (session->rs_delta.dirty & + ILO_STATE_RASTER_3DSTATE_MULTISAMPLE)) { const uint32_t *pattern; + int pattern_len; gen7_wa_pre_3dstate_multisample(r); - pattern = (vec->fb.num_samples > 4) ? r->sample_pattern_8x : - (vec->fb.num_samples > 1) ? &r->sample_pattern_4x : - &r->sample_pattern_1x; + if (vec->fb.num_samples > 4) { + pattern = r->sample_pattern_8x; + pattern_len = ARRAY_SIZE(r->sample_pattern_8x); + } else { + pattern = (vec->fb.num_samples > 1) ? + &r->sample_pattern_4x : &r->sample_pattern_1x; + pattern_len = 1; + } - gen6_3DSTATE_MULTISAMPLE(r->builder, - vec->fb.num_samples, pattern, - vec->rasterizer->state.half_pixel_center); - - gen7_3DSTATE_SAMPLE_MASK(r->builder, - (vec->fb.num_samples > 1) ? vec->sample_mask : 0x1, - vec->fb.num_samples); + gen6_3DSTATE_MULTISAMPLE(r->builder, &vec->rasterizer->rs, + pattern, pattern_len); } + + /* 3DSTATE_SAMPLE_MASK */ + if (session->rs_delta.dirty & ILO_STATE_RASTER_3DSTATE_SAMPLE_MASK) + gen6_3DSTATE_SAMPLE_MASK(r->builder, &vec->rasterizer->rs); } void @@ -720,13 +722,12 @@ gen7_rectlist_vs_to_sf(struct ilo_render *r, gen7_3DSTATE_STREAMOUT(r->builder, 0, false, 0x0, 0); - gen6_disable_3DSTATE_CLIP(r->builder); + gen6_3DSTATE_CLIP(r->builder, &blitter->fb.rs); if (ilo_dev_gen(r->dev) == ILO_GEN(7)) gen7_wa_pre_3dstate_sf_depth_bias(r); - gen7_3DSTATE_SF(r->builder, NULL, blitter->fb.dst.base.format, - blitter->fb.num_samples); + gen7_3DSTATE_SF(r->builder, &blitter->fb.rs); gen7_3DSTATE_SBE(r->builder, NULL, 0); } @@ -734,24 +735,7 @@ static void gen7_rectlist_wm(struct ilo_render *r, const struct ilo_blitter *blitter) { - uint32_t hiz_op; - - switch (blitter->op) { - case ILO_BLITTER_RECTLIST_CLEAR_ZS: - hiz_op = GEN7_WM_DW1_DEPTH_CLEAR; - break; - case ILO_BLITTER_RECTLIST_RESOLVE_Z: - hiz_op = GEN7_WM_DW1_DEPTH_RESOLVE; - break; - case ILO_BLITTER_RECTLIST_RESOLVE_HIZ: - hiz_op = GEN7_WM_DW1_HIZ_RESOLVE; - break; - default: - hiz_op = 0; - break; - } - - gen7_hiz_3DSTATE_WM(r->builder, hiz_op); + gen7_3DSTATE_WM(r->builder, &blitter->fb.rs, NULL, false); gen7_3DSTATE_CONSTANT_PS(r->builder, NULL, NULL, 0); @@ -787,18 +771,24 @@ static void gen7_rectlist_wm_multisample(struct ilo_render *r, const struct ilo_blitter *blitter) { - const uint32_t *pattern = - (blitter->fb.num_samples > 4) ? r->sample_pattern_8x : - (blitter->fb.num_samples > 1) ? &r->sample_pattern_4x : - &r->sample_pattern_1x; + const uint32_t *pattern; + int pattern_len; + + if (blitter->fb.num_samples > 4) { + pattern = r->sample_pattern_8x; + pattern_len = ARRAY_SIZE(r->sample_pattern_8x); + } else { + pattern = (blitter->fb.num_samples > 1) ? + &r->sample_pattern_4x : &r->sample_pattern_1x; + pattern_len = 1; + } gen7_wa_pre_3dstate_multisample(r); - gen6_3DSTATE_MULTISAMPLE(r->builder, blitter->fb.num_samples, - pattern, true); + gen6_3DSTATE_MULTISAMPLE(r->builder, &blitter->fb.rs, + pattern, pattern_len); - gen7_3DSTATE_SAMPLE_MASK(r->builder, - (1 << blitter->fb.num_samples) - 1, blitter->fb.num_samples); + gen6_3DSTATE_SAMPLE_MASK(r->builder, &blitter->fb.rs); } void diff --git a/src/gallium/drivers/ilo/ilo_render_gen8.c b/src/gallium/drivers/ilo/ilo_render_gen8.c index 994d38b0d05..7afb35e8b6b 100644 --- a/src/gallium/drivers/ilo/ilo_render_gen8.c +++ b/src/gallium/drivers/ilo/ilo_render_gen8.c @@ -66,10 +66,8 @@ gen8_draw_sf(struct ilo_render *r, struct ilo_render_draw_session *session) { /* 3DSTATE_RASTER */ - if (DIRTY(RASTERIZER)) { - gen8_3DSTATE_RASTER(r->builder, (vec->rasterizer) ? - &vec->rasterizer->sf : NULL); - } + if (session->rs_delta.dirty & ILO_STATE_RASTER_3DSTATE_RASTER) + gen8_3DSTATE_RASTER(r->builder, &vec->rasterizer->rs); /* 3DSTATE_SBE */ if (DIRTY(RASTERIZER) || DIRTY(FS)) { @@ -82,10 +80,8 @@ gen8_draw_sf(struct ilo_render *r, gen8_3DSTATE_SBE_SWIZ(r->builder, vec->fs); /* 3DSTATE_SF */ - if (DIRTY(RASTERIZER)) { - gen8_3DSTATE_SF(r->builder, (vec->rasterizer) ? - &vec->rasterizer->sf : NULL); - } + if (session->rs_delta.dirty & ILO_STATE_RASTER_3DSTATE_SF) + gen7_3DSTATE_SF(r->builder, &vec->rasterizer->rs); } static void @@ -94,8 +90,8 @@ gen8_draw_wm(struct ilo_render *r, struct ilo_render_draw_session *session) { /* 3DSTATE_WM */ - if (DIRTY(FS) || DIRTY(RASTERIZER)) - gen8_3DSTATE_WM(r->builder, vec->fs, vec->rasterizer); + if (session->rs_delta.dirty & ILO_STATE_RASTER_3DSTATE_WM) + gen8_3DSTATE_WM(r->builder, &vec->rasterizer->rs); if (DIRTY(DSA)) gen8_3DSTATE_WM_DEPTH_STENCIL(r->builder, vec->dsa); @@ -198,15 +194,13 @@ gen8_draw_wm_multisample(struct ilo_render *r, const struct ilo_state_vector *vec, struct ilo_render_draw_session *session) { - /* 3DSTATE_MULTISAMPLE and 3DSTATE_SAMPLE_MASK */ - if (DIRTY(SAMPLE_MASK) || DIRTY(FB) || DIRTY(RASTERIZER)) { - gen8_3DSTATE_MULTISAMPLE(r->builder, vec->fb.num_samples, - vec->rasterizer->state.half_pixel_center); + /* 3DSTATE_MULTISAMPLE */ + if (session->rs_delta.dirty & ILO_STATE_RASTER_3DSTATE_MULTISAMPLE) + gen8_3DSTATE_MULTISAMPLE(r->builder, &vec->rasterizer->rs); - gen7_3DSTATE_SAMPLE_MASK(r->builder, - (vec->fb.num_samples > 1) ? vec->sample_mask : 0x1, - vec->fb.num_samples); - } + /* 3DSTATE_SAMPLE_MASK */ + if (session->rs_delta.dirty & ILO_STATE_RASTER_3DSTATE_SAMPLE_MASK) + gen6_3DSTATE_SAMPLE_MASK(r->builder, &vec->rasterizer->rs); } static void @@ -365,8 +359,6 @@ ilo_render_emit_rectlist_commands_gen8(struct ilo_render *r, const struct ilo_blitter *blitter, const struct ilo_render_rectlist_session *session) { - uint32_t op; - ILO_DEV_ASSERT(r->dev, 8, 8); gen8_wa_pre_depth(r); @@ -391,27 +383,8 @@ ilo_render_emit_rectlist_commands_gen8(struct ilo_render *r, gen6_3DSTATE_DRAWING_RECTANGLE(r->builder, 0, 0, blitter->fb.width, blitter->fb.height); - switch (blitter->op) { - case ILO_BLITTER_RECTLIST_CLEAR_ZS: - op = 0; - if (blitter->uses & ILO_BLITTER_USE_FB_DEPTH) - op |= GEN8_WM_HZ_DW1_DEPTH_CLEAR; - if (blitter->uses & ILO_BLITTER_USE_FB_STENCIL) - op |= GEN8_WM_HZ_DW1_STENCIL_CLEAR; - break; - case ILO_BLITTER_RECTLIST_RESOLVE_Z: - op = GEN8_WM_HZ_DW1_DEPTH_RESOLVE; - break; - case ILO_BLITTER_RECTLIST_RESOLVE_HIZ: - op = GEN8_WM_HZ_DW1_HIZ_RESOLVE; - break; - default: - op = 0; - break; - } - - gen8_3DSTATE_WM_HZ_OP(r->builder, op, blitter->fb.width, - blitter->fb.height, blitter->fb.num_samples); + gen8_3DSTATE_WM_HZ_OP(r->builder, &blitter->fb.rs, + blitter->fb.width, blitter->fb.height); ilo_render_pipe_control(r, GEN6_PIPE_CONTROL_WRITE_IMM); diff --git a/src/gallium/drivers/ilo/ilo_state.c b/src/gallium/drivers/ilo/ilo_state.c index d4d12ca8431..048152158eb 100644 --- a/src/gallium/drivers/ilo/ilo_state.c +++ b/src/gallium/drivers/ilo/ilo_state.c @@ -25,6 +25,7 @@ * Chia-I Wu */ +#include "core/ilo_builder_3d.h" /* for gen6_3d_translate_pipe_prim() */ #include "core/ilo_format.h" #include "core/ilo_state_3d.h" #include "util/u_dynarray.h" @@ -120,6 +121,45 @@ ilo_translate_shadow_func(unsigned func) } } +static enum gen_front_winding +ilo_translate_front_ccw(unsigned front_ccw) +{ + return (front_ccw) ? GEN6_FRONTWINDING_CCW : GEN6_FRONTWINDING_CW; +} + +static enum gen_cull_mode +ilo_translate_cull_face(unsigned cull_face) +{ + switch (cull_face) { + case PIPE_FACE_NONE: return GEN6_CULLMODE_NONE; + case PIPE_FACE_FRONT: return GEN6_CULLMODE_FRONT; + case PIPE_FACE_BACK: return GEN6_CULLMODE_BACK; + case PIPE_FACE_FRONT_AND_BACK: return GEN6_CULLMODE_BOTH; + default: + assert(!"unknown face culling"); + return GEN6_CULLMODE_NONE; + } +} + +static enum gen_fill_mode +ilo_translate_poly_mode(unsigned poly_mode) +{ + switch (poly_mode) { + case PIPE_POLYGON_MODE_FILL: return GEN6_FILLMODE_SOLID; + case PIPE_POLYGON_MODE_LINE: return GEN6_FILLMODE_WIREFRAME; + case PIPE_POLYGON_MODE_POINT: return GEN6_FILLMODE_POINT; + default: + assert(!"unknown polygon mode"); + return GEN6_FILLMODE_SOLID; + } +} + +static enum gen_pixel_location +ilo_translate_half_pixel_center(bool half_pixel_center) +{ + return (half_pixel_center) ? GEN6_PIXLOC_CENTER : GEN6_PIXLOC_UL_CORNER; +} + static void finalize_shader_states(struct ilo_state_vector *vec) { @@ -346,6 +386,86 @@ finalize_viewport(struct ilo_context *ilo) } } +static bool +can_enable_gb_test(const struct ilo_rasterizer_state *rasterizer, + const struct ilo_viewport_state *viewport, + const struct ilo_fb_state *fb) +{ + unsigned i; + + /* + * There are several reasons that guard band test should be disabled + * + * - GL wide points (to avoid partially visibie object) + * - GL wide or AA lines (to avoid partially visibie object) + * - missing 2D clipping + */ + if (rasterizer->state.point_size_per_vertex || + rasterizer->state.point_size > 1.0f || + rasterizer->state.line_width > 1.0f || + rasterizer->state.line_smooth) + return false; + + for (i = 0; i < viewport->params.count; i++) { + const struct ilo_state_viewport_matrix_info *mat = + &viewport->matrices[i]; + float min_x, max_x, min_y, max_y; + + min_x = -1.0f * fabsf(mat->scale[0]) + mat->translate[0]; + max_x = 1.0f * fabsf(mat->scale[0]) + mat->translate[0]; + min_y = -1.0f * fabsf(mat->scale[1]) + mat->translate[1]; + max_y = 1.0f * fabsf(mat->scale[1]) + mat->translate[1]; + + if (min_x > 0.0f || max_x < fb->state.width || + min_y > 0.0f || max_y < fb->state.height) + return false; + } + + return true; +} + +static void +finalize_rasterizer(struct ilo_context *ilo) +{ + const struct ilo_dev *dev = ilo->dev; + struct ilo_state_vector *vec = &ilo->state_vector; + struct ilo_rasterizer_state *rasterizer = vec->rasterizer; + struct ilo_state_raster_info *info = &vec->rasterizer->info; + const bool gb_test_enable = + can_enable_gb_test(rasterizer, &vec->viewport, &vec->fb); + const bool multisample = + (rasterizer->state.multisample && vec->fb.num_samples > 1); + const uint8_t barycentric_interps = ilo_shader_get_kernel_param(vec->fs, + ILO_KERNEL_FS_BARYCENTRIC_INTERPOLATIONS); + + /* check for non-orthogonal states */ + if (info->clip.viewport_count != vec->viewport.params.count || + info->clip.gb_test_enable != gb_test_enable || + info->setup.msaa_enable != multisample || + info->setup.line_msaa_enable != multisample || + info->tri.depth_offset_format != vec->fb.depth_offset_format || + info->scan.sample_count != vec->fb.num_samples || + info->scan.sample_mask != vec->sample_mask || + info->scan.barycentric_interps != barycentric_interps || + info->params.any_integer_rt != vec->fb.has_integer_rt || + info->params.hiz_enable != vec->fb.has_hiz) { + info->clip.viewport_count = vec->viewport.params.count; + info->clip.gb_test_enable = gb_test_enable; + info->setup.msaa_enable = multisample; + info->setup.line_msaa_enable = multisample; + info->tri.depth_offset_format = vec->fb.depth_offset_format; + info->scan.sample_count = vec->fb.num_samples; + info->scan.sample_mask = vec->sample_mask; + info->scan.barycentric_interps = barycentric_interps; + info->params.any_integer_rt = vec->fb.has_integer_rt; + info->params.hiz_enable = vec->fb.has_hiz; + + ilo_state_raster_set_info(&rasterizer->rs, dev, &rasterizer->info); + + vec->dirty |= ILO_DIRTY_RASTERIZER; + } +} + /** * Finalize states. Some states depend on other states and are * incomplete/invalid until finalized. @@ -361,6 +481,7 @@ ilo_finalize_3d_states(struct ilo_context *ilo, finalize_index_buffer(ilo); finalize_vertex_elements(ilo); + finalize_rasterizer(ilo); finalize_viewport(ilo); u_upload_unmap(ilo->uploader); @@ -601,12 +722,74 @@ ilo_create_rasterizer_state(struct pipe_context *pipe, { const struct ilo_dev *dev = ilo_context(pipe)->dev; struct ilo_rasterizer_state *rast; + struct ilo_state_raster_info *info; - rast = MALLOC_STRUCT(ilo_rasterizer_state); + rast = CALLOC_STRUCT(ilo_rasterizer_state); assert(rast); rast->state = *state; - ilo_gpe_init_rasterizer(dev, state, rast); + + info = &rast->info; + + info->clip.clip_enable = true; + info->clip.stats_enable = true; + info->clip.viewport_count = 1; + info->clip.force_rtaindex_zero = true; + info->clip.user_clip_enables = state->clip_plane_enable; + info->clip.gb_test_enable = true; + info->clip.xy_test_enable = true; + info->clip.z_far_enable = state->depth_clip; + info->clip.z_near_enable = state->depth_clip; + info->clip.z_near_zero = state->clip_halfz; + + info->setup.first_vertex_provoking = state->flatshade_first; + info->setup.viewport_transform = true; + info->setup.scissor_enable = state->scissor; + info->setup.msaa_enable = false; + info->setup.line_msaa_enable = false; + info->point.aa_enable = state->point_smooth; + info->point.programmable_width = state->point_size_per_vertex; + info->line.aa_enable = state->line_smooth; + info->line.stipple_enable = state->line_stipple_enable; + info->line.giq_enable = true; + info->line.giq_last_pixel = state->line_last_pixel; + info->tri.front_winding = ilo_translate_front_ccw(state->front_ccw); + info->tri.cull_mode = ilo_translate_cull_face(state->cull_face); + info->tri.fill_mode_front = ilo_translate_poly_mode(state->fill_front); + info->tri.fill_mode_back = ilo_translate_poly_mode(state->fill_back); + info->tri.depth_offset_format = GEN6_ZFORMAT_D24_UNORM_X8_UINT; + info->tri.depth_offset_solid = state->offset_tri; + info->tri.depth_offset_wireframe = state->offset_line; + info->tri.depth_offset_point = state->offset_point; + info->tri.poly_stipple_enable = state->poly_stipple_enable; + + info->scan.stats_enable = true; + info->scan.sample_count = 1; + info->scan.pixloc = + ilo_translate_half_pixel_center(state->half_pixel_center); + info->scan.sample_mask = ~0u; + info->scan.zw_interp = GEN6_ZW_INTERP_PIXEL; + info->scan.barycentric_interps = GEN6_INTERP_PERSPECTIVE_PIXEL; + info->scan.earlyz_control = GEN7_EDSC_NORMAL; + info->scan.earlyz_op = ILO_STATE_RASTER_EARLYZ_NORMAL; + info->scan.earlyz_stencil_clear = false; + + info->params.any_integer_rt = false; + info->params.hiz_enable = true; + info->params.point_width = + (state->point_size == 0.0f) ? 1.0f : state->point_size; + info->params.line_width = + (state->line_width == 0.0f) ? 1.0f : state->line_width; + + info->params.depth_offset_scale = state->offset_scale; + /* + * Scale the constant term. The minimum representable value used by the HW + * is not large enouch to be the minimum resolvable difference. + */ + info->params.depth_offset_const = state->offset_units * 2.0f; + info->params.depth_offset_clamp = state->offset_clamp; + + ilo_state_raster_init(&rast->rs, dev, info); return rast; } @@ -1566,6 +1749,8 @@ void ilo_state_vector_init(const struct ilo_dev *dev, struct ilo_state_vector *vec) { + vec->sample_mask = ~0u; + ilo_state_viewport_init_data_only(&vec->viewport.vp, dev, vec->viewport.vp_data, sizeof(vec->viewport.vp_data)); assert(vec->viewport.vp.array_size >= ILO_MAX_VIEWPORTS); diff --git a/src/gallium/drivers/ilo/ilo_state.h b/src/gallium/drivers/ilo/ilo_state.h index 5541c40ba18..8f6cce2b53a 100644 --- a/src/gallium/drivers/ilo/ilo_state.h +++ b/src/gallium/drivers/ilo/ilo_state.h @@ -29,6 +29,7 @@ #define ILO_STATE_H #include "core/ilo_state_3d.h" +#include "core/ilo_state_raster.h" #include "core/ilo_state_sampler.h" #include "core/ilo_state_surface.h" #include "core/ilo_state_viewport.h" @@ -170,6 +171,14 @@ struct ilo_view_state { unsigned count; }; +struct ilo_rasterizer_state { + struct pipe_rasterizer_state state; + + /* these are invalid until finalize_rasterizer() */ + struct ilo_state_raster_info info; + struct ilo_state_raster rs; +}; + struct ilo_viewport_state { struct ilo_state_viewport_matrix_info matrices[ILO_MAX_VIEWPORTS]; struct ilo_state_viewport_scissor_info scissors[ILO_MAX_VIEWPORTS]; @@ -224,7 +233,8 @@ struct ilo_state_vector { struct ilo_viewport_state viewport; - const struct ilo_rasterizer_state *rasterizer; + struct ilo_rasterizer_state *rasterizer; + struct pipe_poly_stipple poly_stipple; unsigned sample_mask; From 960ca7d5e32997a5367cf798f7930cbb890b3ab4 Mon Sep 17 00:00:00 2001 From: Chia-I Wu Date: Mon, 11 May 2015 19:48:52 +0800 Subject: [PATCH 625/834] ilo: embed ilo_state_cc in ilo_blend_state --- .../drivers/ilo/core/ilo_builder_3d_bottom.h | 199 +---- src/gallium/drivers/ilo/core/ilo_state_3d.h | 44 +- .../drivers/ilo/core/ilo_state_3d_bottom.c | 680 +----------------- src/gallium/drivers/ilo/ilo_blitter.h | 8 +- .../drivers/ilo/ilo_blitter_rectlist.c | 70 +- src/gallium/drivers/ilo/ilo_render.c | 9 + src/gallium/drivers/ilo/ilo_render_dynamic.c | 33 +- src/gallium/drivers/ilo/ilo_render_gen.h | 2 + src/gallium/drivers/ilo/ilo_render_gen6.c | 9 +- src/gallium/drivers/ilo/ilo_render_gen7.c | 9 +- src/gallium/drivers/ilo/ilo_render_gen8.c | 16 +- src/gallium/drivers/ilo/ilo_state.c | 328 ++++++++- src/gallium/drivers/ilo/ilo_state.h | 34 +- 13 files changed, 462 insertions(+), 979 deletions(-) diff --git a/src/gallium/drivers/ilo/core/ilo_builder_3d_bottom.h b/src/gallium/drivers/ilo/core/ilo_builder_3d_bottom.h index 2d7b9e0035f..cd1a6821ca6 100644 --- a/src/gallium/drivers/ilo/core/ilo_builder_3d_bottom.h +++ b/src/gallium/drivers/ilo/core/ilo_builder_3d_bottom.h @@ -35,6 +35,7 @@ #include "ilo_core.h" #include "ilo_dev.h" #include "ilo_format.h" +#include "ilo_state_cc.h" #include "ilo_state_raster.h" #include "ilo_state_viewport.h" #include "ilo_builder.h" @@ -406,21 +407,19 @@ gen8_3DSTATE_WM(struct ilo_builder *builder, static inline void gen8_3DSTATE_WM_DEPTH_STENCIL(struct ilo_builder *builder, - const struct ilo_dsa_state *dsa) + const struct ilo_state_cc *cc) { const uint8_t cmd_len = 3; - uint32_t dw1, dw2, *dw; + uint32_t *dw; ILO_DEV_ASSERT(builder->dev, 8, 8); - dw1 = dsa->payload[0]; - dw2 = dsa->payload[1]; - ilo_builder_batch_pointer(builder, cmd_len, &dw); dw[0] = GEN8_RENDER_CMD(3D, 3DSTATE_WM_DEPTH_STENCIL) | (cmd_len - 2); - dw[1] = dw1; - dw[2] = dw2; + /* see cc_set_gen8_3DSTATE_WM_DEPTH_STENCIL() */ + dw[1] = cc->ds[0]; + dw[2] = cc->ds[1]; } static inline void @@ -605,40 +604,18 @@ gen8_3DSTATE_PS_EXTRA(struct ilo_builder *builder, static inline void gen8_3DSTATE_PS_BLEND(struct ilo_builder *builder, - const struct ilo_blend_state *blend, - const struct ilo_fb_state *fb, - const struct ilo_dsa_state *dsa) + const struct ilo_state_cc *cc) { const uint8_t cmd_len = 2; - uint32_t dw1, *dw; + uint32_t *dw; ILO_DEV_ASSERT(builder->dev, 8, 8); - dw1 = 0; - if (blend->alpha_to_coverage && fb->num_samples > 1) - dw1 |= GEN8_PS_BLEND_DW1_ALPHA_TO_COVERAGE; - - if (fb->state.nr_cbufs && fb->state.cbufs[0]) { - const struct ilo_fb_blend_caps *caps = &fb->blend_caps[0]; - - dw1 |= GEN8_PS_BLEND_DW1_WRITABLE_RT; - if (caps->can_blend) { - if (caps->dst_alpha_forced_one) - dw1 |= blend->dw_ps_blend_dst_alpha_forced_one; - else - dw1 |= blend->dw_ps_blend; - } - - if (caps->can_alpha_test) - dw1 |= dsa->dw_ps_blend_alpha; - } else { - dw1 |= dsa->dw_ps_blend_alpha; - } - ilo_builder_batch_pointer(builder, cmd_len, &dw); dw[0] = GEN8_RENDER_CMD(3D, 3DSTATE_PS_BLEND) | (cmd_len - 2); - dw[1] = dw1; + /* see cc_set_gen8_3DSTATE_PS_BLEND() */ + dw[1] = cc->blend[0]; } static inline void @@ -1282,179 +1259,61 @@ gen6_SCISSOR_RECT(struct ilo_builder *builder, static inline uint32_t gen6_COLOR_CALC_STATE(struct ilo_builder *builder, - const struct pipe_stencil_ref *stencil_ref, - ubyte alpha_ref, - const struct pipe_blend_color *blend_color) + const struct ilo_state_cc *cc) { const int state_align = 64; const int state_len = 6; - uint32_t state_offset, *dw; ILO_DEV_ASSERT(builder->dev, 6, 8); - state_offset = ilo_builder_dynamic_pointer(builder, - ILO_BUILDER_ITEM_COLOR_CALC, state_align, state_len, &dw); - - dw[0] = stencil_ref->ref_value[0] << 24 | - stencil_ref->ref_value[1] << 16 | - GEN6_CC_DW0_ALPHATEST_UNORM8; - dw[1] = alpha_ref; - dw[2] = fui(blend_color->color[0]); - dw[3] = fui(blend_color->color[1]); - dw[4] = fui(blend_color->color[2]); - dw[5] = fui(blend_color->color[3]); - - return state_offset; + /* see cc_params_set_gen6_COLOR_CALC_STATE() */ + return ilo_builder_dynamic_write(builder, ILO_BUILDER_ITEM_COLOR_CALC, + state_align, state_len, cc->cc); } static inline uint32_t gen6_DEPTH_STENCIL_STATE(struct ilo_builder *builder, - const struct ilo_dsa_state *dsa) + const struct ilo_state_cc *cc) { const int state_align = 64; const int state_len = 3; ILO_DEV_ASSERT(builder->dev, 6, 7.5); - STATIC_ASSERT(Elements(dsa->payload) >= state_len); - + /* see cc_set_gen6_DEPTH_STENCIL_STATE() */ return ilo_builder_dynamic_write(builder, ILO_BUILDER_ITEM_DEPTH_STENCIL, - state_align, state_len, dsa->payload); + state_align, state_len, cc->ds); } static inline uint32_t gen6_BLEND_STATE(struct ilo_builder *builder, - const struct ilo_blend_state *blend, - const struct ilo_fb_state *fb, - const struct ilo_dsa_state *dsa) + const struct ilo_state_cc *cc) { const int state_align = 64; - int state_len; - uint32_t state_offset, *dw; - unsigned num_targets, i; + const int state_len = 2 * cc->blend_state_count; ILO_DEV_ASSERT(builder->dev, 6, 7.5); - /* - * From the Sandy Bridge PRM, volume 2 part 1, page 376: - * - * "The blend state is stored as an array of up to 8 elements..." - */ - num_targets = fb->state.nr_cbufs; - assert(num_targets <= 8); + if (!state_len) + return 0; - if (!num_targets) { - if (!dsa->dw_blend_alpha) - return 0; - /* to be able to reference alpha func */ - num_targets = 1; - } - - state_len = 2 * num_targets; - - state_offset = ilo_builder_dynamic_pointer(builder, - ILO_BUILDER_ITEM_BLEND, state_align, state_len, &dw); - - for (i = 0; i < num_targets; i++) { - const struct ilo_blend_cso *cso = &blend->cso[i]; - - dw[0] = cso->payload[0]; - dw[1] = cso->payload[1] | blend->dw_shared; - - if (i < fb->state.nr_cbufs && fb->state.cbufs[i]) { - const struct ilo_fb_blend_caps *caps = &fb->blend_caps[i]; - - if (caps->can_blend) { - if (caps->dst_alpha_forced_one) - dw[0] |= cso->dw_blend_dst_alpha_forced_one; - else - dw[0] |= cso->dw_blend; - } - - if (caps->can_logicop) - dw[1] |= blend->dw_logicop; - - if (caps->can_alpha_test) - dw[1] |= dsa->dw_blend_alpha; - } else { - dw[1] |= GEN6_RT_DW1_WRITE_DISABLES_A | - GEN6_RT_DW1_WRITE_DISABLES_R | - GEN6_RT_DW1_WRITE_DISABLES_G | - GEN6_RT_DW1_WRITE_DISABLES_B | - dsa->dw_blend_alpha; - } - - /* - * From the Sandy Bridge PRM, volume 2 part 1, page 356: - * - * "When NumSamples = 1, AlphaToCoverage and AlphaToCoverage - * Dither both must be disabled." - * - * There is no such limitation on GEN7, or for AlphaToOne. But GL - * requires that anyway. - */ - if (fb->num_samples > 1) - dw[1] |= blend->dw_alpha_mod; - - dw += 2; - } - - return state_offset; + /* see cc_set_gen6_BLEND_STATE() */ + return ilo_builder_dynamic_write(builder, ILO_BUILDER_ITEM_BLEND, + state_align, state_len, cc->blend); } static inline uint32_t gen8_BLEND_STATE(struct ilo_builder *builder, - const struct ilo_blend_state *blend, - const struct ilo_fb_state *fb, - const struct ilo_dsa_state *dsa) + const struct ilo_state_cc *cc) { const int state_align = 64; - const int state_len = 1 + 2 * fb->state.nr_cbufs; - uint32_t state_offset, *dw; - unsigned i; + const int state_len = 1 + 2 * cc->blend_state_count; ILO_DEV_ASSERT(builder->dev, 8, 8); - assert(fb->state.nr_cbufs <= 8); - - state_offset = ilo_builder_dynamic_pointer(builder, - ILO_BUILDER_ITEM_BLEND, state_align, state_len, &dw); - - dw[0] = blend->dw_shared; - if (fb->num_samples > 1) - dw[0] |= blend->dw_alpha_mod; - if (!fb->state.nr_cbufs || fb->blend_caps[0].can_alpha_test) - dw[0] |= dsa->dw_blend_alpha; - dw++; - - for (i = 0; i < fb->state.nr_cbufs; i++) { - const struct ilo_fb_blend_caps *caps = &fb->blend_caps[i]; - const struct ilo_blend_cso *cso = &blend->cso[i]; - - dw[0] = cso->payload[0]; - dw[1] = cso->payload[1]; - - if (fb->state.cbufs[i]) { - if (caps->can_blend) { - if (caps->dst_alpha_forced_one) - dw[0] |= cso->dw_blend_dst_alpha_forced_one; - else - dw[0] |= cso->dw_blend; - } - - if (caps->can_logicop) - dw[1] |= blend->dw_logicop; - } else { - dw[0] |= GEN8_RT_DW0_WRITE_DISABLES_A | - GEN8_RT_DW0_WRITE_DISABLES_R | - GEN8_RT_DW0_WRITE_DISABLES_G | - GEN8_RT_DW0_WRITE_DISABLES_B; - } - - dw += 2; - } - - return state_offset; + /* see cc_set_gen8_BLEND_STATE() */ + return ilo_builder_dynamic_write(builder, ILO_BUILDER_ITEM_BLEND, + state_align, state_len, &cc->blend[1]); } #endif /* ILO_BUILDER_3D_BOTTOM_H */ diff --git a/src/gallium/drivers/ilo/core/ilo_state_3d.h b/src/gallium/drivers/ilo/core/ilo_state_3d.h index 78cd67128af..45929b2226d 100644 --- a/src/gallium/drivers/ilo/core/ilo_state_3d.h +++ b/src/gallium/drivers/ilo/core/ilo_state_3d.h @@ -101,38 +101,6 @@ struct ilo_so_state { bool enabled; }; -struct ilo_dsa_state { - /* DEPTH_STENCIL_STATE or Gen8+ 3DSTATE_WM_DEPTH_STENCIL */ - uint32_t payload[3]; - - uint32_t dw_blend_alpha; - uint32_t dw_ps_blend_alpha; - ubyte alpha_ref; -}; - -struct ilo_blend_cso { - /* BLEND_STATE */ - uint32_t payload[2]; - - uint32_t dw_blend; - uint32_t dw_blend_dst_alpha_forced_one; -}; - -struct ilo_blend_state { - struct ilo_blend_cso cso[ILO_MAX_DRAW_BUFFERS]; - - bool dual_blend; - bool alpha_to_coverage; - - uint32_t dw_shared; - uint32_t dw_alpha_mod; - uint32_t dw_logicop; - - /* a part of 3DSTATE_PS_BLEND */ - uint32_t dw_ps_blend; - uint32_t dw_ps_blend_dst_alpha_forced_one; -}; - struct ilo_surface_cso { struct pipe_surface base; @@ -152,11 +120,11 @@ struct ilo_fb_state { struct ilo_fb_blend_caps { bool is_unorm; bool is_integer; + bool force_dst_alpha_one; bool can_logicop; bool can_blend; bool can_alpha_test; - bool dst_alpha_forced_one; } blend_caps[PIPE_MAX_COLOR_BUFS]; unsigned num_samples; @@ -185,16 +153,6 @@ ilo_gpe_init_ve_nosrc(const struct ilo_dev *dev, int comp0, int comp1, int comp2, int comp3, struct ilo_ve_cso *cso); -void -ilo_gpe_init_dsa(const struct ilo_dev *dev, - const struct pipe_depth_stencil_alpha_state *state, - struct ilo_dsa_state *dsa); - -void -ilo_gpe_init_blend(const struct ilo_dev *dev, - const struct pipe_blend_state *state, - struct ilo_blend_state *blend); - void ilo_gpe_init_vs_cso(const struct ilo_dev *dev, const struct ilo_shader_state *vs, diff --git a/src/gallium/drivers/ilo/core/ilo_state_3d_bottom.c b/src/gallium/drivers/ilo/core/ilo_state_3d_bottom.c index 83da224811e..932b80dd0aa 100644 --- a/src/gallium/drivers/ilo/core/ilo_state_3d_bottom.c +++ b/src/gallium/drivers/ilo/core/ilo_state_3d_bottom.c @@ -26,7 +26,6 @@ */ #include "genhw/genhw.h" -#include "util/u_dual_blend.h" #include "util/u_framebuffer.h" #include "util/u_half.h" @@ -314,681 +313,6 @@ ilo_gpe_init_fs_cso(const struct ilo_dev *dev, fs_init_cso_gen6(dev, fs, cso); } -/** - * Translate a pipe logicop to the matching hardware logicop. - */ -static int -gen6_translate_pipe_logicop(unsigned logicop) -{ - switch (logicop) { - case PIPE_LOGICOP_CLEAR: return GEN6_LOGICOP_CLEAR; - case PIPE_LOGICOP_NOR: return GEN6_LOGICOP_NOR; - case PIPE_LOGICOP_AND_INVERTED: return GEN6_LOGICOP_AND_INVERTED; - case PIPE_LOGICOP_COPY_INVERTED: return GEN6_LOGICOP_COPY_INVERTED; - case PIPE_LOGICOP_AND_REVERSE: return GEN6_LOGICOP_AND_REVERSE; - case PIPE_LOGICOP_INVERT: return GEN6_LOGICOP_INVERT; - case PIPE_LOGICOP_XOR: return GEN6_LOGICOP_XOR; - case PIPE_LOGICOP_NAND: return GEN6_LOGICOP_NAND; - case PIPE_LOGICOP_AND: return GEN6_LOGICOP_AND; - case PIPE_LOGICOP_EQUIV: return GEN6_LOGICOP_EQUIV; - case PIPE_LOGICOP_NOOP: return GEN6_LOGICOP_NOOP; - case PIPE_LOGICOP_OR_INVERTED: return GEN6_LOGICOP_OR_INVERTED; - case PIPE_LOGICOP_COPY: return GEN6_LOGICOP_COPY; - case PIPE_LOGICOP_OR_REVERSE: return GEN6_LOGICOP_OR_REVERSE; - case PIPE_LOGICOP_OR: return GEN6_LOGICOP_OR; - case PIPE_LOGICOP_SET: return GEN6_LOGICOP_SET; - default: - assert(!"unknown logicop function"); - return GEN6_LOGICOP_CLEAR; - } -} - -/** - * Translate a pipe blend function to the matching hardware blend function. - */ -static int -gen6_translate_pipe_blend(unsigned blend) -{ - switch (blend) { - case PIPE_BLEND_ADD: return GEN6_BLENDFUNCTION_ADD; - case PIPE_BLEND_SUBTRACT: return GEN6_BLENDFUNCTION_SUBTRACT; - case PIPE_BLEND_REVERSE_SUBTRACT: return GEN6_BLENDFUNCTION_REVERSE_SUBTRACT; - case PIPE_BLEND_MIN: return GEN6_BLENDFUNCTION_MIN; - case PIPE_BLEND_MAX: return GEN6_BLENDFUNCTION_MAX; - default: - assert(!"unknown blend function"); - return GEN6_BLENDFUNCTION_ADD; - }; -} - -/** - * Translate a pipe blend factor to the matching hardware blend factor. - */ -static int -gen6_translate_pipe_blendfactor(unsigned blendfactor) -{ - switch (blendfactor) { - case PIPE_BLENDFACTOR_ONE: return GEN6_BLENDFACTOR_ONE; - case PIPE_BLENDFACTOR_SRC_COLOR: return GEN6_BLENDFACTOR_SRC_COLOR; - case PIPE_BLENDFACTOR_SRC_ALPHA: return GEN6_BLENDFACTOR_SRC_ALPHA; - case PIPE_BLENDFACTOR_DST_ALPHA: return GEN6_BLENDFACTOR_DST_ALPHA; - case PIPE_BLENDFACTOR_DST_COLOR: return GEN6_BLENDFACTOR_DST_COLOR; - case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: return GEN6_BLENDFACTOR_SRC_ALPHA_SATURATE; - case PIPE_BLENDFACTOR_CONST_COLOR: return GEN6_BLENDFACTOR_CONST_COLOR; - case PIPE_BLENDFACTOR_CONST_ALPHA: return GEN6_BLENDFACTOR_CONST_ALPHA; - case PIPE_BLENDFACTOR_SRC1_COLOR: return GEN6_BLENDFACTOR_SRC1_COLOR; - case PIPE_BLENDFACTOR_SRC1_ALPHA: return GEN6_BLENDFACTOR_SRC1_ALPHA; - case PIPE_BLENDFACTOR_ZERO: return GEN6_BLENDFACTOR_ZERO; - case PIPE_BLENDFACTOR_INV_SRC_COLOR: return GEN6_BLENDFACTOR_INV_SRC_COLOR; - case PIPE_BLENDFACTOR_INV_SRC_ALPHA: return GEN6_BLENDFACTOR_INV_SRC_ALPHA; - case PIPE_BLENDFACTOR_INV_DST_ALPHA: return GEN6_BLENDFACTOR_INV_DST_ALPHA; - case PIPE_BLENDFACTOR_INV_DST_COLOR: return GEN6_BLENDFACTOR_INV_DST_COLOR; - case PIPE_BLENDFACTOR_INV_CONST_COLOR: return GEN6_BLENDFACTOR_INV_CONST_COLOR; - case PIPE_BLENDFACTOR_INV_CONST_ALPHA: return GEN6_BLENDFACTOR_INV_CONST_ALPHA; - case PIPE_BLENDFACTOR_INV_SRC1_COLOR: return GEN6_BLENDFACTOR_INV_SRC1_COLOR; - case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: return GEN6_BLENDFACTOR_INV_SRC1_ALPHA; - default: - assert(!"unknown blend factor"); - return GEN6_BLENDFACTOR_ONE; - }; -} - -/** - * Translate a pipe stencil op to the matching hardware stencil op. - */ -static int -gen6_translate_pipe_stencil_op(unsigned stencil_op) -{ - switch (stencil_op) { - case PIPE_STENCIL_OP_KEEP: return GEN6_STENCILOP_KEEP; - case PIPE_STENCIL_OP_ZERO: return GEN6_STENCILOP_ZERO; - case PIPE_STENCIL_OP_REPLACE: return GEN6_STENCILOP_REPLACE; - case PIPE_STENCIL_OP_INCR: return GEN6_STENCILOP_INCRSAT; - case PIPE_STENCIL_OP_DECR: return GEN6_STENCILOP_DECRSAT; - case PIPE_STENCIL_OP_INCR_WRAP: return GEN6_STENCILOP_INCR; - case PIPE_STENCIL_OP_DECR_WRAP: return GEN6_STENCILOP_DECR; - case PIPE_STENCIL_OP_INVERT: return GEN6_STENCILOP_INVERT; - default: - assert(!"unknown stencil op"); - return GEN6_STENCILOP_KEEP; - } -} - -static int -gen6_blend_factor_dst_alpha_forced_one(int factor) -{ - switch (factor) { - case GEN6_BLENDFACTOR_DST_ALPHA: - return GEN6_BLENDFACTOR_ONE; - case GEN6_BLENDFACTOR_INV_DST_ALPHA: - case GEN6_BLENDFACTOR_SRC_ALPHA_SATURATE: - return GEN6_BLENDFACTOR_ZERO; - default: - return factor; - } -} - -static uint32_t -blend_get_rt_blend_enable_gen6(const struct ilo_dev *dev, - const struct pipe_rt_blend_state *rt, - bool dst_alpha_forced_one) -{ - int rgb_src, rgb_dst, a_src, a_dst; - uint32_t dw; - - ILO_DEV_ASSERT(dev, 6, 7.5); - - if (!rt->blend_enable) - return 0; - - rgb_src = gen6_translate_pipe_blendfactor(rt->rgb_src_factor); - rgb_dst = gen6_translate_pipe_blendfactor(rt->rgb_dst_factor); - a_src = gen6_translate_pipe_blendfactor(rt->alpha_src_factor); - a_dst = gen6_translate_pipe_blendfactor(rt->alpha_dst_factor); - - if (dst_alpha_forced_one) { - rgb_src = gen6_blend_factor_dst_alpha_forced_one(rgb_src); - rgb_dst = gen6_blend_factor_dst_alpha_forced_one(rgb_dst); - a_src = gen6_blend_factor_dst_alpha_forced_one(a_src); - a_dst = gen6_blend_factor_dst_alpha_forced_one(a_dst); - } - - dw = GEN6_RT_DW0_BLEND_ENABLE | - gen6_translate_pipe_blend(rt->alpha_func) << 26 | - a_src << 20 | - a_dst << 15 | - gen6_translate_pipe_blend(rt->rgb_func) << 11 | - rgb_src << 5 | - rgb_dst; - - if (rt->rgb_func != rt->alpha_func || - rgb_src != a_src || rgb_dst != a_dst) - dw |= GEN6_RT_DW0_INDEPENDENT_ALPHA_ENABLE; - - return dw; -} - -static uint32_t -blend_get_rt_blend_enable_gen8(const struct ilo_dev *dev, - const struct pipe_rt_blend_state *rt, - bool dst_alpha_forced_one, - bool *independent_alpha) -{ - int rgb_src, rgb_dst, a_src, a_dst; - uint32_t dw; - - ILO_DEV_ASSERT(dev, 8, 8); - - if (!rt->blend_enable) { - *independent_alpha = false; - return 0; - } - - rgb_src = gen6_translate_pipe_blendfactor(rt->rgb_src_factor); - rgb_dst = gen6_translate_pipe_blendfactor(rt->rgb_dst_factor); - a_src = gen6_translate_pipe_blendfactor(rt->alpha_src_factor); - a_dst = gen6_translate_pipe_blendfactor(rt->alpha_dst_factor); - - if (dst_alpha_forced_one) { - rgb_src = gen6_blend_factor_dst_alpha_forced_one(rgb_src); - rgb_dst = gen6_blend_factor_dst_alpha_forced_one(rgb_dst); - a_src = gen6_blend_factor_dst_alpha_forced_one(a_src); - a_dst = gen6_blend_factor_dst_alpha_forced_one(a_dst); - } - - dw = GEN8_RT_DW0_BLEND_ENABLE | - rgb_src << 26 | - rgb_dst << 21 | - gen6_translate_pipe_blend(rt->rgb_func) << 18 | - a_src << 13 | - a_dst << 8 | - gen6_translate_pipe_blend(rt->alpha_func) << 5; - - *independent_alpha = (rt->rgb_func != rt->alpha_func || - rgb_src != a_src || - rgb_dst != a_dst); - - return dw; -} - -static void -blend_init_cso_gen6(const struct ilo_dev *dev, - const struct pipe_blend_state *state, - struct ilo_blend_state *blend, - unsigned index) -{ - const struct pipe_rt_blend_state *rt = &state->rt[index]; - struct ilo_blend_cso *cso = &blend->cso[index]; - - ILO_DEV_ASSERT(dev, 6, 7.5); - - cso->payload[0] = 0; - cso->payload[1] = GEN6_RT_DW1_COLORCLAMP_RTFORMAT | - GEN6_RT_DW1_PRE_BLEND_CLAMP | - GEN6_RT_DW1_POST_BLEND_CLAMP; - - if (!(rt->colormask & PIPE_MASK_A)) - cso->payload[1] |= GEN6_RT_DW1_WRITE_DISABLES_A; - if (!(rt->colormask & PIPE_MASK_R)) - cso->payload[1] |= GEN6_RT_DW1_WRITE_DISABLES_R; - if (!(rt->colormask & PIPE_MASK_G)) - cso->payload[1] |= GEN6_RT_DW1_WRITE_DISABLES_G; - if (!(rt->colormask & PIPE_MASK_B)) - cso->payload[1] |= GEN6_RT_DW1_WRITE_DISABLES_B; - - /* - * From the Sandy Bridge PRM, volume 2 part 1, page 365: - * - * "Color Buffer Blending and Logic Ops must not be enabled - * simultaneously, or behavior is UNDEFINED." - * - * Since state->logicop_enable takes precedence over rt->blend_enable, - * no special care is needed. - */ - if (state->logicop_enable) { - cso->dw_blend = 0; - cso->dw_blend_dst_alpha_forced_one = 0; - } else { - cso->dw_blend = blend_get_rt_blend_enable_gen6(dev, rt, false); - cso->dw_blend_dst_alpha_forced_one = - blend_get_rt_blend_enable_gen6(dev, rt, true); - } -} - -static bool -blend_init_cso_gen8(const struct ilo_dev *dev, - const struct pipe_blend_state *state, - struct ilo_blend_state *blend, - unsigned index) -{ - const struct pipe_rt_blend_state *rt = &state->rt[index]; - struct ilo_blend_cso *cso = &blend->cso[index]; - bool independent_alpha = false; - - ILO_DEV_ASSERT(dev, 8, 8); - - cso->payload[0] = 0; - cso->payload[1] = GEN8_RT_DW1_COLORCLAMP_RTFORMAT | - GEN8_RT_DW1_PRE_BLEND_CLAMP | - GEN8_RT_DW1_POST_BLEND_CLAMP; - - if (!(rt->colormask & PIPE_MASK_A)) - cso->payload[0] |= GEN8_RT_DW0_WRITE_DISABLES_A; - if (!(rt->colormask & PIPE_MASK_R)) - cso->payload[0] |= GEN8_RT_DW0_WRITE_DISABLES_R; - if (!(rt->colormask & PIPE_MASK_G)) - cso->payload[0] |= GEN8_RT_DW0_WRITE_DISABLES_G; - if (!(rt->colormask & PIPE_MASK_B)) - cso->payload[0] |= GEN8_RT_DW0_WRITE_DISABLES_B; - - if (state->logicop_enable) { - cso->dw_blend = 0; - cso->dw_blend_dst_alpha_forced_one = 0; - } else { - bool tmp[2]; - - cso->dw_blend = blend_get_rt_blend_enable_gen8(dev, rt, false, &tmp[0]); - cso->dw_blend_dst_alpha_forced_one = - blend_get_rt_blend_enable_gen8(dev, rt, true, &tmp[1]); - - if (tmp[0] || tmp[1]) - independent_alpha = true; - } - - return independent_alpha; -} - -static uint32_t -blend_get_logicop_enable_gen6(const struct ilo_dev *dev, - const struct pipe_blend_state *state) -{ - ILO_DEV_ASSERT(dev, 6, 7.5); - - if (!state->logicop_enable) - return 0; - - return GEN6_RT_DW1_LOGICOP_ENABLE | - gen6_translate_pipe_logicop(state->logicop_func) << 18; -} - -static uint32_t -blend_get_logicop_enable_gen8(const struct ilo_dev *dev, - const struct pipe_blend_state *state) -{ - ILO_DEV_ASSERT(dev, 8, 8); - - if (!state->logicop_enable) - return 0; - - return GEN8_RT_DW1_LOGICOP_ENABLE | - gen6_translate_pipe_logicop(state->logicop_func) << 27; -} - -static uint32_t -blend_get_alpha_mod_gen6(const struct ilo_dev *dev, - const struct pipe_blend_state *state, - bool dual_blend) -{ - uint32_t dw = 0; - - ILO_DEV_ASSERT(dev, 6, 7.5); - - if (state->alpha_to_coverage) { - dw |= GEN6_RT_DW1_ALPHA_TO_COVERAGE; - if (ilo_dev_gen(dev) >= ILO_GEN(7)) - dw |= GEN6_RT_DW1_ALPHA_TO_COVERAGE_DITHER; - } - /* - * From the Sandy Bridge PRM, volume 2 part 1, page 378: - * - * "If Dual Source Blending is enabled, this bit (AlphaToOne Enable) - * must be disabled." - */ - if (state->alpha_to_one && !dual_blend) - dw |= GEN6_RT_DW1_ALPHA_TO_ONE; - - return dw; -} - -static uint32_t -blend_get_alpha_mod_gen8(const struct ilo_dev *dev, - const struct pipe_blend_state *state, - bool dual_blend) -{ - uint32_t dw = 0; - - ILO_DEV_ASSERT(dev, 8, 8); - - if (state->alpha_to_coverage) { - dw |= GEN8_BLEND_DW0_ALPHA_TO_COVERAGE | - GEN8_BLEND_DW0_ALPHA_TO_COVERAGE_DITHER; - } - - if (state->alpha_to_one && !dual_blend) - dw |= GEN8_BLEND_DW0_ALPHA_TO_ONE; - - return dw; -} - -static uint32_t -blend_get_ps_blend_gen8(const struct ilo_dev *dev, uint32_t rt_dw0) -{ - int rgb_src, rgb_dst, a_src, a_dst; - uint32_t dw; - - ILO_DEV_ASSERT(dev, 8, 8); - - if (!(rt_dw0 & GEN8_RT_DW0_BLEND_ENABLE)) - return 0; - - a_src = GEN_EXTRACT(rt_dw0, GEN8_RT_DW0_SRC_ALPHA_FACTOR); - a_dst = GEN_EXTRACT(rt_dw0, GEN8_RT_DW0_DST_ALPHA_FACTOR); - rgb_src = GEN_EXTRACT(rt_dw0, GEN8_RT_DW0_SRC_COLOR_FACTOR); - rgb_dst = GEN_EXTRACT(rt_dw0, GEN8_RT_DW0_DST_COLOR_FACTOR); - - dw = GEN8_PS_BLEND_DW1_BLEND_ENABLE; - dw |= GEN_SHIFT32(a_src, GEN8_PS_BLEND_DW1_SRC_ALPHA_FACTOR); - dw |= GEN_SHIFT32(a_dst, GEN8_PS_BLEND_DW1_DST_ALPHA_FACTOR); - dw |= GEN_SHIFT32(rgb_src, GEN8_PS_BLEND_DW1_SRC_COLOR_FACTOR); - dw |= GEN_SHIFT32(rgb_dst, GEN8_PS_BLEND_DW1_DST_COLOR_FACTOR); - - if (a_src != rgb_src || a_dst != rgb_dst) - dw |= GEN8_PS_BLEND_DW1_INDEPENDENT_ALPHA_ENABLE; - - return dw; -} - -void -ilo_gpe_init_blend(const struct ilo_dev *dev, - const struct pipe_blend_state *state, - struct ilo_blend_state *blend) -{ - unsigned i; - - ILO_DEV_ASSERT(dev, 6, 8); - - blend->dual_blend = (util_blend_state_is_dual(state, 0) && - state->rt[0].blend_enable && - !state->logicop_enable); - blend->alpha_to_coverage = state->alpha_to_coverage; - - if (ilo_dev_gen(dev) >= ILO_GEN(8)) { - bool independent_alpha; - - blend->dw_alpha_mod = - blend_get_alpha_mod_gen8(dev, state, blend->dual_blend); - blend->dw_logicop = blend_get_logicop_enable_gen8(dev, state); - blend->dw_shared = (state->dither) ? GEN8_BLEND_DW0_DITHER_ENABLE : 0; - - independent_alpha = blend_init_cso_gen8(dev, state, blend, 0); - if (independent_alpha) - blend->dw_shared |= GEN8_BLEND_DW0_INDEPENDENT_ALPHA_ENABLE; - - blend->dw_ps_blend = blend_get_ps_blend_gen8(dev, - blend->cso[0].dw_blend); - blend->dw_ps_blend_dst_alpha_forced_one = blend_get_ps_blend_gen8(dev, - blend->cso[0].dw_blend_dst_alpha_forced_one); - - if (state->independent_blend_enable) { - for (i = 1; i < Elements(blend->cso); i++) { - independent_alpha = blend_init_cso_gen8(dev, state, blend, i); - if (independent_alpha) - blend->dw_shared |= GEN8_BLEND_DW0_INDEPENDENT_ALPHA_ENABLE; - } - } else { - for (i = 1; i < Elements(blend->cso); i++) - blend->cso[i] = blend->cso[0]; - } - } else { - blend->dw_alpha_mod = - blend_get_alpha_mod_gen6(dev, state, blend->dual_blend); - blend->dw_logicop = blend_get_logicop_enable_gen6(dev, state); - blend->dw_shared = (state->dither) ? GEN6_RT_DW1_DITHER_ENABLE : 0; - - blend->dw_ps_blend = 0; - blend->dw_ps_blend_dst_alpha_forced_one = 0; - - blend_init_cso_gen6(dev, state, blend, 0); - if (state->independent_blend_enable) { - for (i = 1; i < Elements(blend->cso); i++) - blend_init_cso_gen6(dev, state, blend, i); - } else { - for (i = 1; i < Elements(blend->cso); i++) - blend->cso[i] = blend->cso[0]; - } - } -} - -/** - * Translate a pipe DSA test function to the matching hardware compare - * function. - */ -static int -gen6_translate_dsa_func(unsigned func) -{ - switch (func) { - case PIPE_FUNC_NEVER: return GEN6_COMPAREFUNCTION_NEVER; - case PIPE_FUNC_LESS: return GEN6_COMPAREFUNCTION_LESS; - case PIPE_FUNC_EQUAL: return GEN6_COMPAREFUNCTION_EQUAL; - case PIPE_FUNC_LEQUAL: return GEN6_COMPAREFUNCTION_LEQUAL; - case PIPE_FUNC_GREATER: return GEN6_COMPAREFUNCTION_GREATER; - case PIPE_FUNC_NOTEQUAL: return GEN6_COMPAREFUNCTION_NOTEQUAL; - case PIPE_FUNC_GEQUAL: return GEN6_COMPAREFUNCTION_GEQUAL; - case PIPE_FUNC_ALWAYS: return GEN6_COMPAREFUNCTION_ALWAYS; - default: - assert(!"unknown depth/stencil/alpha test function"); - return GEN6_COMPAREFUNCTION_NEVER; - } -} - -static uint32_t -dsa_get_stencil_enable_gen6(const struct ilo_dev *dev, - const struct pipe_stencil_state *stencil0, - const struct pipe_stencil_state *stencil1) -{ - uint32_t dw; - - ILO_DEV_ASSERT(dev, 6, 7.5); - - if (!stencil0->enabled) - return 0; - - /* - * From the Sandy Bridge PRM, volume 2 part 1, page 359: - * - * "If the Depth Buffer is either undefined or does not have a surface - * format of D32_FLOAT_S8X24_UINT or D24_UNORM_S8_UINT and separate - * stencil buffer is disabled, Stencil Test Enable must be DISABLED" - * - * From the Sandy Bridge PRM, volume 2 part 1, page 370: - * - * "This field (Stencil Test Enable) cannot be enabled if - * Surface Format in 3DSTATE_DEPTH_BUFFER is set to D16_UNORM." - * - * TODO We do not check these yet. - */ - dw = GEN6_ZS_DW0_STENCIL_TEST_ENABLE | - gen6_translate_dsa_func(stencil0->func) << 28 | - gen6_translate_pipe_stencil_op(stencil0->fail_op) << 25 | - gen6_translate_pipe_stencil_op(stencil0->zfail_op) << 22 | - gen6_translate_pipe_stencil_op(stencil0->zpass_op) << 19; - if (stencil0->writemask) - dw |= GEN6_ZS_DW0_STENCIL_WRITE_ENABLE; - - if (stencil1->enabled) { - dw |= GEN6_ZS_DW0_STENCIL1_ENABLE | - gen6_translate_dsa_func(stencil1->func) << 12 | - gen6_translate_pipe_stencil_op(stencil1->fail_op) << 9 | - gen6_translate_pipe_stencil_op(stencil1->zfail_op) << 6 | - gen6_translate_pipe_stencil_op(stencil1->zpass_op) << 3; - if (stencil1->writemask) - dw |= GEN6_ZS_DW0_STENCIL_WRITE_ENABLE; - } - - return dw; -} - -static uint32_t -dsa_get_stencil_enable_gen8(const struct ilo_dev *dev, - const struct pipe_stencil_state *stencil0, - const struct pipe_stencil_state *stencil1) -{ - uint32_t dw; - - ILO_DEV_ASSERT(dev, 8, 8); - - if (!stencil0->enabled) - return 0; - - dw = gen6_translate_pipe_stencil_op(stencil0->fail_op) << 29 | - gen6_translate_pipe_stencil_op(stencil0->zfail_op) << 26 | - gen6_translate_pipe_stencil_op(stencil0->zpass_op) << 23 | - gen6_translate_dsa_func(stencil0->func) << 8 | - GEN8_ZS_DW1_STENCIL_TEST_ENABLE; - if (stencil0->writemask) - dw |= GEN8_ZS_DW1_STENCIL_WRITE_ENABLE; - - if (stencil1->enabled) { - dw |= gen6_translate_dsa_func(stencil1->func) << 20 | - gen6_translate_pipe_stencil_op(stencil1->fail_op) << 17 | - gen6_translate_pipe_stencil_op(stencil1->zfail_op) << 14 | - gen6_translate_pipe_stencil_op(stencil1->zpass_op) << 11 | - GEN8_ZS_DW1_STENCIL1_ENABLE; - if (stencil1->writemask) - dw |= GEN8_ZS_DW1_STENCIL_WRITE_ENABLE; - } - - return dw; -} - -static uint32_t -dsa_get_depth_enable_gen6(const struct ilo_dev *dev, - const struct pipe_depth_state *state) -{ - uint32_t dw; - - ILO_DEV_ASSERT(dev, 6, 7.5); - - /* - * From the Sandy Bridge PRM, volume 2 part 1, page 360: - * - * "Enabling the Depth Test function without defining a Depth Buffer is - * UNDEFINED." - * - * From the Sandy Bridge PRM, volume 2 part 1, page 375: - * - * "A Depth Buffer must be defined before enabling writes to it, or - * operation is UNDEFINED." - * - * TODO We do not check these yet. - */ - if (state->enabled) { - dw = GEN6_ZS_DW2_DEPTH_TEST_ENABLE | - gen6_translate_dsa_func(state->func) << 27; - } else { - dw = GEN6_COMPAREFUNCTION_ALWAYS << 27; - } - - if (state->writemask) - dw |= GEN6_ZS_DW2_DEPTH_WRITE_ENABLE; - - return dw; -} - -static uint32_t -dsa_get_depth_enable_gen8(const struct ilo_dev *dev, - const struct pipe_depth_state *state) -{ - uint32_t dw; - - ILO_DEV_ASSERT(dev, 8, 8); - - if (state->enabled) { - dw = GEN8_ZS_DW1_DEPTH_TEST_ENABLE | - gen6_translate_dsa_func(state->func) << 5; - } else { - dw = GEN6_COMPAREFUNCTION_ALWAYS << 5; - } - - if (state->writemask) - dw |= GEN8_ZS_DW1_DEPTH_WRITE_ENABLE; - - return dw; -} - -static uint32_t -dsa_get_alpha_enable_gen6(const struct ilo_dev *dev, - const struct pipe_alpha_state *state) -{ - uint32_t dw; - - ILO_DEV_ASSERT(dev, 6, 7.5); - - if (!state->enabled) - return 0; - - /* this will be ORed to BLEND_STATE */ - dw = GEN6_RT_DW1_ALPHA_TEST_ENABLE | - gen6_translate_dsa_func(state->func) << 13; - - return dw; -} - -static uint32_t -dsa_get_alpha_enable_gen8(const struct ilo_dev *dev, - const struct pipe_alpha_state *state) -{ - uint32_t dw; - - ILO_DEV_ASSERT(dev, 8, 8); - - if (!state->enabled) - return 0; - - /* this will be ORed to BLEND_STATE */ - dw = GEN8_BLEND_DW0_ALPHA_TEST_ENABLE | - gen6_translate_dsa_func(state->func) << 24; - - return dw; -} - -void -ilo_gpe_init_dsa(const struct ilo_dev *dev, - const struct pipe_depth_stencil_alpha_state *state, - struct ilo_dsa_state *dsa) -{ - ILO_DEV_ASSERT(dev, 6, 8); - - STATIC_ASSERT(Elements(dsa->payload) >= 3); - - if (ilo_dev_gen(dev) >= ILO_GEN(8)) { - const uint32_t dw_stencil = dsa_get_stencil_enable_gen8(dev, - &state->stencil[0], &state->stencil[1]); - const uint32_t dw_depth = dsa_get_depth_enable_gen8(dev, &state->depth); - - assert(!(dw_stencil & dw_depth)); - dsa->payload[0] = dw_stencil | dw_depth; - - dsa->dw_blend_alpha = dsa_get_alpha_enable_gen8(dev, &state->alpha); - dsa->dw_ps_blend_alpha = (state->alpha.enabled) ? - GEN8_PS_BLEND_DW1_ALPHA_TEST_ENABLE : 0; - } else { - dsa->payload[0] = dsa_get_stencil_enable_gen6(dev, - &state->stencil[0], &state->stencil[1]); - dsa->payload[2] = dsa_get_depth_enable_gen6(dev, &state->depth); - - dsa->dw_blend_alpha = dsa_get_alpha_enable_gen6(dev, &state->alpha); - dsa->dw_ps_blend_alpha = 0; - } - - dsa->payload[1] = state->stencil[0].valuemask << 24 | - state->stencil[0].writemask << 16 | - state->stencil[1].valuemask << 8 | - state->stencil[1].writemask; - - dsa->alpha_ref = float_to_ubyte(state->alpha.ref_value); -} - static void fb_set_blend_caps(const struct ilo_dev *dev, enum pipe_format format, @@ -1029,12 +353,12 @@ fb_set_blend_caps(const struct ilo_dev *dev, */ caps->can_alpha_test = !caps->is_integer; - caps->dst_alpha_forced_one = + caps->force_dst_alpha_one = (ilo_format_translate_render(dev, format) != ilo_format_translate_color(dev, format)); /* sanity check */ - if (caps->dst_alpha_forced_one) { + if (caps->force_dst_alpha_one) { enum pipe_format render_format; switch (format) { diff --git a/src/gallium/drivers/ilo/ilo_blitter.h b/src/gallium/drivers/ilo/ilo_blitter.h index a092aff1993..072f0f7f7fc 100644 --- a/src/gallium/drivers/ilo/ilo_blitter.h +++ b/src/gallium/drivers/ilo/ilo_blitter.h @@ -64,13 +64,7 @@ struct ilo_blitter { struct ilo_state_viewport vp; uint32_t vp_data[20]; - struct ilo_dsa_state dsa; - - struct { - struct pipe_stencil_ref stencil_ref; - ubyte alpha_ref; - struct pipe_blend_color blend_color; - } cc; + struct ilo_state_cc cc; uint32_t depth_clear_value; diff --git a/src/gallium/drivers/ilo/ilo_blitter_rectlist.c b/src/gallium/drivers/ilo/ilo_blitter_rectlist.c index 51e640d5236..9d431956314 100644 --- a/src/gallium/drivers/ilo/ilo_blitter_rectlist.c +++ b/src/gallium/drivers/ilo/ilo_blitter_rectlist.c @@ -115,18 +115,18 @@ ilo_blitter_set_rectlist(struct ilo_blitter *blitter, } static void -ilo_blitter_set_clear_values(struct ilo_blitter *blitter, - uint32_t depth, ubyte stencil) +ilo_blitter_set_depth_clear_value(struct ilo_blitter *blitter, + uint32_t depth) { blitter->depth_clear_value = depth; - blitter->cc.stencil_ref.ref_value[0] = stencil; } static void -ilo_blitter_set_dsa(struct ilo_blitter *blitter, - const struct pipe_depth_stencil_alpha_state *state) +ilo_blitter_set_cc(struct ilo_blitter *blitter, + const struct ilo_state_cc_info *info) { - ilo_gpe_init_dsa(blitter->ilo->dev, state, &blitter->dsa); + memset(&blitter->cc, 0, sizeof(blitter->cc)); + ilo_state_cc_init(&blitter->cc, blitter->ilo->dev, info); } static void @@ -337,7 +337,7 @@ ilo_blitter_rectlist_clear_zs(struct ilo_blitter *blitter, double depth, unsigned stencil) { struct ilo_texture *tex = ilo_texture(zs->texture); - struct pipe_depth_stencil_alpha_state dsa_state; + struct ilo_state_cc_info info; uint32_t uses, clear_value; if (!ilo_image_can_enable_aux(&tex->image, zs->u.tex.level)) @@ -377,17 +377,20 @@ ilo_blitter_rectlist_clear_zs(struct ilo_blitter *blitter, * - [DevSNB] errata: For stencil buffer only clear, the previous * depth clear value must be delivered during the clear." */ - memset(&dsa_state, 0, sizeof(dsa_state)); + memset(&info, 0, sizeof(info)); - if (clear_flags & PIPE_CLEAR_DEPTH) - dsa_state.depth.writemask = true; + if (clear_flags & PIPE_CLEAR_DEPTH) { + info.depth.cv_has_buffer = true; + info.depth.write_enable = true; + } if (clear_flags & PIPE_CLEAR_STENCIL) { - dsa_state.stencil[0].enabled = true; - dsa_state.stencil[0].func = PIPE_FUNC_ALWAYS; - dsa_state.stencil[0].fail_op = PIPE_STENCIL_OP_KEEP; - dsa_state.stencil[0].zpass_op = PIPE_STENCIL_OP_REPLACE; - dsa_state.stencil[0].zfail_op = PIPE_STENCIL_OP_KEEP; + info.stencil.cv_has_buffer = true; + info.stencil.test_enable = true; + info.stencil.front.test_func = GEN6_COMPAREFUNCTION_ALWAYS; + info.stencil.front.fail_op = GEN6_STENCILOP_KEEP; + info.stencil.front.zfail_op = GEN6_STENCILOP_KEEP; + info.stencil.front.zpass_op = GEN6_STENCILOP_REPLACE; /* * From the Ivy Bridge PRM, volume 2 part 1, page 277: @@ -398,11 +401,12 @@ ilo_blitter_rectlist_clear_zs(struct ilo_blitter *blitter, * - DEPTH_STENCIL_STATE::Stencil Test Mask must be 0xFF * - DEPTH_STENCIL_STATE::Back Face Stencil Write Mask must be 0xFF * - DEPTH_STENCIL_STATE::Back Face Stencil Test Mask must be 0xFF" + * + * Back frace masks will be copied from front face masks. */ - dsa_state.stencil[0].valuemask = 0xff; - dsa_state.stencil[0].writemask = 0xff; - dsa_state.stencil[1].valuemask = 0xff; - dsa_state.stencil[1].writemask = 0xff; + info.params.stencil_front.test_ref = (uint8_t) stencil; + info.params.stencil_front.test_mask = 0xff; + info.params.stencil_front.write_mask = 0xff; } ilo_blitter_set_invariants(blitter); @@ -410,8 +414,8 @@ ilo_blitter_rectlist_clear_zs(struct ilo_blitter *blitter, ILO_STATE_RASTER_EARLYZ_DEPTH_CLEAR, clear_flags & PIPE_CLEAR_STENCIL); - ilo_blitter_set_dsa(blitter, &dsa_state); - ilo_blitter_set_clear_values(blitter, clear_value, (ubyte) stencil); + ilo_blitter_set_cc(blitter, &info); + ilo_blitter_set_depth_clear_value(blitter, clear_value); ilo_blitter_set_fb_from_surface(blitter, zs); uses = ILO_BLITTER_USE_DSA; @@ -432,7 +436,7 @@ ilo_blitter_rectlist_resolve_z(struct ilo_blitter *blitter, unsigned level, unsigned slice) { struct ilo_texture *tex = ilo_texture(res); - struct pipe_depth_stencil_alpha_state dsa_state; + struct ilo_state_cc_info info; const struct ilo_texture_slice *s = ilo_texture_get_slice(tex, level, slice); @@ -446,17 +450,18 @@ ilo_blitter_rectlist_resolve_z(struct ilo_blitter *blitter, * to NEVER. Depth Buffer Write Enable must be enabled. Stencil Test * Enable and Stencil Buffer Write Enable must be disabled." */ - memset(&dsa_state, 0, sizeof(dsa_state)); - dsa_state.depth.writemask = true; - dsa_state.depth.enabled = true; - dsa_state.depth.func = PIPE_FUNC_NEVER; + memset(&info, 0, sizeof(info)); + info.depth.cv_has_buffer = true; + info.depth.test_enable = true; + info.depth.write_enable = true; + info.depth.test_func = GEN6_COMPAREFUNCTION_NEVER; ilo_blitter_set_invariants(blitter); ilo_blitter_set_earlyz_op(blitter, ILO_STATE_RASTER_EARLYZ_DEPTH_RESOLVE, false); - ilo_blitter_set_dsa(blitter, &dsa_state); - ilo_blitter_set_clear_values(blitter, s->clear_value, 0); + ilo_blitter_set_cc(blitter, &info); + ilo_blitter_set_depth_clear_value(blitter, s->clear_value); ilo_blitter_set_fb_from_resource(blitter, res, res->format, level, slice); ilo_blitter_set_uses(blitter, ILO_BLITTER_USE_DSA | ILO_BLITTER_USE_FB_DEPTH); @@ -470,7 +475,7 @@ ilo_blitter_rectlist_resolve_hiz(struct ilo_blitter *blitter, unsigned level, unsigned slice) { struct ilo_texture *tex = ilo_texture(res); - struct pipe_depth_stencil_alpha_state dsa_state; + struct ilo_state_cc_info info; if (!ilo_image_can_enable_aux(&tex->image, level)) return; @@ -482,14 +487,15 @@ ilo_blitter_rectlist_resolve_hiz(struct ilo_blitter *blitter, * disabled. Depth Buffer Write Enable must be enabled. Stencil Test * Enable and Stencil Buffer Write Enable must be disabled." */ - memset(&dsa_state, 0, sizeof(dsa_state)); - dsa_state.depth.writemask = true; + memset(&info, 0, sizeof(info)); + info.depth.cv_has_buffer = true; + info.depth.write_enable = true; ilo_blitter_set_invariants(blitter); ilo_blitter_set_earlyz_op(blitter, ILO_STATE_RASTER_EARLYZ_HIZ_RESOLVE, false); - ilo_blitter_set_dsa(blitter, &dsa_state); + ilo_blitter_set_cc(blitter, &info); ilo_blitter_set_fb_from_resource(blitter, res, res->format, level, slice); ilo_blitter_set_uses(blitter, ILO_BLITTER_USE_DSA | ILO_BLITTER_USE_FB_DEPTH); diff --git a/src/gallium/drivers/ilo/ilo_render.c b/src/gallium/drivers/ilo/ilo_render.c index f71059857c6..6935138f8d9 100644 --- a/src/gallium/drivers/ilo/ilo_render.c +++ b/src/gallium/drivers/ilo/ilo_render.c @@ -453,6 +453,9 @@ draw_session_prepare(struct ilo_render *render, ilo_state_viewport_full_delta(&vec->viewport.vp, render->dev, &session->vp_delta); + + ilo_state_cc_full_delta(&vec->blend->cc, render->dev, + &session->cc_delta); } else { session->prim_changed = (render->state.reduced_prim != session->reduced_prim); @@ -468,6 +471,11 @@ draw_session_prepare(struct ilo_render *render, ilo_state_viewport_full_delta(&vec->viewport.vp, render->dev, &session->vp_delta); } + + if (vec->dirty & ILO_DIRTY_BLEND) { + ilo_state_cc_get_delta(&vec->blend->cc, render->dev, + &render->state.cc, &session->cc_delta); + } } } @@ -486,6 +494,7 @@ draw_session_end(struct ilo_render *render, render->state.primitive_restart = vec->draw->primitive_restart; render->state.rs = vec->rasterizer->rs; + render->state.cc = vec->blend->cc; } void diff --git a/src/gallium/drivers/ilo/ilo_render_dynamic.c b/src/gallium/drivers/ilo/ilo_render_dynamic.c index cc3791eb470..5618920a507 100644 --- a/src/gallium/drivers/ilo/ilo_render_dynamic.c +++ b/src/gallium/drivers/ilo/ilo_render_dynamic.c @@ -99,32 +99,30 @@ gen6_emit_draw_dynamic_cc(struct ilo_render *r, ILO_DEV_ASSERT(r->dev, 6, 8); /* BLEND_STATE */ - if (DIRTY(BLEND) || DIRTY(FB) || DIRTY(DSA)) { - if (ilo_dev_gen(r->dev) >= ILO_GEN(8)) { - r->state.BLEND_STATE = gen8_BLEND_STATE(r->builder, - vec->blend, &vec->fb, vec->dsa); - } else { - r->state.BLEND_STATE = gen6_BLEND_STATE(r->builder, - vec->blend, &vec->fb, vec->dsa); - } + if ((session->cc_delta.dirty & ILO_STATE_CC_BLEND_STATE) || + r->state_bo_changed) { + if (ilo_dev_gen(r->dev) >= ILO_GEN(8)) + r->state.BLEND_STATE = gen8_BLEND_STATE(r->builder, &vec->blend->cc); + else + r->state.BLEND_STATE = gen6_BLEND_STATE(r->builder, &vec->blend->cc); session->blend_changed = true; } /* COLOR_CALC_STATE */ - if (DIRTY(DSA) || DIRTY(STENCIL_REF) || DIRTY(BLEND_COLOR)) { + if ((session->cc_delta.dirty & ILO_STATE_CC_COLOR_CALC_STATE) || + r->state_bo_changed) { r->state.COLOR_CALC_STATE = - gen6_COLOR_CALC_STATE(r->builder, &vec->stencil_ref, - vec->dsa->alpha_ref, &vec->blend_color); - + gen6_COLOR_CALC_STATE(r->builder, &vec->blend->cc); session->cc_changed = true; } /* DEPTH_STENCIL_STATE */ - if (ilo_dev_gen(r->dev) < ILO_GEN(8) && DIRTY(DSA)) { + if (ilo_dev_gen(r->dev) < ILO_GEN(8) && + ((session->cc_delta.dirty & ILO_STATE_CC_DEPTH_STENCIL_STATE) || + r->state_bo_changed)) { r->state.DEPTH_STENCIL_STATE = - gen6_DEPTH_STENCIL_STATE(r->builder, vec->dsa); - + gen6_DEPTH_STENCIL_STATE(r->builder, &vec->blend->cc); session->dsa_changed = true; } } @@ -450,13 +448,12 @@ ilo_render_emit_rectlist_dynamic_states(struct ilo_render *render, if (blitter->uses & ILO_BLITTER_USE_DSA) { render->state.DEPTH_STENCIL_STATE = - gen6_DEPTH_STENCIL_STATE(render->builder, &blitter->dsa); + gen6_DEPTH_STENCIL_STATE(render->builder, &blitter->cc); } if (blitter->uses & ILO_BLITTER_USE_CC) { render->state.COLOR_CALC_STATE = - gen6_COLOR_CALC_STATE(render->builder, &blitter->cc.stencil_ref, - blitter->cc.alpha_ref, &blitter->cc.blend_color); + gen6_COLOR_CALC_STATE(render->builder, &blitter->cc); } if (blitter->uses & ILO_BLITTER_USE_VIEWPORT) { diff --git a/src/gallium/drivers/ilo/ilo_render_gen.h b/src/gallium/drivers/ilo/ilo_render_gen.h index 2bf51724733..cc6f77d9750 100644 --- a/src/gallium/drivers/ilo/ilo_render_gen.h +++ b/src/gallium/drivers/ilo/ilo_render_gen.h @@ -91,6 +91,7 @@ struct ilo_render { int so_max_vertices; struct ilo_state_raster rs; + struct ilo_state_cc cc; uint32_t SF_VIEWPORT; uint32_t CLIP_VIEWPORT; @@ -149,6 +150,7 @@ struct ilo_render_draw_session { struct ilo_state_raster_delta rs_delta; struct ilo_state_viewport_delta vp_delta; + struct ilo_state_cc_delta cc_delta; /* dynamic states */ bool viewport_changed; diff --git a/src/gallium/drivers/ilo/ilo_render_gen6.c b/src/gallium/drivers/ilo/ilo_render_gen6.c index e292ae8f3f9..ff0bf2fb820 100644 --- a/src/gallium/drivers/ilo/ilo_render_gen6.c +++ b/src/gallium/drivers/ilo/ilo_render_gen6.c @@ -29,7 +29,6 @@ #include "core/ilo_builder_3d.h" #include "core/ilo_builder_mi.h" #include "core/ilo_builder_render.h" -#include "util/u_dual_blend.h" #include "util/u_prim.h" #include "ilo_blitter.h" @@ -679,18 +678,14 @@ gen6_draw_wm(struct ilo_render *r, } /* 3DSTATE_WM */ - if (DIRTY(FS) || DIRTY(BLEND) || DIRTY(DSA) || + if (DIRTY(FS) || DIRTY(BLEND) || (session->rs_delta.dirty & ILO_STATE_RASTER_3DSTATE_WM) || r->instruction_bo_changed) { - const bool dual_blend = vec->blend->dual_blend; - const bool cc_may_kill = (vec->dsa->dw_blend_alpha || - vec->blend->alpha_to_coverage); - if (ilo_dev_gen(r->dev) == ILO_GEN(6) && r->hw_ctx_changed) gen6_wa_pre_3dstate_wm_max_threads(r); gen6_3DSTATE_WM(r->builder, &vec->rasterizer->rs, vec->fs, - dual_blend, cc_may_kill); + vec->blend->dual_blend, vec->blend->alpha_may_kill); } } diff --git a/src/gallium/drivers/ilo/ilo_render_gen7.c b/src/gallium/drivers/ilo/ilo_render_gen7.c index b427b2920f8..0931a771876 100644 --- a/src/gallium/drivers/ilo/ilo_render_gen7.c +++ b/src/gallium/drivers/ilo/ilo_render_gen7.c @@ -28,7 +28,6 @@ #include "genhw/genhw.h" #include "core/ilo_builder_3d.h" #include "core/ilo_builder_render.h" -#include "util/u_dual_blend.h" #include "ilo_blitter.h" #include "ilo_shader.h" @@ -497,12 +496,10 @@ gen7_draw_wm(struct ilo_render *r, struct ilo_render_draw_session *session) { /* 3DSTATE_WM */ - if (DIRTY(FS) || DIRTY(BLEND) || DIRTY(DSA) || + if (DIRTY(FS) || DIRTY(BLEND) || (session->rs_delta.dirty & ILO_STATE_RASTER_3DSTATE_WM)) { - const bool cc_may_kill = (vec->dsa->dw_blend_alpha || - vec->blend->alpha_to_coverage); - - gen7_3DSTATE_WM(r->builder, &vec->rasterizer->rs, vec->fs, cc_may_kill); + gen7_3DSTATE_WM(r->builder, &vec->rasterizer->rs, vec->fs, + vec->blend->alpha_may_kill); } /* 3DSTATE_BINDING_TABLE_POINTERS_PS */ diff --git a/src/gallium/drivers/ilo/ilo_render_gen8.c b/src/gallium/drivers/ilo/ilo_render_gen8.c index 7afb35e8b6b..e0e1a854ebc 100644 --- a/src/gallium/drivers/ilo/ilo_render_gen8.c +++ b/src/gallium/drivers/ilo/ilo_render_gen8.c @@ -28,7 +28,6 @@ #include "genhw/genhw.h" #include "core/ilo_builder_3d.h" #include "core/ilo_builder_render.h" -#include "util/u_dual_blend.h" #include "ilo_blitter.h" #include "ilo_shader.h" @@ -93,8 +92,8 @@ gen8_draw_wm(struct ilo_render *r, if (session->rs_delta.dirty & ILO_STATE_RASTER_3DSTATE_WM) gen8_3DSTATE_WM(r->builder, &vec->rasterizer->rs); - if (DIRTY(DSA)) - gen8_3DSTATE_WM_DEPTH_STENCIL(r->builder, vec->dsa); + if (session->cc_delta.dirty & ILO_STATE_CC_3DSTATE_WM_DEPTH_STENCIL) + gen8_3DSTATE_WM_DEPTH_STENCIL(r->builder, &vec->blend->cc); /* 3DSTATE_WM_HZ_OP and 3DSTATE_WM_CHROMAKEY */ if (r->hw_ctx_changed) { @@ -127,15 +126,14 @@ gen8_draw_wm(struct ilo_render *r, gen8_3DSTATE_PS(r->builder, vec->fs); /* 3DSTATE_PS_EXTRA */ - if (DIRTY(FS) || DIRTY(DSA) || DIRTY(BLEND)) { - const bool cc_may_kill = (vec->dsa->dw_blend_alpha || - vec->blend->alpha_to_coverage); - gen8_3DSTATE_PS_EXTRA(r->builder, vec->fs, cc_may_kill, false); + if (DIRTY(FS) || DIRTY(BLEND)) { + gen8_3DSTATE_PS_EXTRA(r->builder, vec->fs, + vec->blend->alpha_may_kill, false); } /* 3DSTATE_PS_BLEND */ - if (DIRTY(BLEND) || DIRTY(FB) || DIRTY(DSA)) - gen8_3DSTATE_PS_BLEND(r->builder, vec->blend, &vec->fb, vec->dsa); + if (session->cc_delta.dirty & ILO_STATE_CC_3DSTATE_PS_BLEND) + gen8_3DSTATE_PS_BLEND(r->builder, &vec->blend->cc); /* 3DSTATE_SCISSOR_STATE_POINTERS */ if (session->scissor_changed) { diff --git a/src/gallium/drivers/ilo/ilo_state.c b/src/gallium/drivers/ilo/ilo_state.c index 048152158eb..0a568bfbdc2 100644 --- a/src/gallium/drivers/ilo/ilo_state.c +++ b/src/gallium/drivers/ilo/ilo_state.c @@ -28,6 +28,7 @@ #include "core/ilo_builder_3d.h" /* for gen6_3d_translate_pipe_prim() */ #include "core/ilo_format.h" #include "core/ilo_state_3d.h" +#include "util/u_dual_blend.h" #include "util/u_dynarray.h" #include "util/u_helpers.h" #include "util/u_resource.h" @@ -160,6 +161,112 @@ ilo_translate_half_pixel_center(bool half_pixel_center) return (half_pixel_center) ? GEN6_PIXLOC_CENTER : GEN6_PIXLOC_UL_CORNER; } +static enum gen_compare_function +ilo_translate_compare_func(unsigned func) +{ + switch (func) { + case PIPE_FUNC_NEVER: return GEN6_COMPAREFUNCTION_NEVER; + case PIPE_FUNC_LESS: return GEN6_COMPAREFUNCTION_LESS; + case PIPE_FUNC_EQUAL: return GEN6_COMPAREFUNCTION_EQUAL; + case PIPE_FUNC_LEQUAL: return GEN6_COMPAREFUNCTION_LEQUAL; + case PIPE_FUNC_GREATER: return GEN6_COMPAREFUNCTION_GREATER; + case PIPE_FUNC_NOTEQUAL: return GEN6_COMPAREFUNCTION_NOTEQUAL; + case PIPE_FUNC_GEQUAL: return GEN6_COMPAREFUNCTION_GEQUAL; + case PIPE_FUNC_ALWAYS: return GEN6_COMPAREFUNCTION_ALWAYS; + default: + assert(!"unknown compare function"); + return GEN6_COMPAREFUNCTION_NEVER; + } +} + +static enum gen_stencil_op +ilo_translate_stencil_op(unsigned stencil_op) +{ + switch (stencil_op) { + case PIPE_STENCIL_OP_KEEP: return GEN6_STENCILOP_KEEP; + case PIPE_STENCIL_OP_ZERO: return GEN6_STENCILOP_ZERO; + case PIPE_STENCIL_OP_REPLACE: return GEN6_STENCILOP_REPLACE; + case PIPE_STENCIL_OP_INCR: return GEN6_STENCILOP_INCRSAT; + case PIPE_STENCIL_OP_DECR: return GEN6_STENCILOP_DECRSAT; + case PIPE_STENCIL_OP_INCR_WRAP: return GEN6_STENCILOP_INCR; + case PIPE_STENCIL_OP_DECR_WRAP: return GEN6_STENCILOP_DECR; + case PIPE_STENCIL_OP_INVERT: return GEN6_STENCILOP_INVERT; + default: + assert(!"unknown stencil op"); + return GEN6_STENCILOP_KEEP; + } +} + +static enum gen_logic_op +ilo_translate_logicop(unsigned logicop) +{ + switch (logicop) { + case PIPE_LOGICOP_CLEAR: return GEN6_LOGICOP_CLEAR; + case PIPE_LOGICOP_NOR: return GEN6_LOGICOP_NOR; + case PIPE_LOGICOP_AND_INVERTED: return GEN6_LOGICOP_AND_INVERTED; + case PIPE_LOGICOP_COPY_INVERTED: return GEN6_LOGICOP_COPY_INVERTED; + case PIPE_LOGICOP_AND_REVERSE: return GEN6_LOGICOP_AND_REVERSE; + case PIPE_LOGICOP_INVERT: return GEN6_LOGICOP_INVERT; + case PIPE_LOGICOP_XOR: return GEN6_LOGICOP_XOR; + case PIPE_LOGICOP_NAND: return GEN6_LOGICOP_NAND; + case PIPE_LOGICOP_AND: return GEN6_LOGICOP_AND; + case PIPE_LOGICOP_EQUIV: return GEN6_LOGICOP_EQUIV; + case PIPE_LOGICOP_NOOP: return GEN6_LOGICOP_NOOP; + case PIPE_LOGICOP_OR_INVERTED: return GEN6_LOGICOP_OR_INVERTED; + case PIPE_LOGICOP_COPY: return GEN6_LOGICOP_COPY; + case PIPE_LOGICOP_OR_REVERSE: return GEN6_LOGICOP_OR_REVERSE; + case PIPE_LOGICOP_OR: return GEN6_LOGICOP_OR; + case PIPE_LOGICOP_SET: return GEN6_LOGICOP_SET; + default: + assert(!"unknown logicop function"); + return GEN6_LOGICOP_CLEAR; + } +} + +static int +ilo_translate_blend_func(unsigned blend) +{ + switch (blend) { + case PIPE_BLEND_ADD: return GEN6_BLENDFUNCTION_ADD; + case PIPE_BLEND_SUBTRACT: return GEN6_BLENDFUNCTION_SUBTRACT; + case PIPE_BLEND_REVERSE_SUBTRACT: return GEN6_BLENDFUNCTION_REVERSE_SUBTRACT; + case PIPE_BLEND_MIN: return GEN6_BLENDFUNCTION_MIN; + case PIPE_BLEND_MAX: return GEN6_BLENDFUNCTION_MAX; + default: + assert(!"unknown blend function"); + return GEN6_BLENDFUNCTION_ADD; + } +} + +static int +ilo_translate_blend_factor(unsigned factor) +{ + switch (factor) { + case PIPE_BLENDFACTOR_ONE: return GEN6_BLENDFACTOR_ONE; + case PIPE_BLENDFACTOR_SRC_COLOR: return GEN6_BLENDFACTOR_SRC_COLOR; + case PIPE_BLENDFACTOR_SRC_ALPHA: return GEN6_BLENDFACTOR_SRC_ALPHA; + case PIPE_BLENDFACTOR_DST_ALPHA: return GEN6_BLENDFACTOR_DST_ALPHA; + case PIPE_BLENDFACTOR_DST_COLOR: return GEN6_BLENDFACTOR_DST_COLOR; + case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: return GEN6_BLENDFACTOR_SRC_ALPHA_SATURATE; + case PIPE_BLENDFACTOR_CONST_COLOR: return GEN6_BLENDFACTOR_CONST_COLOR; + case PIPE_BLENDFACTOR_CONST_ALPHA: return GEN6_BLENDFACTOR_CONST_ALPHA; + case PIPE_BLENDFACTOR_SRC1_COLOR: return GEN6_BLENDFACTOR_SRC1_COLOR; + case PIPE_BLENDFACTOR_SRC1_ALPHA: return GEN6_BLENDFACTOR_SRC1_ALPHA; + case PIPE_BLENDFACTOR_ZERO: return GEN6_BLENDFACTOR_ZERO; + case PIPE_BLENDFACTOR_INV_SRC_COLOR: return GEN6_BLENDFACTOR_INV_SRC_COLOR; + case PIPE_BLENDFACTOR_INV_SRC_ALPHA: return GEN6_BLENDFACTOR_INV_SRC_ALPHA; + case PIPE_BLENDFACTOR_INV_DST_ALPHA: return GEN6_BLENDFACTOR_INV_DST_ALPHA; + case PIPE_BLENDFACTOR_INV_DST_COLOR: return GEN6_BLENDFACTOR_INV_DST_COLOR; + case PIPE_BLENDFACTOR_INV_CONST_COLOR: return GEN6_BLENDFACTOR_INV_CONST_COLOR; + case PIPE_BLENDFACTOR_INV_CONST_ALPHA: return GEN6_BLENDFACTOR_INV_CONST_ALPHA; + case PIPE_BLENDFACTOR_INV_SRC1_COLOR: return GEN6_BLENDFACTOR_INV_SRC1_COLOR; + case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: return GEN6_BLENDFACTOR_INV_SRC1_ALPHA; + default: + assert(!"unknown blend factor"); + return GEN6_BLENDFACTOR_ONE; + } +} + static void finalize_shader_states(struct ilo_state_vector *vec) { @@ -466,6 +573,98 @@ finalize_rasterizer(struct ilo_context *ilo) } } +static bool +finalize_blend_rt(struct ilo_context *ilo) +{ + struct ilo_state_vector *vec = &ilo->state_vector; + const struct ilo_fb_state *fb = &vec->fb; + struct ilo_blend_state *blend = vec->blend; + struct ilo_state_cc_blend_info *info = &vec->blend->info.blend; + bool changed = false; + unsigned i; + + if (!(vec->dirty & (ILO_DIRTY_FB | ILO_DIRTY_BLEND))) + return false; + + /* set up one for dummy RT writes */ + if (!fb->state.nr_cbufs) { + if (info->rt != &blend->dummy_rt) { + info->rt = &blend->dummy_rt; + info->rt_count = 1; + changed = true; + } + + return changed; + } + + if (info->rt != blend->effective_rt || + info->rt_count != fb->state.nr_cbufs) { + info->rt = blend->effective_rt; + info->rt_count = fb->state.nr_cbufs; + changed = true; + } + + for (i = 0; i < fb->state.nr_cbufs; i++) { + const struct ilo_fb_blend_caps *caps = &fb->blend_caps[i]; + struct ilo_state_cc_blend_rt_info *rt = &blend->effective_rt[i]; + /* ignore logicop when not UNORM */ + const bool logicop_enable = + (blend->rt[i].logicop_enable && caps->is_unorm); + + if (rt->cv_is_unorm != caps->is_unorm || + rt->cv_is_integer != caps->is_integer || + rt->logicop_enable != logicop_enable || + rt->force_dst_alpha_one != caps->force_dst_alpha_one) { + rt->cv_is_unorm = caps->is_unorm; + rt->cv_is_integer = caps->is_integer; + rt->logicop_enable = logicop_enable; + rt->force_dst_alpha_one = caps->force_dst_alpha_one; + + changed = true; + } + } + + return changed; +} + +static void +finalize_blend(struct ilo_context *ilo) +{ + const struct ilo_dev *dev = ilo->dev; + struct ilo_state_vector *vec = &ilo->state_vector; + struct ilo_blend_state *blend = vec->blend; + struct ilo_state_cc_info *info = &blend->info; + const bool sample_count_one = (vec->fb.num_samples <= 1); + const bool float_source0_alpha = + (!vec->fb.state.nr_cbufs || !vec->fb.state.cbufs[0] || + !util_format_is_pure_integer(vec->fb.state.cbufs[0]->format)); + + /* check for non-orthogonal states */ + if (finalize_blend_rt(ilo) || + info->alpha.cv_sample_count_one != sample_count_one || + info->alpha.cv_float_source0_alpha != float_source0_alpha || + info->alpha.test_enable != vec->dsa->alpha_test || + info->alpha.test_func != vec->dsa->alpha_func || + memcmp(&info->stencil, &vec->dsa->stencil, sizeof(info->stencil)) || + memcmp(&info->depth, &vec->dsa->depth, sizeof(info->depth)) || + memcmp(&info->params, &vec->cc_params, sizeof(info->params))) { + info->alpha.cv_sample_count_one = sample_count_one; + info->alpha.cv_float_source0_alpha = float_source0_alpha; + info->alpha.test_enable = vec->dsa->alpha_test; + info->alpha.test_func = vec->dsa->alpha_func; + info->stencil = vec->dsa->stencil; + info->depth = vec->dsa->depth; + info->params = vec->cc_params; + + ilo_state_cc_set_info(&blend->cc, dev, info); + + blend->alpha_may_kill = (info->alpha.alpha_to_coverage || + info->alpha.test_enable); + + vec->dirty |= ILO_DIRTY_BLEND; + } +} + /** * Finalize states. Some states depend on other states and are * incomplete/invalid until finalized. @@ -483,6 +682,7 @@ ilo_finalize_3d_states(struct ilo_context *ilo, finalize_rasterizer(ilo); finalize_viewport(ilo); + finalize_blend(ilo); u_upload_unmap(ilo->uploader); } @@ -526,12 +726,79 @@ ilo_create_blend_state(struct pipe_context *pipe, const struct pipe_blend_state *state) { const struct ilo_dev *dev = ilo_context(pipe)->dev; + struct ilo_state_cc_info *info; struct ilo_blend_state *blend; + int i; - blend = MALLOC_STRUCT(ilo_blend_state); + blend = CALLOC_STRUCT(ilo_blend_state); assert(blend); - ilo_gpe_init_blend(dev, state, blend); + info = &blend->info; + + info->alpha.cv_float_source0_alpha = true; + info->alpha.cv_sample_count_one = true; + info->alpha.alpha_to_one = state->alpha_to_one; + info->alpha.alpha_to_coverage = state->alpha_to_coverage; + info->alpha.test_enable = false; + info->alpha.test_func = GEN6_COMPAREFUNCTION_ALWAYS; + + info->stencil.cv_has_buffer = true; + info->depth.cv_has_buffer= true; + + info->blend.rt = blend->effective_rt; + info->blend.rt_count = 1; + info->blend.dither_enable = state->dither; + + for (i = 0; i < ARRAY_SIZE(blend->rt); i++) { + const struct pipe_rt_blend_state *rt = &state->rt[i]; + struct ilo_state_cc_blend_rt_info *rt_info = &blend->rt[i]; + + rt_info->cv_has_buffer = true; + rt_info->cv_is_unorm = true; + rt_info->cv_is_integer = false; + + /* logic op takes precedence over blending */ + if (state->logicop_enable) { + rt_info->logicop_enable = true; + rt_info->logicop_func = ilo_translate_logicop(state->logicop_func); + } else if (rt->blend_enable) { + rt_info->blend_enable = true; + + rt_info->rgb_src = ilo_translate_blend_factor(rt->rgb_src_factor); + rt_info->rgb_dst = ilo_translate_blend_factor(rt->rgb_dst_factor); + rt_info->rgb_func = ilo_translate_blend_func(rt->rgb_func); + + rt_info->a_src = ilo_translate_blend_factor(rt->alpha_src_factor); + rt_info->a_dst = ilo_translate_blend_factor(rt->alpha_dst_factor); + rt_info->a_func = ilo_translate_blend_func(rt->alpha_func); + } + + if (!(rt->colormask & PIPE_MASK_A)) + rt_info->argb_write_disables |= (1 << 3); + if (!(rt->colormask & PIPE_MASK_R)) + rt_info->argb_write_disables |= (1 << 2); + if (!(rt->colormask & PIPE_MASK_G)) + rt_info->argb_write_disables |= (1 << 1); + if (!(rt->colormask & PIPE_MASK_B)) + rt_info->argb_write_disables |= (1 << 0); + + if (!state->independent_blend_enable) { + for (i = 1; i < ARRAY_SIZE(blend->rt); i++) + blend->rt[i] = *rt_info; + break; + } + } + + memcpy(blend->effective_rt, blend->rt, sizeof(blend->rt)); + + blend->dummy_rt.argb_write_disables = 0xf; + + if (!ilo_state_cc_init(&blend->cc, dev, &blend->info)) { + FREE(blend); + return NULL; + } + + blend->dual_blend = util_blend_state_is_dual(state, 0); return blend; } @@ -814,13 +1081,48 @@ static void * ilo_create_depth_stencil_alpha_state(struct pipe_context *pipe, const struct pipe_depth_stencil_alpha_state *state) { - const struct ilo_dev *dev = ilo_context(pipe)->dev; struct ilo_dsa_state *dsa; + int i; - dsa = MALLOC_STRUCT(ilo_dsa_state); + dsa = CALLOC_STRUCT(ilo_dsa_state); assert(dsa); - ilo_gpe_init_dsa(dev, state, dsa); + dsa->depth.cv_has_buffer = true; + dsa->depth.test_enable = state->depth.enabled; + dsa->depth.write_enable = state->depth.writemask; + dsa->depth.test_func = ilo_translate_compare_func(state->depth.func); + + dsa->stencil.cv_has_buffer = true; + for (i = 0; i < ARRAY_SIZE(state->stencil); i++) { + const struct pipe_stencil_state *stencil = &state->stencil[i]; + struct ilo_state_cc_stencil_op_info *op; + + if (!stencil->enabled) + break; + + if (i == 0) { + dsa->stencil.test_enable = true; + dsa->stencil_front.test_mask = stencil->valuemask; + dsa->stencil_front.write_mask = stencil->writemask; + + op = &dsa->stencil.front; + } else { + dsa->stencil.twosided_enable = true; + dsa->stencil_back.test_mask = stencil->valuemask; + dsa->stencil_back.write_mask = stencil->writemask; + + op = &dsa->stencil.back; + } + + op->test_func = ilo_translate_compare_func(stencil->func); + op->fail_op = ilo_translate_stencil_op(stencil->fail_op); + op->zfail_op = ilo_translate_stencil_op(stencil->zfail_op); + op->zpass_op = ilo_translate_stencil_op(stencil->zpass_op); + } + + dsa->alpha_test = state->alpha.enabled; + dsa->alpha_ref = state->alpha.ref_value; + dsa->alpha_func = ilo_translate_compare_func(state->alpha.func); return dsa; } @@ -831,6 +1133,17 @@ ilo_bind_depth_stencil_alpha_state(struct pipe_context *pipe, void *state) struct ilo_state_vector *vec = &ilo_context(pipe)->state_vector; vec->dsa = state; + if (vec->dsa) { + vec->cc_params.alpha_ref = vec->dsa->alpha_ref; + vec->cc_params.stencil_front.test_mask = + vec->dsa->stencil_front.test_mask; + vec->cc_params.stencil_front.write_mask = + vec->dsa->stencil_front.write_mask; + vec->cc_params.stencil_back.test_mask = + vec->dsa->stencil_back.test_mask; + vec->cc_params.stencil_back.write_mask = + vec->dsa->stencil_back.write_mask; + } vec->dirty |= ILO_DIRTY_DSA; } @@ -990,7 +1303,7 @@ ilo_set_blend_color(struct pipe_context *pipe, { struct ilo_state_vector *vec = &ilo_context(pipe)->state_vector; - vec->blend_color = *state; + memcpy(vec->cc_params.blend_rgba, state->color, sizeof(state->color)); vec->dirty |= ILO_DIRTY_BLEND_COLOR; } @@ -1007,6 +1320,9 @@ ilo_set_stencil_ref(struct pipe_context *pipe, vec->stencil_ref = *state; + vec->cc_params.stencil_front.test_ref = state->ref_value[0]; + vec->cc_params.stencil_back.test_ref = state->ref_value[1]; + vec->dirty |= ILO_DIRTY_STENCIL_REF; } diff --git a/src/gallium/drivers/ilo/ilo_state.h b/src/gallium/drivers/ilo/ilo_state.h index 8f6cce2b53a..39d0d7eac8b 100644 --- a/src/gallium/drivers/ilo/ilo_state.h +++ b/src/gallium/drivers/ilo/ilo_state.h @@ -29,6 +29,7 @@ #define ILO_STATE_H #include "core/ilo_state_3d.h" +#include "core/ilo_state_cc.h" #include "core/ilo_state_raster.h" #include "core/ilo_state_sampler.h" #include "core/ilo_state_surface.h" @@ -191,6 +192,32 @@ struct ilo_viewport_state { uint32_t vp_data[20 * ILO_MAX_VIEWPORTS]; }; +struct ilo_dsa_state { + struct ilo_state_cc_depth_info depth; + + struct ilo_state_cc_stencil_info stencil; + struct { + uint8_t test_mask; + uint8_t write_mask; + } stencil_front, stencil_back; + + bool alpha_test; + float alpha_ref; + enum gen_compare_function alpha_func; +}; + +struct ilo_blend_state { + struct ilo_state_cc_blend_rt_info rt[PIPE_MAX_COLOR_BUFS]; + struct ilo_state_cc_blend_rt_info dummy_rt; + bool dual_blend; + + /* these are invalid until finalize_blend() */ + struct ilo_state_cc_blend_rt_info effective_rt[PIPE_MAX_COLOR_BUFS]; + struct ilo_state_cc_info info; + struct ilo_state_cc cc; + bool alpha_may_kill; +}; + struct ilo_global_binding_cso { struct pipe_resource *resource; uint32_t *handle; @@ -240,10 +267,11 @@ struct ilo_state_vector { struct ilo_shader_state *fs; - const struct ilo_dsa_state *dsa; + struct ilo_state_cc_params_info cc_params; struct pipe_stencil_ref stencil_ref; - const struct ilo_blend_state *blend; - struct pipe_blend_color blend_color; + const struct ilo_dsa_state *dsa; + struct ilo_blend_state *blend; + struct ilo_fb_state fb; /* shader resources */ From eaf2c738991d43ec8e7b36bed05727deaf8151b6 Mon Sep 17 00:00:00 2001 From: Chia-I Wu Date: Fri, 29 May 2015 15:25:13 +0800 Subject: [PATCH 626/834] ilo: embed ilo_state_sol in ilo_shader --- .../drivers/ilo/core/ilo_builder_3d_top.h | 135 +++--------------- src/gallium/drivers/ilo/ilo_blitter.h | 2 + .../drivers/ilo/ilo_blitter_rectlist.c | 2 + src/gallium/drivers/ilo/ilo_render_gen7.c | 45 +++--- src/gallium/drivers/ilo/ilo_shader.c | 108 +++++++++++--- src/gallium/drivers/ilo/ilo_shader.h | 4 + src/gallium/drivers/ilo/ilo_state.h | 1 + .../drivers/ilo/shader/ilo_shader_internal.h | 6 +- 8 files changed, 153 insertions(+), 150 deletions(-) diff --git a/src/gallium/drivers/ilo/core/ilo_builder_3d_top.h b/src/gallium/drivers/ilo/core/ilo_builder_3d_top.h index d5a4c778a87..3a2522186be 100644 --- a/src/gallium/drivers/ilo/core/ilo_builder_3d_top.h +++ b/src/gallium/drivers/ilo/core/ilo_builder_3d_top.h @@ -37,6 +37,7 @@ #include "ilo_dev.h" #include "ilo_state_3d.h" #include "ilo_state_sampler.h" +#include "ilo_state_sol.h" #include "ilo_builder.h" static inline void @@ -1013,131 +1014,41 @@ gen7_disable_3DSTATE_GS(struct ilo_builder *builder) static inline void gen7_3DSTATE_STREAMOUT(struct ilo_builder *builder, - int render_stream, - bool render_disable, - int vertex_attrib_count, - const int *buf_strides) + const struct ilo_state_sol *sol) { const uint8_t cmd_len = (ilo_dev_gen(builder->dev) >= ILO_GEN(8)) ? 5 : 3; uint32_t *dw; - int buf_mask; ILO_DEV_ASSERT(builder->dev, 7, 8); ilo_builder_batch_pointer(builder, cmd_len, &dw); dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_STREAMOUT) | (cmd_len - 2); - - dw[1] = render_stream << GEN7_SO_DW1_RENDER_STREAM_SELECT__SHIFT; - if (render_disable) - dw[1] |= GEN7_SO_DW1_RENDER_DISABLE; - - if (buf_strides) { - buf_mask = ((bool) buf_strides[3]) << 3 | - ((bool) buf_strides[2]) << 2 | - ((bool) buf_strides[1]) << 1 | - ((bool) buf_strides[0]); - if (ilo_dev_gen(builder->dev) >= ILO_GEN(8)) { - dw[3] = buf_strides[1] << 16 | buf_strides[0]; - dw[4] = buf_strides[3] << 16 | buf_strides[1]; - } - } else { - buf_mask = 0; - } - - if (buf_mask) { - int read_len; - - dw[1] |= GEN7_SO_DW1_SO_ENABLE | - GEN7_SO_DW1_STATISTICS; - /* API_OPENGL */ - if (true) - dw[1] |= GEN7_REORDER_TRAILING << GEN7_SO_DW1_REORDER_MODE__SHIFT; - if (ilo_dev_gen(builder->dev) < ILO_GEN(8)) - dw[1] |= buf_mask << GEN7_SO_DW1_BUFFER_ENABLES__SHIFT; - - read_len = (vertex_attrib_count + 1) / 2; - if (!read_len) - read_len = 1; - - dw[2] = 0 << GEN7_SO_DW2_STREAM3_READ_OFFSET__SHIFT | - (read_len - 1) << GEN7_SO_DW2_STREAM3_READ_LEN__SHIFT | - 0 << GEN7_SO_DW2_STREAM2_READ_OFFSET__SHIFT | - (read_len - 1) << GEN7_SO_DW2_STREAM2_READ_LEN__SHIFT | - 0 << GEN7_SO_DW2_STREAM1_READ_OFFSET__SHIFT | - (read_len - 1) << GEN7_SO_DW2_STREAM1_READ_LEN__SHIFT | - 0 << GEN7_SO_DW2_STREAM0_READ_OFFSET__SHIFT | - (read_len - 1) << GEN7_SO_DW2_STREAM0_READ_LEN__SHIFT; - } else { - dw[2] = 0; + /* see sol_set_gen7_3DSTATE_STREAMOUT() */ + dw[1] = sol->so[0]; + dw[2] = sol->so[1]; + if (ilo_dev_gen(builder->dev) >= ILO_GEN(8)) { + dw[3] = sol->so[2]; + dw[4] = sol->so[3]; } } static inline void gen7_3DSTATE_SO_DECL_LIST(struct ilo_builder *builder, - const struct pipe_stream_output_info *so_info) + const struct ilo_state_sol *sol) { /* * Note that "DWord Length" has 9 bits for this command and the type of * cmd_len cannot be uint8_t. */ uint16_t cmd_len; - struct { - int buf_selects; - int decl_count; - uint16_t decls[128]; - } streams[4]; - unsigned buf_offsets[PIPE_MAX_SO_BUFFERS]; - int hw_decl_count, i; + int cmd_decl_count; uint32_t *dw; ILO_DEV_ASSERT(builder->dev, 7, 8); - memset(streams, 0, sizeof(streams)); - memset(buf_offsets, 0, sizeof(buf_offsets)); - - for (i = 0; i < so_info->num_outputs; i++) { - unsigned decl, st, buf, reg, mask; - - st = so_info->output[i].stream; - buf = so_info->output[i].output_buffer; - - /* pad with holes */ - while (buf_offsets[buf] < so_info->output[i].dst_offset) { - int num_dwords; - - num_dwords = so_info->output[i].dst_offset - buf_offsets[buf]; - if (num_dwords > 4) - num_dwords = 4; - - decl = buf << GEN7_SO_DECL_OUTPUT_SLOT__SHIFT | - GEN7_SO_DECL_HOLE_FLAG | - ((1 << num_dwords) - 1) << GEN7_SO_DECL_COMPONENT_MASK__SHIFT; - - assert(streams[st].decl_count < Elements(streams[st].decls)); - streams[st].decls[streams[st].decl_count++] = decl; - buf_offsets[buf] += num_dwords; - } - assert(buf_offsets[buf] == so_info->output[i].dst_offset); - - reg = so_info->output[i].register_index; - mask = ((1 << so_info->output[i].num_components) - 1) << - so_info->output[i].start_component; - - decl = buf << GEN7_SO_DECL_OUTPUT_SLOT__SHIFT | - reg << GEN7_SO_DECL_REG_INDEX__SHIFT | - mask << GEN7_SO_DECL_COMPONENT_MASK__SHIFT; - - assert(streams[st].decl_count < Elements(streams[st].decls)); - - streams[st].buf_selects |= 1 << buf; - streams[st].decls[streams[st].decl_count++] = decl; - buf_offsets[buf] += so_info->output[i].num_components; - } - if (ilo_dev_gen(builder->dev) >= ILO_GEN(7.5)) { - hw_decl_count = MAX4(streams[0].decl_count, streams[1].decl_count, - streams[2].decl_count, streams[3].decl_count); + cmd_decl_count = sol->decl_count; } else { /* * From the Ivy Bridge PRM, volume 2 part 1, page 201: @@ -1146,28 +1057,22 @@ gen7_3DSTATE_SO_DECL_LIST(struct ilo_builder *builder, * whenever this command is issued. The "Num Entries [n]" fields * still contain the actual numbers of valid decls." */ - hw_decl_count = 128; + cmd_decl_count = 128; } - cmd_len = 3 + 2 * hw_decl_count; + cmd_len = 3 + 2 * cmd_decl_count; ilo_builder_batch_pointer(builder, cmd_len, &dw); dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_SO_DECL_LIST) | (cmd_len - 2); - dw[1] = streams[3].buf_selects << GEN7_SO_DECL_DW1_STREAM3_BUFFER_SELECTS__SHIFT | - streams[2].buf_selects << GEN7_SO_DECL_DW1_STREAM2_BUFFER_SELECTS__SHIFT | - streams[1].buf_selects << GEN7_SO_DECL_DW1_STREAM1_BUFFER_SELECTS__SHIFT | - streams[0].buf_selects << GEN7_SO_DECL_DW1_STREAM0_BUFFER_SELECTS__SHIFT; - dw[2] = streams[3].decl_count << GEN7_SO_DECL_DW2_STREAM3_ENTRY_COUNT__SHIFT | - streams[2].decl_count << GEN7_SO_DECL_DW2_STREAM2_ENTRY_COUNT__SHIFT | - streams[1].decl_count << GEN7_SO_DECL_DW2_STREAM1_ENTRY_COUNT__SHIFT | - streams[0].decl_count << GEN7_SO_DECL_DW2_STREAM0_ENTRY_COUNT__SHIFT; - dw += 3; + /* see sol_set_gen7_3DSTATE_SO_DECL_LIST() */ + dw[1] = sol->so[4]; + dw[2] = sol->so[5]; + memcpy(&dw[3], sol->decl, sizeof(sol->decl[0]) * sol->decl_count); - for (i = 0; i < hw_decl_count; i++) { - dw[0] = streams[1].decls[i] << 16 | streams[0].decls[i]; - dw[1] = streams[3].decls[i] << 16 | streams[2].decls[i]; - dw += 2; + if (sol->decl_count < cmd_decl_count) { + memset(&dw[3 + 2 * sol->decl_count], 0, sizeof(sol->decl[0]) * + cmd_decl_count - sol->decl_count); } } diff --git a/src/gallium/drivers/ilo/ilo_blitter.h b/src/gallium/drivers/ilo/ilo_blitter.h index 072f0f7f7fc..3d02063f809 100644 --- a/src/gallium/drivers/ilo/ilo_blitter.h +++ b/src/gallium/drivers/ilo/ilo_blitter.h @@ -61,6 +61,8 @@ struct ilo_blitter { struct ilo_ve_state ve; struct pipe_draw_info draw; + struct ilo_state_sol sol; + struct ilo_state_viewport vp; uint32_t vp_data[20]; diff --git a/src/gallium/drivers/ilo/ilo_blitter_rectlist.c b/src/gallium/drivers/ilo/ilo_blitter_rectlist.c index 9d431956314..b2b839cbb41 100644 --- a/src/gallium/drivers/ilo/ilo_blitter_rectlist.c +++ b/src/gallium/drivers/ilo/ilo_blitter_rectlist.c @@ -64,6 +64,8 @@ ilo_blitter_set_invariants(struct ilo_blitter *blitter) blitter->draw.mode = ILO_PRIM_RECTANGLES; blitter->draw.count = 3; + ilo_state_sol_init_disabled(&blitter->sol, blitter->ilo->dev, false); + /** * From the Haswell PRM, volume 7, page 615: * diff --git a/src/gallium/drivers/ilo/ilo_render_gen7.c b/src/gallium/drivers/ilo/ilo_render_gen7.c index 0931a771876..95884a0d51d 100644 --- a/src/gallium/drivers/ilo/ilo_render_gen7.c +++ b/src/gallium/drivers/ilo/ilo_render_gen7.c @@ -420,7 +420,7 @@ gen7_draw_sol(struct ilo_render *r, const struct ilo_state_vector *vec, struct ilo_render_draw_session *session) { - const struct pipe_stream_output_info *so_info; + const struct ilo_state_sol *sol; const struct ilo_shader_state *shader; bool dirty_sh = false; @@ -433,13 +433,16 @@ gen7_draw_sol(struct ilo_render *r, dirty_sh = DIRTY(VS); } - so_info = ilo_shader_get_kernel_so_info(shader); + sol = ilo_shader_get_kernel_sol(shader); /* 3DSTATE_SO_BUFFER */ if ((DIRTY(SO) || dirty_sh || r->batch_bo_changed) && vec->so.enabled) { + const struct pipe_stream_output_info *so_info; int i; + so_info = ilo_shader_get_kernel_so_info(shader); + for (i = 0; i < vec->so.count; i++) { const int stride = so_info->stride[i] * 4; /* in bytes */ @@ -452,22 +455,30 @@ gen7_draw_sol(struct ilo_render *r, /* 3DSTATE_SO_DECL_LIST */ if (dirty_sh && vec->so.enabled) - gen7_3DSTATE_SO_DECL_LIST(r->builder, so_info); + gen7_3DSTATE_SO_DECL_LIST(r->builder, sol); + + /* + * From the Ivy Bridge PRM, volume 2 part 1, page 196-197: + * + * "Anytime the SOL unit MMIO registers or non-pipeline state are + * written, the SOL unit needs to receive a pipeline state update with + * SOL unit dirty state for information programmed in MMIO/NP to get + * loaded into the SOL unit. + * + * The SOL unit incorrectly double buffers MMIO/NP registers and only + * moves them into the design for usage when control topology is + * received with the SOL unit dirty state. + * + * If the state does not change, need to resend the same state. + * + * Because of corruption, software must flush the whole fixed function + * pipeline when 3DSTATE_STREAMOUT changes state." + * + * The first and fourth paragraphs are gone on Gen7.5+. + */ /* 3DSTATE_STREAMOUT */ - if (DIRTY(SO) || DIRTY(RASTERIZER) || dirty_sh) { - const int output_count = ilo_shader_get_kernel_param(shader, - ILO_KERNEL_OUTPUT_COUNT); - int buf_strides[4] = { 0, 0, 0, 0 }; - int i; - - for (i = 0; i < vec->so.count; i++) - buf_strides[i] = so_info->stride[i] * 4; - - gen7_3DSTATE_STREAMOUT(r->builder, 0, - vec->rasterizer->state.rasterizer_discard, - output_count, buf_strides); - } + gen7_3DSTATE_STREAMOUT(r->builder, sol); } static void @@ -717,7 +728,7 @@ gen7_rectlist_vs_to_sf(struct ilo_render *r, gen7_3DSTATE_CONSTANT_GS(r->builder, NULL, NULL, 0); gen7_disable_3DSTATE_GS(r->builder); - gen7_3DSTATE_STREAMOUT(r->builder, 0, false, 0x0, 0); + gen7_3DSTATE_STREAMOUT(r->builder, &blitter->sol); gen6_3DSTATE_CLIP(r->builder, &blitter->fb.rs); diff --git a/src/gallium/drivers/ilo/ilo_shader.c b/src/gallium/drivers/ilo/ilo_shader.c index af467064fe4..e9eb042ebc8 100644 --- a/src/gallium/drivers/ilo/ilo_shader.c +++ b/src/gallium/drivers/ilo/ilo_shader.c @@ -557,39 +557,103 @@ ilo_shader_state_search_variant(struct ilo_shader_state *state, } static void -copy_so_info(struct ilo_shader *sh, - const struct pipe_stream_output_info *so_info) +init_sol(struct ilo_shader *kernel, + const struct ilo_dev *dev, + const struct pipe_stream_output_info *so_info, + bool rasterizer_discard) { - unsigned i, attr; + struct ilo_state_sol_decl_info decls[4][PIPE_MAX_SO_OUTPUTS]; + unsigned buf_offsets[PIPE_MAX_SO_BUFFERS]; + struct ilo_state_sol_info info; + unsigned i; - if (!so_info->num_outputs) + if (!so_info->num_outputs) { + ilo_state_sol_init_disabled(&kernel->sol, dev, rasterizer_discard); return; + } - sh->so_info = *so_info; + memset(&info, 0, sizeof(info)); + info.data = kernel->sol_data; + info.data_size = sizeof(kernel->sol_data); + info.sol_enable = true; + info.stats_enable = true; + info.tristrip_reorder = GEN7_REORDER_TRAILING; + info.render_disable = rasterizer_discard; + info.render_stream = 0; + for (i = 0; i < 4; i++) { + info.buffer_strides[i] = so_info->stride[i] * 4; + + info.streams[i].cv_vue_attr_count = kernel->out.count; + info.streams[i].decls = decls[i]; + } + + memset(decls, 0, sizeof(decls)); + memset(buf_offsets, 0, sizeof(buf_offsets)); for (i = 0; i < so_info->num_outputs; i++) { + const unsigned stream = so_info->output[i].stream; + const unsigned buffer = so_info->output[i].output_buffer; + struct ilo_state_sol_decl_info *decl; + unsigned attr; + /* figure out which attribute is sourced */ - for (attr = 0; attr < sh->out.count; attr++) { - const int reg_idx = sh->out.register_indices[attr]; + for (attr = 0; attr < kernel->out.count; attr++) { + const int reg_idx = kernel->out.register_indices[attr]; if (reg_idx == so_info->output[i].register_index) break; } - - if (attr < sh->out.count) { - sh->so_info.output[i].register_index = attr; - } - else { + if (attr >= kernel->out.count) { assert(!"stream output an undefined register"); - sh->so_info.output[i].register_index = 0; + attr = 0; } + if (info.streams[stream].vue_read_count < attr + 1) + info.streams[stream].vue_read_count = attr + 1; + + /* pad with holes first */ + while (buf_offsets[buffer] < so_info->output[i].dst_offset) { + int num_dwords; + + num_dwords = so_info->output[i].dst_offset - buf_offsets[buffer]; + if (num_dwords > 4) + num_dwords = 4; + + assert(info.streams[stream].decl_count < ARRAY_SIZE(decls[stream])); + decl = &decls[stream][info.streams[stream].decl_count]; + + decl->attr = 0; + decl->is_hole = true; + decl->component_base = 0; + decl->component_count = num_dwords; + decl->buffer = buffer; + + info.streams[stream].decl_count++; + buf_offsets[buffer] += num_dwords; + } + assert(buf_offsets[buffer] == so_info->output[i].dst_offset); + + assert(info.streams[stream].decl_count < ARRAY_SIZE(decls[stream])); + decl = &decls[stream][info.streams[stream].decl_count]; + + decl->attr = attr; + decl->is_hole = false; /* PSIZE is at W channel */ - if (sh->out.semantic_names[attr] == TGSI_SEMANTIC_PSIZE) { + if (kernel->out.semantic_names[attr] == TGSI_SEMANTIC_PSIZE) { assert(so_info->output[i].start_component == 0); assert(so_info->output[i].num_components == 1); - sh->so_info.output[i].start_component = 3; + decl->component_base = 3; + decl->component_count = 1; + } else { + decl->component_base = so_info->output[i].start_component; + decl->component_count = so_info->output[i].num_components; } + decl->buffer = buffer; + + info.streams[stream].decl_count++; + buf_offsets[buffer] += so_info->output[i].num_components; } + + ilo_state_sol_init(&kernel->sol, dev, &info); } /** @@ -599,17 +663,20 @@ static struct ilo_shader * ilo_shader_state_add_variant(struct ilo_shader_state *state, const struct ilo_shader_variant *variant) { + bool rasterizer_discard = false; struct ilo_shader *sh; switch (state->info.type) { case PIPE_SHADER_VERTEX: sh = ilo_shader_compile_vs(state, variant); + rasterizer_discard = variant->u.vs.rasterizer_discard; break; case PIPE_SHADER_FRAGMENT: sh = ilo_shader_compile_fs(state, variant); break; case PIPE_SHADER_GEOMETRY: sh = ilo_shader_compile_gs(state, variant); + rasterizer_discard = variant->u.gs.rasterizer_discard; break; case PIPE_SHADER_COMPUTE: sh = ilo_shader_compile_cs(state, variant); @@ -625,7 +692,8 @@ ilo_shader_state_add_variant(struct ilo_shader_state *state, sh->variant = *variant; - copy_so_info(sh, &state->info.stream_output); + init_sol(sh, state->info.dev, &state->info.stream_output, + rasterizer_discard); ilo_shader_state_add_shader(state, sh); @@ -1163,12 +1231,18 @@ ilo_shader_get_kernel_cso(const struct ilo_shader_state *shader) */ const struct pipe_stream_output_info * ilo_shader_get_kernel_so_info(const struct ilo_shader_state *shader) +{ + return &shader->info.stream_output; +} + +const struct ilo_state_sol * +ilo_shader_get_kernel_sol(const struct ilo_shader_state *shader) { const struct ilo_shader *kernel = shader->shader; assert(kernel); - return &kernel->so_info; + return &kernel->sol; } /** diff --git a/src/gallium/drivers/ilo/ilo_shader.h b/src/gallium/drivers/ilo/ilo_shader.h index 8a359001bb8..ddcd6f0356f 100644 --- a/src/gallium/drivers/ilo/ilo_shader.h +++ b/src/gallium/drivers/ilo/ilo_shader.h @@ -96,6 +96,7 @@ struct ilo_rasterizer_state; struct ilo_shader_cache; struct ilo_shader_state; struct ilo_shader_cso; +struct ilo_state_sol; struct ilo_state_vector; struct ilo_shader_cache * @@ -168,6 +169,9 @@ ilo_shader_get_kernel_cso(const struct ilo_shader_state *shader); const struct pipe_stream_output_info * ilo_shader_get_kernel_so_info(const struct ilo_shader_state *shader); +const struct ilo_state_sol * +ilo_shader_get_kernel_sol(const struct ilo_shader_state *shader); + const struct ilo_kernel_routing * ilo_shader_get_kernel_routing(const struct ilo_shader_state *shader); diff --git a/src/gallium/drivers/ilo/ilo_state.h b/src/gallium/drivers/ilo/ilo_state.h index 39d0d7eac8b..ae4639fe3f7 100644 --- a/src/gallium/drivers/ilo/ilo_state.h +++ b/src/gallium/drivers/ilo/ilo_state.h @@ -32,6 +32,7 @@ #include "core/ilo_state_cc.h" #include "core/ilo_state_raster.h" #include "core/ilo_state_sampler.h" +#include "core/ilo_state_sol.h" #include "core/ilo_state_surface.h" #include "core/ilo_state_viewport.h" #include "core/ilo_state_zs.h" diff --git a/src/gallium/drivers/ilo/shader/ilo_shader_internal.h b/src/gallium/drivers/ilo/shader/ilo_shader_internal.h index d2dc2f5b5b4..603d13e5766 100644 --- a/src/gallium/drivers/ilo/shader/ilo_shader_internal.h +++ b/src/gallium/drivers/ilo/shader/ilo_shader_internal.h @@ -28,6 +28,8 @@ #ifndef ILO_SHADER_INTERNAL_H #define ILO_SHADER_INTERNAL_H +#include "core/ilo_state_sol.h" + #include "ilo_common.h" #include "ilo_state.h" #include "ilo_shader.h" @@ -111,7 +113,9 @@ struct ilo_shader { bool stream_output; int svbi_post_inc; - struct pipe_stream_output_info so_info; + + uint32_t sol_data[PIPE_MAX_SO_OUTPUTS][2]; + struct ilo_state_sol sol; /* for VS stream output / rasterizer discard */ int gs_offsets[3]; From 9bfa987fb00a4e0471bcdb4948c8f416d7c5b562 Mon Sep 17 00:00:00 2001 From: Chia-I Wu Date: Thu, 28 May 2015 13:43:56 +0800 Subject: [PATCH 627/834] ilo: embed ilo_state_urb in ilo_state_vector --- .../drivers/ilo/core/ilo_builder_3d_top.h | 309 +++++------------- src/gallium/drivers/ilo/ilo_blitter.h | 2 + .../drivers/ilo/ilo_blitter_rectlist.c | 3 + src/gallium/drivers/ilo/ilo_render.c | 6 + src/gallium/drivers/ilo/ilo_render_gen.h | 2 + src/gallium/drivers/ilo/ilo_render_gen6.c | 65 +--- src/gallium/drivers/ilo/ilo_render_gen7.c | 98 ++---- src/gallium/drivers/ilo/ilo_state.c | 55 ++++ src/gallium/drivers/ilo/ilo_state.h | 3 + 9 files changed, 189 insertions(+), 354 deletions(-) diff --git a/src/gallium/drivers/ilo/core/ilo_builder_3d_top.h b/src/gallium/drivers/ilo/core/ilo_builder_3d_top.h index 3a2522186be..2a475cb15ca 100644 --- a/src/gallium/drivers/ilo/core/ilo_builder_3d_top.h +++ b/src/gallium/drivers/ilo/core/ilo_builder_3d_top.h @@ -38,290 +38,153 @@ #include "ilo_state_3d.h" #include "ilo_state_sampler.h" #include "ilo_state_sol.h" +#include "ilo_state_urb.h" #include "ilo_builder.h" static inline void gen6_3DSTATE_URB(struct ilo_builder *builder, - int vs_total_size, int gs_total_size, - int vs_entry_size, int gs_entry_size) + const struct ilo_state_urb *urb) { const uint8_t cmd_len = 3; - const int row_size = 128; /* 1024 bits */ - int vs_alloc_size, gs_alloc_size; - int vs_num_entries, gs_num_entries; uint32_t *dw; - ILO_DEV_ASSERT(builder->dev, 6, 6); - - /* in 1024-bit URB rows */ - vs_alloc_size = (vs_entry_size + row_size - 1) / row_size; - gs_alloc_size = (gs_entry_size + row_size - 1) / row_size; - - /* the valid range is [1, 5] */ - if (!vs_alloc_size) - vs_alloc_size = 1; - if (!gs_alloc_size) - gs_alloc_size = 1; - assert(vs_alloc_size <= 5 && gs_alloc_size <= 5); - - /* the valid range is [24, 256] in multiples of 4 */ - vs_num_entries = (vs_total_size / row_size / vs_alloc_size) & ~3; - if (vs_num_entries > 256) - vs_num_entries = 256; - assert(vs_num_entries >= 24); - - /* the valid range is [0, 256] in multiples of 4 */ - gs_num_entries = (gs_total_size / row_size / gs_alloc_size) & ~3; - if (gs_num_entries > 256) - gs_num_entries = 256; - ilo_builder_batch_pointer(builder, cmd_len, &dw); dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_URB) | (cmd_len - 2); - dw[1] = (vs_alloc_size - 1) << GEN6_URB_DW1_VS_ENTRY_SIZE__SHIFT | - vs_num_entries << GEN6_URB_DW1_VS_ENTRY_COUNT__SHIFT; - dw[2] = gs_num_entries << GEN6_URB_DW2_GS_ENTRY_COUNT__SHIFT | - (gs_alloc_size - 1) << GEN6_URB_DW2_GS_ENTRY_SIZE__SHIFT; -} - -static inline void -gen7_3dstate_push_constant_alloc(struct ilo_builder *builder, - int subop, int offset, int size) -{ - const uint32_t cmd = GEN6_RENDER_TYPE_RENDER | - GEN6_RENDER_SUBTYPE_3D | - subop; - const uint8_t cmd_len = 2; - const int slice_count = ((ilo_dev_gen(builder->dev) == ILO_GEN(7.5) && - builder->dev->gt == 3) || - ilo_dev_gen(builder->dev) >= ILO_GEN(8)) ? 2 : 1; - uint32_t *dw; - int end; - - ILO_DEV_ASSERT(builder->dev, 7, 8); - - /* VS, HS, DS, GS, and PS variants */ - assert(subop >= GEN7_RENDER_OPCODE_3DSTATE_PUSH_CONSTANT_ALLOC_VS && - subop <= GEN7_RENDER_OPCODE_3DSTATE_PUSH_CONSTANT_ALLOC_PS); - - /* - * From the Ivy Bridge PRM, volume 2 part 1, page 68: - * - * "(A table that says the maximum size of each constant buffer is - * 16KB") - * - * From the Ivy Bridge PRM, volume 2 part 1, page 115: - * - * "The sum of the Constant Buffer Offset and the Constant Buffer Size - * may not exceed the maximum value of the Constant Buffer Size." - * - * Thus, the valid range of buffer end is [0KB, 16KB]. - */ - end = (offset + size) / 1024; - if (end > 16 * slice_count) { - assert(!"invalid constant buffer end"); - end = 16 * slice_count; - } - - /* the valid range of buffer offset is [0KB, 15KB] */ - offset = (offset + 1023) / 1024; - if (offset > 15 * slice_count) { - assert(!"invalid constant buffer offset"); - offset = 15 * slice_count; - } - - if (offset > end) { - assert(!size); - offset = end; - } - - /* the valid range of buffer size is [0KB, 15KB] */ - size = end - offset; - if (size > 15 * slice_count) { - assert(!"invalid constant buffer size"); - size = 15 * slice_count; - } - - assert(offset % slice_count == 0 && size % slice_count == 0); - - ilo_builder_batch_pointer(builder, cmd_len, &dw); - - dw[0] = cmd | (cmd_len - 2); - dw[1] = offset << GEN7_PCB_ALLOC_DW1_OFFSET__SHIFT | - size; + /* see urb_set_gen6_3DSTATE_URB() */ + dw[1] = urb->urb[0]; + dw[2] = urb->urb[1]; } static inline void gen7_3DSTATE_PUSH_CONSTANT_ALLOC_VS(struct ilo_builder *builder, - int offset, int size) + const struct ilo_state_urb *urb) { - gen7_3dstate_push_constant_alloc(builder, - GEN7_RENDER_OPCODE_3DSTATE_PUSH_CONSTANT_ALLOC_VS, offset, size); + const uint8_t cmd_len = 2; + uint32_t *dw; + + ilo_builder_batch_pointer(builder, cmd_len, &dw); + + dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_PUSH_CONSTANT_ALLOC_VS) | + (cmd_len - 2); + /* see urb_set_gen7_3dstate_push_constant_alloc() */ + dw[1] = urb->pcb[0]; } static inline void gen7_3DSTATE_PUSH_CONSTANT_ALLOC_HS(struct ilo_builder *builder, - int offset, int size) + const struct ilo_state_urb *urb) { - gen7_3dstate_push_constant_alloc(builder, - GEN7_RENDER_OPCODE_3DSTATE_PUSH_CONSTANT_ALLOC_HS, offset, size); + const uint8_t cmd_len = 2; + uint32_t *dw; + + ilo_builder_batch_pointer(builder, cmd_len, &dw); + + dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_PUSH_CONSTANT_ALLOC_HS) | + (cmd_len - 2); + /* see urb_set_gen7_3dstate_push_constant_alloc() */ + dw[1] = urb->pcb[1]; } static inline void gen7_3DSTATE_PUSH_CONSTANT_ALLOC_DS(struct ilo_builder *builder, - int offset, int size) + const struct ilo_state_urb *urb) { - gen7_3dstate_push_constant_alloc(builder, - GEN7_RENDER_OPCODE_3DSTATE_PUSH_CONSTANT_ALLOC_DS, offset, size); + const uint8_t cmd_len = 2; + uint32_t *dw; + + ilo_builder_batch_pointer(builder, cmd_len, &dw); + + dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_PUSH_CONSTANT_ALLOC_DS) | + (cmd_len - 2); + /* see urb_set_gen7_3dstate_push_constant_alloc() */ + dw[1] = urb->pcb[2]; } static inline void gen7_3DSTATE_PUSH_CONSTANT_ALLOC_GS(struct ilo_builder *builder, - int offset, int size) + const struct ilo_state_urb *urb) { - gen7_3dstate_push_constant_alloc(builder, - GEN7_RENDER_OPCODE_3DSTATE_PUSH_CONSTANT_ALLOC_GS, offset, size); + const uint8_t cmd_len = 2; + uint32_t *dw; + + ilo_builder_batch_pointer(builder, cmd_len, &dw); + + dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_PUSH_CONSTANT_ALLOC_GS) | + (cmd_len - 2); + /* see urb_set_gen7_3dstate_push_constant_alloc() */ + dw[1] = urb->pcb[3]; } static inline void gen7_3DSTATE_PUSH_CONSTANT_ALLOC_PS(struct ilo_builder *builder, - int offset, int size) + const struct ilo_state_urb *urb) { - gen7_3dstate_push_constant_alloc(builder, - GEN7_RENDER_OPCODE_3DSTATE_PUSH_CONSTANT_ALLOC_PS, offset, size); -} - -static inline void -gen7_3dstate_urb(struct ilo_builder *builder, - int subop, int offset, int size, - int entry_size) -{ - const uint32_t cmd = GEN6_RENDER_TYPE_RENDER | - GEN6_RENDER_SUBTYPE_3D | - subop; const uint8_t cmd_len = 2; - const int row_size = 64; /* 512 bits */ - int alloc_size, num_entries, min_entries, max_entries; uint32_t *dw; - ILO_DEV_ASSERT(builder->dev, 7, 8); - - /* VS, HS, DS, and GS variants */ - assert(subop >= GEN7_RENDER_OPCODE_3DSTATE_URB_VS && - subop <= GEN7_RENDER_OPCODE_3DSTATE_URB_GS); - - /* in multiples of 8KB */ - assert(offset % 8192 == 0); - offset /= 8192; - - /* in multiple of 512-bit rows */ - alloc_size = (entry_size + row_size - 1) / row_size; - if (!alloc_size) - alloc_size = 1; - - /* - * From the Ivy Bridge PRM, volume 2 part 1, page 34: - * - * "VS URB Entry Allocation Size equal to 4(5 512-bit URB rows) may - * cause performance to decrease due to banking in the URB. Element - * sizes of 16 to 20 should be programmed with six 512-bit URB rows." - */ - if (subop == GEN7_RENDER_OPCODE_3DSTATE_URB_VS && alloc_size == 5) - alloc_size = 6; - - /* in multiples of 8 */ - num_entries = (size / row_size / alloc_size) & ~7; - - switch (subop) { - case GEN7_RENDER_OPCODE_3DSTATE_URB_VS: - switch (ilo_dev_gen(builder->dev)) { - case ILO_GEN(8): - max_entries = 2560; - min_entries = 64; - break; - case ILO_GEN(7.5): - max_entries = (builder->dev->gt >= 2) ? 1664 : 640; - min_entries = (builder->dev->gt >= 2) ? 64 : 32; - break; - case ILO_GEN(7): - default: - max_entries = (builder->dev->gt == 2) ? 704 : 512; - min_entries = 32; - break; - } - - assert(num_entries >= min_entries); - if (num_entries > max_entries) - num_entries = max_entries; - break; - case GEN7_RENDER_OPCODE_3DSTATE_URB_HS: - max_entries = (builder->dev->gt == 2) ? 64 : 32; - if (num_entries > max_entries) - num_entries = max_entries; - break; - case GEN7_RENDER_OPCODE_3DSTATE_URB_DS: - if (num_entries) - assert(num_entries >= 138); - break; - case GEN7_RENDER_OPCODE_3DSTATE_URB_GS: - switch (ilo_dev_gen(builder->dev)) { - case ILO_GEN(8): - max_entries = 960; - break; - case ILO_GEN(7.5): - max_entries = (builder->dev->gt >= 2) ? 640 : 256; - break; - case ILO_GEN(7): - default: - max_entries = (builder->dev->gt == 2) ? 320 : 192; - break; - } - - if (num_entries > max_entries) - num_entries = max_entries; - break; - default: - break; - } - ilo_builder_batch_pointer(builder, cmd_len, &dw); - dw[0] = cmd | (cmd_len - 2); - dw[1] = offset << GEN7_URB_DW1_OFFSET__SHIFT | - (alloc_size - 1) << GEN7_URB_DW1_ENTRY_SIZE__SHIFT | - num_entries; + dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_PUSH_CONSTANT_ALLOC_PS) | + (cmd_len - 2); + /* see urb_set_gen7_3dstate_push_constant_alloc() */ + dw[1] = urb->pcb[4]; } static inline void gen7_3DSTATE_URB_VS(struct ilo_builder *builder, - int offset, int size, int entry_size) + const struct ilo_state_urb *urb) { - gen7_3dstate_urb(builder, GEN7_RENDER_OPCODE_3DSTATE_URB_VS, - offset, size, entry_size); + const uint8_t cmd_len = 2; + uint32_t *dw; + + ilo_builder_batch_pointer(builder, cmd_len, &dw); + + dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_URB_VS) | (cmd_len - 2); + /* see urb_set_gen7_3dstate_push_constant_alloc() */ + dw[1] = urb->urb[0]; } static inline void gen7_3DSTATE_URB_HS(struct ilo_builder *builder, - int offset, int size, int entry_size) + const struct ilo_state_urb *urb) { - gen7_3dstate_urb(builder, GEN7_RENDER_OPCODE_3DSTATE_URB_HS, - offset, size, entry_size); + const uint8_t cmd_len = 2; + uint32_t *dw; + + ilo_builder_batch_pointer(builder, cmd_len, &dw); + + dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_URB_HS) | (cmd_len - 2); + /* see urb_set_gen7_3dstate_push_constant_alloc() */ + dw[1] = urb->urb[1]; } static inline void gen7_3DSTATE_URB_DS(struct ilo_builder *builder, - int offset, int size, int entry_size) + const struct ilo_state_urb *urb) { - gen7_3dstate_urb(builder, GEN7_RENDER_OPCODE_3DSTATE_URB_DS, - offset, size, entry_size); + const uint8_t cmd_len = 2; + uint32_t *dw; + + ilo_builder_batch_pointer(builder, cmd_len, &dw); + + dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_URB_DS) | (cmd_len - 2); + /* see urb_set_gen7_3dstate_push_constant_alloc() */ + dw[1] = urb->urb[2]; } static inline void gen7_3DSTATE_URB_GS(struct ilo_builder *builder, - int offset, int size, int entry_size) + const struct ilo_state_urb *urb) { - gen7_3dstate_urb(builder, GEN7_RENDER_OPCODE_3DSTATE_URB_GS, - offset, size, entry_size); + const uint8_t cmd_len = 2; + uint32_t *dw; + + ilo_builder_batch_pointer(builder, cmd_len, &dw); + + dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_URB_GS) | (cmd_len - 2); + /* see urb_set_gen7_3dstate_push_constant_alloc() */ + dw[1] = urb->urb[3]; } static inline void diff --git a/src/gallium/drivers/ilo/ilo_blitter.h b/src/gallium/drivers/ilo/ilo_blitter.h index 3d02063f809..c257c6048e1 100644 --- a/src/gallium/drivers/ilo/ilo_blitter.h +++ b/src/gallium/drivers/ilo/ilo_blitter.h @@ -70,6 +70,8 @@ struct ilo_blitter { uint32_t depth_clear_value; + struct ilo_state_urb urb; + struct { struct ilo_surface_cso dst; unsigned width, height; diff --git a/src/gallium/drivers/ilo/ilo_blitter_rectlist.c b/src/gallium/drivers/ilo/ilo_blitter_rectlist.c index b2b839cbb41..b106e79958a 100644 --- a/src/gallium/drivers/ilo/ilo_blitter_rectlist.c +++ b/src/gallium/drivers/ilo/ilo_blitter_rectlist.c @@ -78,6 +78,9 @@ ilo_blitter_set_invariants(struct ilo_blitter *blitter) ilo_state_viewport_init_for_rectlist(&blitter->vp, blitter->ilo->dev, blitter->vp_data, sizeof(blitter->vp_data)); + ilo_state_urb_init_for_rectlist(&blitter->urb, blitter->ilo->dev, + blitter->ve.count + blitter->ve.prepend_nosrc_cso); + blitter->initialized = true; return true; diff --git a/src/gallium/drivers/ilo/ilo_render.c b/src/gallium/drivers/ilo/ilo_render.c index 6935138f8d9..0fd19e3fdff 100644 --- a/src/gallium/drivers/ilo/ilo_render.c +++ b/src/gallium/drivers/ilo/ilo_render.c @@ -448,6 +448,8 @@ draw_session_prepare(struct ilo_render *render, session->prim_changed = true; session->primitive_restart_changed = true; + ilo_state_urb_full_delta(&vec->urb, render->dev, &session->urb_delta); + ilo_state_raster_full_delta(&vec->rasterizer->rs, render->dev, &session->rs_delta); @@ -462,6 +464,9 @@ draw_session_prepare(struct ilo_render *render, session->primitive_restart_changed = (render->state.primitive_restart != vec->draw->primitive_restart); + ilo_state_urb_get_delta(&vec->urb, render->dev, + &render->state.urb, &session->urb_delta); + if (vec->dirty & ILO_DIRTY_RASTERIZER) { ilo_state_raster_get_delta(&vec->rasterizer->rs, render->dev, &render->state.rs, &session->rs_delta); @@ -493,6 +498,7 @@ draw_session_end(struct ilo_render *render, render->state.reduced_prim = session->reduced_prim; render->state.primitive_restart = vec->draw->primitive_restart; + render->state.urb = vec->urb; render->state.rs = vec->rasterizer->rs; render->state.cc = vec->blend->cc; } diff --git a/src/gallium/drivers/ilo/ilo_render_gen.h b/src/gallium/drivers/ilo/ilo_render_gen.h index cc6f77d9750..74c13801767 100644 --- a/src/gallium/drivers/ilo/ilo_render_gen.h +++ b/src/gallium/drivers/ilo/ilo_render_gen.h @@ -90,6 +90,7 @@ struct ilo_render { int reduced_prim; int so_max_vertices; + struct ilo_state_urb urb; struct ilo_state_raster rs; struct ilo_state_cc cc; @@ -148,6 +149,7 @@ struct ilo_render_draw_session { bool prim_changed; bool primitive_restart_changed; + struct ilo_state_urb_delta urb_delta; struct ilo_state_raster_delta rs_delta; struct ilo_state_viewport_delta vp_delta; struct ilo_state_cc_delta cc_delta; diff --git a/src/gallium/drivers/ilo/ilo_render_gen6.c b/src/gallium/drivers/ilo/ilo_render_gen6.c index ff0bf2fb820..9d199955be3 100644 --- a/src/gallium/drivers/ilo/ilo_render_gen6.c +++ b/src/gallium/drivers/ilo/ilo_render_gen6.c @@ -329,64 +329,19 @@ gen6_draw_common_urb(struct ilo_render *r, const struct ilo_state_vector *vec, struct ilo_render_draw_session *session) { + const bool gs_active = (vec->gs || (vec->vs && + ilo_shader_get_kernel_param(vec->vs, ILO_KERNEL_VS_GEN6_SO))); + /* 3DSTATE_URB */ - if (DIRTY(VE) || DIRTY(VS) || DIRTY(GS)) { - const bool gs_active = (vec->gs || (vec->vs && - ilo_shader_get_kernel_param(vec->vs, ILO_KERNEL_VS_GEN6_SO))); - int vs_entry_size, gs_entry_size; - int vs_total_size, gs_total_size; - - vs_entry_size = (vec->vs) ? - ilo_shader_get_kernel_param(vec->vs, ILO_KERNEL_OUTPUT_COUNT) : 0; - - /* - * As indicated by 2e712e41db0c0676e9f30fc73172c0e8de8d84d4, VF and VS - * share VUE handles. The VUE allocation size must be large enough to - * store either VF outputs (number of VERTEX_ELEMENTs) and VS outputs. - * - * I am not sure if the PRM explicitly states that VF and VS share VUE - * handles. But here is a citation that implies so: - * - * From the Sandy Bridge PRM, volume 2 part 1, page 44: - * - * "Once a FF stage that spawn threads has sufficient input to - * initiate a thread, it must guarantee that it is safe to request - * the thread initiation. For all these FF stages, this check is - * based on : - * - * - The availability of output URB entries: - * - VS: As the input URB entries are overwritten with the - * VS-generated output data, output URB availability isn't a - * factor." - */ - if (vs_entry_size < vec->ve->count + vec->ve->prepend_nosrc_cso) - vs_entry_size = vec->ve->count + vec->ve->prepend_nosrc_cso; - - gs_entry_size = (vec->gs) ? - ilo_shader_get_kernel_param(vec->gs, ILO_KERNEL_OUTPUT_COUNT) : - (gs_active) ? vs_entry_size : 0; - - /* in bytes */ - vs_entry_size *= sizeof(float) * 4; - gs_entry_size *= sizeof(float) * 4; - vs_total_size = r->dev->urb_size; - - if (gs_active) { - vs_total_size /= 2; - gs_total_size = vs_total_size; - } - else { - gs_total_size = 0; - } - - gen6_3DSTATE_URB(r->builder, vs_total_size, gs_total_size, - vs_entry_size, gs_entry_size); + if (session->urb_delta.dirty & (ILO_STATE_URB_3DSTATE_URB_VS | + ILO_STATE_URB_3DSTATE_URB_GS)) { + gen6_3DSTATE_URB(r->builder, &vec->urb); if (r->state.gs.active && !gs_active) gen6_wa_post_3dstate_urb_no_gs(r); - - r->state.gs.active = gs_active; } + + r->state.gs.active = gs_active; } static void @@ -920,9 +875,7 @@ ilo_render_emit_rectlist_commands_gen6(struct ilo_render *r, gen6_3DSTATE_VERTEX_ELEMENTS(r->builder, &blitter->ve); - gen6_3DSTATE_URB(r->builder, r->dev->urb_size, 0, - (blitter->ve.count + blitter->ve.prepend_nosrc_cso) * 4 * sizeof(float), - 0); + gen6_3DSTATE_URB(r->builder, &blitter->urb); if (r->state.gs.active) { gen6_wa_post_3dstate_urb_no_gs(r); diff --git a/src/gallium/drivers/ilo/ilo_render_gen7.c b/src/gallium/drivers/ilo/ilo_render_gen7.c index 95884a0d51d..f5c1a82d671 100644 --- a/src/gallium/drivers/ilo/ilo_render_gen7.c +++ b/src/gallium/drivers/ilo/ilo_render_gen7.c @@ -200,40 +200,17 @@ gen7_draw_common_urb(struct ilo_render *r, struct ilo_render_draw_session *session) { /* 3DSTATE_URB_{VS,GS,HS,DS} */ - if (DIRTY(VE) || DIRTY(VS)) { - /* the first 16KB are reserved for VS and PS PCBs */ - const int offset = - (ilo_dev_gen(r->dev) >= ILO_GEN(8)) || - (ilo_dev_gen(r->dev) == ILO_GEN(7.5) && r->dev->gt == 3) ? - 32768 : 16384; - int vs_entry_size, vs_total_size; - - vs_entry_size = (vec->vs) ? - ilo_shader_get_kernel_param(vec->vs, ILO_KERNEL_OUTPUT_COUNT) : 0; - - /* - * From the Ivy Bridge PRM, volume 2 part 1, page 35: - * - * "Programming Restriction: As the VS URB entry serves as both the - * per-vertex input and output of the VS shader, the VS URB - * Allocation Size must be sized to the maximum of the vertex input - * and output structures." - */ - if (vs_entry_size < vec->ve->count + vec->ve->prepend_nosrc_cso) - vs_entry_size = vec->ve->count + vec->ve->prepend_nosrc_cso; - - vs_entry_size *= sizeof(float) * 4; - vs_total_size = r->dev->urb_size - offset; - + if (session->urb_delta.dirty & (ILO_STATE_URB_3DSTATE_URB_VS | + ILO_STATE_URB_3DSTATE_URB_HS | + ILO_STATE_URB_3DSTATE_URB_DS | + ILO_STATE_URB_3DSTATE_URB_GS)) { if (ilo_dev_gen(r->dev) == ILO_GEN(7)) gen7_wa_pre_vs(r); - gen7_3DSTATE_URB_VS(r->builder, - offset, vs_total_size, vs_entry_size); - - gen7_3DSTATE_URB_GS(r->builder, offset, 0, 0); - gen7_3DSTATE_URB_HS(r->builder, offset, 0, 0); - gen7_3DSTATE_URB_DS(r->builder, offset, 0, 0); + gen7_3DSTATE_URB_VS(r->builder, &vec->urb); + gen7_3DSTATE_URB_GS(r->builder, &vec->urb); + gen7_3DSTATE_URB_HS(r->builder, &vec->urb); + gen7_3DSTATE_URB_DS(r->builder, &vec->urb); } } @@ -243,22 +220,15 @@ gen7_draw_common_pcb_alloc(struct ilo_render *r, struct ilo_render_draw_session *session) { /* 3DSTATE_PUSH_CONSTANT_ALLOC_{VS,PS} */ - if (r->hw_ctx_changed) { - /* - * Push constant buffers are only allowed to take up at most the first - * 16KB of the URB. Split the space evenly for VS and FS. - */ - const int max_size = - (ilo_dev_gen(r->dev) >= ILO_GEN(8)) || - (ilo_dev_gen(r->dev) == ILO_GEN(7.5) && r->dev->gt == 3) ? - 32768 : 16384; - const int size = max_size / 2; - int offset = 0; - - gen7_3DSTATE_PUSH_CONSTANT_ALLOC_VS(r->builder, offset, size); - offset += size; - - gen7_3DSTATE_PUSH_CONSTANT_ALLOC_PS(r->builder, offset, size); + if (session->urb_delta.dirty & + (ILO_STATE_URB_3DSTATE_PUSH_CONSTANT_ALLOC_VS | + ILO_STATE_URB_3DSTATE_PUSH_CONSTANT_ALLOC_HS | + ILO_STATE_URB_3DSTATE_PUSH_CONSTANT_ALLOC_DS | + ILO_STATE_URB_3DSTATE_PUSH_CONSTANT_ALLOC_GS | + ILO_STATE_URB_3DSTATE_PUSH_CONSTANT_ALLOC_PS)) { + gen7_3DSTATE_PUSH_CONSTANT_ALLOC_VS(r->builder, &vec->urb); + gen7_3DSTATE_PUSH_CONSTANT_ALLOC_GS(r->builder, &vec->urb); + gen7_3DSTATE_PUSH_CONSTANT_ALLOC_PS(r->builder, &vec->urb); if (ilo_dev_gen(r->dev) == ILO_GEN(7)) gen7_wa_post_3dstate_push_constant_alloc_ps(r); @@ -671,21 +641,8 @@ static void gen7_rectlist_pcb_alloc(struct ilo_render *r, const struct ilo_blitter *blitter) { - /* - * Push constant buffers are only allowed to take up at most the first - * 16KB of the URB. Split the space evenly for VS and FS. - */ - const int max_size = - (ilo_dev_gen(r->dev) >= ILO_GEN(8)) || - (ilo_dev_gen(r->dev) == ILO_GEN(7.5) && r->dev->gt == 3) ? - 32768 : 16384; - const int size = max_size / 2; - int offset = 0; - - gen7_3DSTATE_PUSH_CONSTANT_ALLOC_VS(r->builder, offset, size); - offset += size; - - gen7_3DSTATE_PUSH_CONSTANT_ALLOC_PS(r->builder, offset, size); + gen7_3DSTATE_PUSH_CONSTANT_ALLOC_VS(r->builder, &blitter->urb); + gen7_3DSTATE_PUSH_CONSTANT_ALLOC_PS(r->builder, &blitter->urb); if (ilo_dev_gen(r->dev) == ILO_GEN(7)) gen7_wa_post_3dstate_push_constant_alloc_ps(r); @@ -695,19 +652,10 @@ static void gen7_rectlist_urb(struct ilo_render *r, const struct ilo_blitter *blitter) { - /* the first 16KB are reserved for VS and PS PCBs */ - const int offset = - (ilo_dev_gen(r->dev) >= ILO_GEN(8)) || - (ilo_dev_gen(r->dev) == ILO_GEN(7.5) && r->dev->gt == 3) ? - 32768 : 16384; - - gen7_3DSTATE_URB_VS(r->builder, offset, r->dev->urb_size - offset, - (blitter->ve.count + blitter->ve.prepend_nosrc_cso) * - 4 * sizeof(float)); - - gen7_3DSTATE_URB_GS(r->builder, offset, 0, 0); - gen7_3DSTATE_URB_HS(r->builder, offset, 0, 0); - gen7_3DSTATE_URB_DS(r->builder, offset, 0, 0); + gen7_3DSTATE_URB_VS(r->builder, &blitter->urb); + gen7_3DSTATE_URB_GS(r->builder, &blitter->urb); + gen7_3DSTATE_URB_HS(r->builder, &blitter->urb); + gen7_3DSTATE_URB_DS(r->builder, &blitter->urb); } static void diff --git a/src/gallium/drivers/ilo/ilo_state.c b/src/gallium/drivers/ilo/ilo_state.c index 0a568bfbdc2..896402c9109 100644 --- a/src/gallium/drivers/ilo/ilo_state.c +++ b/src/gallium/drivers/ilo/ilo_state.c @@ -477,6 +477,55 @@ finalize_vertex_elements(struct ilo_context *ilo) } } +static void +finalize_urb(struct ilo_context *ilo) +{ + const uint16_t attr_size = sizeof(uint32_t) * 4; + const struct ilo_dev *dev = ilo->dev; + struct ilo_state_vector *vec = &ilo->state_vector; + struct ilo_state_urb_info info; + + if (!(vec->dirty & (ILO_DIRTY_VE | ILO_DIRTY_VS | + ILO_DIRTY_GS | ILO_DIRTY_FS))) + return; + + memset(&info, 0, sizeof(info)); + + info.ve_entry_size = attr_size * + (vec->ve->count + vec->ve->prepend_nosrc_cso); + + if (vec->vs) { + info.vs_const_data = (bool) + (ilo_shader_get_kernel_param(vec->vs, ILO_KERNEL_PCB_CBUF0_SIZE) + + ilo_shader_get_kernel_param(vec->vs, ILO_KERNEL_VS_PCB_UCP_SIZE)); + info.vs_entry_size = attr_size * + ilo_shader_get_kernel_param(vec->vs, ILO_KERNEL_OUTPUT_COUNT); + } + + if (vec->gs) { + info.gs_const_data = (bool) + ilo_shader_get_kernel_param(vec->gs, ILO_KERNEL_PCB_CBUF0_SIZE); + + /* + * From the Ivy Bridge PRM, volume 2 part 1, page 189: + * + * "All outputs of a GS thread will be stored in the single GS + * thread output URB entry." + * + * TODO + */ + info.gs_entry_size = attr_size * + ilo_shader_get_kernel_param(vec->gs, ILO_KERNEL_OUTPUT_COUNT); + } + + if (vec->fs) { + info.ps_const_data = (bool) + ilo_shader_get_kernel_param(vec->fs, ILO_KERNEL_PCB_CBUF0_SIZE); + } + + ilo_state_urb_set_info(&vec->urb, dev, &info); +} + static void finalize_viewport(struct ilo_context *ilo) { @@ -680,6 +729,7 @@ ilo_finalize_3d_states(struct ilo_context *ilo, finalize_index_buffer(ilo); finalize_vertex_elements(ilo); + finalize_urb(ilo); finalize_rasterizer(ilo); finalize_viewport(ilo); finalize_blend(ilo); @@ -2065,6 +2115,8 @@ void ilo_state_vector_init(const struct ilo_dev *dev, struct ilo_state_vector *vec) { + struct ilo_state_urb_info urb_info; + vec->sample_mask = ~0u; ilo_state_viewport_init_data_only(&vec->viewport.vp, dev, @@ -2079,6 +2131,9 @@ ilo_state_vector_init(const struct ilo_dev *dev, ilo_state_sampler_init_disabled(&vec->disabled_sampler, dev); + memset(&urb_info, 0, sizeof(urb_info)); + ilo_state_urb_init(&vec->urb, dev, &urb_info); + util_dynarray_init(&vec->global_binding.bindings); vec->dirty = ILO_DIRTY_ALL; diff --git a/src/gallium/drivers/ilo/ilo_state.h b/src/gallium/drivers/ilo/ilo_state.h index ae4639fe3f7..908585a507e 100644 --- a/src/gallium/drivers/ilo/ilo_state.h +++ b/src/gallium/drivers/ilo/ilo_state.h @@ -34,6 +34,7 @@ #include "core/ilo_state_sampler.h" #include "core/ilo_state_sol.h" #include "core/ilo_state_surface.h" +#include "core/ilo_state_urb.h" #include "core/ilo_state_viewport.h" #include "core/ilo_state_zs.h" #include "pipe/p_state.h" @@ -275,6 +276,8 @@ struct ilo_state_vector { struct ilo_fb_state fb; + struct ilo_state_urb urb; + /* shader resources */ struct ilo_sampler_state sampler[PIPE_SHADER_TYPES]; struct ilo_view_state view[PIPE_SHADER_TYPES]; From 5a52627c4f9215649b0f244af96512b9aafceaa1 Mon Sep 17 00:00:00 2001 From: Chia-I Wu Date: Tue, 2 Jun 2015 14:57:48 +0800 Subject: [PATCH 628/834] ilo: embed ilo_state_vf in ilo_ve_state --- .../drivers/ilo/core/ilo_builder_3d_top.h | 78 +++------ src/gallium/drivers/ilo/core/ilo_state_3d.h | 36 ---- .../drivers/ilo/core/ilo_state_3d_top.c | 159 ------------------ src/gallium/drivers/ilo/ilo_blitter.h | 3 +- .../drivers/ilo/ilo_blitter_rectlist.c | 27 ++- src/gallium/drivers/ilo/ilo_render.c | 6 + src/gallium/drivers/ilo/ilo_render_gen.h | 1 + src/gallium/drivers/ilo/ilo_render_gen6.c | 12 +- src/gallium/drivers/ilo/ilo_render_gen7.c | 2 +- src/gallium/drivers/ilo/ilo_render_gen8.c | 18 +- src/gallium/drivers/ilo/ilo_state.c | 122 ++++++++------ src/gallium/drivers/ilo/ilo_state.h | 12 ++ 12 files changed, 146 insertions(+), 330 deletions(-) diff --git a/src/gallium/drivers/ilo/core/ilo_builder_3d_top.h b/src/gallium/drivers/ilo/core/ilo_builder_3d_top.h index 2a475cb15ca..a47c2973480 100644 --- a/src/gallium/drivers/ilo/core/ilo_builder_3d_top.h +++ b/src/gallium/drivers/ilo/core/ilo_builder_3d_top.h @@ -39,6 +39,7 @@ #include "ilo_state_sampler.h" #include "ilo_state_sol.h" #include "ilo_state_urb.h" +#include "ilo_state_vf.h" #include "ilo_builder.h" static inline void @@ -249,10 +250,10 @@ gen6_3d_translate_pipe_prim(unsigned prim) } static inline void -gen8_3DSTATE_VF_TOPOLOGY(struct ilo_builder *builder, unsigned pipe_prim) +gen8_3DSTATE_VF_TOPOLOGY(struct ilo_builder *builder, + enum gen_3dprim_type topology) { const uint8_t cmd_len = 2; - const int prim = gen6_3d_translate_pipe_prim(pipe_prim); uint32_t *dw; ILO_DEV_ASSERT(builder->dev, 8, 8); @@ -260,7 +261,7 @@ gen8_3DSTATE_VF_TOPOLOGY(struct ilo_builder *builder, unsigned pipe_prim) ilo_builder_batch_pointer(builder, cmd_len, &dw); dw[0] = GEN8_RENDER_CMD(3D, 3DSTATE_VF_TOPOLOGY) | (cmd_len - 2); - dw[1] = prim; + dw[1] = topology << GEN8_TOPOLOGY_DW1_TYPE__SHIFT; } static inline void @@ -283,8 +284,7 @@ gen8_3DSTATE_VF_INSTANCING(struct ilo_builder *builder, static inline void gen8_3DSTATE_VF_SGVS(struct ilo_builder *builder, - bool vid_enable, int vid_ve, int vid_comp, - bool iid_enable, int iid_ve, int iid_comp) + const struct ilo_state_vf *vf) { const uint8_t cmd_len = 2; uint32_t *dw; @@ -294,25 +294,16 @@ gen8_3DSTATE_VF_SGVS(struct ilo_builder *builder, ilo_builder_batch_pointer(builder, cmd_len, &dw); dw[0] = GEN8_RENDER_CMD(3D, 3DSTATE_VF_SGVS) | (cmd_len - 2); - dw[1] = 0; - - if (iid_enable) { - dw[1] |= GEN8_SGVS_DW1_IID_ENABLE | - vid_comp << GEN8_SGVS_DW1_IID_VE_COMP__SHIFT | - vid_ve << GEN8_SGVS_DW1_IID_VE_INDEX__SHIFT; - } - - if (vid_enable) { - dw[1] |= GEN8_SGVS_DW1_VID_ENABLE | - vid_comp << GEN8_SGVS_DW1_VID_VE_COMP__SHIFT | - vid_ve << GEN8_SGVS_DW1_VID_VE_INDEX__SHIFT; - } + /* see vf_params_set_gen8_3DSTATE_VF_SGVS() */ + dw[1] = vf->sgvs[0]; } static inline void gen6_3DSTATE_VERTEX_BUFFERS(struct ilo_builder *builder, - const struct ilo_ve_state *ve, - const struct ilo_vb_state *vb) + const struct ilo_vb_state *vb, + const unsigned *vb_mapping, + const unsigned *instance_divisors, + unsigned vb_count) { uint8_t cmd_len; uint32_t *dw; @@ -325,21 +316,21 @@ gen6_3DSTATE_VERTEX_BUFFERS(struct ilo_builder *builder, * * "From 1 to 33 VBs can be specified..." */ - assert(ve->vb_count <= 33); + assert(vb_count <= 33); - if (!ve->vb_count) + if (!vb_count) return; - cmd_len = 1 + 4 * ve->vb_count; + cmd_len = 1 + 4 * vb_count; pos = ilo_builder_batch_pointer(builder, cmd_len, &dw); dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_VERTEX_BUFFERS) | (cmd_len - 2); dw++; pos++; - for (hw_idx = 0; hw_idx < ve->vb_count; hw_idx++) { - const unsigned instance_divisor = ve->instance_divisors[hw_idx]; - const unsigned pipe_idx = ve->vb_mapping[hw_idx]; + for (hw_idx = 0; hw_idx < vb_count; hw_idx++) { + const unsigned instance_divisor = instance_divisors[hw_idx]; + const unsigned pipe_idx = vb_mapping[hw_idx]; const struct pipe_vertex_buffer *cso = &vb->states[pipe_idx]; dw[0] = hw_idx << GEN6_VB_DW0_INDEX__SHIFT; @@ -428,46 +419,27 @@ gen6_user_3DSTATE_VERTEX_BUFFERS(struct ilo_builder *builder, static inline void gen6_3DSTATE_VERTEX_ELEMENTS(struct ilo_builder *builder, - const struct ilo_ve_state *ve) + const struct ilo_state_vf *vf) { uint8_t cmd_len; uint32_t *dw; - unsigned i; ILO_DEV_ASSERT(builder->dev, 6, 8); - /* - * From the Sandy Bridge PRM, volume 2 part 1, page 92: - * - * "At least one VERTEX_ELEMENT_STATE structure must be included." - * - * From the Sandy Bridge PRM, volume 2 part 1, page 93: - * - * "Up to 34 (DevSNB+) vertex elements are supported." - */ - assert(ve->count + ve->prepend_nosrc_cso >= 1); - assert(ve->count + ve->prepend_nosrc_cso <= 34); + cmd_len = 1 + 2 * (vf->internal_ve_count + vf->user_ve_count); - STATIC_ASSERT(Elements(ve->cso[0].payload) == 2); - - cmd_len = 1 + 2 * (ve->count + ve->prepend_nosrc_cso); ilo_builder_batch_pointer(builder, cmd_len, &dw); dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_VERTEX_ELEMENTS) | (cmd_len - 2); dw++; - if (ve->prepend_nosrc_cso) { - memcpy(dw, ve->nosrc_cso.payload, sizeof(ve->nosrc_cso.payload)); - dw += 2; + /* see vf_set_gen6_3DSTATE_VERTEX_ELEMENTS() */ + if (vf->internal_ve_count) { + memcpy(dw, vf->internal_ve, + sizeof(vf->internal_ve[0]) * vf->internal_ve_count); + dw += 2 * vf->internal_ve_count; } - - for (i = 0; i < ve->count - ve->last_cso_edgeflag; i++) { - memcpy(dw, ve->cso[i].payload, sizeof(ve->cso[i].payload)); - dw += 2; - } - - if (ve->last_cso_edgeflag) - memcpy(dw, ve->edgeflag_cso.payload, sizeof(ve->edgeflag_cso.payload)); + memcpy(dw, vf->user_ve, sizeof(vf->user_ve[0]) * vf->user_ve_count); } static inline void diff --git a/src/gallium/drivers/ilo/core/ilo_state_3d.h b/src/gallium/drivers/ilo/core/ilo_state_3d.h index 45929b2226d..16db93c6603 100644 --- a/src/gallium/drivers/ilo/core/ilo_state_3d.h +++ b/src/gallium/drivers/ilo/core/ilo_state_3d.h @@ -72,27 +72,6 @@ struct ilo_ib_state { int64_t draw_start_offset; }; -struct ilo_ve_cso { - /* VERTEX_ELEMENT_STATE */ - uint32_t payload[2]; -}; - -struct ilo_ve_state { - struct ilo_ve_cso cso[PIPE_MAX_ATTRIBS]; - unsigned count; - - unsigned instance_divisors[PIPE_MAX_ATTRIBS]; - unsigned vb_mapping[PIPE_MAX_ATTRIBS]; - unsigned vb_count; - - /* these are not valid until the state is finalized */ - struct ilo_ve_cso edgeflag_cso; - bool last_cso_edgeflag; - - struct ilo_ve_cso nosrc_cso; - bool prepend_nosrc_cso; -}; - struct ilo_so_state { struct pipe_stream_output_target *states[ILO_MAX_SO_BUFFERS]; unsigned count; @@ -138,21 +117,6 @@ struct ilo_shader_cso { uint32_t payload[5]; }; -void -ilo_gpe_init_ve(const struct ilo_dev *dev, - unsigned num_states, - const struct pipe_vertex_element *states, - struct ilo_ve_state *ve); - -void -ilo_gpe_set_ve_edgeflag(const struct ilo_dev *dev, - struct ilo_ve_cso *cso); - -void -ilo_gpe_init_ve_nosrc(const struct ilo_dev *dev, - int comp0, int comp1, int comp2, int comp3, - struct ilo_ve_cso *cso); - void ilo_gpe_init_vs_cso(const struct ilo_dev *dev, const struct ilo_shader_state *vs, diff --git a/src/gallium/drivers/ilo/core/ilo_state_3d_top.c b/src/gallium/drivers/ilo/core/ilo_state_3d_top.c index ca58f73783e..feac579f2de 100644 --- a/src/gallium/drivers/ilo/core/ilo_state_3d_top.c +++ b/src/gallium/drivers/ilo/core/ilo_state_3d_top.c @@ -37,165 +37,6 @@ #include "ilo_state_3d.h" #include "../ilo_shader.h" -static void -ve_init_cso(const struct ilo_dev *dev, - const struct pipe_vertex_element *state, - unsigned vb_index, - struct ilo_ve_cso *cso) -{ - int comp[4] = { - GEN6_VFCOMP_STORE_SRC, - GEN6_VFCOMP_STORE_SRC, - GEN6_VFCOMP_STORE_SRC, - GEN6_VFCOMP_STORE_SRC, - }; - int format; - - ILO_DEV_ASSERT(dev, 6, 8); - - switch (util_format_get_nr_components(state->src_format)) { - case 1: comp[1] = GEN6_VFCOMP_STORE_0; - case 2: comp[2] = GEN6_VFCOMP_STORE_0; - case 3: comp[3] = (util_format_is_pure_integer(state->src_format)) ? - GEN6_VFCOMP_STORE_1_INT : - GEN6_VFCOMP_STORE_1_FP; - } - - format = ilo_format_translate_vertex(dev, state->src_format); - - STATIC_ASSERT(Elements(cso->payload) >= 2); - cso->payload[0] = - vb_index << GEN6_VE_DW0_VB_INDEX__SHIFT | - GEN6_VE_DW0_VALID | - format << GEN6_VE_DW0_FORMAT__SHIFT | - state->src_offset << GEN6_VE_DW0_VB_OFFSET__SHIFT; - - cso->payload[1] = - comp[0] << GEN6_VE_DW1_COMP0__SHIFT | - comp[1] << GEN6_VE_DW1_COMP1__SHIFT | - comp[2] << GEN6_VE_DW1_COMP2__SHIFT | - comp[3] << GEN6_VE_DW1_COMP3__SHIFT; -} - -void -ilo_gpe_init_ve(const struct ilo_dev *dev, - unsigned num_states, - const struct pipe_vertex_element *states, - struct ilo_ve_state *ve) -{ - unsigned i; - - ILO_DEV_ASSERT(dev, 6, 8); - - ve->count = num_states; - ve->vb_count = 0; - - for (i = 0; i < num_states; i++) { - const unsigned pipe_idx = states[i].vertex_buffer_index; - const unsigned instance_divisor = states[i].instance_divisor; - unsigned hw_idx; - - /* - * map the pipe vb to the hardware vb, which has a fixed instance - * divisor - */ - for (hw_idx = 0; hw_idx < ve->vb_count; hw_idx++) { - if (ve->vb_mapping[hw_idx] == pipe_idx && - ve->instance_divisors[hw_idx] == instance_divisor) - break; - } - - /* create one if there is no matching hardware vb */ - if (hw_idx >= ve->vb_count) { - hw_idx = ve->vb_count++; - - ve->vb_mapping[hw_idx] = pipe_idx; - ve->instance_divisors[hw_idx] = instance_divisor; - } - - ve_init_cso(dev, &states[i], hw_idx, &ve->cso[i]); - } -} - -void -ilo_gpe_set_ve_edgeflag(const struct ilo_dev *dev, - struct ilo_ve_cso *cso) -{ - int format; - - ILO_DEV_ASSERT(dev, 6, 8); - - /* - * From the Sandy Bridge PRM, volume 2 part 1, page 94: - * - * "- This bit (Edge Flag Enable) must only be ENABLED on the last - * valid VERTEX_ELEMENT structure. - * - * - When set, Component 0 Control must be set to VFCOMP_STORE_SRC, - * and Component 1-3 Control must be set to VFCOMP_NOSTORE. - * - * - The Source Element Format must be set to the UINT format. - * - * - [DevSNB]: Edge Flags are not supported for QUADLIST - * primitives. Software may elect to convert QUADLIST primitives - * to some set of corresponding edge-flag-supported primitive - * types (e.g., POLYGONs) prior to submission to the 3D pipeline." - */ - cso->payload[0] |= GEN6_VE_DW0_EDGE_FLAG_ENABLE; - - /* - * Edge flags have format GEN6_FORMAT_R8_USCALED when defined via - * glEdgeFlagPointer(), and format GEN6_FORMAT_R32_FLOAT when defined - * via glEdgeFlag(), as can be seen in vbo_attrib_tmp.h. - * - * Since all the hardware cares about is whether the flags are zero or not, - * we can treat them as the corresponding _UINT formats. - */ - format = GEN_EXTRACT(cso->payload[0], GEN6_VE_DW0_FORMAT); - cso->payload[0] &= ~GEN6_VE_DW0_FORMAT__MASK; - - switch (format) { - case GEN6_FORMAT_R32_FLOAT: - format = GEN6_FORMAT_R32_UINT; - break; - case GEN6_FORMAT_R8_USCALED: - format = GEN6_FORMAT_R8_UINT; - break; - default: - break; - } - - cso->payload[0] |= GEN_SHIFT32(format, GEN6_VE_DW0_FORMAT); - - cso->payload[1] = - GEN6_VFCOMP_STORE_SRC << GEN6_VE_DW1_COMP0__SHIFT | - GEN6_VFCOMP_NOSTORE << GEN6_VE_DW1_COMP1__SHIFT | - GEN6_VFCOMP_NOSTORE << GEN6_VE_DW1_COMP2__SHIFT | - GEN6_VFCOMP_NOSTORE << GEN6_VE_DW1_COMP3__SHIFT; -} - -void -ilo_gpe_init_ve_nosrc(const struct ilo_dev *dev, - int comp0, int comp1, int comp2, int comp3, - struct ilo_ve_cso *cso) -{ - ILO_DEV_ASSERT(dev, 6, 8); - - STATIC_ASSERT(Elements(cso->payload) >= 2); - - assert(comp0 != GEN6_VFCOMP_STORE_SRC && - comp1 != GEN6_VFCOMP_STORE_SRC && - comp2 != GEN6_VFCOMP_STORE_SRC && - comp3 != GEN6_VFCOMP_STORE_SRC); - - cso->payload[0] = GEN6_VE_DW0_VALID; - cso->payload[1] = - comp0 << GEN6_VE_DW1_COMP0__SHIFT | - comp1 << GEN6_VE_DW1_COMP1__SHIFT | - comp2 << GEN6_VE_DW1_COMP2__SHIFT | - comp3 << GEN6_VE_DW1_COMP3__SHIFT; -} - void ilo_gpe_init_vs_cso(const struct ilo_dev *dev, const struct ilo_shader_state *vs, diff --git a/src/gallium/drivers/ilo/ilo_blitter.h b/src/gallium/drivers/ilo/ilo_blitter.h index c257c6048e1..dd56472a8d3 100644 --- a/src/gallium/drivers/ilo/ilo_blitter.h +++ b/src/gallium/drivers/ilo/ilo_blitter.h @@ -58,9 +58,10 @@ struct ilo_blitter { bool initialized; float vertices[3][2]; - struct ilo_ve_state ve; struct pipe_draw_info draw; + uint32_t vf_data[2]; + struct ilo_state_vf vf; struct ilo_state_sol sol; struct ilo_state_viewport vp; diff --git a/src/gallium/drivers/ilo/ilo_blitter_rectlist.c b/src/gallium/drivers/ilo/ilo_blitter_rectlist.c index b106e79958a..ed9057a93bd 100644 --- a/src/gallium/drivers/ilo/ilo_blitter_rectlist.c +++ b/src/gallium/drivers/ilo/ilo_blitter_rectlist.c @@ -40,30 +40,25 @@ static bool ilo_blitter_set_invariants(struct ilo_blitter *blitter) { - struct pipe_vertex_element velem; + struct ilo_state_vf_element_info elem; if (blitter->initialized) return true; - /* only vertex X and Y */ - memset(&velem, 0, sizeof(velem)); - velem.src_format = PIPE_FORMAT_R32G32_FLOAT; - ilo_gpe_init_ve(blitter->ilo->dev, 1, &velem, &blitter->ve); - - /* generate VUE header */ - ilo_gpe_init_ve_nosrc(blitter->ilo->dev, - GEN6_VFCOMP_STORE_0, /* Reserved */ - GEN6_VFCOMP_STORE_0, /* Render Target Array Index */ - GEN6_VFCOMP_STORE_0, /* Viewport Index */ - GEN6_VFCOMP_STORE_0, /* Point Width */ - &blitter->ve.nosrc_cso); - blitter->ve.prepend_nosrc_cso = true; - /* a rectangle has 3 vertices in a RECTLIST */ util_draw_init_info(&blitter->draw); blitter->draw.mode = ILO_PRIM_RECTANGLES; blitter->draw.count = 3; + memset(&elem, 0, sizeof(elem)); + /* only vertex X and Y */ + elem.format = GEN6_FORMAT_R32G32_FLOAT; + elem.format_size = 8; + elem.component_count = 2; + + ilo_state_vf_init_for_rectlist(&blitter->vf, blitter->ilo->dev, + blitter->vf_data, sizeof(blitter->vf_data), &elem, 1); + ilo_state_sol_init_disabled(&blitter->sol, blitter->ilo->dev, false); /** @@ -79,7 +74,7 @@ ilo_blitter_set_invariants(struct ilo_blitter *blitter) blitter->vp_data, sizeof(blitter->vp_data)); ilo_state_urb_init_for_rectlist(&blitter->urb, blitter->ilo->dev, - blitter->ve.count + blitter->ve.prepend_nosrc_cso); + ilo_state_vf_get_attr_count(&blitter->vf)); blitter->initialized = true; diff --git a/src/gallium/drivers/ilo/ilo_render.c b/src/gallium/drivers/ilo/ilo_render.c index 0fd19e3fdff..c3b53724a28 100644 --- a/src/gallium/drivers/ilo/ilo_render.c +++ b/src/gallium/drivers/ilo/ilo_render.c @@ -449,6 +449,7 @@ draw_session_prepare(struct ilo_render *render, session->primitive_restart_changed = true; ilo_state_urb_full_delta(&vec->urb, render->dev, &session->urb_delta); + ilo_state_vf_full_delta(&vec->ve->vf, render->dev, &session->vf_delta); ilo_state_raster_full_delta(&vec->rasterizer->rs, render->dev, &session->rs_delta); @@ -467,6 +468,11 @@ draw_session_prepare(struct ilo_render *render, ilo_state_urb_get_delta(&vec->urb, render->dev, &render->state.urb, &session->urb_delta); + if (vec->dirty & ILO_DIRTY_VE) { + ilo_state_vf_full_delta(&vec->ve->vf, render->dev, + &session->vf_delta); + } + if (vec->dirty & ILO_DIRTY_RASTERIZER) { ilo_state_raster_get_delta(&vec->rasterizer->rs, render->dev, &render->state.rs, &session->rs_delta); diff --git a/src/gallium/drivers/ilo/ilo_render_gen.h b/src/gallium/drivers/ilo/ilo_render_gen.h index 74c13801767..439d4326852 100644 --- a/src/gallium/drivers/ilo/ilo_render_gen.h +++ b/src/gallium/drivers/ilo/ilo_render_gen.h @@ -150,6 +150,7 @@ struct ilo_render_draw_session { bool primitive_restart_changed; struct ilo_state_urb_delta urb_delta; + struct ilo_state_vf_delta vf_delta; struct ilo_state_raster_delta rs_delta; struct ilo_state_viewport_delta vp_delta; struct ilo_state_cc_delta cc_delta; diff --git a/src/gallium/drivers/ilo/ilo_render_gen6.c b/src/gallium/drivers/ilo/ilo_render_gen6.c index 9d199955be3..7b4740e3693 100644 --- a/src/gallium/drivers/ilo/ilo_render_gen6.c +++ b/src/gallium/drivers/ilo/ilo_render_gen6.c @@ -434,12 +434,14 @@ gen6_draw_vf(struct ilo_render *r, } /* 3DSTATE_VERTEX_BUFFERS */ - if (DIRTY(VB) || DIRTY(VE) || r->batch_bo_changed) - gen6_3DSTATE_VERTEX_BUFFERS(r->builder, vec->ve, &vec->vb); + if (DIRTY(VB) || DIRTY(VE) || r->batch_bo_changed) { + gen6_3DSTATE_VERTEX_BUFFERS(r->builder, &vec->vb, vec->ve->vb_mapping, + vec->ve->instance_divisors, vec->ve->vb_count); + } /* 3DSTATE_VERTEX_ELEMENTS */ - if (DIRTY(VE)) - gen6_3DSTATE_VERTEX_ELEMENTS(r->builder, vec->ve); + if (session->vf_delta.dirty & ILO_STATE_VF_3DSTATE_VERTEX_ELEMENTS) + gen6_3DSTATE_VERTEX_ELEMENTS(r->builder, &vec->ve->vf); } void @@ -873,7 +875,7 @@ ilo_render_emit_rectlist_commands_gen6(struct ilo_render *r, session->vb_start, session->vb_end, sizeof(blitter->vertices[0])); - gen6_3DSTATE_VERTEX_ELEMENTS(r->builder, &blitter->ve); + gen6_3DSTATE_VERTEX_ELEMENTS(r->builder, &blitter->vf); gen6_3DSTATE_URB(r->builder, &blitter->urb); diff --git a/src/gallium/drivers/ilo/ilo_render_gen7.c b/src/gallium/drivers/ilo/ilo_render_gen7.c index f5c1a82d671..8aea76df095 100644 --- a/src/gallium/drivers/ilo/ilo_render_gen7.c +++ b/src/gallium/drivers/ilo/ilo_render_gen7.c @@ -762,7 +762,7 @@ ilo_render_emit_rectlist_commands_gen7(struct ilo_render *r, session->vb_start, session->vb_end, sizeof(blitter->vertices[0])); - gen6_3DSTATE_VERTEX_ELEMENTS(r->builder, &blitter->ve); + gen6_3DSTATE_VERTEX_ELEMENTS(r->builder, &blitter->vf); gen7_rectlist_pcb_alloc(r, blitter); diff --git a/src/gallium/drivers/ilo/ilo_render_gen8.c b/src/gallium/drivers/ilo/ilo_render_gen8.c index e0e1a854ebc..689e6985eb4 100644 --- a/src/gallium/drivers/ilo/ilo_render_gen8.c +++ b/src/gallium/drivers/ilo/ilo_render_gen8.c @@ -219,23 +219,25 @@ gen8_draw_vf(struct ilo_render *r, } /* 3DSTATE_VERTEX_BUFFERS */ - if (DIRTY(VB) || DIRTY(VE) || r->batch_bo_changed) - gen6_3DSTATE_VERTEX_BUFFERS(r->builder, vec->ve, &vec->vb); + if (DIRTY(VB) || DIRTY(VE) || r->batch_bo_changed) { + gen6_3DSTATE_VERTEX_BUFFERS(r->builder, &vec->vb, vec->ve->vb_mapping, + vec->ve->instance_divisors, vec->ve->vb_count); + } /* 3DSTATE_VERTEX_ELEMENTS */ - if (DIRTY(VE)) - gen6_3DSTATE_VERTEX_ELEMENTS(r->builder, vec->ve); + if (session->vf_delta.dirty & ILO_STATE_VF_3DSTATE_VERTEX_ELEMENTS) + gen6_3DSTATE_VERTEX_ELEMENTS(r->builder, &vec->ve->vf); - gen8_3DSTATE_VF_TOPOLOGY(r->builder, vec->draw->mode); + gen8_3DSTATE_VF_TOPOLOGY(r->builder, + gen6_3d_translate_pipe_prim(vec->draw->mode)); for (i = 0; i < vec->ve->vb_count; i++) { gen8_3DSTATE_VF_INSTANCING(r->builder, i, vec->ve->instance_divisors[i]); } - gen8_3DSTATE_VF_SGVS(r->builder, - false, 0, 0, - false, 0, 0); + if (session->vf_delta.dirty & ILO_STATE_VF_3DSTATE_VF_SGVS) + gen8_3DSTATE_VF_SGVS(r->builder, &vec->ve->vf); } void diff --git a/src/gallium/drivers/ilo/ilo_state.c b/src/gallium/drivers/ilo/ilo_state.c index 896402c9109..a164c4cdefa 100644 --- a/src/gallium/drivers/ilo/ilo_state.c +++ b/src/gallium/drivers/ilo/ilo_state.c @@ -423,57 +423,32 @@ finalize_index_buffer(struct ilo_context *ilo) static void finalize_vertex_elements(struct ilo_context *ilo) { + const struct ilo_dev *dev = ilo->dev; struct ilo_state_vector *vec = &ilo->state_vector; + struct ilo_ve_state *ve = vec->ve; + const bool is_quad = (vec->draw->mode == PIPE_PRIM_QUADS || + vec->draw->mode == PIPE_PRIM_QUAD_STRIP); + const bool last_element_edge_flag = (vec->vs && + ilo_shader_get_kernel_param(vec->vs, ILO_KERNEL_VS_INPUT_EDGEFLAG)); + const bool prepend_vertexid = (vec->vs && + ilo_shader_get_kernel_param(vec->vs, ILO_KERNEL_VS_INPUT_VERTEXID)); + const bool prepend_instanceid = (vec->vs && + ilo_shader_get_kernel_param(vec->vs, + ILO_KERNEL_VS_INPUT_INSTANCEID)); - if (!(vec->dirty & (ILO_DIRTY_VE | ILO_DIRTY_VS))) - return; + /* check for non-orthogonal states */ + if (ve->vf_params.cv_is_quad != is_quad || + ve->vf_params.prepend_vertexid != prepend_vertexid || + ve->vf_params.prepend_instanceid != prepend_instanceid || + ve->vf_params.last_element_edge_flag != last_element_edge_flag) { + ve->vf_params.cv_is_quad = is_quad; + ve->vf_params.prepend_vertexid = prepend_vertexid; + ve->vf_params.prepend_instanceid = prepend_instanceid; + ve->vf_params.last_element_edge_flag = last_element_edge_flag; - vec->dirty |= ILO_DIRTY_VE; + ilo_state_vf_set_params(&ve->vf, dev, &ve->vf_params); - vec->ve->last_cso_edgeflag = false; - if (vec->ve->count && vec->vs && - ilo_shader_get_kernel_param(vec->vs, ILO_KERNEL_VS_INPUT_EDGEFLAG)) { - vec->ve->edgeflag_cso = vec->ve->cso[vec->ve->count - 1]; - ilo_gpe_set_ve_edgeflag(ilo->dev, &vec->ve->edgeflag_cso); - vec->ve->last_cso_edgeflag = true; - } - - vec->ve->prepend_nosrc_cso = false; - if (vec->vs && - (ilo_shader_get_kernel_param(vec->vs, - ILO_KERNEL_VS_INPUT_INSTANCEID) || - ilo_shader_get_kernel_param(vec->vs, - ILO_KERNEL_VS_INPUT_VERTEXID))) { - ilo_gpe_init_ve_nosrc(ilo->dev, - GEN6_VFCOMP_STORE_VID, - GEN6_VFCOMP_STORE_IID, - GEN6_VFCOMP_NOSTORE, - GEN6_VFCOMP_NOSTORE, - &vec->ve->nosrc_cso); - vec->ve->prepend_nosrc_cso = true; - } else if (!vec->vs) { - /* generate VUE header */ - ilo_gpe_init_ve_nosrc(ilo->dev, - GEN6_VFCOMP_STORE_0, /* Reserved */ - GEN6_VFCOMP_STORE_0, /* Render Target Array Index */ - GEN6_VFCOMP_STORE_0, /* Viewport Index */ - GEN6_VFCOMP_STORE_0, /* Point Width */ - &vec->ve->nosrc_cso); - vec->ve->prepend_nosrc_cso = true; - } else if (!vec->ve->count) { - /* - * From the Sandy Bridge PRM, volume 2 part 1, page 92: - * - * "SW must ensure that at least one vertex element is defined prior - * to issuing a 3DPRIMTIVE command, or operation is UNDEFINED." - */ - ilo_gpe_init_ve_nosrc(ilo->dev, - GEN6_VFCOMP_STORE_0, - GEN6_VFCOMP_STORE_0, - GEN6_VFCOMP_STORE_0, - GEN6_VFCOMP_STORE_1_FP, - &vec->ve->nosrc_cso); - vec->ve->prepend_nosrc_cso = true; + vec->dirty |= ILO_DIRTY_VE; } } @@ -491,8 +466,7 @@ finalize_urb(struct ilo_context *ilo) memset(&info, 0, sizeof(info)); - info.ve_entry_size = attr_size * - (vec->ve->count + vec->ve->prepend_nosrc_cso); + info.ve_entry_size = attr_size * ilo_state_vf_get_attr_count(&vec->ve->vf); if (vec->vs) { info.vs_const_data = (bool) @@ -1319,12 +1293,58 @@ ilo_create_vertex_elements_state(struct pipe_context *pipe, const struct pipe_vertex_element *elements) { const struct ilo_dev *dev = ilo_context(pipe)->dev; + struct ilo_state_vf_element_info vf_elements[PIPE_MAX_ATTRIBS]; + struct ilo_state_vf_info vf_info; struct ilo_ve_state *ve; + unsigned i; - ve = MALLOC_STRUCT(ilo_ve_state); + ve = CALLOC_STRUCT(ilo_ve_state); assert(ve); - ilo_gpe_init_ve(dev, num_elements, elements, ve); + for (i = 0; i < num_elements; i++) { + const struct pipe_vertex_element *elem = &elements[i]; + struct ilo_state_vf_element_info *attr = &vf_elements[i]; + unsigned hw_idx; + + /* + * map the pipe vb to the hardware vb, which has a fixed instance + * divisor + */ + for (hw_idx = 0; hw_idx < ve->vb_count; hw_idx++) { + if (ve->vb_mapping[hw_idx] == elem->vertex_buffer_index && + ve->instance_divisors[hw_idx] == elem->instance_divisor) + break; + } + + /* create one if there is no matching hardware vb */ + if (hw_idx >= ve->vb_count) { + hw_idx = ve->vb_count++; + + ve->vb_mapping[hw_idx] = elem->vertex_buffer_index; + ve->instance_divisors[hw_idx] = elem->instance_divisor; + } + + attr->buffer = hw_idx; + attr->vertex_offset = elem->src_offset; + attr->format = ilo_format_translate_vertex(dev, elem->src_format); + attr->format_size = util_format_get_blocksize(elem->src_format); + attr->component_count = util_format_get_nr_components(elem->src_format); + attr->is_integer = util_format_is_pure_integer(elem->src_format); + attr->is_double = (util_format_is_float(elem->src_format) && + attr->format_size == attr->component_count * 8); + } + + memset(&vf_info, 0, sizeof(vf_info)); + vf_info.data = ve->vf_data; + vf_info.data_size = sizeof(ve->vf_data); + vf_info.elements = vf_elements; + vf_info.element_count = num_elements; + /* vf_info.params and ve->vf_params are both zeroed */ + + if (!ilo_state_vf_init(&ve->vf, dev, &vf_info)) { + FREE(ve); + return NULL; + } return ve; } diff --git a/src/gallium/drivers/ilo/ilo_state.h b/src/gallium/drivers/ilo/ilo_state.h index 908585a507e..f504e0732aa 100644 --- a/src/gallium/drivers/ilo/ilo_state.h +++ b/src/gallium/drivers/ilo/ilo_state.h @@ -35,6 +35,7 @@ #include "core/ilo_state_sol.h" #include "core/ilo_state_surface.h" #include "core/ilo_state_urb.h" +#include "core/ilo_state_vf.h" #include "core/ilo_state_viewport.h" #include "core/ilo_state_zs.h" #include "pipe/p_state.h" @@ -129,6 +130,17 @@ enum ilo_dirty_flags { struct ilo_context; +struct ilo_ve_state { + unsigned vb_mapping[PIPE_MAX_ATTRIBS]; + unsigned instance_divisors[PIPE_MAX_ATTRIBS]; + unsigned vb_count; + + /* these are not valid until the state is finalized */ + uint32_t vf_data[PIPE_MAX_ATTRIBS][2]; + struct ilo_state_vf_params_info vf_params; + struct ilo_state_vf vf; +}; + struct ilo_cbuf_cso { struct pipe_resource *resource; struct ilo_state_surface_buffer_info info; From da6e45fcbc4570df0ec4b8c8885f33a206da3552 Mon Sep 17 00:00:00 2001 From: Chia-I Wu Date: Tue, 2 Jun 2015 23:09:53 +0800 Subject: [PATCH 629/834] ilo: embed ilo_state_sbe in ilo_shader --- .../drivers/ilo/core/ilo_builder_3d_bottom.h | 167 ++++------------- src/gallium/drivers/ilo/ilo_blitter.h | 1 + .../drivers/ilo/ilo_blitter_rectlist.c | 2 + src/gallium/drivers/ilo/ilo_render_gen6.c | 9 +- src/gallium/drivers/ilo/ilo_render_gen7.c | 8 +- src/gallium/drivers/ilo/ilo_render_gen8.c | 14 +- src/gallium/drivers/ilo/ilo_shader.c | 175 +++++++++--------- src/gallium/drivers/ilo/ilo_shader.h | 20 +- src/gallium/drivers/ilo/ilo_state.c | 2 +- src/gallium/drivers/ilo/ilo_state.h | 1 + .../drivers/ilo/shader/ilo_shader_internal.h | 18 +- 11 files changed, 166 insertions(+), 251 deletions(-) diff --git a/src/gallium/drivers/ilo/core/ilo_builder_3d_bottom.h b/src/gallium/drivers/ilo/core/ilo_builder_3d_bottom.h index cd1a6821ca6..fc63c80c2ce 100644 --- a/src/gallium/drivers/ilo/core/ilo_builder_3d_bottom.h +++ b/src/gallium/drivers/ilo/core/ilo_builder_3d_bottom.h @@ -37,6 +37,7 @@ #include "ilo_format.h" #include "ilo_state_cc.h" #include "ilo_state_raster.h" +#include "ilo_state_sbe.h" #include "ilo_state_viewport.h" #include "ilo_builder.h" #include "ilo_builder_3d_top.h" @@ -59,126 +60,21 @@ gen6_3DSTATE_CLIP(struct ilo_builder *builder, dw[3] = rs->clip[2]; } -static inline void -gen8_internal_3dstate_sbe(struct ilo_builder *builder, - uint8_t cmd_len, uint32_t *dw, - const struct ilo_shader_state *fs, - int sprite_coord_mode) -{ - const struct ilo_kernel_routing *routing; - int vue_offset, vue_len, out_count; - - ILO_DEV_ASSERT(builder->dev, 6, 8); - - assert(cmd_len == 4); - - dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_SBE) | (cmd_len - 2); - - if (!fs) { - dw[1] = 1 << GEN7_SBE_DW1_URB_READ_LEN__SHIFT; - dw[2] = 0; - dw[3] = 0; - return; - } - - routing = ilo_shader_get_kernel_routing(fs); - - vue_offset = routing->source_skip; - assert(vue_offset % 2 == 0); - vue_offset /= 2; - - vue_len = (routing->source_len + 1) / 2; - if (!vue_len) - vue_len = 1; - - out_count = ilo_shader_get_kernel_param(fs, ILO_KERNEL_INPUT_COUNT); - assert(out_count <= 32); - - dw[1] = out_count << GEN7_SBE_DW1_ATTR_COUNT__SHIFT | - vue_len << GEN7_SBE_DW1_URB_READ_LEN__SHIFT; - - if (ilo_dev_gen(builder->dev) >= ILO_GEN(8)) { - dw[1] |= GEN8_SBE_DW1_USE_URB_READ_LEN | - GEN8_SBE_DW1_USE_URB_READ_OFFSET | - vue_offset << GEN8_SBE_DW1_URB_READ_OFFSET__SHIFT; - } else { - dw[1] |= vue_offset << GEN7_SBE_DW1_URB_READ_OFFSET__SHIFT; - } - - if (routing->swizzle_enable) - dw[1] |= GEN7_SBE_DW1_ATTR_SWIZZLE_ENABLE; - - switch (sprite_coord_mode) { - case PIPE_SPRITE_COORD_UPPER_LEFT: - dw[1] |= GEN7_SBE_DW1_POINT_SPRITE_TEXCOORD_UPPERLEFT; - break; - case PIPE_SPRITE_COORD_LOWER_LEFT: - dw[1] |= GEN7_SBE_DW1_POINT_SPRITE_TEXCOORD_LOWERLEFT; - break; - } - - /* - * From the Ivy Bridge PRM, volume 2 part 1, page 268: - * - * "This field (Point Sprite Texture Coordinate Enable) must be - * programmed to 0 when non-point primitives are rendered." - * - * TODO We do not check that yet. - */ - dw[2] = routing->point_sprite_enable; - - dw[3] = routing->const_interp_enable; -} - -static inline void -gen8_internal_3dstate_sbe_swiz(struct ilo_builder *builder, - uint8_t cmd_len, uint32_t *dw, - const struct ilo_shader_state *fs) -{ - const struct ilo_kernel_routing *routing; - - ILO_DEV_ASSERT(builder->dev, 6, 8); - - assert(cmd_len == 11); - - dw[0] = GEN8_RENDER_CMD(3D, 3DSTATE_SBE_SWIZ) | (cmd_len - 2); - - if (!fs) { - memset(&dw[1], 0, sizeof(*dw) * (cmd_len - 1)); - return; - } - - routing = ilo_shader_get_kernel_routing(fs); - - STATIC_ASSERT(sizeof(routing->swizzles) >= sizeof(*dw) * 8); - memcpy(&dw[1], routing->swizzles, sizeof(*dw) * 8); - - /* WrapShortest enables */ - dw[9] = 0; - dw[10] = 0; -} - static inline void gen6_3DSTATE_SF(struct ilo_builder *builder, const struct ilo_state_raster *rs, - unsigned sprite_coord_mode, - const struct ilo_shader_state *fs) + const struct ilo_state_sbe *sbe) { const uint8_t cmd_len = 20; - uint32_t gen8_3dstate_sbe[4], gen8_3dstate_sbe_swiz[11]; uint32_t *dw; ILO_DEV_ASSERT(builder->dev, 6, 6); - gen8_internal_3dstate_sbe(builder, Elements(gen8_3dstate_sbe), - gen8_3dstate_sbe, fs, sprite_coord_mode); - gen8_internal_3dstate_sbe_swiz(builder, Elements(gen8_3dstate_sbe_swiz), - gen8_3dstate_sbe_swiz, fs); - ilo_builder_batch_pointer(builder, cmd_len, &dw); dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_SF) | (cmd_len - 2); - dw[1] = gen8_3dstate_sbe[1]; + /* see sbe_set_gen8_3DSTATE_SBE() */ + dw[1] = sbe->sbe[0]; /* see raster_set_gen7_3DSTATE_SF() */ dw[2] = rs->sf[0]; @@ -188,11 +84,14 @@ gen6_3DSTATE_SF(struct ilo_builder *builder, dw[6] = rs->raster[2]; dw[7] = rs->raster[3]; - memcpy(&dw[8], &gen8_3dstate_sbe_swiz[1], sizeof(*dw) * 8); - dw[16] = gen8_3dstate_sbe[2]; - dw[17] = gen8_3dstate_sbe[3]; - dw[18] = gen8_3dstate_sbe_swiz[9]; - dw[19] = gen8_3dstate_sbe_swiz[10]; + /* see sbe_set_gen8_3DSTATE_SBE_SWIZ() */ + memcpy(&dw[8], sbe->swiz, sizeof(*dw) * 8); + + dw[16] = sbe->sbe[1]; + dw[17] = sbe->sbe[2]; + /* WrapShortest enables */ + dw[18] = 0; + dw[19] = 0; } static inline void @@ -221,35 +120,30 @@ gen7_3DSTATE_SF(struct ilo_builder *builder, static inline void gen7_3DSTATE_SBE(struct ilo_builder *builder, - const struct ilo_shader_state *fs, - int sprite_coord_mode) + const struct ilo_state_sbe *sbe) { const uint8_t cmd_len = 14; - uint32_t gen8_3dstate_sbe[4], gen8_3dstate_sbe_swiz[11]; uint32_t *dw; ILO_DEV_ASSERT(builder->dev, 7, 7.5); - gen8_internal_3dstate_sbe(builder, Elements(gen8_3dstate_sbe), - gen8_3dstate_sbe, fs, sprite_coord_mode); - gen8_internal_3dstate_sbe_swiz(builder, Elements(gen8_3dstate_sbe_swiz), - gen8_3dstate_sbe_swiz, fs); - ilo_builder_batch_pointer(builder, cmd_len, &dw); dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_SBE) | (cmd_len - 2); - dw[1] = gen8_3dstate_sbe[1]; - memcpy(&dw[2], &gen8_3dstate_sbe_swiz[1], sizeof(*dw) * 8); - dw[10] = gen8_3dstate_sbe[2]; - dw[11] = gen8_3dstate_sbe[3]; - dw[12] = gen8_3dstate_sbe_swiz[9]; - dw[13] = gen8_3dstate_sbe_swiz[10]; + /* see sbe_set_gen8_3DSTATE_SBE() and sbe_set_gen8_3DSTATE_SBE_SWIZ() */ + dw[1] = sbe->sbe[0]; + memcpy(&dw[2], sbe->swiz, sizeof(*dw) * 8); + dw[10] = sbe->sbe[1]; + dw[11] = sbe->sbe[2]; + + /* WrapShortest enables */ + dw[12] = 0; + dw[13] = 0; } static inline void gen8_3DSTATE_SBE(struct ilo_builder *builder, - const struct ilo_shader_state *fs, - int sprite_coord_mode) + const struct ilo_state_sbe *sbe) { const uint8_t cmd_len = 4; uint32_t *dw; @@ -258,12 +152,16 @@ gen8_3DSTATE_SBE(struct ilo_builder *builder, ilo_builder_batch_pointer(builder, cmd_len, &dw); - gen8_internal_3dstate_sbe(builder, cmd_len, dw, fs, sprite_coord_mode); + /* see sbe_set_gen8_3DSTATE_SBE() */ + dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_SBE) | (cmd_len - 2); + dw[1] = sbe->sbe[0]; + dw[2] = sbe->sbe[1]; + dw[3] = sbe->sbe[2]; } static inline void gen8_3DSTATE_SBE_SWIZ(struct ilo_builder *builder, - const struct ilo_shader_state *fs) + const struct ilo_state_sbe *sbe) { const uint8_t cmd_len = 11; uint32_t *dw; @@ -272,7 +170,12 @@ gen8_3DSTATE_SBE_SWIZ(struct ilo_builder *builder, ilo_builder_batch_pointer(builder, cmd_len, &dw); - gen8_internal_3dstate_sbe_swiz(builder, cmd_len, dw, fs); + dw[0] = GEN8_RENDER_CMD(3D, 3DSTATE_SBE_SWIZ) | (cmd_len - 2); + /* see sbe_set_gen8_3DSTATE_SBE_SWIZ() */ + memcpy(&dw[1], sbe->swiz, sizeof(*dw) * 8); + /* WrapShortest enables */ + dw[9] = 0; + dw[10] = 0; } static inline void diff --git a/src/gallium/drivers/ilo/ilo_blitter.h b/src/gallium/drivers/ilo/ilo_blitter.h index dd56472a8d3..550e465a329 100644 --- a/src/gallium/drivers/ilo/ilo_blitter.h +++ b/src/gallium/drivers/ilo/ilo_blitter.h @@ -67,6 +67,7 @@ struct ilo_blitter { struct ilo_state_viewport vp; uint32_t vp_data[20]; + struct ilo_state_sbe sbe; struct ilo_state_cc cc; uint32_t depth_clear_value; diff --git a/src/gallium/drivers/ilo/ilo_blitter_rectlist.c b/src/gallium/drivers/ilo/ilo_blitter_rectlist.c index ed9057a93bd..873f3e4c3e8 100644 --- a/src/gallium/drivers/ilo/ilo_blitter_rectlist.c +++ b/src/gallium/drivers/ilo/ilo_blitter_rectlist.c @@ -73,6 +73,8 @@ ilo_blitter_set_invariants(struct ilo_blitter *blitter) ilo_state_viewport_init_for_rectlist(&blitter->vp, blitter->ilo->dev, blitter->vp_data, sizeof(blitter->vp_data)); + ilo_state_sbe_init_for_rectlist(&blitter->sbe, blitter->ilo->dev, 0, 0); + ilo_state_urb_init_for_rectlist(&blitter->urb, blitter->ilo->dev, ilo_state_vf_get_attr_count(&blitter->vf)); diff --git a/src/gallium/drivers/ilo/ilo_render_gen6.c b/src/gallium/drivers/ilo/ilo_render_gen6.c index 7b4740e3693..d659ab588ac 100644 --- a/src/gallium/drivers/ilo/ilo_render_gen6.c +++ b/src/gallium/drivers/ilo/ilo_render_gen6.c @@ -599,10 +599,9 @@ gen6_draw_sf(struct ilo_render *r, struct ilo_render_draw_session *session) { /* 3DSTATE_SF */ - if ((session->rs_delta.dirty & ILO_STATE_RASTER_3DSTATE_SF) || - DIRTY(RASTERIZER) || DIRTY(FS)) { - gen6_3DSTATE_SF(r->builder, &vec->rasterizer->rs, - vec->rasterizer->state.sprite_coord_mode, vec->fs); + if ((session->rs_delta.dirty & ILO_STATE_RASTER_3DSTATE_SF) || DIRTY(FS)) { + const struct ilo_state_sbe *sbe = ilo_shader_get_kernel_sbe(vec->fs); + gen6_3DSTATE_SF(r->builder, &vec->rasterizer->rs, sbe); } } @@ -799,7 +798,7 @@ gen6_rectlist_vs_to_sf(struct ilo_render *r, gen6_disable_3DSTATE_GS(r->builder); gen6_3DSTATE_CLIP(r->builder, &blitter->fb.rs); - gen6_3DSTATE_SF(r->builder, &blitter->fb.rs, 0, NULL); + gen6_3DSTATE_SF(r->builder, &blitter->fb.rs, &blitter->sbe); } static void diff --git a/src/gallium/drivers/ilo/ilo_render_gen7.c b/src/gallium/drivers/ilo/ilo_render_gen7.c index 8aea76df095..3c080a52a89 100644 --- a/src/gallium/drivers/ilo/ilo_render_gen7.c +++ b/src/gallium/drivers/ilo/ilo_render_gen7.c @@ -457,9 +457,9 @@ gen7_draw_sf(struct ilo_render *r, struct ilo_render_draw_session *session) { /* 3DSTATE_SBE */ - if (DIRTY(RASTERIZER) || DIRTY(FS)) { - gen7_3DSTATE_SBE(r->builder, vec->fs, (vec->rasterizer) ? - vec->rasterizer->state.sprite_coord_mode : 0); + if (DIRTY(FS)) { + const struct ilo_state_sbe *sbe = ilo_shader_get_kernel_sbe(vec->fs); + gen7_3DSTATE_SBE(r->builder, sbe); } /* 3DSTATE_SF */ @@ -684,7 +684,7 @@ gen7_rectlist_vs_to_sf(struct ilo_render *r, gen7_wa_pre_3dstate_sf_depth_bias(r); gen7_3DSTATE_SF(r->builder, &blitter->fb.rs); - gen7_3DSTATE_SBE(r->builder, NULL, 0); + gen7_3DSTATE_SBE(r->builder, &blitter->sbe); } static void diff --git a/src/gallium/drivers/ilo/ilo_render_gen8.c b/src/gallium/drivers/ilo/ilo_render_gen8.c index 689e6985eb4..691c378c864 100644 --- a/src/gallium/drivers/ilo/ilo_render_gen8.c +++ b/src/gallium/drivers/ilo/ilo_render_gen8.c @@ -68,15 +68,13 @@ gen8_draw_sf(struct ilo_render *r, if (session->rs_delta.dirty & ILO_STATE_RASTER_3DSTATE_RASTER) gen8_3DSTATE_RASTER(r->builder, &vec->rasterizer->rs); - /* 3DSTATE_SBE */ - if (DIRTY(RASTERIZER) || DIRTY(FS)) { - gen8_3DSTATE_SBE(r->builder, vec->fs, (vec->rasterizer) ? - vec->rasterizer->state.sprite_coord_mode : 0); - } + /* 3DSTATE_SBE and 3DSTATE_SBE_SWIZ */ + if (DIRTY(FS)) { + const struct ilo_state_sbe *sbe = ilo_shader_get_kernel_sbe(vec->fs); - /* 3DSTATE_SBE_SWIZ */ - if (DIRTY(FS)) - gen8_3DSTATE_SBE_SWIZ(r->builder, vec->fs); + gen8_3DSTATE_SBE(r->builder, sbe); + gen8_3DSTATE_SBE_SWIZ(r->builder, sbe); + } /* 3DSTATE_SF */ if (session->rs_delta.dirty & ILO_STATE_RASTER_3DSTATE_SF) diff --git a/src/gallium/drivers/ilo/ilo_shader.c b/src/gallium/drivers/ilo/ilo_shader.c index e9eb042ebc8..46f39c19b15 100644 --- a/src/gallium/drivers/ilo/ilo_shader.c +++ b/src/gallium/drivers/ilo/ilo_shader.c @@ -897,82 +897,103 @@ route_attr(const int *semantics, const int *indices, int len, * \return true if a different routing is selected */ bool -ilo_shader_select_kernel_routing(struct ilo_shader_state *shader, - const struct ilo_shader_state *source, - const struct ilo_rasterizer_state *rasterizer) +ilo_shader_select_kernel_sbe(struct ilo_shader_state *shader, + const struct ilo_shader_state *source, + const struct ilo_rasterizer_state *rasterizer) { - const uint32_t sprite_coord_enable = rasterizer->state.sprite_coord_enable; + const bool is_point = true; const bool light_twoside = rasterizer->state.light_twoside; + const uint32_t sprite_coord_enable = rasterizer->state.sprite_coord_enable; + const int sprite_coord_mode = rasterizer->state.sprite_coord_mode; struct ilo_shader *kernel = shader->shader; struct ilo_kernel_routing *routing = &kernel->routing; + struct ilo_state_sbe_swizzle_info swizzles[ILO_STATE_SBE_MAX_SWIZZLE_COUNT]; + struct ilo_state_sbe_info info; const int *src_semantics, *src_indices; - int src_len, max_src_slot; + int src_skip, src_len, src_slot; int dst_len, dst_slot; - /* we are constructing 3DSTATE_SBE here */ - ILO_DEV_ASSERT(shader->info.dev, 6, 8); - assert(kernel); if (source) { assert(source->shader); + src_semantics = source->shader->out.semantic_names; src_indices = source->shader->out.semantic_indices; src_len = source->shader->out.count; - } - else { + + assert(src_len >= 2 && + src_semantics[0] == TGSI_SEMANTIC_PSIZE && + src_semantics[1] == TGSI_SEMANTIC_POSITION); + + /* + * skip PSIZE and POSITION (how about the optional CLIPDISTs?), unless + * they are all the source shader has and FS needs to read some + * attributes. + */ + if (src_len > 2 || !kernel->in.count) { + src_semantics += 2; + src_indices += 2; + src_len -= 2; + src_skip = 2; + } + } else { src_semantics = kernel->in.semantic_names; src_indices = kernel->in.semantic_indices; src_len = kernel->in.count; + src_skip = 0; } /* no change */ - if (kernel->routing_initialized && - routing->source_skip + routing->source_len <= src_len && - kernel->routing_sprite_coord_enable == sprite_coord_enable && - !memcmp(kernel->routing_src_semantics, - &src_semantics[routing->source_skip], - sizeof(kernel->routing_src_semantics[0]) * routing->source_len) && - !memcmp(kernel->routing_src_indices, - &src_indices[routing->source_skip], - sizeof(kernel->routing_src_indices[0]) * routing->source_len)) + if (routing->initialized && + routing->is_point == is_point && + routing->light_twoside == light_twoside && + routing->sprite_coord_enable == sprite_coord_enable && + routing->sprite_coord_mode == sprite_coord_mode && + routing->src_len <= src_len && + !memcmp(routing->src_semantics, src_semantics, + sizeof(src_semantics[0]) * routing->src_len) && + !memcmp(routing->src_indices, src_indices, + sizeof(src_indices[0]) * routing->src_len)) return false; - if (source) { - /* skip PSIZE and POSITION (how about the optional CLIPDISTs?) */ - assert(src_semantics[0] == TGSI_SEMANTIC_PSIZE); - assert(src_semantics[1] == TGSI_SEMANTIC_POSITION); - routing->source_skip = 2; + routing->is_point = is_point; + routing->light_twoside = light_twoside; + routing->sprite_coord_enable = sprite_coord_enable; + routing->sprite_coord_mode = sprite_coord_mode; - routing->source_len = src_len - routing->source_skip; - src_semantics += routing->source_skip; - src_indices += routing->source_skip; - } - else { - routing->source_skip = 0; - routing->source_len = src_len; - } + assert(kernel->in.count <= Elements(swizzles)); + dst_len = MIN2(kernel->in.count, Elements(swizzles)); - routing->const_interp_enable = kernel->in.const_interp_enable; - routing->point_sprite_enable = 0; - routing->swizzle_enable = false; + memset(&info, 0, sizeof(info)); + memset(&swizzles, 0, sizeof(swizzles)); - assert(kernel->in.count <= Elements(routing->swizzles)); - dst_len = MIN2(kernel->in.count, Elements(routing->swizzles)); - max_src_slot = -1; + info.attr_count = dst_len; + info.cv_vue_attr_count = src_skip + src_len; + info.vue_read_base = src_skip; + info.vue_read_count = 0; + info.has_min_read_count = true; + info.swizzle_enable = false; + info.swizzle_16_31 = false; + info.swizzle_count = 0; + info.swizzles = swizzles; + info.const_interp_enables = kernel->in.const_interp_enable; + info.point_sprite_enables = 0x0; + info.point_sprite_origin_lower_left = + (sprite_coord_mode == PIPE_SPRITE_COORD_LOWER_LEFT); + info.cv_is_point = is_point; for (dst_slot = 0; dst_slot < dst_len; dst_slot++) { const int semantic = kernel->in.semantic_names[dst_slot]; const int index = kernel->in.semantic_indices[dst_slot]; - int src_slot; if (semantic == TGSI_SEMANTIC_GENERIC && (sprite_coord_enable & (1 << index))) - routing->point_sprite_enable |= 1 << dst_slot; + info.point_sprite_enables |= 1 << dst_slot; if (source) { - src_slot = route_attr(src_semantics, src_indices, - routing->source_len, semantic, index); + src_slot = route_attr(src_semantics, src_indices, src_len, + semantic, index); /* * The source shader stage does not output this attribute. The value @@ -986,59 +1007,47 @@ ilo_shader_select_kernel_routing(struct ilo_shader_state *shader, */ if (src_slot < 0) src_slot = 0; - } - else { + } else { src_slot = dst_slot; } - routing->swizzles[dst_slot] = src_slot; - /* use the following slot for two-sided lighting */ if (semantic == TGSI_SEMANTIC_COLOR && light_twoside && - src_slot + 1 < routing->source_len && + src_slot + 1 < src_len && src_semantics[src_slot + 1] == TGSI_SEMANTIC_BCOLOR && src_indices[src_slot + 1] == index) { - routing->swizzles[dst_slot] |= GEN6_INPUTATTR_FACING << - GEN8_SBE_SWIZ_SWIZZLE_SELECT__SHIFT; + swizzles[dst_slot].attr_select = GEN6_INPUTATTR_FACING; + swizzles[dst_slot].attr = src_slot; + info.swizzle_enable = true; src_slot++; + } else { + swizzles[dst_slot].attr_select = GEN6_INPUTATTR_NORMAL; + swizzles[dst_slot].attr = src_slot; + if (src_slot != dst_slot) + info.swizzle_enable = true; } - if (routing->swizzles[dst_slot] != dst_slot) - routing->swizzle_enable = true; + swizzles[dst_slot].force_zeros = false; - if (max_src_slot < src_slot) - max_src_slot = src_slot; + if (info.vue_read_count < src_slot + 1) + info.vue_read_count = src_slot + 1; } - memset(&routing->swizzles[dst_slot], 0, sizeof(routing->swizzles) - - sizeof(routing->swizzles[0]) * dst_slot); + if (info.swizzle_enable) + info.swizzle_count = dst_len; - /* - * From the Sandy Bridge PRM, volume 2 part 1, page 248: - * - * "It is UNDEFINED to set this field (Vertex URB Entry Read Length) to - * 0 indicating no Vertex URB data to be read. - * - * This field should be set to the minimum length required to read the - * maximum source attribute. The maximum source attribute is indicated - * by the maximum value of the enabled Attribute # Source Attribute if - * Attribute Swizzle Enable is set, Number of Output Attributes-1 if - * enable is not set. - * - * read_length = ceiling((max_source_attr+1)/2) - * - * [errata] Corruption/Hang possible if length programmed larger than - * recommended" - */ - routing->source_len = max_src_slot + 1; + if (routing->initialized) + ilo_state_sbe_set_info(&routing->sbe, shader->info.dev, &info); + else + ilo_state_sbe_init(&routing->sbe, shader->info.dev, &info); - /* remember the states of the source */ - kernel->routing_initialized = true; - kernel->routing_sprite_coord_enable = sprite_coord_enable; - memcpy(kernel->routing_src_semantics, src_semantics, - sizeof(kernel->routing_src_semantics[0]) * routing->source_len); - memcpy(kernel->routing_src_indices, src_indices, - sizeof(kernel->routing_src_indices[0]) * routing->source_len); + routing->src_len = info.vue_read_count; + memcpy(routing->src_semantics, src_semantics, + sizeof(src_semantics[0]) * routing->src_len); + memcpy(routing->src_indices, src_indices, + sizeof(src_indices[0]) * routing->src_len); + + routing->initialized = true; return true; } @@ -1248,12 +1257,12 @@ ilo_shader_get_kernel_sol(const struct ilo_shader_state *shader) /** * Return the routing info of the selected kernel. */ -const struct ilo_kernel_routing * -ilo_shader_get_kernel_routing(const struct ilo_shader_state *shader) +const struct ilo_state_sbe * +ilo_shader_get_kernel_sbe(const struct ilo_shader_state *shader) { const struct ilo_shader *kernel = shader->shader; assert(kernel); - return &kernel->routing; + return &kernel->routing.sbe; } diff --git a/src/gallium/drivers/ilo/ilo_shader.h b/src/gallium/drivers/ilo/ilo_shader.h index ddcd6f0356f..457f847bb0c 100644 --- a/src/gallium/drivers/ilo/ilo_shader.h +++ b/src/gallium/drivers/ilo/ilo_shader.h @@ -81,21 +81,13 @@ enum ilo_kernel_param { ILO_KERNEL_PARAM_COUNT, }; -struct ilo_kernel_routing { - uint32_t const_interp_enable; - uint32_t point_sprite_enable; - unsigned source_skip, source_len; - - bool swizzle_enable; - uint16_t swizzles[16]; -}; - struct intel_bo; struct ilo_builder; struct ilo_rasterizer_state; struct ilo_shader_cache; struct ilo_shader_state; struct ilo_shader_cso; +struct ilo_state_sbe; struct ilo_state_sol; struct ilo_state_vector; @@ -152,9 +144,9 @@ ilo_shader_select_kernel(struct ilo_shader_state *shader, uint32_t dirty); bool -ilo_shader_select_kernel_routing(struct ilo_shader_state *shader, - const struct ilo_shader_state *source, - const struct ilo_rasterizer_state *rasterizer); +ilo_shader_select_kernel_sbe(struct ilo_shader_state *shader, + const struct ilo_shader_state *source, + const struct ilo_rasterizer_state *rasterizer); uint32_t ilo_shader_get_kernel_offset(const struct ilo_shader_state *shader); @@ -172,7 +164,7 @@ ilo_shader_get_kernel_so_info(const struct ilo_shader_state *shader); const struct ilo_state_sol * ilo_shader_get_kernel_sol(const struct ilo_shader_state *shader); -const struct ilo_kernel_routing * -ilo_shader_get_kernel_routing(const struct ilo_shader_state *shader); +const struct ilo_state_sbe * +ilo_shader_get_kernel_sbe(const struct ilo_shader_state *shader); #endif /* ILO_SHADER_H */ diff --git a/src/gallium/drivers/ilo/ilo_state.c b/src/gallium/drivers/ilo/ilo_state.c index a164c4cdefa..82fd0e7df19 100644 --- a/src/gallium/drivers/ilo/ilo_state.c +++ b/src/gallium/drivers/ilo/ilo_state.c @@ -310,7 +310,7 @@ finalize_shader_states(struct ilo_state_vector *vec) /* need to setup SBE for FS */ if (type == PIPE_SHADER_FRAGMENT && vec->dirty & (state | ILO_DIRTY_GS | ILO_DIRTY_VS | ILO_DIRTY_RASTERIZER)) { - if (ilo_shader_select_kernel_routing(shader, + if (ilo_shader_select_kernel_sbe(shader, (vec->gs) ? vec->gs : vec->vs, vec->rasterizer)) vec->dirty |= state; } diff --git a/src/gallium/drivers/ilo/ilo_state.h b/src/gallium/drivers/ilo/ilo_state.h index f504e0732aa..3ee471e84f1 100644 --- a/src/gallium/drivers/ilo/ilo_state.h +++ b/src/gallium/drivers/ilo/ilo_state.h @@ -32,6 +32,7 @@ #include "core/ilo_state_cc.h" #include "core/ilo_state_raster.h" #include "core/ilo_state_sampler.h" +#include "core/ilo_state_sbe.h" #include "core/ilo_state_sol.h" #include "core/ilo_state_surface.h" #include "core/ilo_state_urb.h" diff --git a/src/gallium/drivers/ilo/shader/ilo_shader_internal.h b/src/gallium/drivers/ilo/shader/ilo_shader_internal.h index 603d13e5766..31f731f29cd 100644 --- a/src/gallium/drivers/ilo/shader/ilo_shader_internal.h +++ b/src/gallium/drivers/ilo/shader/ilo_shader_internal.h @@ -28,6 +28,7 @@ #ifndef ILO_SHADER_INTERNAL_H #define ILO_SHADER_INTERNAL_H +#include "core/ilo_state_sbe.h" #include "core/ilo_state_sol.h" #include "ilo_common.h" @@ -74,6 +75,19 @@ struct ilo_shader_variant { uint32_t saturate_tex_coords[3]; }; +struct ilo_kernel_routing { + bool initialized; + bool is_point; + bool light_twoside; + uint32_t sprite_coord_enable; + int sprite_coord_mode; + int src_len; + int src_semantics[PIPE_MAX_SHADER_OUTPUTS]; + int src_indices[PIPE_MAX_SHADER_OUTPUTS]; + + struct ilo_state_sbe sbe; +}; + /** * A compiled shader. */ @@ -125,10 +139,6 @@ struct ilo_shader { void *kernel; int kernel_size; - bool routing_initialized; - int routing_src_semantics[PIPE_MAX_SHADER_OUTPUTS]; - int routing_src_indices[PIPE_MAX_SHADER_OUTPUTS]; - uint32_t routing_sprite_coord_enable; struct ilo_kernel_routing routing; /* what does the push constant buffer consist of? */ From 30fcb31c9b095451ce5ac5a10c3c6b177dc03e20 Mon Sep 17 00:00:00 2001 From: Chia-I Wu Date: Fri, 12 Jun 2015 14:47:02 +0800 Subject: [PATCH 630/834] ilo: add ilo_state_{vs,hs,ds,gs} to ilo_shader_cso --- src/gallium/drivers/ilo/Makefile.sources | 1 - .../drivers/ilo/core/ilo_builder_3d_bottom.h | 34 +- .../drivers/ilo/core/ilo_builder_3d_top.h | 308 +++++++----------- src/gallium/drivers/ilo/core/ilo_state_3d.h | 27 +- .../drivers/ilo/core/ilo_state_3d_bottom.c | 38 +-- .../drivers/ilo/core/ilo_state_3d_top.c | 251 -------------- src/gallium/drivers/ilo/ilo_blitter.h | 6 + .../drivers/ilo/ilo_blitter_rectlist.c | 4 + src/gallium/drivers/ilo/ilo_render_gen6.c | 48 ++- src/gallium/drivers/ilo/ilo_render_gen7.c | 59 +++- src/gallium/drivers/ilo/ilo_shader.c | 104 +++++- src/gallium/drivers/ilo/ilo_shader.h | 3 +- src/gallium/drivers/ilo/ilo_state.c | 4 + src/gallium/drivers/ilo/ilo_state.h | 5 + .../drivers/ilo/shader/ilo_shader_internal.h | 2 +- 15 files changed, 379 insertions(+), 515 deletions(-) delete mode 100644 src/gallium/drivers/ilo/core/ilo_state_3d_top.c diff --git a/src/gallium/drivers/ilo/Makefile.sources b/src/gallium/drivers/ilo/Makefile.sources index 3b38277af19..e5a8ed4a97b 100644 --- a/src/gallium/drivers/ilo/Makefile.sources +++ b/src/gallium/drivers/ilo/Makefile.sources @@ -22,7 +22,6 @@ C_SOURCES := \ core/ilo_image.h \ core/ilo_state_3d.h \ core/ilo_state_3d_bottom.c \ - core/ilo_state_3d_top.c \ core/ilo_state_cc.c \ core/ilo_state_cc.h \ core/ilo_state_raster.c \ diff --git a/src/gallium/drivers/ilo/core/ilo_builder_3d_bottom.h b/src/gallium/drivers/ilo/core/ilo_builder_3d_bottom.h index fc63c80c2ce..68461fff09d 100644 --- a/src/gallium/drivers/ilo/core/ilo_builder_3d_bottom.h +++ b/src/gallium/drivers/ilo/core/ilo_builder_3d_bottom.h @@ -217,14 +217,14 @@ gen6_3DSTATE_WM(struct ilo_builder *builder, dw6 = rs->raster[2]; if (fs) { - const struct ilo_shader_cso *cso; + const union ilo_shader_cso *cso; cso = ilo_shader_get_kernel_cso(fs); /* see fs_init_cso_gen6() */ - dw2 |= cso->payload[0]; - dw4 |= cso->payload[1]; - dw5 |= cso->payload[2]; - dw6 |= cso->payload[3]; + dw2 |= cso->ps_payload[0]; + dw4 |= cso->ps_payload[1]; + dw5 |= cso->ps_payload[2]; + dw6 |= cso->ps_payload[3]; } else { const int max_threads = (builder->dev->gt == 2) ? 80 : 40; @@ -271,11 +271,11 @@ gen7_3DSTATE_WM(struct ilo_builder *builder, dw1 = rs->wm[0]; if (fs) { - const struct ilo_shader_cso *cso; + const union ilo_shader_cso *cso; cso = ilo_shader_get_kernel_cso(fs); /* see fs_init_cso_gen7() */ - dw1 |= cso->payload[3]; + dw1 |= cso->ps_payload[3]; } if (cc_may_kill) @@ -383,16 +383,16 @@ gen7_3DSTATE_PS(struct ilo_builder *builder, bool dual_blend) { const uint8_t cmd_len = 8; - const struct ilo_shader_cso *cso; + const union ilo_shader_cso *cso; uint32_t dw2, dw4, dw5, *dw; ILO_DEV_ASSERT(builder->dev, 7, 7.5); /* see fs_init_cso_gen7() */ cso = ilo_shader_get_kernel_cso(fs); - dw2 = cso->payload[0]; - dw4 = cso->payload[1]; - dw5 = cso->payload[2]; + dw2 = cso->ps_payload[0]; + dw4 = cso->ps_payload[1]; + dw5 = cso->ps_payload[2]; if (dual_blend) dw4 |= GEN7_PS_DW4_DUAL_SOURCE_BLEND; @@ -452,16 +452,16 @@ gen8_3DSTATE_PS(struct ilo_builder *builder, const struct ilo_shader_state *fs) { const uint8_t cmd_len = 12; - const struct ilo_shader_cso *cso; + const union ilo_shader_cso *cso; uint32_t dw3, dw6, dw7, *dw; ILO_DEV_ASSERT(builder->dev, 8, 8); /* see fs_init_cso_gen8() */ cso = ilo_shader_get_kernel_cso(fs); - dw3 = cso->payload[0]; - dw6 = cso->payload[1]; - dw7 = cso->payload[2]; + dw3 = cso->ps_payload[0]; + dw6 = cso->ps_payload[1]; + dw7 = cso->ps_payload[2]; ilo_builder_batch_pointer(builder, cmd_len, &dw); @@ -485,14 +485,14 @@ gen8_3DSTATE_PS_EXTRA(struct ilo_builder *builder, bool cc_may_kill, bool per_sample) { const uint8_t cmd_len = 2; - const struct ilo_shader_cso *cso; + const union ilo_shader_cso *cso; uint32_t dw1, *dw; ILO_DEV_ASSERT(builder->dev, 8, 8); /* see fs_init_cso_gen8() */ cso = ilo_shader_get_kernel_cso(fs); - dw1 = cso->payload[3]; + dw1 = cso->ps_payload[3]; if (cc_may_kill) dw1 |= GEN8_PSX_DW1_VALID | GEN8_PSX_DW1_KILL_PIXEL; diff --git a/src/gallium/drivers/ilo/core/ilo_builder_3d_top.h b/src/gallium/drivers/ilo/core/ilo_builder_3d_top.h index a47c2973480..85b0da7406d 100644 --- a/src/gallium/drivers/ilo/core/ilo_builder_3d_top.h +++ b/src/gallium/drivers/ilo/core/ilo_builder_3d_top.h @@ -30,13 +30,13 @@ #include "genhw/genhw.h" #include "../ilo_resource.h" -#include "../ilo_shader.h" #include "intel_winsys.h" #include "ilo_core.h" #include "ilo_dev.h" #include "ilo_state_3d.h" #include "ilo_state_sampler.h" +#include "ilo_state_shader.h" #include "ilo_state_sol.h" #include "ilo_state_urb.h" #include "ilo_state_vf.h" @@ -546,102 +546,98 @@ gen8_3DSTATE_INDEX_BUFFER(struct ilo_builder *builder, static inline void gen6_3DSTATE_VS(struct ilo_builder *builder, - const struct ilo_shader_state *vs) + const struct ilo_state_vs *vs, + uint32_t kernel_offset) { const uint8_t cmd_len = 6; - const struct ilo_shader_cso *cso; - uint32_t dw2, dw4, dw5, *dw; + uint32_t *dw; ILO_DEV_ASSERT(builder->dev, 6, 7.5); - cso = ilo_shader_get_kernel_cso(vs); - dw2 = cso->payload[0]; - dw4 = cso->payload[1]; - dw5 = cso->payload[2]; - ilo_builder_batch_pointer(builder, cmd_len, &dw); dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_VS) | (cmd_len - 2); - dw[1] = ilo_shader_get_kernel_offset(vs); - dw[2] = dw2; - dw[3] = 0; /* scratch */ - dw[4] = dw4; - dw[5] = dw5; + dw[1] = kernel_offset; + /* see vs_set_gen6_3DSTATE_VS() */ + dw[2] = vs->vs[0]; + dw[3] = vs->vs[1]; + dw[4] = vs->vs[2]; + dw[5] = vs->vs[3]; } static inline void gen8_3DSTATE_VS(struct ilo_builder *builder, - const struct ilo_shader_state *vs, - uint32_t clip_plane_enable) + const struct ilo_state_vs *vs, + uint32_t kernel_offset) { const uint8_t cmd_len = 9; - const struct ilo_shader_cso *cso; - uint32_t dw3, dw6, dw7, dw8, *dw; + uint32_t *dw; ILO_DEV_ASSERT(builder->dev, 8, 8); - cso = ilo_shader_get_kernel_cso(vs); - dw3 = cso->payload[0]; - dw6 = cso->payload[1]; - dw7 = cso->payload[2]; - dw8 = clip_plane_enable << GEN8_VS_DW8_UCP_CLIP_ENABLES__SHIFT; - ilo_builder_batch_pointer(builder, cmd_len, &dw); dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_VS) | (cmd_len - 2); - dw[1] = ilo_shader_get_kernel_offset(vs); + dw[1] = kernel_offset; dw[2] = 0; - dw[3] = dw3; - dw[4] = 0; /* scratch */ + /* see vs_set_gen6_3DSTATE_VS() */ + dw[3] = vs->vs[0]; + dw[4] = vs->vs[1]; dw[5] = 0; - dw[6] = dw6; - dw[7] = dw7; - dw[8] = dw8; + dw[6] = vs->vs[2]; + dw[7] = vs->vs[3]; + dw[8] = vs->vs[4]; } static inline void -gen6_disable_3DSTATE_VS(struct ilo_builder *builder) +gen7_3DSTATE_HS(struct ilo_builder *builder, + const struct ilo_state_hs *hs, + uint32_t kernel_offset) { - const uint8_t cmd_len = 6; + const uint8_t cmd_len = 7; uint32_t *dw; - ILO_DEV_ASSERT(builder->dev, 6, 7.5); - - ilo_builder_batch_pointer(builder, cmd_len, &dw); - - dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_VS) | (cmd_len - 2); - dw[1] = 0; - dw[2] = 0; - dw[3] = 0; - dw[4] = 0; - dw[5] = 0; -} - -static inline void -gen7_disable_3DSTATE_HS(struct ilo_builder *builder) -{ - const uint8_t cmd_len = (ilo_dev_gen(builder->dev) >= ILO_GEN(8)) ? 9 : 7; - uint32_t *dw; - - ILO_DEV_ASSERT(builder->dev, 7, 8); + ILO_DEV_ASSERT(builder->dev, 7, 7.5); ilo_builder_batch_pointer(builder, cmd_len, &dw); dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_HS) | (cmd_len - 2); - dw[1] = 0; - dw[2] = 0; - dw[3] = 0; - dw[4] = 0; - dw[5] = 0; + /* see hs_set_gen7_3DSTATE_HS() */ + dw[1] = hs->hs[0]; + dw[2] = hs->hs[1]; + dw[3] = kernel_offset; + dw[4] = hs->hs[2]; + dw[5] = hs->hs[3]; dw[6] = 0; - if (ilo_dev_gen(builder->dev) >= ILO_GEN(8)) { - dw[7] = 0; - dw[8] = 0; - } } static inline void -gen7_3DSTATE_TE(struct ilo_builder *builder) +gen8_3DSTATE_HS(struct ilo_builder *builder, + const struct ilo_state_hs *hs, + uint32_t kernel_offset) +{ + const uint8_t cmd_len = 9; + uint32_t *dw; + + ILO_DEV_ASSERT(builder->dev, 8, 8); + + ilo_builder_batch_pointer(builder, cmd_len, &dw); + + dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_HS) | (cmd_len - 2); + /* see hs_set_gen7_3DSTATE_HS() */ + dw[1] = hs->hs[0]; + dw[2] = hs->hs[1]; + dw[3] = kernel_offset; + dw[4] = 0; + dw[5] = hs->hs[2]; + dw[6] = 0; + dw[7] = hs->hs[3]; + dw[8] = 0; +} + +static inline void +gen7_3DSTATE_TE(struct ilo_builder *builder, + const struct ilo_state_ds *ds) { const uint8_t cmd_len = 4; uint32_t *dw; @@ -651,108 +647,61 @@ gen7_3DSTATE_TE(struct ilo_builder *builder) ilo_builder_batch_pointer(builder, cmd_len, &dw); dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_TE) | (cmd_len - 2); - dw[1] = 0; - dw[2] = 0; - dw[3] = 0; + /* see ds_set_gen7_3DSTATE_TE() */ + dw[1] = ds->te[0]; + dw[2] = ds->te[1]; + dw[3] = ds->te[2]; } static inline void -gen7_disable_3DSTATE_DS(struct ilo_builder *builder) +gen7_3DSTATE_DS(struct ilo_builder *builder, + const struct ilo_state_ds *ds, + uint32_t kernel_offset) { - const uint8_t cmd_len = (ilo_dev_gen(builder->dev) >= ILO_GEN(8)) ? 9 : 6; + const uint8_t cmd_len = 6; uint32_t *dw; - ILO_DEV_ASSERT(builder->dev, 7, 8); + ILO_DEV_ASSERT(builder->dev, 7, 7.5); ilo_builder_batch_pointer(builder, cmd_len, &dw); dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_DS) | (cmd_len - 2); - dw[1] = 0; + /* see ds_set_gen7_3DSTATE_DS() */ + dw[1] = kernel_offset; + dw[2] = ds->ds[0]; + dw[3] = ds->ds[1]; + dw[4] = ds->ds[2]; + dw[5] = ds->ds[3]; +} + +static inline void +gen8_3DSTATE_DS(struct ilo_builder *builder, + const struct ilo_state_ds *ds, + uint32_t kernel_offset) +{ + const uint8_t cmd_len = 9; + uint32_t *dw; + + ILO_DEV_ASSERT(builder->dev, 8, 8); + + ilo_builder_batch_pointer(builder, cmd_len, &dw); + + dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_DS) | (cmd_len - 2); + /* see ds_set_gen7_3DSTATE_DS() */ + dw[1] = kernel_offset; dw[2] = 0; - dw[3] = 0; - dw[4] = 0; + dw[3] = ds->ds[0]; + dw[4] = ds->ds[1]; dw[5] = 0; - if (ilo_dev_gen(builder->dev) >= ILO_GEN(8)) { - dw[6] = 0; - dw[7] = 0; - dw[8] = 0; - } + dw[6] = ds->ds[2]; + dw[7] = ds->ds[3]; + dw[8] = ds->ds[4]; } static inline void gen6_3DSTATE_GS(struct ilo_builder *builder, - const struct ilo_shader_state *gs) -{ - const uint8_t cmd_len = 7; - const struct ilo_shader_cso *cso; - uint32_t dw2, dw4, dw5, dw6, *dw; - - ILO_DEV_ASSERT(builder->dev, 6, 6); - - cso = ilo_shader_get_kernel_cso(gs); - dw2 = cso->payload[0]; - dw4 = cso->payload[1]; - dw5 = cso->payload[2]; - dw6 = cso->payload[3]; - - ilo_builder_batch_pointer(builder, cmd_len, &dw); - - dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_GS) | (cmd_len - 2); - dw[1] = ilo_shader_get_kernel_offset(gs); - dw[2] = dw2; - dw[3] = 0; /* scratch */ - dw[4] = dw4; - dw[5] = dw5; - dw[6] = dw6; -} - -static inline void -gen6_so_3DSTATE_GS(struct ilo_builder *builder, - const struct ilo_shader_state *vs, - int verts_per_prim) -{ - const uint8_t cmd_len = 7; - struct ilo_shader_cso cso; - enum ilo_kernel_param param; - uint32_t dw2, dw4, dw5, dw6, *dw; - - ILO_DEV_ASSERT(builder->dev, 6, 6); - - assert(ilo_shader_get_kernel_param(vs, ILO_KERNEL_VS_GEN6_SO)); - - switch (verts_per_prim) { - case 1: - param = ILO_KERNEL_VS_GEN6_SO_POINT_OFFSET; - break; - case 2: - param = ILO_KERNEL_VS_GEN6_SO_LINE_OFFSET; - break; - default: - param = ILO_KERNEL_VS_GEN6_SO_TRI_OFFSET; - break; - } - - /* cannot use VS's CSO */ - ilo_gpe_init_gs_cso(builder->dev, vs, &cso); - dw2 = cso.payload[0]; - dw4 = cso.payload[1]; - dw5 = cso.payload[2]; - dw6 = cso.payload[3]; - - ilo_builder_batch_pointer(builder, cmd_len, &dw); - - dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_GS) | (cmd_len - 2); - dw[1] = ilo_shader_get_kernel_offset(vs) + - ilo_shader_get_kernel_param(vs, param); - dw[2] = dw2; - dw[3] = 0; - dw[4] = dw4; - dw[5] = dw5; - dw[6] = dw6; -} - -static inline void -gen6_disable_3DSTATE_GS(struct ilo_builder *builder) + const struct ilo_state_gs *gs, + uint32_t kernel_offset) { const uint8_t cmd_len = 7; uint32_t *dw; @@ -762,13 +711,13 @@ gen6_disable_3DSTATE_GS(struct ilo_builder *builder) ilo_builder_batch_pointer(builder, cmd_len, &dw); dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_GS) | (cmd_len - 2); - dw[1] = 0; - dw[2] = 0; - dw[3] = 0; - /* honor the valid range of URB read length */ - dw[4] = 1 << GEN6_GS_DW4_URB_READ_LEN__SHIFT; - dw[5] = GEN6_GS_DW5_STATISTICS; - dw[6] = 0; + dw[1] = kernel_offset; + /* see gs_set_gen6_3DSTATE_GS() */ + dw[2] = gs->gs[0]; + dw[3] = gs->gs[1]; + dw[4] = gs->gs[2]; + dw[5] = gs->gs[3]; + dw[6] = gs->gs[4]; } static inline void @@ -797,54 +746,49 @@ gen6_3DSTATE_GS_SVB_INDEX(struct ilo_builder *builder, static inline void gen7_3DSTATE_GS(struct ilo_builder *builder, - const struct ilo_shader_state *gs) + const struct ilo_state_gs *gs, + uint32_t kernel_offset) { const uint8_t cmd_len = 7; - const struct ilo_shader_cso *cso; - uint32_t dw2, dw4, dw5, *dw; + uint32_t *dw; ILO_DEV_ASSERT(builder->dev, 7, 7.5); - cso = ilo_shader_get_kernel_cso(gs); - dw2 = cso->payload[0]; - dw4 = cso->payload[1]; - dw5 = cso->payload[2]; - ilo_builder_batch_pointer(builder, cmd_len, &dw); dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_GS) | (cmd_len - 2); - dw[1] = ilo_shader_get_kernel_offset(gs); - dw[2] = dw2; - dw[3] = 0; /* scratch */ - dw[4] = dw4; - dw[5] = dw5; + dw[1] = kernel_offset; + /* see gs_set_gen7_3DSTATE_GS() */ + dw[2] = gs->gs[0]; + dw[3] = gs->gs[1]; + dw[4] = gs->gs[2]; + dw[5] = gs->gs[3]; dw[6] = 0; } static inline void -gen7_disable_3DSTATE_GS(struct ilo_builder *builder) +gen8_3DSTATE_GS(struct ilo_builder *builder, + const struct ilo_state_gs *gs, + uint32_t kernel_offset) { - const uint8_t cmd_len = (ilo_dev_gen(builder->dev) >= ILO_GEN(8)) ? 10 : 7; + const uint8_t cmd_len = 10; uint32_t *dw; - ILO_DEV_ASSERT(builder->dev, 7, 8); + ILO_DEV_ASSERT(builder->dev, 8, 8); ilo_builder_batch_pointer(builder, cmd_len, &dw); dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_GS) | (cmd_len - 2); - dw[1] = 0; + dw[1] = kernel_offset; dw[2] = 0; - dw[3] = 0; - dw[4] = 0; - - if (ilo_dev_gen(builder->dev) >= ILO_GEN(8)) { - dw[7] = GEN8_GS_DW7_STATISTICS; - dw[8] = 0; - dw[9] = 0; - } else { - dw[5] = GEN7_GS_DW5_STATISTICS; - dw[6] = 0; - } + /* see gs_set_gen7_3DSTATE_GS() */ + dw[3] = gs->gs[0]; + dw[4] = gs->gs[1]; + dw[5] = 0; + dw[6] = gs->gs[2]; + dw[7] = gs->gs[3]; + dw[8] = 0; + dw[9] = gs->gs[4]; } static inline void diff --git a/src/gallium/drivers/ilo/core/ilo_state_3d.h b/src/gallium/drivers/ilo/core/ilo_state_3d.h index 16db93c6603..b2087df3470 100644 --- a/src/gallium/drivers/ilo/core/ilo_state_3d.h +++ b/src/gallium/drivers/ilo/core/ilo_state_3d.h @@ -33,6 +33,7 @@ #include "ilo_core.h" #include "ilo_dev.h" +#include "ilo_state_shader.h" #include "ilo_state_surface.h" #include "ilo_state_zs.h" @@ -113,24 +114,24 @@ struct ilo_fb_state { enum gen_depth_format depth_offset_format; }; -struct ilo_shader_cso { - uint32_t payload[5]; +union ilo_shader_cso { + struct ilo_state_vs vs; + struct ilo_state_hs hs; + struct ilo_state_ds ds; + struct ilo_state_gs gs; + + uint32_t ps_payload[5]; + + struct { + struct ilo_state_vs vs; + struct ilo_state_gs sol; + } vs_sol; }; -void -ilo_gpe_init_vs_cso(const struct ilo_dev *dev, - const struct ilo_shader_state *vs, - struct ilo_shader_cso *cso); - -void -ilo_gpe_init_gs_cso(const struct ilo_dev *dev, - const struct ilo_shader_state *gs, - struct ilo_shader_cso *cso); - void ilo_gpe_init_fs_cso(const struct ilo_dev *dev, const struct ilo_shader_state *fs, - struct ilo_shader_cso *cso); + union ilo_shader_cso *cso); void ilo_gpe_set_fb(const struct ilo_dev *dev, diff --git a/src/gallium/drivers/ilo/core/ilo_state_3d_bottom.c b/src/gallium/drivers/ilo/core/ilo_state_3d_bottom.c index 932b80dd0aa..004904fcd08 100644 --- a/src/gallium/drivers/ilo/core/ilo_state_3d_bottom.c +++ b/src/gallium/drivers/ilo/core/ilo_state_3d_bottom.c @@ -37,7 +37,7 @@ static void fs_init_cso_gen6(const struct ilo_dev *dev, const struct ilo_shader_state *fs, - struct ilo_shader_cso *cso) + union ilo_shader_cso *cso) { int start_grf, input_count, sampler_count, max_threads; uint32_t dw2, dw4, dw5, dw6; @@ -121,11 +121,11 @@ fs_init_cso_gen6(const struct ilo_dev *dev, dw6 = input_count << GEN6_WM_DW6_SF_ATTR_COUNT__SHIFT | GEN6_POSOFFSET_NONE << GEN6_WM_DW6_PS_POSOFFSET__SHIFT; - STATIC_ASSERT(Elements(cso->payload) >= 4); - cso->payload[0] = dw2; - cso->payload[1] = dw4; - cso->payload[2] = dw5; - cso->payload[3] = dw6; + STATIC_ASSERT(Elements(cso->ps_payload) >= 4); + cso->ps_payload[0] = dw2; + cso->ps_payload[1] = dw4; + cso->ps_payload[2] = dw5; + cso->ps_payload[3] = dw6; } static uint32_t @@ -191,7 +191,7 @@ fs_get_wm_gen7(const struct ilo_dev *dev, static void fs_init_cso_gen7(const struct ilo_dev *dev, const struct ilo_shader_state *fs, - struct ilo_shader_cso *cso) + union ilo_shader_cso *cso) { int start_grf, sampler_count, max_threads; uint32_t dw2, dw4, dw5; @@ -233,11 +233,11 @@ fs_init_cso_gen7(const struct ilo_dev *dev, 0 << GEN7_PS_DW5_URB_GRF_START1__SHIFT | 0 << GEN7_PS_DW5_URB_GRF_START2__SHIFT; - STATIC_ASSERT(Elements(cso->payload) >= 4); - cso->payload[0] = dw2; - cso->payload[1] = dw4; - cso->payload[2] = dw5; - cso->payload[3] = fs_get_wm_gen7(dev, fs); + STATIC_ASSERT(Elements(cso->ps_payload) >= 4); + cso->ps_payload[0] = dw2; + cso->ps_payload[1] = dw4; + cso->ps_payload[2] = dw5; + cso->ps_payload[3] = fs_get_wm_gen7(dev, fs); } static uint32_t @@ -267,7 +267,7 @@ fs_get_psx_gen8(const struct ilo_dev *dev, static void fs_init_cso_gen8(const struct ilo_dev *dev, const struct ilo_shader_state *fs, - struct ilo_shader_cso *cso) + union ilo_shader_cso *cso) { int start_grf, sampler_count; uint32_t dw3, dw6, dw7; @@ -293,17 +293,17 @@ fs_init_cso_gen8(const struct ilo_dev *dev, 0 << GEN8_PS_DW7_URB_GRF_START1__SHIFT | 0 << GEN8_PS_DW7_URB_GRF_START2__SHIFT; - STATIC_ASSERT(Elements(cso->payload) >= 4); - cso->payload[0] = dw3; - cso->payload[1] = dw6; - cso->payload[2] = dw7; - cso->payload[3] = fs_get_psx_gen8(dev, fs); + STATIC_ASSERT(Elements(cso->ps_payload) >= 4); + cso->ps_payload[0] = dw3; + cso->ps_payload[1] = dw6; + cso->ps_payload[2] = dw7; + cso->ps_payload[3] = fs_get_psx_gen8(dev, fs); } void ilo_gpe_init_fs_cso(const struct ilo_dev *dev, const struct ilo_shader_state *fs, - struct ilo_shader_cso *cso) + union ilo_shader_cso *cso) { if (ilo_dev_gen(dev) >= ILO_GEN(8)) fs_init_cso_gen8(dev, fs, cso); diff --git a/src/gallium/drivers/ilo/core/ilo_state_3d_top.c b/src/gallium/drivers/ilo/core/ilo_state_3d_top.c deleted file mode 100644 index feac579f2de..00000000000 --- a/src/gallium/drivers/ilo/core/ilo_state_3d_top.c +++ /dev/null @@ -1,251 +0,0 @@ -/* - * Mesa 3-D graphics library - * - * Copyright (C) 2012-2014 LunarG, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included - * in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - * - * Authors: - * Chia-I Wu - */ - -#include "genhw/genhw.h" -#include "util/u_dual_blend.h" -#include "util/u_framebuffer.h" -#include "util/u_half.h" -#include "util/u_resource.h" - -#include "ilo_buffer.h" -#include "ilo_format.h" -#include "ilo_image.h" -#include "ilo_state_3d.h" -#include "../ilo_shader.h" - -void -ilo_gpe_init_vs_cso(const struct ilo_dev *dev, - const struct ilo_shader_state *vs, - struct ilo_shader_cso *cso) -{ - int start_grf, vue_read_len, sampler_count, max_threads; - uint32_t dw2, dw4, dw5; - - ILO_DEV_ASSERT(dev, 6, 8); - - start_grf = ilo_shader_get_kernel_param(vs, ILO_KERNEL_URB_DATA_START_REG); - vue_read_len = ilo_shader_get_kernel_param(vs, ILO_KERNEL_INPUT_COUNT); - sampler_count = ilo_shader_get_kernel_param(vs, ILO_KERNEL_SAMPLER_COUNT); - - /* - * From the Sandy Bridge PRM, volume 2 part 1, page 135: - * - * "(Vertex URB Entry Read Length) Specifies the number of pairs of - * 128-bit vertex elements to be passed into the payload for each - * vertex." - * - * "It is UNDEFINED to set this field to 0 indicating no Vertex URB - * data to be read and passed to the thread." - */ - vue_read_len = (vue_read_len + 1) / 2; - if (!vue_read_len) - vue_read_len = 1; - - max_threads = dev->thread_count; - if (ilo_dev_gen(dev) == ILO_GEN(7.5) && dev->gt == 2) - max_threads *= 2; - - dw2 = (true) ? 0 : GEN6_THREADDISP_FP_MODE_ALT; - dw2 |= ((sampler_count + 3) / 4) << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT; - - dw4 = start_grf << GEN6_VS_DW4_URB_GRF_START__SHIFT | - vue_read_len << GEN6_VS_DW4_URB_READ_LEN__SHIFT | - 0 << GEN6_VS_DW4_URB_READ_OFFSET__SHIFT; - - dw5 = GEN6_VS_DW5_STATISTICS | - GEN6_VS_DW5_VS_ENABLE; - - if (ilo_dev_gen(dev) >= ILO_GEN(7.5)) - dw5 |= (max_threads - 1) << GEN75_VS_DW5_MAX_THREADS__SHIFT; - else - dw5 |= (max_threads - 1) << GEN6_VS_DW5_MAX_THREADS__SHIFT; - - STATIC_ASSERT(Elements(cso->payload) >= 3); - cso->payload[0] = dw2; - cso->payload[1] = dw4; - cso->payload[2] = dw5; -} - -static void -gs_init_cso_gen6(const struct ilo_dev *dev, - const struct ilo_shader_state *gs, - struct ilo_shader_cso *cso) -{ - int start_grf, vue_read_len, max_threads; - uint32_t dw2, dw4, dw5, dw6; - - ILO_DEV_ASSERT(dev, 6, 6); - - if (ilo_shader_get_type(gs) == PIPE_SHADER_GEOMETRY) { - start_grf = ilo_shader_get_kernel_param(gs, - ILO_KERNEL_URB_DATA_START_REG); - - vue_read_len = ilo_shader_get_kernel_param(gs, ILO_KERNEL_INPUT_COUNT); - } - else { - start_grf = ilo_shader_get_kernel_param(gs, - ILO_KERNEL_VS_GEN6_SO_START_REG); - - vue_read_len = ilo_shader_get_kernel_param(gs, ILO_KERNEL_OUTPUT_COUNT); - } - - /* - * From the Sandy Bridge PRM, volume 2 part 1, page 153: - * - * "Specifies the amount of URB data read and passed in the thread - * payload for each Vertex URB entry, in 256-bit register increments. - * - * It is UNDEFINED to set this field (Vertex URB Entry Read Length) to - * 0 indicating no Vertex URB data to be read and passed to the - * thread." - */ - vue_read_len = (vue_read_len + 1) / 2; - if (!vue_read_len) - vue_read_len = 1; - - /* - * From the Sandy Bridge PRM, volume 2 part 1, page 154: - * - * "Maximum Number of Threads valid range is [0,27] when Rendering - * Enabled bit is set." - * - * From the Sandy Bridge PRM, volume 2 part 1, page 173: - * - * "Programming Note: If the GS stage is enabled, software must always - * allocate at least one GS URB Entry. This is true even if the GS - * thread never needs to output vertices to the pipeline, e.g., when - * only performing stream output. This is an artifact of the need to - * pass the GS thread an initial destination URB handle." - * - * As such, we always enable rendering, and limit the number of threads. - */ - if (dev->gt == 2) { - /* maximum is 60, but limited to 28 */ - max_threads = 28; - } - else { - /* maximum is 24, but limited to 21 (see brwCreateContext()) */ - max_threads = 21; - } - - dw2 = GEN6_THREADDISP_SPF; - - dw4 = vue_read_len << GEN6_GS_DW4_URB_READ_LEN__SHIFT | - 0 << GEN6_GS_DW4_URB_READ_OFFSET__SHIFT | - start_grf << GEN6_GS_DW4_URB_GRF_START__SHIFT; - - dw5 = (max_threads - 1) << GEN6_GS_DW5_MAX_THREADS__SHIFT | - GEN6_GS_DW5_STATISTICS | - GEN6_GS_DW5_SO_STATISTICS | - GEN6_GS_DW5_RENDER_ENABLE; - - /* - * we cannot make use of GEN6_GS_REORDER because it will reorder - * triangle strips according to D3D rules (triangle 2N+1 uses vertices - * (2N+1, 2N+3, 2N+2)), instead of GL rules (triangle 2N+1 uses vertices - * (2N+2, 2N+1, 2N+3)). - */ - dw6 = GEN6_GS_DW6_GS_ENABLE; - - if (ilo_shader_get_kernel_param(gs, ILO_KERNEL_GS_DISCARD_ADJACENCY)) - dw6 |= GEN6_GS_DW6_DISCARD_ADJACENCY; - - if (ilo_shader_get_kernel_param(gs, ILO_KERNEL_VS_GEN6_SO)) { - const uint32_t svbi_post_inc = - ilo_shader_get_kernel_param(gs, ILO_KERNEL_GS_GEN6_SVBI_POST_INC); - - dw6 |= GEN6_GS_DW6_SVBI_PAYLOAD_ENABLE; - if (svbi_post_inc) { - dw6 |= GEN6_GS_DW6_SVBI_POST_INC_ENABLE | - svbi_post_inc << GEN6_GS_DW6_SVBI_POST_INC_VAL__SHIFT; - } - } - - STATIC_ASSERT(Elements(cso->payload) >= 4); - cso->payload[0] = dw2; - cso->payload[1] = dw4; - cso->payload[2] = dw5; - cso->payload[3] = dw6; -} - -static void -gs_init_cso_gen7(const struct ilo_dev *dev, - const struct ilo_shader_state *gs, - struct ilo_shader_cso *cso) -{ - int start_grf, vue_read_len, sampler_count, max_threads; - uint32_t dw2, dw4, dw5; - - ILO_DEV_ASSERT(dev, 7, 7.5); - - start_grf = ilo_shader_get_kernel_param(gs, ILO_KERNEL_URB_DATA_START_REG); - vue_read_len = ilo_shader_get_kernel_param(gs, ILO_KERNEL_INPUT_COUNT); - sampler_count = ilo_shader_get_kernel_param(gs, ILO_KERNEL_SAMPLER_COUNT); - - /* in pairs */ - vue_read_len = (vue_read_len + 1) / 2; - - switch (ilo_dev_gen(dev)) { - case ILO_GEN(7.5): - max_threads = (dev->gt >= 2) ? 256 : 70; - break; - case ILO_GEN(7): - max_threads = (dev->gt == 2) ? 128 : 36; - break; - default: - max_threads = 1; - break; - } - - dw2 = (true) ? 0 : GEN6_THREADDISP_FP_MODE_ALT; - dw2 |= ((sampler_count + 3) / 4) << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT; - - dw4 = vue_read_len << GEN7_GS_DW4_URB_READ_LEN__SHIFT | - GEN7_GS_DW4_INCLUDE_VERTEX_HANDLES | - 0 << GEN7_GS_DW4_URB_READ_OFFSET__SHIFT | - start_grf << GEN7_GS_DW4_URB_GRF_START__SHIFT; - - dw5 = (max_threads - 1) << GEN7_GS_DW5_MAX_THREADS__SHIFT | - GEN7_GS_DW5_STATISTICS | - GEN7_GS_DW5_GS_ENABLE; - - STATIC_ASSERT(Elements(cso->payload) >= 3); - cso->payload[0] = dw2; - cso->payload[1] = dw4; - cso->payload[2] = dw5; -} - -void -ilo_gpe_init_gs_cso(const struct ilo_dev *dev, - const struct ilo_shader_state *gs, - struct ilo_shader_cso *cso) -{ - if (ilo_dev_gen(dev) >= ILO_GEN(7)) - gs_init_cso_gen7(dev, gs, cso); - else - gs_init_cso_gen6(dev, gs, cso); -} diff --git a/src/gallium/drivers/ilo/ilo_blitter.h b/src/gallium/drivers/ilo/ilo_blitter.h index 550e465a329..392f784a503 100644 --- a/src/gallium/drivers/ilo/ilo_blitter.h +++ b/src/gallium/drivers/ilo/ilo_blitter.h @@ -62,6 +62,12 @@ struct ilo_blitter { uint32_t vf_data[2]; struct ilo_state_vf vf; + + struct ilo_state_vs vs; + struct ilo_state_hs hs; + struct ilo_state_ds ds; + struct ilo_state_gs gs; + struct ilo_state_sol sol; struct ilo_state_viewport vp; diff --git a/src/gallium/drivers/ilo/ilo_blitter_rectlist.c b/src/gallium/drivers/ilo/ilo_blitter_rectlist.c index 873f3e4c3e8..9cc57f86f68 100644 --- a/src/gallium/drivers/ilo/ilo_blitter_rectlist.c +++ b/src/gallium/drivers/ilo/ilo_blitter_rectlist.c @@ -59,6 +59,10 @@ ilo_blitter_set_invariants(struct ilo_blitter *blitter) ilo_state_vf_init_for_rectlist(&blitter->vf, blitter->ilo->dev, blitter->vf_data, sizeof(blitter->vf_data), &elem, 1); + ilo_state_vs_init_disabled(&blitter->vs, blitter->ilo->dev); + ilo_state_hs_init_disabled(&blitter->hs, blitter->ilo->dev); + ilo_state_ds_init_disabled(&blitter->ds, blitter->ilo->dev); + ilo_state_gs_init_disabled(&blitter->gs, blitter->ilo->dev); ilo_state_sol_init_disabled(&blitter->sol, blitter->ilo->dev, false); /** diff --git a/src/gallium/drivers/ilo/ilo_render_gen6.c b/src/gallium/drivers/ilo/ilo_render_gen6.c index d659ab588ac..2f6743c8b28 100644 --- a/src/gallium/drivers/ilo/ilo_render_gen6.c +++ b/src/gallium/drivers/ilo/ilo_render_gen6.c @@ -472,10 +472,17 @@ gen6_draw_vs(struct ilo_render *r, /* 3DSTATE_VS */ if (DIRTY(VS) || r->instruction_bo_changed) { + const union ilo_shader_cso *cso = ilo_shader_get_kernel_cso(vec->vs); + const uint32_t kernel_offset = ilo_shader_get_kernel_offset(vec->vs); + if (ilo_dev_gen(r->dev) == ILO_GEN(6)) gen6_wa_pre_3dstate_vs_toggle(r); - gen6_3DSTATE_VS(r->builder, vec->vs); + if (ilo_dev_gen(r->dev) == ILO_GEN(6) && + ilo_shader_get_kernel_param(vec->vs, ILO_KERNEL_VS_GEN6_SO)) + gen6_3DSTATE_VS(r->builder, &cso->vs_sol.vs, kernel_offset); + else + gen6_3DSTATE_VS(r->builder, &cso->vs, kernel_offset); } } @@ -491,14 +498,39 @@ gen6_draw_gs(struct ilo_render *r, /* 3DSTATE_GS */ if (DIRTY(GS) || DIRTY(VS) || session->prim_changed || r->instruction_bo_changed) { + const union ilo_shader_cso *cso; + uint32_t kernel_offset; + if (vec->gs) { - gen6_3DSTATE_GS(r->builder, vec->gs); - } else if (vec->vs && + cso = ilo_shader_get_kernel_cso(vec->gs); + kernel_offset = ilo_shader_get_kernel_offset(vec->gs); + + gen6_3DSTATE_GS(r->builder, &cso->gs, kernel_offset); + } else if (ilo_dev_gen(r->dev) == ILO_GEN(6) && ilo_shader_get_kernel_param(vec->vs, ILO_KERNEL_VS_GEN6_SO)) { - const int verts_per_prim = u_vertices_per_prim(session->reduced_prim); - gen6_so_3DSTATE_GS(r->builder, vec->vs, verts_per_prim); + const int verts_per_prim = + u_vertices_per_prim(session->reduced_prim); + enum ilo_kernel_param param; + + switch (verts_per_prim) { + case 1: + param = ILO_KERNEL_VS_GEN6_SO_POINT_OFFSET; + break; + case 2: + param = ILO_KERNEL_VS_GEN6_SO_LINE_OFFSET; + break; + default: + param = ILO_KERNEL_VS_GEN6_SO_TRI_OFFSET; + break; + } + + cso = ilo_shader_get_kernel_cso(vec->vs); + kernel_offset = ilo_shader_get_kernel_offset(vec->vs) + + ilo_shader_get_kernel_param(vec->vs, param); + + gen6_3DSTATE_GS(r->builder, &cso->vs_sol.sol, kernel_offset); } else { - gen6_disable_3DSTATE_GS(r->builder); + gen6_3DSTATE_GS(r->builder, &vec->disabled_gs, 0); } } } @@ -792,10 +824,10 @@ gen6_rectlist_vs_to_sf(struct ilo_render *r, gen6_wa_post_3dstate_constant_vs(r); gen6_wa_pre_3dstate_vs_toggle(r); - gen6_disable_3DSTATE_VS(r->builder); + gen6_3DSTATE_VS(r->builder, &blitter->vs, 0); gen6_3DSTATE_CONSTANT_GS(r->builder, NULL, NULL, 0); - gen6_disable_3DSTATE_GS(r->builder); + gen6_3DSTATE_GS(r->builder, &blitter->gs, 0); gen6_3DSTATE_CLIP(r->builder, &blitter->fb.rs); gen6_3DSTATE_SF(r->builder, &blitter->fb.rs, &blitter->sbe); diff --git a/src/gallium/drivers/ilo/ilo_render_gen7.c b/src/gallium/drivers/ilo/ilo_render_gen7.c index 3c080a52a89..04da1c41261 100644 --- a/src/gallium/drivers/ilo/ilo_render_gen7.c +++ b/src/gallium/drivers/ilo/ilo_render_gen7.c @@ -313,14 +313,14 @@ gen7_draw_vs(struct ilo_render *r, } /* 3DSTATE_VS */ - if (ilo_dev_gen(r->dev) >= ILO_GEN(8)) { - if (emit_3dstate_vs || DIRTY(RASTERIZER)) { - gen8_3DSTATE_VS(r->builder, vec->vs, - vec->rasterizer->state.clip_plane_enable); - } - } else { - if (emit_3dstate_vs) - gen6_3DSTATE_VS(r->builder, vec->vs); + if (emit_3dstate_vs) { + const union ilo_shader_cso *cso = ilo_shader_get_kernel_cso(vec->vs); + const uint32_t kernel_offset = ilo_shader_get_kernel_offset(vec->vs); + + if (ilo_dev_gen(r->dev) >= ILO_GEN(8)) + gen8_3DSTATE_VS(r->builder, &cso->vs, kernel_offset); + else + gen6_3DSTATE_VS(r->builder, &cso->vs, kernel_offset); } } @@ -331,8 +331,15 @@ gen7_draw_hs(struct ilo_render *r, { /* 3DSTATE_CONSTANT_HS and 3DSTATE_HS */ if (r->hw_ctx_changed) { + const struct ilo_state_hs *hs = &vec->disabled_hs; + const uint32_t kernel_offset = 0; + gen7_3DSTATE_CONSTANT_HS(r->builder, 0, 0, 0); - gen7_disable_3DSTATE_HS(r->builder); + + if (ilo_dev_gen(r->dev) >= ILO_GEN(8)) + gen8_3DSTATE_HS(r->builder, hs, kernel_offset); + else + gen7_3DSTATE_HS(r->builder, hs, kernel_offset); } /* 3DSTATE_BINDING_TABLE_POINTERS_HS */ @@ -346,8 +353,10 @@ gen7_draw_te(struct ilo_render *r, struct ilo_render_draw_session *session) { /* 3DSTATE_TE */ - if (r->hw_ctx_changed) - gen7_3DSTATE_TE(r->builder); + if (r->hw_ctx_changed) { + const struct ilo_state_ds *ds = &vec->disabled_ds; + gen7_3DSTATE_TE(r->builder, ds); + } } void @@ -357,8 +366,15 @@ gen7_draw_ds(struct ilo_render *r, { /* 3DSTATE_CONSTANT_DS and 3DSTATE_DS */ if (r->hw_ctx_changed) { + const struct ilo_state_ds *ds = &vec->disabled_ds; + const uint32_t kernel_offset = 0; + gen7_3DSTATE_CONSTANT_DS(r->builder, 0, 0, 0); - gen7_disable_3DSTATE_DS(r->builder); + + if (ilo_dev_gen(r->dev) >= ILO_GEN(8)) + gen8_3DSTATE_DS(r->builder, ds, kernel_offset); + else + gen7_3DSTATE_DS(r->builder, ds, kernel_offset); } /* 3DSTATE_BINDING_TABLE_POINTERS_DS */ @@ -374,8 +390,15 @@ gen7_draw_gs(struct ilo_render *r, { /* 3DSTATE_CONSTANT_GS and 3DSTATE_GS */ if (r->hw_ctx_changed) { + const struct ilo_state_gs *gs = &vec->disabled_gs; + const uint32_t kernel_offset = 0; + gen7_3DSTATE_CONSTANT_GS(r->builder, 0, 0, 0); - gen7_disable_3DSTATE_GS(r->builder); + + if (ilo_dev_gen(r->dev) >= ILO_GEN(8)) + gen8_3DSTATE_GS(r->builder, gs, kernel_offset); + else + gen7_3DSTATE_GS(r->builder, gs, kernel_offset); } /* 3DSTATE_BINDING_TABLE_POINTERS_GS */ @@ -663,18 +686,18 @@ gen7_rectlist_vs_to_sf(struct ilo_render *r, const struct ilo_blitter *blitter) { gen7_3DSTATE_CONSTANT_VS(r->builder, NULL, NULL, 0); - gen6_disable_3DSTATE_VS(r->builder); + gen6_3DSTATE_VS(r->builder, &blitter->vs, 0); gen7_3DSTATE_CONSTANT_HS(r->builder, NULL, NULL, 0); - gen7_disable_3DSTATE_HS(r->builder); + gen7_3DSTATE_HS(r->builder, &blitter->hs, 0); - gen7_3DSTATE_TE(r->builder); + gen7_3DSTATE_TE(r->builder, &blitter->ds); gen7_3DSTATE_CONSTANT_DS(r->builder, NULL, NULL, 0); - gen7_disable_3DSTATE_DS(r->builder); + gen7_3DSTATE_DS(r->builder, &blitter->ds, 0); gen7_3DSTATE_CONSTANT_GS(r->builder, NULL, NULL, 0); - gen7_disable_3DSTATE_GS(r->builder); + gen7_3DSTATE_GS(r->builder, &blitter->gs, 0); gen7_3DSTATE_STREAMOUT(r->builder, &blitter->sol); diff --git a/src/gallium/drivers/ilo/ilo_shader.c b/src/gallium/drivers/ilo/ilo_shader.c index 46f39c19b15..29c2bf5f8f4 100644 --- a/src/gallium/drivers/ilo/ilo_shader.c +++ b/src/gallium/drivers/ilo/ilo_shader.c @@ -556,6 +556,104 @@ ilo_shader_state_search_variant(struct ilo_shader_state *state, return sh; } +static void +init_shader_urb(const struct ilo_shader *kernel, + const struct ilo_shader_state *state, + struct ilo_state_shader_urb_info *urb) +{ + urb->cv_input_attr_count = kernel->in.count; + urb->read_base = 0; + urb->read_count = kernel->in.count; + + urb->output_attr_count = kernel->out.count; + urb->user_cull_enables = 0x0; + urb->user_clip_enables = 0x0; +} + +static void +init_shader_kernel(const struct ilo_shader *kernel, + const struct ilo_shader_state *state, + struct ilo_state_shader_kernel_info *kern) +{ + kern->offset = 0; + kern->grf_start = kernel->in.start_grf; + kern->pcb_attr_count = + (kernel->pcb.cbuf0_size + kernel->pcb.clip_state_size + 15) / 16; + kern->scratch_size = 0; +} + +static void +init_shader_resource(const struct ilo_shader *kernel, + const struct ilo_shader_state *state, + struct ilo_state_shader_resource_info *resource) +{ + resource->sampler_count = state->info.num_samplers; + resource->surface_count = 0; + resource->has_uav = false; +} + +static void +init_vs(struct ilo_shader *kernel, + const struct ilo_shader_state *state) +{ + struct ilo_state_vs_info info; + + memset(&info, 0, sizeof(info)); + + init_shader_urb(kernel, state, &info.urb); + init_shader_kernel(kernel, state, &info.kernel); + init_shader_resource(kernel, state, &info.resource); + info.dispatch_enable = true; + info.stats_enable = true; + + if (ilo_dev_gen(state->info.dev) == ILO_GEN(6) && kernel->stream_output) { + struct ilo_state_gs_info gs_info; + + memset(&gs_info, 0, sizeof(gs_info)); + + gs_info.urb.cv_input_attr_count = kernel->out.count; + gs_info.urb.read_count = kernel->out.count; + gs_info.kernel.grf_start = kernel->gs_start_grf; + gs_info.sol.sol_enable = true; + gs_info.sol.stats_enable = true; + gs_info.sol.render_disable = kernel->variant.u.vs.rasterizer_discard; + gs_info.sol.svbi_post_inc = kernel->svbi_post_inc; + gs_info.sol.tristrip_reorder = GEN7_REORDER_LEADING; + gs_info.dispatch_enable = true; + gs_info.stats_enable = true; + + ilo_state_vs_init(&kernel->cso.vs_sol.vs, state->info.dev, &info); + ilo_state_gs_init(&kernel->cso.vs_sol.sol, state->info.dev, &gs_info); + } else { + ilo_state_vs_init(&kernel->cso.vs, state->info.dev, &info); + } +} + +static void +init_gs(struct ilo_shader *kernel, + const struct ilo_shader_state *state) +{ + const struct pipe_stream_output_info *so_info = &state->info.stream_output; + struct ilo_state_gs_info info; + + memset(&info, 0, sizeof(info)); + + init_shader_urb(kernel, state, &info.urb); + init_shader_kernel(kernel, state, &info.kernel); + init_shader_resource(kernel, state, &info.resource); + info.dispatch_enable = true; + info.stats_enable = true; + + if (so_info->num_outputs > 0) { + info.sol.sol_enable = true; + info.sol.stats_enable = true; + info.sol.render_disable = kernel->variant.u.gs.rasterizer_discard; + info.sol.tristrip_reorder = GEN7_REORDER_LEADING; + } + + ilo_state_gs_init(&kernel->cso.gs, state->info.dev, &info); +} + static void init_sol(struct ilo_shader *kernel, const struct ilo_dev *dev, @@ -733,10 +831,10 @@ ilo_shader_state_use_variant(struct ilo_shader_state *state, if (construct_cso) { switch (state->info.type) { case PIPE_SHADER_VERTEX: - ilo_gpe_init_vs_cso(state->info.dev, state, &sh->cso); + init_vs(sh, state); break; case PIPE_SHADER_GEOMETRY: - ilo_gpe_init_gs_cso(state->info.dev, state, &sh->cso); + init_gs(sh, state); break; case PIPE_SHADER_FRAGMENT: ilo_gpe_init_fs_cso(state->info.dev, state, &sh->cso); @@ -1225,7 +1323,7 @@ ilo_shader_get_kernel_param(const struct ilo_shader_state *shader, /** * Return the CSO of the selected kernel. */ -const struct ilo_shader_cso * +const union ilo_shader_cso * ilo_shader_get_kernel_cso(const struct ilo_shader_state *shader) { const struct ilo_shader *kernel = shader->shader; diff --git a/src/gallium/drivers/ilo/ilo_shader.h b/src/gallium/drivers/ilo/ilo_shader.h index 457f847bb0c..0f20877f83d 100644 --- a/src/gallium/drivers/ilo/ilo_shader.h +++ b/src/gallium/drivers/ilo/ilo_shader.h @@ -86,7 +86,6 @@ struct ilo_builder; struct ilo_rasterizer_state; struct ilo_shader_cache; struct ilo_shader_state; -struct ilo_shader_cso; struct ilo_state_sbe; struct ilo_state_sol; struct ilo_state_vector; @@ -155,7 +154,7 @@ int ilo_shader_get_kernel_param(const struct ilo_shader_state *shader, enum ilo_kernel_param param); -const struct ilo_shader_cso * +const union ilo_shader_cso * ilo_shader_get_kernel_cso(const struct ilo_shader_state *shader); const struct pipe_stream_output_info * diff --git a/src/gallium/drivers/ilo/ilo_state.c b/src/gallium/drivers/ilo/ilo_state.c index 82fd0e7df19..902b6d3d1de 100644 --- a/src/gallium/drivers/ilo/ilo_state.c +++ b/src/gallium/drivers/ilo/ilo_state.c @@ -2146,6 +2146,10 @@ ilo_state_vector_init(const struct ilo_dev *dev, vec->viewport.params.matrices = vec->viewport.matrices; vec->viewport.params.scissors = vec->viewport.scissors; + ilo_state_hs_init_disabled(&vec->disabled_hs, dev); + ilo_state_ds_init_disabled(&vec->disabled_ds, dev); + ilo_state_gs_init_disabled(&vec->disabled_gs, dev); + ilo_state_surface_init_for_null(&vec->fb.null_rt, dev); ilo_state_zs_init_for_null(&vec->fb.null_zs, dev); diff --git a/src/gallium/drivers/ilo/ilo_state.h b/src/gallium/drivers/ilo/ilo_state.h index 3ee471e84f1..e4c6f281a5c 100644 --- a/src/gallium/drivers/ilo/ilo_state.h +++ b/src/gallium/drivers/ilo/ilo_state.h @@ -33,6 +33,7 @@ #include "core/ilo_state_raster.h" #include "core/ilo_state_sampler.h" #include "core/ilo_state_sbe.h" +#include "core/ilo_state_shader.h" #include "core/ilo_state_sol.h" #include "core/ilo_state_surface.h" #include "core/ilo_state_urb.h" @@ -269,6 +270,10 @@ struct ilo_state_vector { struct ilo_shader_state *vs; struct ilo_shader_state *gs; + struct ilo_state_hs disabled_hs; + struct ilo_state_ds disabled_ds; + struct ilo_state_gs disabled_gs; + struct ilo_so_state so; struct pipe_clip_state clip; diff --git a/src/gallium/drivers/ilo/shader/ilo_shader_internal.h b/src/gallium/drivers/ilo/shader/ilo_shader_internal.h index 31f731f29cd..9c17ec0e58d 100644 --- a/src/gallium/drivers/ilo/shader/ilo_shader_internal.h +++ b/src/gallium/drivers/ilo/shader/ilo_shader_internal.h @@ -94,7 +94,7 @@ struct ilo_kernel_routing { struct ilo_shader { struct ilo_shader_variant variant; - struct ilo_shader_cso cso; + union ilo_shader_cso cso; struct { int semantic_names[PIPE_MAX_SHADER_INPUTS]; From 54e0a8ed5dcaaa0ef483d5960ae86f88e0bf8990 Mon Sep 17 00:00:00 2001 From: Chia-I Wu Date: Fri, 12 Jun 2015 15:08:02 +0800 Subject: [PATCH 631/834] ilo: add ilo_state_ps to ilo_shader_cso --- .../drivers/ilo/core/ilo_builder_3d_bottom.h | 190 +++--------- src/gallium/drivers/ilo/core/ilo_state_3d.h | 19 -- .../drivers/ilo/core/ilo_state_3d_bottom.c | 280 ------------------ src/gallium/drivers/ilo/ilo_blitter.h | 1 + .../drivers/ilo/ilo_blitter_rectlist.c | 1 + src/gallium/drivers/ilo/ilo_render_gen6.c | 11 +- src/gallium/drivers/ilo/ilo_render_gen7.c | 20 +- src/gallium/drivers/ilo/ilo_render_gen8.c | 11 +- src/gallium/drivers/ilo/ilo_render_surface.c | 1 + src/gallium/drivers/ilo/ilo_shader.c | 88 +++++- src/gallium/drivers/ilo/ilo_shader.h | 15 + src/gallium/drivers/ilo/ilo_state.c | 2 +- .../drivers/ilo/shader/ilo_shader_internal.h | 2 + 13 files changed, 160 insertions(+), 481 deletions(-) diff --git a/src/gallium/drivers/ilo/core/ilo_builder_3d_bottom.h b/src/gallium/drivers/ilo/core/ilo_builder_3d_bottom.h index 68461fff09d..88ed6ea054c 100644 --- a/src/gallium/drivers/ilo/core/ilo_builder_3d_bottom.h +++ b/src/gallium/drivers/ilo/core/ilo_builder_3d_bottom.h @@ -29,7 +29,6 @@ #define ILO_BUILDER_3D_BOTTOM_H #include "genhw/genhw.h" -#include "../ilo_shader.h" #include "intel_winsys.h" #include "ilo_core.h" @@ -38,6 +37,7 @@ #include "ilo_state_cc.h" #include "ilo_state_raster.h" #include "ilo_state_sbe.h" +#include "ilo_state_shader.h" #include "ilo_state_viewport.h" #include "ilo_builder.h" #include "ilo_builder_3d_top.h" @@ -200,56 +200,24 @@ gen8_3DSTATE_RASTER(struct ilo_builder *builder, static inline void gen6_3DSTATE_WM(struct ilo_builder *builder, const struct ilo_state_raster *rs, - const struct ilo_shader_state *fs, - bool dual_blend, bool cc_may_kill) + const struct ilo_state_ps *ps, + uint32_t kernel_offset) { const uint8_t cmd_len = 9; - const bool multisample = false; - const int num_samples = 1; - uint32_t dw2, dw4, dw5, dw6, *dw; + uint32_t *dw; ILO_DEV_ASSERT(builder->dev, 6, 6); - dw2 = 0; - /* see raster_set_gen6_3dstate_wm() */ - dw4 = rs->raster[0]; - dw5 = rs->raster[1]; - dw6 = rs->raster[2]; - - if (fs) { - const union ilo_shader_cso *cso; - - cso = ilo_shader_get_kernel_cso(fs); - /* see fs_init_cso_gen6() */ - dw2 |= cso->ps_payload[0]; - dw4 |= cso->ps_payload[1]; - dw5 |= cso->ps_payload[2]; - dw6 |= cso->ps_payload[3]; - } else { - const int max_threads = (builder->dev->gt == 2) ? 80 : 40; - - /* honor the valid range even if dispatching is disabled */ - dw5 |= (max_threads - 1) << GEN6_WM_DW5_MAX_THREADS__SHIFT; - } - - if (cc_may_kill) - dw5 |= GEN6_WM_DW5_PS_KILL_PIXEL | GEN6_WM_DW5_PS_DISPATCH_ENABLE; - - if (dual_blend) - dw5 |= GEN6_WM_DW5_PS_DUAL_SOURCE_BLEND; - - if (multisample && num_samples > 1) - dw6 |= GEN6_WM_DW6_MSDISPMODE_PERPIXEL; - ilo_builder_batch_pointer(builder, cmd_len, &dw); dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_WM) | (cmd_len - 2); - dw[1] = ilo_shader_get_kernel_offset(fs); - dw[2] = dw2; - dw[3] = 0; /* scratch */ - dw[4] = dw4; - dw[5] = dw5; - dw[6] = dw6; + dw[1] = kernel_offset; + /* see raster_set_gen6_3dstate_wm() and ps_set_gen6_3dstate_wm() */ + dw[2] = ps->ps[0]; + dw[3] = ps->ps[1]; + dw[4] = rs->wm[0] | ps->ps[2]; + dw[5] = rs->wm[1] | ps->ps[3]; + dw[6] = rs->wm[2] | ps->ps[4]; dw[7] = 0; /* kernel 1 */ dw[8] = 0; /* kernel 2 */ } @@ -257,39 +225,19 @@ gen6_3DSTATE_WM(struct ilo_builder *builder, static inline void gen7_3DSTATE_WM(struct ilo_builder *builder, const struct ilo_state_raster *rs, - const struct ilo_shader_state *fs, - bool cc_may_kill) + const struct ilo_state_ps *ps) { const uint8_t cmd_len = 3; - const bool multisample = false; - const int num_samples = 1; - uint32_t dw1, dw2, *dw; + uint32_t *dw; ILO_DEV_ASSERT(builder->dev, 7, 7.5); - /* see raster_set_gen8_3DSTATE_WM() */ - dw1 = rs->wm[0]; - - if (fs) { - const union ilo_shader_cso *cso; - - cso = ilo_shader_get_kernel_cso(fs); - /* see fs_init_cso_gen7() */ - dw1 |= cso->ps_payload[3]; - } - - if (cc_may_kill) - dw1 |= GEN7_WM_DW1_PS_DISPATCH_ENABLE | GEN7_WM_DW1_PS_KILL_PIXEL; - - dw2 = 0; - if (multisample && num_samples > 1) - dw2 |= GEN7_WM_DW2_MSDISPMODE_PERPIXEL; - ilo_builder_batch_pointer(builder, cmd_len, &dw); dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_WM) | (cmd_len - 2); - dw[1] = dw1; - dw[2] = dw2; + /* see raster_set_gen8_3DSTATE_WM() and ps_set_gen7_3dstate_wm() */ + dw[1] = rs->wm[0] | ps->ps[0]; + dw[2] = ps->ps[1]; } static inline void @@ -379,100 +327,48 @@ gen8_3DSTATE_WM_CHROMAKEY(struct ilo_builder *builder) static inline void gen7_3DSTATE_PS(struct ilo_builder *builder, - const struct ilo_shader_state *fs, - bool dual_blend) + const struct ilo_state_ps *ps, + uint32_t kernel_offset) { const uint8_t cmd_len = 8; - const union ilo_shader_cso *cso; - uint32_t dw2, dw4, dw5, *dw; + uint32_t *dw; ILO_DEV_ASSERT(builder->dev, 7, 7.5); - /* see fs_init_cso_gen7() */ - cso = ilo_shader_get_kernel_cso(fs); - dw2 = cso->ps_payload[0]; - dw4 = cso->ps_payload[1]; - dw5 = cso->ps_payload[2]; - - if (dual_blend) - dw4 |= GEN7_PS_DW4_DUAL_SOURCE_BLEND; - ilo_builder_batch_pointer(builder, cmd_len, &dw); dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_PS) | (cmd_len - 2); - dw[1] = ilo_shader_get_kernel_offset(fs); - dw[2] = dw2; - dw[3] = 0; /* scratch */ - dw[4] = dw4; - dw[5] = dw5; + dw[1] = kernel_offset; + /* see ps_set_gen7_3DSTATE_PS() */ + dw[2] = ps->ps[2]; + dw[3] = ps->ps[3]; + dw[4] = ps->ps[4]; + dw[5] = ps->ps[5]; dw[6] = 0; /* kernel 1 */ dw[7] = 0; /* kernel 2 */ } -static inline void -gen7_disable_3DSTATE_PS(struct ilo_builder *builder) -{ - const uint8_t cmd_len = 8; - int max_threads; - uint32_t dw4, *dw; - - ILO_DEV_ASSERT(builder->dev, 7, 7.5); - - /* GPU hangs if none of the dispatch enable bits is set */ - dw4 = GEN6_PS_DISPATCH_8 << GEN7_PS_DW4_DISPATCH_MODE__SHIFT; - - /* see brwCreateContext() */ - switch (ilo_dev_gen(builder->dev)) { - case ILO_GEN(7.5): - max_threads = (builder->dev->gt == 3) ? 408 : - (builder->dev->gt == 2) ? 204 : 102; - dw4 |= (max_threads - 1) << GEN75_PS_DW4_MAX_THREADS__SHIFT; - break; - case ILO_GEN(7): - default: - max_threads = (builder->dev->gt == 2) ? 172 : 48; - dw4 |= (max_threads - 1) << GEN7_PS_DW4_MAX_THREADS__SHIFT; - break; - } - - ilo_builder_batch_pointer(builder, cmd_len, &dw); - - dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_PS) | (cmd_len - 2); - dw[1] = 0; - dw[2] = 0; - dw[3] = 0; - dw[4] = dw4; - dw[5] = 0; - dw[6] = 0; - dw[7] = 0; -} - static inline void gen8_3DSTATE_PS(struct ilo_builder *builder, - const struct ilo_shader_state *fs) + const struct ilo_state_ps *ps, + uint32_t kernel_offset) { const uint8_t cmd_len = 12; - const union ilo_shader_cso *cso; - uint32_t dw3, dw6, dw7, *dw; + uint32_t *dw; ILO_DEV_ASSERT(builder->dev, 8, 8); - /* see fs_init_cso_gen8() */ - cso = ilo_shader_get_kernel_cso(fs); - dw3 = cso->ps_payload[0]; - dw6 = cso->ps_payload[1]; - dw7 = cso->ps_payload[2]; - ilo_builder_batch_pointer(builder, cmd_len, &dw); dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_PS) | (cmd_len - 2); - dw[1] = ilo_shader_get_kernel_offset(fs); + dw[1] = kernel_offset; dw[2] = 0; - dw[3] = dw3; - dw[4] = 0; /* scratch */ + /* see ps_set_gen8_3DSTATE_PS() */ + dw[3] = ps->ps[0]; + dw[4] = ps->ps[1]; dw[5] = 0; - dw[6] = dw6; - dw[7] = dw7; + dw[6] = ps->ps[2]; + dw[7] = ps->ps[3]; dw[8] = 0; /* kernel 1 */ dw[9] = 0; dw[10] = 0; /* kernel 2 */ @@ -481,28 +377,18 @@ gen8_3DSTATE_PS(struct ilo_builder *builder, static inline void gen8_3DSTATE_PS_EXTRA(struct ilo_builder *builder, - const struct ilo_shader_state *fs, - bool cc_may_kill, bool per_sample) + const struct ilo_state_ps *ps) { const uint8_t cmd_len = 2; - const union ilo_shader_cso *cso; - uint32_t dw1, *dw; + uint32_t *dw; ILO_DEV_ASSERT(builder->dev, 8, 8); - /* see fs_init_cso_gen8() */ - cso = ilo_shader_get_kernel_cso(fs); - dw1 = cso->ps_payload[3]; - - if (cc_may_kill) - dw1 |= GEN8_PSX_DW1_VALID | GEN8_PSX_DW1_KILL_PIXEL; - if (per_sample) - dw1 |= GEN8_PSX_DW1_PER_SAMPLE; - ilo_builder_batch_pointer(builder, cmd_len, &dw); dw[0] = GEN8_RENDER_CMD(3D, 3DSTATE_PS_EXTRA) | (cmd_len - 2); - dw[1] = dw1; + /* see ps_set_gen8_3DSTATE_PS_EXTRA() */ + dw[1] = ps->ps[4]; } static inline void diff --git a/src/gallium/drivers/ilo/core/ilo_state_3d.h b/src/gallium/drivers/ilo/core/ilo_state_3d.h index b2087df3470..dcc94bfc88c 100644 --- a/src/gallium/drivers/ilo/core/ilo_state_3d.h +++ b/src/gallium/drivers/ilo/core/ilo_state_3d.h @@ -114,25 +114,6 @@ struct ilo_fb_state { enum gen_depth_format depth_offset_format; }; -union ilo_shader_cso { - struct ilo_state_vs vs; - struct ilo_state_hs hs; - struct ilo_state_ds ds; - struct ilo_state_gs gs; - - uint32_t ps_payload[5]; - - struct { - struct ilo_state_vs vs; - struct ilo_state_gs sol; - } vs_sol; -}; - -void -ilo_gpe_init_fs_cso(const struct ilo_dev *dev, - const struct ilo_shader_state *fs, - union ilo_shader_cso *cso); - void ilo_gpe_set_fb(const struct ilo_dev *dev, const struct pipe_framebuffer_state *state, diff --git a/src/gallium/drivers/ilo/core/ilo_state_3d_bottom.c b/src/gallium/drivers/ilo/core/ilo_state_3d_bottom.c index 004904fcd08..8734aff44da 100644 --- a/src/gallium/drivers/ilo/core/ilo_state_3d_bottom.c +++ b/src/gallium/drivers/ilo/core/ilo_state_3d_bottom.c @@ -32,286 +32,6 @@ #include "ilo_format.h" #include "ilo_image.h" #include "ilo_state_3d.h" -#include "../ilo_shader.h" - -static void -fs_init_cso_gen6(const struct ilo_dev *dev, - const struct ilo_shader_state *fs, - union ilo_shader_cso *cso) -{ - int start_grf, input_count, sampler_count, max_threads; - uint32_t dw2, dw4, dw5, dw6; - - ILO_DEV_ASSERT(dev, 6, 6); - - start_grf = ilo_shader_get_kernel_param(fs, ILO_KERNEL_URB_DATA_START_REG); - input_count = ilo_shader_get_kernel_param(fs, ILO_KERNEL_INPUT_COUNT); - sampler_count = ilo_shader_get_kernel_param(fs, ILO_KERNEL_SAMPLER_COUNT); - - /* see brwCreateContext() */ - max_threads = (dev->gt == 2) ? 80 : 40; - - dw2 = (true) ? 0 : GEN6_THREADDISP_FP_MODE_ALT; - dw2 |= ((sampler_count + 3) / 4) << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT; - - dw4 = start_grf << GEN6_WM_DW4_URB_GRF_START0__SHIFT | - 0 << GEN6_WM_DW4_URB_GRF_START1__SHIFT | - 0 << GEN6_WM_DW4_URB_GRF_START2__SHIFT; - - dw5 = (max_threads - 1) << GEN6_WM_DW5_MAX_THREADS__SHIFT; - - /* - * From the Sandy Bridge PRM, volume 2 part 1, page 275: - * - * "This bit (Pixel Shader Kill Pixel), if ENABLED, indicates that the - * PS kernel or color calculator has the ability to kill (discard) - * pixels or samples, other than due to depth or stencil testing. - * This bit is required to be ENABLED in the following situations: - * - * The API pixel shader program contains "killpix" or "discard" - * instructions, or other code in the pixel shader kernel that can - * cause the final pixel mask to differ from the pixel mask received - * on dispatch. - * - * A sampler with chroma key enabled with kill pixel mode is used by - * the pixel shader. - * - * Any render target has Alpha Test Enable or AlphaToCoverage Enable - * enabled. - * - * The pixel shader kernel generates and outputs oMask. - * - * Note: As ClipDistance clipping is fully supported in hardware and - * therefore not via PS instructions, there should be no need to - * ENABLE this bit due to ClipDistance clipping." - */ - if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_USE_KILL)) - dw5 |= GEN6_WM_DW5_PS_KILL_PIXEL; - - /* - * From the Sandy Bridge PRM, volume 2 part 1, page 275: - * - * "If a NULL Depth Buffer is selected, the Pixel Shader Computed Depth - * field must be set to disabled." - * - * TODO This is not checked yet. - */ - if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_OUTPUT_Z)) - dw5 |= GEN6_WM_DW5_PS_COMPUTE_DEPTH; - - if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_INPUT_Z)) - dw5 |= GEN6_WM_DW5_PS_USE_DEPTH; - - if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_INPUT_W)) - dw5 |= GEN6_WM_DW5_PS_USE_W; - - /* - * TODO set this bit only when - * - * a) fs writes colors and color is not masked, or - * b) fs writes depth, or - * c) fs or cc kills - */ - if (true) - dw5 |= GEN6_WM_DW5_PS_DISPATCH_ENABLE; - - assert(!ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_DISPATCH_16_OFFSET)); - dw5 |= GEN6_PS_DISPATCH_8 << GEN6_WM_DW5_PS_DISPATCH_MODE__SHIFT; - - dw6 = input_count << GEN6_WM_DW6_SF_ATTR_COUNT__SHIFT | - GEN6_POSOFFSET_NONE << GEN6_WM_DW6_PS_POSOFFSET__SHIFT; - - STATIC_ASSERT(Elements(cso->ps_payload) >= 4); - cso->ps_payload[0] = dw2; - cso->ps_payload[1] = dw4; - cso->ps_payload[2] = dw5; - cso->ps_payload[3] = dw6; -} - -static uint32_t -fs_get_wm_gen7(const struct ilo_dev *dev, - const struct ilo_shader_state *fs) -{ - uint32_t dw; - - ILO_DEV_ASSERT(dev, 7, 7.5); - - dw = 0; - - /* - * TODO set this bit only when - * - * a) fs writes colors and color is not masked, or - * b) fs writes depth, or - * c) fs or cc kills - */ - dw |= GEN7_WM_DW1_PS_DISPATCH_ENABLE; - - /* - * From the Ivy Bridge PRM, volume 2 part 1, page 278: - * - * "This bit (Pixel Shader Kill Pixel), if ENABLED, indicates that - * the PS kernel or color calculator has the ability to kill - * (discard) pixels or samples, other than due to depth or stencil - * testing. This bit is required to be ENABLED in the following - * situations: - * - * - The API pixel shader program contains "killpix" or "discard" - * instructions, or other code in the pixel shader kernel that - * can cause the final pixel mask to differ from the pixel mask - * received on dispatch. - * - * - A sampler with chroma key enabled with kill pixel mode is used - * by the pixel shader. - * - * - Any render target has Alpha Test Enable or AlphaToCoverage - * Enable enabled. - * - * - The pixel shader kernel generates and outputs oMask. - * - * Note: As ClipDistance clipping is fully supported in hardware - * and therefore not via PS instructions, there should be no need - * to ENABLE this bit due to ClipDistance clipping." - */ - if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_USE_KILL)) - dw |= GEN7_WM_DW1_PS_KILL_PIXEL; - - if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_OUTPUT_Z)) - dw |= GEN7_PSCDEPTH_ON << GEN7_WM_DW1_PSCDEPTH__SHIFT; - - if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_INPUT_Z)) - dw |= GEN7_WM_DW1_PS_USE_DEPTH; - - if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_INPUT_W)) - dw |= GEN7_WM_DW1_PS_USE_W; - - return dw; -} - -static void -fs_init_cso_gen7(const struct ilo_dev *dev, - const struct ilo_shader_state *fs, - union ilo_shader_cso *cso) -{ - int start_grf, sampler_count, max_threads; - uint32_t dw2, dw4, dw5; - - ILO_DEV_ASSERT(dev, 7, 7.5); - - start_grf = ilo_shader_get_kernel_param(fs, ILO_KERNEL_URB_DATA_START_REG); - sampler_count = ilo_shader_get_kernel_param(fs, ILO_KERNEL_SAMPLER_COUNT); - - dw2 = (true) ? 0 : GEN6_THREADDISP_FP_MODE_ALT; - dw2 |= ((sampler_count + 3) / 4) << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT; - - dw4 = GEN6_POSOFFSET_NONE << GEN7_PS_DW4_POSOFFSET__SHIFT; - - /* see brwCreateContext() */ - switch (ilo_dev_gen(dev)) { - case ILO_GEN(7.5): - max_threads = (dev->gt == 3) ? 408 : (dev->gt == 2) ? 204 : 102; - dw4 |= (max_threads - 1) << GEN75_PS_DW4_MAX_THREADS__SHIFT; - dw4 |= 1 << GEN75_PS_DW4_SAMPLE_MASK__SHIFT; - break; - case ILO_GEN(7): - default: - max_threads = (dev->gt == 2) ? 172 : 48; - dw4 |= (max_threads - 1) << GEN7_PS_DW4_MAX_THREADS__SHIFT; - break; - } - - if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_PCB_CBUF0_SIZE)) - dw4 |= GEN7_PS_DW4_PUSH_CONSTANT_ENABLE; - - if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_INPUT_COUNT)) - dw4 |= GEN7_PS_DW4_ATTR_ENABLE; - - assert(!ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_DISPATCH_16_OFFSET)); - dw4 |= GEN6_PS_DISPATCH_8 << GEN7_PS_DW4_DISPATCH_MODE__SHIFT; - - dw5 = start_grf << GEN7_PS_DW5_URB_GRF_START0__SHIFT | - 0 << GEN7_PS_DW5_URB_GRF_START1__SHIFT | - 0 << GEN7_PS_DW5_URB_GRF_START2__SHIFT; - - STATIC_ASSERT(Elements(cso->ps_payload) >= 4); - cso->ps_payload[0] = dw2; - cso->ps_payload[1] = dw4; - cso->ps_payload[2] = dw5; - cso->ps_payload[3] = fs_get_wm_gen7(dev, fs); -} - -static uint32_t -fs_get_psx_gen8(const struct ilo_dev *dev, - const struct ilo_shader_state *fs) -{ - uint32_t dw; - - ILO_DEV_ASSERT(dev, 8, 8); - - dw = GEN8_PSX_DW1_VALID; - - if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_USE_KILL)) - dw |= GEN8_PSX_DW1_KILL_PIXEL; - if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_OUTPUT_Z)) - dw |= GEN7_PSCDEPTH_ON << GEN8_PSX_DW1_PSCDEPTH__SHIFT; - if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_INPUT_Z)) - dw |= GEN8_PSX_DW1_USE_DEPTH; - if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_INPUT_W)) - dw |= GEN8_PSX_DW1_USE_W; - if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_INPUT_COUNT)) - dw |= GEN8_PSX_DW1_ATTR_ENABLE; - - return dw; -} - -static void -fs_init_cso_gen8(const struct ilo_dev *dev, - const struct ilo_shader_state *fs, - union ilo_shader_cso *cso) -{ - int start_grf, sampler_count; - uint32_t dw3, dw6, dw7; - - ILO_DEV_ASSERT(dev, 8, 8); - - start_grf = ilo_shader_get_kernel_param(fs, ILO_KERNEL_URB_DATA_START_REG); - sampler_count = ilo_shader_get_kernel_param(fs, ILO_KERNEL_SAMPLER_COUNT); - - dw3 = (true) ? 0 : GEN6_THREADDISP_FP_MODE_ALT; - dw3 |= ((sampler_count + 3) / 4) << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT; - - /* always 64? */ - dw6 = (64 - 2) << GEN8_PS_DW6_MAX_THREADS__SHIFT | - GEN6_POSOFFSET_NONE << GEN8_PS_DW6_POSOFFSET__SHIFT; - if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_PCB_CBUF0_SIZE)) - dw6 |= GEN8_PS_DW6_PUSH_CONSTANT_ENABLE; - - assert(!ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_DISPATCH_16_OFFSET)); - dw6 |= GEN6_PS_DISPATCH_8 << GEN8_PS_DW6_DISPATCH_MODE__SHIFT; - - dw7 = start_grf << GEN8_PS_DW7_URB_GRF_START0__SHIFT | - 0 << GEN8_PS_DW7_URB_GRF_START1__SHIFT | - 0 << GEN8_PS_DW7_URB_GRF_START2__SHIFT; - - STATIC_ASSERT(Elements(cso->ps_payload) >= 4); - cso->ps_payload[0] = dw3; - cso->ps_payload[1] = dw6; - cso->ps_payload[2] = dw7; - cso->ps_payload[3] = fs_get_psx_gen8(dev, fs); -} - -void -ilo_gpe_init_fs_cso(const struct ilo_dev *dev, - const struct ilo_shader_state *fs, - union ilo_shader_cso *cso) -{ - if (ilo_dev_gen(dev) >= ILO_GEN(8)) - fs_init_cso_gen8(dev, fs, cso); - else if (ilo_dev_gen(dev) >= ILO_GEN(7)) - fs_init_cso_gen7(dev, fs, cso); - else - fs_init_cso_gen6(dev, fs, cso); -} static void fb_set_blend_caps(const struct ilo_dev *dev, diff --git a/src/gallium/drivers/ilo/ilo_blitter.h b/src/gallium/drivers/ilo/ilo_blitter.h index 392f784a503..08690f30378 100644 --- a/src/gallium/drivers/ilo/ilo_blitter.h +++ b/src/gallium/drivers/ilo/ilo_blitter.h @@ -74,6 +74,7 @@ struct ilo_blitter { uint32_t vp_data[20]; struct ilo_state_sbe sbe; + struct ilo_state_ps ps; struct ilo_state_cc cc; uint32_t depth_clear_value; diff --git a/src/gallium/drivers/ilo/ilo_blitter_rectlist.c b/src/gallium/drivers/ilo/ilo_blitter_rectlist.c index 9cc57f86f68..a4c8dead4a5 100644 --- a/src/gallium/drivers/ilo/ilo_blitter_rectlist.c +++ b/src/gallium/drivers/ilo/ilo_blitter_rectlist.c @@ -78,6 +78,7 @@ ilo_blitter_set_invariants(struct ilo_blitter *blitter) blitter->vp_data, sizeof(blitter->vp_data)); ilo_state_sbe_init_for_rectlist(&blitter->sbe, blitter->ilo->dev, 0, 0); + ilo_state_ps_init_disabled(&blitter->ps, blitter->ilo->dev); ilo_state_urb_init_for_rectlist(&blitter->urb, blitter->ilo->dev, ilo_state_vf_get_attr_count(&blitter->vf)); diff --git a/src/gallium/drivers/ilo/ilo_render_gen6.c b/src/gallium/drivers/ilo/ilo_render_gen6.c index 2f6743c8b28..30abead0cdc 100644 --- a/src/gallium/drivers/ilo/ilo_render_gen6.c +++ b/src/gallium/drivers/ilo/ilo_render_gen6.c @@ -666,14 +666,17 @@ gen6_draw_wm(struct ilo_render *r, } /* 3DSTATE_WM */ - if (DIRTY(FS) || DIRTY(BLEND) || + if (DIRTY(FS) || (session->rs_delta.dirty & ILO_STATE_RASTER_3DSTATE_WM) || r->instruction_bo_changed) { + const union ilo_shader_cso *cso = ilo_shader_get_kernel_cso(vec->fs); + const uint32_t kernel_offset = ilo_shader_get_kernel_offset(vec->fs); + if (ilo_dev_gen(r->dev) == ILO_GEN(6) && r->hw_ctx_changed) gen6_wa_pre_3dstate_wm_max_threads(r); - gen6_3DSTATE_WM(r->builder, &vec->rasterizer->rs, vec->fs, - vec->blend->dual_blend, vec->blend->alpha_may_kill); + gen6_3DSTATE_WM(r->builder, &vec->rasterizer->rs, + &cso->ps, kernel_offset); } } @@ -840,7 +843,7 @@ gen6_rectlist_wm(struct ilo_render *r, gen6_3DSTATE_CONSTANT_PS(r->builder, NULL, NULL, 0); gen6_wa_pre_3dstate_wm_max_threads(r); - gen6_3DSTATE_WM(r->builder, &blitter->fb.rs, NULL, false, false); + gen6_3DSTATE_WM(r->builder, &blitter->fb.rs, &blitter->ps, 0); } static void diff --git a/src/gallium/drivers/ilo/ilo_render_gen7.c b/src/gallium/drivers/ilo/ilo_render_gen7.c index 04da1c41261..0b2245c80da 100644 --- a/src/gallium/drivers/ilo/ilo_render_gen7.c +++ b/src/gallium/drivers/ilo/ilo_render_gen7.c @@ -499,12 +499,12 @@ gen7_draw_wm(struct ilo_render *r, const struct ilo_state_vector *vec, struct ilo_render_draw_session *session) { + const union ilo_shader_cso *cso = ilo_shader_get_kernel_cso(vec->fs); + const uint32_t kernel_offset = ilo_shader_get_kernel_offset(vec->fs); + /* 3DSTATE_WM */ - if (DIRTY(FS) || DIRTY(BLEND) || - (session->rs_delta.dirty & ILO_STATE_RASTER_3DSTATE_WM)) { - gen7_3DSTATE_WM(r->builder, &vec->rasterizer->rs, vec->fs, - vec->blend->alpha_may_kill); - } + if (DIRTY(FS) || (session->rs_delta.dirty & ILO_STATE_RASTER_3DSTATE_WM)) + gen7_3DSTATE_WM(r->builder, &vec->rasterizer->rs, &cso->ps); /* 3DSTATE_BINDING_TABLE_POINTERS_PS */ if (session->binding_table_fs_changed) { @@ -527,13 +527,11 @@ gen7_draw_wm(struct ilo_render *r, } /* 3DSTATE_PS */ - if (DIRTY(FS) || DIRTY(BLEND) || r->instruction_bo_changed) { - const bool dual_blend = vec->blend->dual_blend; - + if (DIRTY(FS) || r->instruction_bo_changed) { if (r->hw_ctx_changed) gen7_wa_pre_3dstate_ps_max_threads(r); - gen7_3DSTATE_PS(r->builder, vec->fs, dual_blend); + gen7_3DSTATE_PS(r->builder, &cso->ps, kernel_offset); } /* 3DSTATE_SCISSOR_STATE_POINTERS */ @@ -714,12 +712,12 @@ static void gen7_rectlist_wm(struct ilo_render *r, const struct ilo_blitter *blitter) { - gen7_3DSTATE_WM(r->builder, &blitter->fb.rs, NULL, false); + gen7_3DSTATE_WM(r->builder, &blitter->fb.rs, &blitter->ps); gen7_3DSTATE_CONSTANT_PS(r->builder, NULL, NULL, 0); gen7_wa_pre_3dstate_ps_max_threads(r); - gen7_disable_3DSTATE_PS(r->builder); + gen7_3DSTATE_PS(r->builder, &blitter->ps, 0); } static void diff --git a/src/gallium/drivers/ilo/ilo_render_gen8.c b/src/gallium/drivers/ilo/ilo_render_gen8.c index 691c378c864..2ce71fb161e 100644 --- a/src/gallium/drivers/ilo/ilo_render_gen8.c +++ b/src/gallium/drivers/ilo/ilo_render_gen8.c @@ -86,6 +86,9 @@ gen8_draw_wm(struct ilo_render *r, const struct ilo_state_vector *vec, struct ilo_render_draw_session *session) { + const union ilo_shader_cso *cso = ilo_shader_get_kernel_cso(vec->fs); + const uint32_t kernel_offset = ilo_shader_get_kernel_offset(vec->fs); + /* 3DSTATE_WM */ if (session->rs_delta.dirty & ILO_STATE_RASTER_3DSTATE_WM) gen8_3DSTATE_WM(r->builder, &vec->rasterizer->rs); @@ -121,13 +124,11 @@ gen8_draw_wm(struct ilo_render *r, /* 3DSTATE_PS */ if (DIRTY(FS) || r->instruction_bo_changed) - gen8_3DSTATE_PS(r->builder, vec->fs); + gen8_3DSTATE_PS(r->builder, &cso->ps, kernel_offset); /* 3DSTATE_PS_EXTRA */ - if (DIRTY(FS) || DIRTY(BLEND)) { - gen8_3DSTATE_PS_EXTRA(r->builder, vec->fs, - vec->blend->alpha_may_kill, false); - } + if (DIRTY(FS)) + gen8_3DSTATE_PS_EXTRA(r->builder, &cso->ps); /* 3DSTATE_PS_BLEND */ if (session->cc_delta.dirty & ILO_STATE_CC_3DSTATE_PS_BLEND) diff --git a/src/gallium/drivers/ilo/ilo_render_surface.c b/src/gallium/drivers/ilo/ilo_render_surface.c index 729cb604eaf..bbdd5fe7a0a 100644 --- a/src/gallium/drivers/ilo/ilo_render_surface.c +++ b/src/gallium/drivers/ilo/ilo_render_surface.c @@ -29,6 +29,7 @@ #include "ilo_common.h" #include "ilo_blitter.h" +#include "ilo_shader.h" #include "ilo_state.h" #include "ilo_render_gen.h" diff --git a/src/gallium/drivers/ilo/ilo_shader.c b/src/gallium/drivers/ilo/ilo_shader.c index 29c2bf5f8f4..93a26268a29 100644 --- a/src/gallium/drivers/ilo/ilo_shader.c +++ b/src/gallium/drivers/ilo/ilo_shader.c @@ -27,7 +27,6 @@ #include "genhw/genhw.h" /* for SBE setup */ #include "core/ilo_builder.h" -#include "core/ilo_state_3d.h" #include "core/intel_winsys.h" #include "shader/ilo_shader_internal.h" #include "tgsi/tgsi_parse.h" @@ -654,6 +653,60 @@ init_gs(struct ilo_shader *kernel, ilo_state_gs_init(&kernel->cso.gs, state->info.dev, &info); } +static void +init_ps(struct ilo_shader *kernel, + const struct ilo_shader_state *state) +{ + struct ilo_state_ps_info info; + + memset(&info, 0, sizeof(info)); + + init_shader_kernel(kernel, state, &info.kernel_8); + init_shader_resource(kernel, state, &info.resource); + + info.io.has_rt_write = true; + info.io.posoffset = GEN6_POSOFFSET_NONE; + info.io.attr_count = kernel->in.count; + info.io.use_z = kernel->in.has_pos; + info.io.use_w = kernel->in.has_pos; + info.io.use_coverage_mask = false; + info.io.pscdepth = (kernel->out.has_pos) ? + GEN7_PSCDEPTH_ON : GEN7_PSCDEPTH_OFF; + info.io.write_pixel_mask = kernel->has_kill; + info.io.write_omask = false; + + info.params.sample_mask = 0x1; + info.params.earlyz_control_psexec = false; + info.params.alpha_may_kill = false; + info.params.dual_source_blending = false; + info.params.has_writeable_rt = true; + + info.valid_kernels = GEN6_PS_DISPATCH_8; + + /* + * From the Sandy Bridge PRM, volume 2 part 1, page 284: + * + * "(MSDISPMODE_PERSAMPLE) This is the high-quality multisample mode + * where (over and above PERPIXEL mode) the PS is run for each covered + * sample. This mode is also used for "normal" non-multisample + * rendering (aka 1X), given Number of Multisamples is programmed to + * NUMSAMPLES_1." + */ + info.per_sample_dispatch = true; + + info.rt_clear_enable = false; + info.rt_resolve_enable = false; + info.cv_per_sample_interp = false; + info.cv_has_earlyz_op = false; + info.sample_count_one = true; + info.cv_has_depth_buffer = true; + + ilo_state_ps_init(&kernel->cso.ps, state->info.dev, &info); + + /* remember current parameters */ + kernel->ps_params = info.params; +} + static void init_sol(struct ilo_shader *kernel, const struct ilo_dev *dev, @@ -837,7 +890,7 @@ ilo_shader_state_use_variant(struct ilo_shader_state *state, init_gs(sh, state); break; case PIPE_SHADER_FRAGMENT: - ilo_gpe_init_fs_cso(state->info.dev, state, &sh->cso); + init_ps(sh, state); break; default: break; @@ -955,16 +1008,33 @@ ilo_shader_select_kernel(struct ilo_shader_state *shader, const struct ilo_state_vector *vec, uint32_t dirty) { - const struct ilo_shader * const cur = shader->shader; struct ilo_shader_variant variant; + bool changed = false; - if (!(shader->info.non_orthogonal_states & dirty)) - return false; + if (shader->info.non_orthogonal_states & dirty) { + const struct ilo_shader * const old = shader->shader; - ilo_shader_variant_init(&variant, &shader->info, vec); - ilo_shader_state_use_variant(shader, &variant); + ilo_shader_variant_init(&variant, &shader->info, vec); + ilo_shader_state_use_variant(shader, &variant); + changed = (shader->shader != old); + } - return (shader->shader != cur); + if (shader->info.type == PIPE_SHADER_FRAGMENT) { + struct ilo_shader *kernel = shader->shader; + + if (kernel->ps_params.sample_mask != vec->sample_mask || + kernel->ps_params.alpha_may_kill != vec->blend->alpha_may_kill) { + kernel->ps_params.sample_mask = vec->sample_mask; + kernel->ps_params.alpha_may_kill = vec->blend->alpha_may_kill; + + ilo_state_ps_set_params(&kernel->cso.ps, shader->info.dev, + &kernel->ps_params); + + changed = true; + } + } + + return changed; } static int @@ -1063,8 +1133,8 @@ ilo_shader_select_kernel_sbe(struct ilo_shader_state *shader, assert(kernel->in.count <= Elements(swizzles)); dst_len = MIN2(kernel->in.count, Elements(swizzles)); - memset(&info, 0, sizeof(info)); memset(&swizzles, 0, sizeof(swizzles)); + memset(&info, 0, sizeof(info)); info.attr_count = dst_len; info.cv_vue_attr_count = src_skip + src_len; diff --git a/src/gallium/drivers/ilo/ilo_shader.h b/src/gallium/drivers/ilo/ilo_shader.h index 0f20877f83d..d9f02a4746a 100644 --- a/src/gallium/drivers/ilo/ilo_shader.h +++ b/src/gallium/drivers/ilo/ilo_shader.h @@ -28,6 +28,8 @@ #ifndef ILO_SHADER_H #define ILO_SHADER_H +#include "core/ilo_state_shader.h" + #include "ilo_common.h" enum ilo_kernel_param { @@ -90,6 +92,19 @@ struct ilo_state_sbe; struct ilo_state_sol; struct ilo_state_vector; +union ilo_shader_cso { + struct ilo_state_vs vs; + struct ilo_state_hs hs; + struct ilo_state_ds ds; + struct ilo_state_gs gs; + struct ilo_state_ps ps; + + struct { + struct ilo_state_vs vs; + struct ilo_state_gs sol; + } vs_sol; +}; + struct ilo_shader_cache * ilo_shader_cache_create(void); diff --git a/src/gallium/drivers/ilo/ilo_state.c b/src/gallium/drivers/ilo/ilo_state.c index 902b6d3d1de..917839fa23e 100644 --- a/src/gallium/drivers/ilo/ilo_state.c +++ b/src/gallium/drivers/ilo/ilo_state.c @@ -698,6 +698,7 @@ ilo_finalize_3d_states(struct ilo_context *ilo, { ilo->state_vector.draw = draw; + finalize_blend(ilo); finalize_shader_states(&ilo->state_vector); finalize_constant_buffers(ilo); finalize_index_buffer(ilo); @@ -706,7 +707,6 @@ ilo_finalize_3d_states(struct ilo_context *ilo, finalize_urb(ilo); finalize_rasterizer(ilo); finalize_viewport(ilo); - finalize_blend(ilo); u_upload_unmap(ilo->uploader); } diff --git a/src/gallium/drivers/ilo/shader/ilo_shader_internal.h b/src/gallium/drivers/ilo/shader/ilo_shader_internal.h index 9c17ec0e58d..01c86675202 100644 --- a/src/gallium/drivers/ilo/shader/ilo_shader_internal.h +++ b/src/gallium/drivers/ilo/shader/ilo_shader_internal.h @@ -77,6 +77,7 @@ struct ilo_shader_variant { struct ilo_kernel_routing { bool initialized; + bool is_point; bool light_twoside; uint32_t sprite_coord_enable; @@ -140,6 +141,7 @@ struct ilo_shader { int kernel_size; struct ilo_kernel_routing routing; + struct ilo_state_ps_params_info ps_params; /* what does the push constant buffer consist of? */ struct { From 117926debb72e5027faae885f9aa7f1ca61f6a9c Mon Sep 17 00:00:00 2001 From: Chia-I Wu Date: Thu, 11 Jun 2015 07:36:28 +0800 Subject: [PATCH 632/834] ilo: merge ilo_state_3d*.[ch] to ilo_state.[ch] With most code replaced to ilo_state_*, what was left did not belong there anymore. --- src/gallium/drivers/ilo/Makefile.sources | 2 - .../drivers/ilo/core/ilo_builder_3d_top.h | 2 +- src/gallium/drivers/ilo/core/ilo_state_3d.h | 122 --------------- .../drivers/ilo/core/ilo_state_3d_bottom.c | 147 ------------------ .../drivers/ilo/ilo_blitter_rectlist.c | 1 - src/gallium/drivers/ilo/ilo_state.c | 107 ++++++++++++- src/gallium/drivers/ilo/ilo_state.h | 74 ++++++++- 7 files changed, 179 insertions(+), 276 deletions(-) delete mode 100644 src/gallium/drivers/ilo/core/ilo_state_3d.h delete mode 100644 src/gallium/drivers/ilo/core/ilo_state_3d_bottom.c diff --git a/src/gallium/drivers/ilo/Makefile.sources b/src/gallium/drivers/ilo/Makefile.sources index e5a8ed4a97b..68870f44337 100644 --- a/src/gallium/drivers/ilo/Makefile.sources +++ b/src/gallium/drivers/ilo/Makefile.sources @@ -20,8 +20,6 @@ C_SOURCES := \ core/ilo_fence.h \ core/ilo_image.c \ core/ilo_image.h \ - core/ilo_state_3d.h \ - core/ilo_state_3d_bottom.c \ core/ilo_state_cc.c \ core/ilo_state_cc.h \ core/ilo_state_raster.c \ diff --git a/src/gallium/drivers/ilo/core/ilo_builder_3d_top.h b/src/gallium/drivers/ilo/core/ilo_builder_3d_top.h index 85b0da7406d..bfd94344103 100644 --- a/src/gallium/drivers/ilo/core/ilo_builder_3d_top.h +++ b/src/gallium/drivers/ilo/core/ilo_builder_3d_top.h @@ -30,11 +30,11 @@ #include "genhw/genhw.h" #include "../ilo_resource.h" +#include "../ilo_state.h" #include "intel_winsys.h" #include "ilo_core.h" #include "ilo_dev.h" -#include "ilo_state_3d.h" #include "ilo_state_sampler.h" #include "ilo_state_shader.h" #include "ilo_state_sol.h" diff --git a/src/gallium/drivers/ilo/core/ilo_state_3d.h b/src/gallium/drivers/ilo/core/ilo_state_3d.h deleted file mode 100644 index dcc94bfc88c..00000000000 --- a/src/gallium/drivers/ilo/core/ilo_state_3d.h +++ /dev/null @@ -1,122 +0,0 @@ -/* - * Mesa 3-D graphics library - * - * Copyright (C) 2012-2014 LunarG, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included - * in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - * - * Authors: - * Chia-I Wu - */ - -#ifndef ILO_STATE_3D_H -#define ILO_STATE_3D_H - -#include "genhw/genhw.h" -#include "pipe/p_state.h" - -#include "ilo_core.h" -#include "ilo_dev.h" -#include "ilo_state_shader.h" -#include "ilo_state_surface.h" -#include "ilo_state_zs.h" - -/** - * \see brw_context.h - */ -#define ILO_MAX_DRAW_BUFFERS 8 -#define ILO_MAX_CONST_BUFFERS (1 + 12) -#define ILO_MAX_SAMPLER_VIEWS 16 -#define ILO_MAX_SAMPLERS 16 -#define ILO_MAX_SO_BINDINGS 64 -#define ILO_MAX_SO_BUFFERS 4 -#define ILO_MAX_VIEWPORTS 1 - -#define ILO_MAX_SURFACES 256 - -struct intel_bo; -struct ilo_buffer; -struct ilo_image; -struct ilo_shader_state; - -struct ilo_vb_state { - struct pipe_vertex_buffer states[PIPE_MAX_ATTRIBS]; - uint32_t enabled_mask; -}; - -struct ilo_ib_state { - struct pipe_resource *buffer; - const void *user_buffer; - unsigned offset; - unsigned index_size; - - /* these are not valid until the state is finalized */ - struct pipe_resource *hw_resource; - unsigned hw_index_size; - /* an offset to be added to pipe_draw_info::start */ - int64_t draw_start_offset; -}; - -struct ilo_so_state { - struct pipe_stream_output_target *states[ILO_MAX_SO_BUFFERS]; - unsigned count; - unsigned append_bitmask; - - bool enabled; -}; - -struct ilo_surface_cso { - struct pipe_surface base; - - bool is_rt; - union { - struct ilo_state_surface rt; - struct ilo_state_zs zs; - } u; -}; - -struct ilo_fb_state { - struct pipe_framebuffer_state state; - - struct ilo_state_surface null_rt; - struct ilo_state_zs null_zs; - - struct ilo_fb_blend_caps { - bool is_unorm; - bool is_integer; - bool force_dst_alpha_one; - - bool can_logicop; - bool can_blend; - bool can_alpha_test; - } blend_caps[PIPE_MAX_COLOR_BUFS]; - - unsigned num_samples; - - bool has_integer_rt; - bool has_hiz; - enum gen_depth_format depth_offset_format; -}; - -void -ilo_gpe_set_fb(const struct ilo_dev *dev, - const struct pipe_framebuffer_state *state, - struct ilo_fb_state *fb); - -#endif /* ILO_STATE_3D_H */ diff --git a/src/gallium/drivers/ilo/core/ilo_state_3d_bottom.c b/src/gallium/drivers/ilo/core/ilo_state_3d_bottom.c deleted file mode 100644 index 8734aff44da..00000000000 --- a/src/gallium/drivers/ilo/core/ilo_state_3d_bottom.c +++ /dev/null @@ -1,147 +0,0 @@ -/* - * Mesa 3-D graphics library - * - * Copyright (C) 2012-2014 LunarG, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included - * in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - * - * Authors: - * Chia-I Wu - */ - -#include "genhw/genhw.h" -#include "util/u_framebuffer.h" -#include "util/u_half.h" - -#include "ilo_format.h" -#include "ilo_image.h" -#include "ilo_state_3d.h" - -static void -fb_set_blend_caps(const struct ilo_dev *dev, - enum pipe_format format, - struct ilo_fb_blend_caps *caps) -{ - const struct util_format_description *desc = - util_format_description(format); - const int ch = util_format_get_first_non_void_channel(format); - - memset(caps, 0, sizeof(*caps)); - - if (format == PIPE_FORMAT_NONE || desc->is_mixed) - return; - - caps->is_unorm = (ch >= 0 && desc->channel[ch].normalized && - desc->channel[ch].type == UTIL_FORMAT_TYPE_UNSIGNED && - desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB); - caps->is_integer = util_format_is_pure_integer(format); - - /* - * From the Sandy Bridge PRM, volume 2 part 1, page 365: - * - * "Logic Ops are only supported on *_UNORM surfaces (excluding _SRGB - * variants), otherwise Logic Ops must be DISABLED." - * - * According to the classic driver, this is lifted on Gen8+. - */ - caps->can_logicop = (ilo_dev_gen(dev) >= ILO_GEN(8) || caps->is_unorm); - - /* no blending for pure integer formats */ - caps->can_blend = !caps->is_integer; - - /* - * From the Sandy Bridge PRM, volume 2 part 1, page 382: - * - * "Alpha Test can only be enabled if Pixel Shader outputs a float - * alpha value." - */ - caps->can_alpha_test = !caps->is_integer; - - caps->force_dst_alpha_one = - (ilo_format_translate_render(dev, format) != - ilo_format_translate_color(dev, format)); - - /* sanity check */ - if (caps->force_dst_alpha_one) { - enum pipe_format render_format; - - switch (format) { - case PIPE_FORMAT_B8G8R8X8_UNORM: - render_format = PIPE_FORMAT_B8G8R8A8_UNORM; - break; - default: - render_format = PIPE_FORMAT_NONE; - break; - } - - assert(ilo_format_translate_render(dev, format) == - ilo_format_translate_color(dev, render_format)); - } -} - -void -ilo_gpe_set_fb(const struct ilo_dev *dev, - const struct pipe_framebuffer_state *state, - struct ilo_fb_state *fb) -{ - const struct pipe_surface *first_surf = NULL; - int i; - - ILO_DEV_ASSERT(dev, 6, 8); - - util_copy_framebuffer_state(&fb->state, state); - - fb->has_integer_rt = false; - for (i = 0; i < state->nr_cbufs; i++) { - if (state->cbufs[i]) { - fb_set_blend_caps(dev, state->cbufs[i]->format, &fb->blend_caps[i]); - - fb->has_integer_rt |= fb->blend_caps[i].is_integer; - - if (!first_surf) - first_surf = state->cbufs[i]; - } else { - fb_set_blend_caps(dev, PIPE_FORMAT_NONE, &fb->blend_caps[i]); - } - } - - if (!first_surf && state->zsbuf) - first_surf = state->zsbuf; - - fb->num_samples = (first_surf) ? first_surf->texture->nr_samples : 1; - if (!fb->num_samples) - fb->num_samples = 1; - - if (state->zsbuf) { - const struct ilo_surface_cso *cso = - (const struct ilo_surface_cso *) state->zsbuf; - - fb->has_hiz = cso->u.zs.hiz_bo; - fb->depth_offset_format = - ilo_state_zs_get_depth_format(&cso->u.zs, dev); - } else { - fb->has_hiz = false; - fb->depth_offset_format = GEN6_ZFORMAT_D32_FLOAT; - } - - /* - * The PRMs list several restrictions when the framebuffer has more than - * one surface. It seems they are actually lifted on GEN6+. - */ -} diff --git a/src/gallium/drivers/ilo/ilo_blitter_rectlist.c b/src/gallium/drivers/ilo/ilo_blitter_rectlist.c index a4c8dead4a5..afdb0377824 100644 --- a/src/gallium/drivers/ilo/ilo_blitter_rectlist.c +++ b/src/gallium/drivers/ilo/ilo_blitter_rectlist.c @@ -25,7 +25,6 @@ * Chia-I Wu */ -#include "core/ilo_state_3d.h" #include "util/u_draw.h" #include "util/u_pack_color.h" diff --git a/src/gallium/drivers/ilo/ilo_state.c b/src/gallium/drivers/ilo/ilo_state.c index 917839fa23e..0145fcbb8d5 100644 --- a/src/gallium/drivers/ilo/ilo_state.c +++ b/src/gallium/drivers/ilo/ilo_state.c @@ -27,9 +27,9 @@ #include "core/ilo_builder_3d.h" /* for gen6_3d_translate_pipe_prim() */ #include "core/ilo_format.h" -#include "core/ilo_state_3d.h" #include "util/u_dual_blend.h" #include "util/u_dynarray.h" +#include "util/u_framebuffer.h" #include "util/u_helpers.h" #include "util/u_resource.h" #include "util/u_upload_mgr.h" @@ -1491,14 +1491,117 @@ ilo_set_constant_buffer(struct pipe_context *pipe, vec->dirty |= ILO_DIRTY_CBUF; } +static void +fb_set_blend_caps(const struct ilo_dev *dev, + enum pipe_format format, + struct ilo_fb_blend_caps *caps) +{ + const struct util_format_description *desc = + util_format_description(format); + const int ch = util_format_get_first_non_void_channel(format); + + memset(caps, 0, sizeof(*caps)); + + if (format == PIPE_FORMAT_NONE || desc->is_mixed) + return; + + caps->is_unorm = (ch >= 0 && desc->channel[ch].normalized && + desc->channel[ch].type == UTIL_FORMAT_TYPE_UNSIGNED && + desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB); + caps->is_integer = util_format_is_pure_integer(format); + + /* + * From the Sandy Bridge PRM, volume 2 part 1, page 365: + * + * "Logic Ops are only supported on *_UNORM surfaces (excluding _SRGB + * variants), otherwise Logic Ops must be DISABLED." + * + * According to the classic driver, this is lifted on Gen8+. + */ + caps->can_logicop = (ilo_dev_gen(dev) >= ILO_GEN(8) || caps->is_unorm); + + /* no blending for pure integer formats */ + caps->can_blend = !caps->is_integer; + + /* + * From the Sandy Bridge PRM, volume 2 part 1, page 382: + * + * "Alpha Test can only be enabled if Pixel Shader outputs a float + * alpha value." + */ + caps->can_alpha_test = !caps->is_integer; + + caps->force_dst_alpha_one = + (ilo_format_translate_render(dev, format) != + ilo_format_translate_color(dev, format)); + + /* sanity check */ + if (caps->force_dst_alpha_one) { + enum pipe_format render_format; + + switch (format) { + case PIPE_FORMAT_B8G8R8X8_UNORM: + render_format = PIPE_FORMAT_B8G8R8A8_UNORM; + break; + default: + render_format = PIPE_FORMAT_NONE; + break; + } + + assert(ilo_format_translate_render(dev, format) == + ilo_format_translate_color(dev, render_format)); + } +} + static void ilo_set_framebuffer_state(struct pipe_context *pipe, const struct pipe_framebuffer_state *state) { const struct ilo_dev *dev = ilo_context(pipe)->dev; struct ilo_state_vector *vec = &ilo_context(pipe)->state_vector; + struct ilo_fb_state *fb = &vec->fb; + const struct pipe_surface *first_surf = NULL; + int i; - ilo_gpe_set_fb(dev, state, &vec->fb); + util_copy_framebuffer_state(&fb->state, state); + + fb->has_integer_rt = false; + for (i = 0; i < state->nr_cbufs; i++) { + if (state->cbufs[i]) { + fb_set_blend_caps(dev, state->cbufs[i]->format, &fb->blend_caps[i]); + + fb->has_integer_rt |= fb->blend_caps[i].is_integer; + + if (!first_surf) + first_surf = state->cbufs[i]; + } else { + fb_set_blend_caps(dev, PIPE_FORMAT_NONE, &fb->blend_caps[i]); + } + } + + if (!first_surf && state->zsbuf) + first_surf = state->zsbuf; + + fb->num_samples = (first_surf) ? first_surf->texture->nr_samples : 1; + if (!fb->num_samples) + fb->num_samples = 1; + + if (state->zsbuf) { + const struct ilo_surface_cso *cso = + (const struct ilo_surface_cso *) state->zsbuf; + + fb->has_hiz = cso->u.zs.hiz_bo; + fb->depth_offset_format = + ilo_state_zs_get_depth_format(&cso->u.zs, dev); + } else { + fb->has_hiz = false; + fb->depth_offset_format = GEN6_ZFORMAT_D32_FLOAT; + } + + /* + * The PRMs list several restrictions when the framebuffer has more than + * one surface. It seems they are actually lifted on GEN6+. + */ vec->dirty |= ILO_DIRTY_FB; } diff --git a/src/gallium/drivers/ilo/ilo_state.h b/src/gallium/drivers/ilo/ilo_state.h index e4c6f281a5c..90514d52224 100644 --- a/src/gallium/drivers/ilo/ilo_state.h +++ b/src/gallium/drivers/ilo/ilo_state.h @@ -28,7 +28,6 @@ #ifndef ILO_STATE_H #define ILO_STATE_H -#include "core/ilo_state_3d.h" #include "core/ilo_state_cc.h" #include "core/ilo_state_raster.h" #include "core/ilo_state_sampler.h" @@ -45,6 +44,19 @@ #include "ilo_common.h" +/** + * \see brw_context.h + */ +#define ILO_MAX_DRAW_BUFFERS 8 +#define ILO_MAX_CONST_BUFFERS (1 + 12) +#define ILO_MAX_SAMPLER_VIEWS 16 +#define ILO_MAX_SAMPLERS 16 +#define ILO_MAX_SO_BINDINGS 64 +#define ILO_MAX_SO_BUFFERS 4 +#define ILO_MAX_VIEWPORTS 1 + +#define ILO_MAX_SURFACES 256 + /** * States that we track. * @@ -131,6 +143,7 @@ enum ilo_dirty_flags { }; struct ilo_context; +struct ilo_shader_state; struct ilo_ve_state { unsigned vb_mapping[PIPE_MAX_ATTRIBS]; @@ -143,6 +156,24 @@ struct ilo_ve_state { struct ilo_state_vf vf; }; +struct ilo_vb_state { + struct pipe_vertex_buffer states[PIPE_MAX_ATTRIBS]; + uint32_t enabled_mask; +}; + +struct ilo_ib_state { + struct pipe_resource *buffer; + const void *user_buffer; + unsigned offset; + unsigned index_size; + + /* these are not valid until the state is finalized */ + struct pipe_resource *hw_resource; + unsigned hw_index_size; + /* an offset to be added to pipe_draw_info::start */ + int64_t draw_start_offset; +}; + struct ilo_cbuf_cso { struct pipe_resource *resource; struct ilo_state_surface_buffer_info info; @@ -188,6 +219,14 @@ struct ilo_view_state { unsigned count; }; +struct ilo_so_state { + struct pipe_stream_output_target *states[ILO_MAX_SO_BUFFERS]; + unsigned count; + unsigned append_bitmask; + + bool enabled; +}; + struct ilo_rasterizer_state { struct pipe_rasterizer_state state; @@ -208,6 +247,39 @@ struct ilo_viewport_state { uint32_t vp_data[20 * ILO_MAX_VIEWPORTS]; }; +struct ilo_surface_cso { + struct pipe_surface base; + + bool is_rt; + union { + struct ilo_state_surface rt; + struct ilo_state_zs zs; + } u; +}; + +struct ilo_fb_state { + struct pipe_framebuffer_state state; + + struct ilo_state_surface null_rt; + struct ilo_state_zs null_zs; + + struct ilo_fb_blend_caps { + bool is_unorm; + bool is_integer; + bool force_dst_alpha_one; + + bool can_logicop; + bool can_blend; + bool can_alpha_test; + } blend_caps[PIPE_MAX_COLOR_BUFS]; + + unsigned num_samples; + + bool has_integer_rt; + bool has_hiz; + enum gen_depth_format depth_offset_format; +}; + struct ilo_dsa_state { struct ilo_state_cc_depth_info depth; From 790510808e614ee6c5f55ba773734838041902cb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Fri, 5 Jun 2015 19:09:21 +0200 Subject: [PATCH 633/834] r600g: handle TGSI input/output array declarations correctly Most of this code could be removed if r600g used tgsi_shader_info. --- src/gallium/drivers/r600/r600_pipe.c | 2 +- src/gallium/drivers/r600/r600_shader.c | 176 +++++++++++++------------ 2 files changed, 90 insertions(+), 88 deletions(-) diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c index 93a6e556b16..e122b607b86 100644 --- a/src/gallium/drivers/r600/r600_pipe.c +++ b/src/gallium/drivers/r600/r600_pipe.c @@ -476,6 +476,7 @@ static int r600_get_shader_param(struct pipe_screen* pscreen, unsigned shader, e case PIPE_SHADER_CAP_SUBROUTINES: return 0; case PIPE_SHADER_CAP_INTEGERS: + case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE: return 1; case PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS: case PIPE_SHADER_CAP_MAX_SAMPLER_VIEWS: @@ -495,7 +496,6 @@ static int r600_get_shader_param(struct pipe_screen* pscreen, unsigned shader, e case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED: case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED: case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED: - case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE: return 0; } return 0; diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index 2e83143b29d..07da1676182 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -617,98 +617,100 @@ static int tgsi_declaration(struct r600_shader_ctx *ctx) switch (d->Declaration.File) { case TGSI_FILE_INPUT: - i = ctx->shader->ninput; - assert(i < Elements(ctx->shader->input)); + for (j = 0; j < count; j++) { + i = ctx->shader->ninput + j; + assert(i < Elements(ctx->shader->input)); + ctx->shader->input[i].name = d->Semantic.Name; + ctx->shader->input[i].sid = d->Semantic.Index + j; + ctx->shader->input[i].interpolate = d->Interp.Interpolate; + ctx->shader->input[i].interpolate_location = d->Interp.Location; + ctx->shader->input[i].gpr = ctx->file_offset[TGSI_FILE_INPUT] + d->Range.First + j; + if (ctx->type == TGSI_PROCESSOR_FRAGMENT) { + ctx->shader->input[i].spi_sid = r600_spi_sid(&ctx->shader->input[i]); + switch (ctx->shader->input[i].name) { + case TGSI_SEMANTIC_FACE: + if (ctx->face_gpr != -1) + ctx->shader->input[i].gpr = ctx->face_gpr; /* already allocated by allocate_system_value_inputs */ + else + ctx->face_gpr = ctx->shader->input[i].gpr; + break; + case TGSI_SEMANTIC_COLOR: + ctx->colors_used++; + break; + case TGSI_SEMANTIC_POSITION: + ctx->fragcoord_input = i; + break; + case TGSI_SEMANTIC_PRIMID: + /* set this for now */ + ctx->shader->gs_prim_id_input = true; + ctx->shader->ps_prim_id_input = i; + break; + } + if (ctx->bc->chip_class >= EVERGREEN) { + if ((r = evergreen_interp_input(ctx, i))) + return r; + } + } else if (ctx->type == TGSI_PROCESSOR_GEOMETRY) { + /* FIXME probably skip inputs if they aren't passed in the ring */ + ctx->shader->input[i].ring_offset = ctx->next_ring_offset; + ctx->next_ring_offset += 16; + if (ctx->shader->input[i].name == TGSI_SEMANTIC_PRIMID) + ctx->shader->gs_prim_id_input = true; + } + } ctx->shader->ninput += count; - ctx->shader->input[i].name = d->Semantic.Name; - ctx->shader->input[i].sid = d->Semantic.Index; - ctx->shader->input[i].interpolate = d->Interp.Interpolate; - ctx->shader->input[i].interpolate_location = d->Interp.Location; - ctx->shader->input[i].gpr = ctx->file_offset[TGSI_FILE_INPUT] + d->Range.First; - if (ctx->type == TGSI_PROCESSOR_FRAGMENT) { - ctx->shader->input[i].spi_sid = r600_spi_sid(&ctx->shader->input[i]); - switch (ctx->shader->input[i].name) { - case TGSI_SEMANTIC_FACE: - if (ctx->face_gpr != -1) - ctx->shader->input[i].gpr = ctx->face_gpr; /* already allocated by allocate_system_value_inputs */ - else - ctx->face_gpr = ctx->shader->input[i].gpr; - break; - case TGSI_SEMANTIC_COLOR: - ctx->colors_used++; - break; - case TGSI_SEMANTIC_POSITION: - ctx->fragcoord_input = i; - break; - case TGSI_SEMANTIC_PRIMID: - /* set this for now */ - ctx->shader->gs_prim_id_input = true; - ctx->shader->ps_prim_id_input = i; - break; - } - if (ctx->bc->chip_class >= EVERGREEN) { - if ((r = evergreen_interp_input(ctx, i))) - return r; - } - } else if (ctx->type == TGSI_PROCESSOR_GEOMETRY) { - /* FIXME probably skip inputs if they aren't passed in the ring */ - ctx->shader->input[i].ring_offset = ctx->next_ring_offset; - ctx->next_ring_offset += 16; - if (ctx->shader->input[i].name == TGSI_SEMANTIC_PRIMID) - ctx->shader->gs_prim_id_input = true; - } - for (j = 1; j < count; ++j) { - ctx->shader->input[i + j] = ctx->shader->input[i]; - ctx->shader->input[i + j].gpr += j; - } break; case TGSI_FILE_OUTPUT: - i = ctx->shader->noutput++; - assert(i < Elements(ctx->shader->output)); - ctx->shader->output[i].name = d->Semantic.Name; - ctx->shader->output[i].sid = d->Semantic.Index; - ctx->shader->output[i].gpr = ctx->file_offset[TGSI_FILE_OUTPUT] + d->Range.First; - ctx->shader->output[i].interpolate = d->Interp.Interpolate; - ctx->shader->output[i].write_mask = d->Declaration.UsageMask; - if (ctx->type == TGSI_PROCESSOR_VERTEX || - ctx->type == TGSI_PROCESSOR_GEOMETRY) { - ctx->shader->output[i].spi_sid = r600_spi_sid(&ctx->shader->output[i]); - switch (d->Semantic.Name) { - case TGSI_SEMANTIC_CLIPDIST: - ctx->shader->clip_dist_write |= d->Declaration.UsageMask << (d->Semantic.Index << 2); - break; - case TGSI_SEMANTIC_PSIZE: - ctx->shader->vs_out_misc_write = 1; - ctx->shader->vs_out_point_size = 1; - break; - case TGSI_SEMANTIC_EDGEFLAG: - ctx->shader->vs_out_misc_write = 1; - ctx->shader->vs_out_edgeflag = 1; - ctx->edgeflag_output = i; - break; - case TGSI_SEMANTIC_VIEWPORT_INDEX: - ctx->shader->vs_out_misc_write = 1; - ctx->shader->vs_out_viewport = 1; - break; - case TGSI_SEMANTIC_LAYER: - ctx->shader->vs_out_misc_write = 1; - ctx->shader->vs_out_layer = 1; - break; - case TGSI_SEMANTIC_CLIPVERTEX: - ctx->clip_vertex_write = TRUE; - ctx->cv_output = i; - break; - } - if (ctx->type == TGSI_PROCESSOR_GEOMETRY) { - ctx->gs_out_ring_offset += 16; - } - } else if (ctx->type == TGSI_PROCESSOR_FRAGMENT) { - switch (d->Semantic.Name) { - case TGSI_SEMANTIC_COLOR: - ctx->shader->nr_ps_max_color_exports++; - break; + for (j = 0; j < count; j++) { + i = ctx->shader->noutput + j; + assert(i < Elements(ctx->shader->output)); + ctx->shader->output[i].name = d->Semantic.Name; + ctx->shader->output[i].sid = d->Semantic.Index + j; + ctx->shader->output[i].gpr = ctx->file_offset[TGSI_FILE_OUTPUT] + d->Range.First + j; + ctx->shader->output[i].interpolate = d->Interp.Interpolate; + ctx->shader->output[i].write_mask = d->Declaration.UsageMask; + if (ctx->type == TGSI_PROCESSOR_VERTEX || + ctx->type == TGSI_PROCESSOR_GEOMETRY) { + ctx->shader->output[i].spi_sid = r600_spi_sid(&ctx->shader->output[i]); + switch (d->Semantic.Name) { + case TGSI_SEMANTIC_CLIPDIST: + ctx->shader->clip_dist_write |= d->Declaration.UsageMask << + ((d->Semantic.Index + j) << 2); + break; + case TGSI_SEMANTIC_PSIZE: + ctx->shader->vs_out_misc_write = 1; + ctx->shader->vs_out_point_size = 1; + break; + case TGSI_SEMANTIC_EDGEFLAG: + ctx->shader->vs_out_misc_write = 1; + ctx->shader->vs_out_edgeflag = 1; + ctx->edgeflag_output = i; + break; + case TGSI_SEMANTIC_VIEWPORT_INDEX: + ctx->shader->vs_out_misc_write = 1; + ctx->shader->vs_out_viewport = 1; + break; + case TGSI_SEMANTIC_LAYER: + ctx->shader->vs_out_misc_write = 1; + ctx->shader->vs_out_layer = 1; + break; + case TGSI_SEMANTIC_CLIPVERTEX: + ctx->clip_vertex_write = TRUE; + ctx->cv_output = i; + break; + } + if (ctx->type == TGSI_PROCESSOR_GEOMETRY) { + ctx->gs_out_ring_offset += 16; + } + } else if (ctx->type == TGSI_PROCESSOR_FRAGMENT) { + switch (d->Semantic.Name) { + case TGSI_SEMANTIC_COLOR: + ctx->shader->nr_ps_max_color_exports++; + break; + } } } + ctx->shader->noutput += count; break; case TGSI_FILE_TEMPORARY: if (ctx->info.indirect_files & (1 << TGSI_FILE_TEMPORARY)) { From 2489054f663baa69e659e0878cb39f4e7197ee0b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Sat, 6 Jun 2015 14:12:34 +0200 Subject: [PATCH 634/834] glsl: fix "tesselation" typo Trivial. --- src/glsl/ast_to_hir.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/glsl/ast_to_hir.cpp b/src/glsl/ast_to_hir.cpp index a05efe00470..fc24305b244 100644 --- a/src/glsl/ast_to_hir.cpp +++ b/src/glsl/ast_to_hir.cpp @@ -2729,7 +2729,7 @@ apply_type_qualifier_to_variable(const struct ast_type_qualifier *qual, * GL_ARB_conservative_depth * GL_ARB_gpu_shader5 * GL_ARB_separate_shader_objects - * GL_ARB_tesselation_shader + * GL_ARB_tessellation_shader * GL_ARB_transform_feedback3 * GL_ARB_uniform_buffer_object * From b0a2280e45e5abc56e5301f84f33226469000d6c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Sun, 10 May 2015 20:35:15 +0200 Subject: [PATCH 635/834] gallium/util: add util_last_bit64 This will be needed by radeonsi. Reviewed-by: Ilia Mirkin --- src/gallium/auxiliary/util/u_math.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/src/gallium/auxiliary/util/u_math.h b/src/gallium/auxiliary/util/u_math.h index 58070a9dafa..3b4040f0ee2 100644 --- a/src/gallium/auxiliary/util/u_math.h +++ b/src/gallium/auxiliary/util/u_math.h @@ -424,6 +424,25 @@ util_last_bit(unsigned u) #endif } +/** + * Find last bit set in a word. The least significant bit is 1. + * Return 0 if no bits are set. + */ +static INLINE unsigned +util_last_bit64(uint64_t u) +{ +#if defined(HAVE___BUILTIN_CLZLL) + return u == 0 ? 0 : 64 - __builtin_clzll(u); +#else + unsigned r = 0; + while (u) { + r++; + u >>= 1; + } + return r; +#endif +} + /** * Find last bit in a word that does not match the sign bit. The least * significant bit is 1. From 8f37e8e64fc897180603a7247e2fd47bf0ffb834 Mon Sep 17 00:00:00 2001 From: Chia-I Wu Date: Mon, 15 Jun 2015 11:57:10 +0800 Subject: [PATCH 636/834] ilo: add 3DSTATE_AA_LINE_PARAMETERS to ilo_state_raster Utilize ilo_state_raster to avoid redundant state change. --- .../drivers/ilo/core/ilo_builder_3d_bottom.h | 18 +++++++++++------- .../drivers/ilo/core/ilo_state_raster.c | 3 ++- .../drivers/ilo/core/ilo_state_raster.h | 1 + src/gallium/drivers/ilo/ilo_render_gen6.c | 5 +++-- 4 files changed, 17 insertions(+), 10 deletions(-) diff --git a/src/gallium/drivers/ilo/core/ilo_builder_3d_bottom.h b/src/gallium/drivers/ilo/core/ilo_builder_3d_bottom.h index 88ed6ea054c..7c1825b6b13 100644 --- a/src/gallium/drivers/ilo/core/ilo_builder_3d_bottom.h +++ b/src/gallium/drivers/ilo/core/ilo_builder_3d_bottom.h @@ -646,18 +646,22 @@ gen6_3DSTATE_LINE_STIPPLE(struct ilo_builder *builder, } static inline void -gen6_3DSTATE_AA_LINE_PARAMETERS(struct ilo_builder *builder) +gen6_3DSTATE_AA_LINE_PARAMETERS(struct ilo_builder *builder, + const struct ilo_state_raster *rs) { const uint8_t cmd_len = 3; - const uint32_t dw[3] = { - GEN6_RENDER_CMD(3D, 3DSTATE_AA_LINE_PARAMETERS) | (cmd_len - 2), - 0 << GEN6_AA_LINE_DW1_BIAS__SHIFT | 0, - 0 << GEN6_AA_LINE_DW2_CAP_BIAS__SHIFT | 0, - }; + uint32_t *dw; ILO_DEV_ASSERT(builder->dev, 6, 8); - ilo_builder_batch_write(builder, cmd_len, dw); + ilo_builder_batch_pointer(builder, cmd_len, &dw); + + dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_AA_LINE_PARAMETERS) | (cmd_len - 2); + /* constant */ + dw[1] = 0 << GEN6_AA_LINE_DW1_BIAS__SHIFT | + 0 << GEN6_AA_LINE_DW1_SLOPE__SHIFT; + dw[2] = 0 << GEN6_AA_LINE_DW2_CAP_BIAS__SHIFT | + 0 << GEN6_AA_LINE_DW2_CAP_SLOPE__SHIFT; } static inline void diff --git a/src/gallium/drivers/ilo/core/ilo_state_raster.c b/src/gallium/drivers/ilo/core/ilo_state_raster.c index 2b7567e3111..a33812d7638 100644 --- a/src/gallium/drivers/ilo/core/ilo_state_raster.c +++ b/src/gallium/drivers/ilo/core/ilo_state_raster.c @@ -985,7 +985,8 @@ ilo_state_raster_full_delta(const struct ilo_state_raster *rs, ILO_STATE_RASTER_3DSTATE_SF | ILO_STATE_RASTER_3DSTATE_MULTISAMPLE | ILO_STATE_RASTER_3DSTATE_SAMPLE_MASK | - ILO_STATE_RASTER_3DSTATE_WM; + ILO_STATE_RASTER_3DSTATE_WM | + ILO_STATE_RASTER_3DSTATE_AA_LINE_PARAMETERS; if (ilo_dev_gen(dev) >= ILO_GEN(8)) { delta->dirty |= ILO_STATE_RASTER_3DSTATE_RASTER | diff --git a/src/gallium/drivers/ilo/core/ilo_state_raster.h b/src/gallium/drivers/ilo/core/ilo_state_raster.h index 0b4665b5de8..add26cd0a48 100644 --- a/src/gallium/drivers/ilo/core/ilo_state_raster.h +++ b/src/gallium/drivers/ilo/core/ilo_state_raster.h @@ -41,6 +41,7 @@ enum ilo_state_raster_dirty_bits { ILO_STATE_RASTER_3DSTATE_SAMPLE_MASK = (1 << 4), ILO_STATE_RASTER_3DSTATE_WM = (1 << 5), ILO_STATE_RASTER_3DSTATE_WM_HZ_OP = (1 << 6), + ILO_STATE_RASTER_3DSTATE_AA_LINE_PARAMETERS = (1 << 7), }; enum ilo_state_raster_earlyz_op { diff --git a/src/gallium/drivers/ilo/ilo_render_gen6.c b/src/gallium/drivers/ilo/ilo_render_gen6.c index 30abead0cdc..22081e44c03 100644 --- a/src/gallium/drivers/ilo/ilo_render_gen6.c +++ b/src/gallium/drivers/ilo/ilo_render_gen6.c @@ -774,11 +774,12 @@ gen6_draw_wm_raster(struct ilo_render *r, } /* 3DSTATE_AA_LINE_PARAMETERS */ - if (DIRTY(RASTERIZER) && vec->rasterizer->state.line_smooth) { + if (session->rs_delta.dirty & + ILO_STATE_RASTER_3DSTATE_AA_LINE_PARAMETERS) { if (ilo_dev_gen(r->dev) == ILO_GEN(6)) gen6_wa_pre_non_pipelined(r); - gen6_3DSTATE_AA_LINE_PARAMETERS(r->builder); + gen6_3DSTATE_AA_LINE_PARAMETERS(r->builder, &vec->rasterizer->rs); } } From 7cb853d52ae795b76adec41c98870166b41c9e6f Mon Sep 17 00:00:00 2001 From: Chia-I Wu Date: Mon, 15 Jun 2015 12:01:29 +0800 Subject: [PATCH 637/834] ilo: add ilo_state_sample_pattern Move sample pattern initialization from ilo_render to ilo_state_sample_pattern. --- .../drivers/ilo/core/ilo_builder_3d_bottom.h | 37 ++--- .../drivers/ilo/core/ilo_state_raster.c | 143 ++++++++++++++++++ .../drivers/ilo/core/ilo_state_raster.h | 42 +++++ src/gallium/drivers/ilo/ilo_render.c | 126 +-------------- src/gallium/drivers/ilo/ilo_render.h | 3 - src/gallium/drivers/ilo/ilo_render_gen.h | 6 +- src/gallium/drivers/ilo/ilo_render_gen6.c | 14 +- src/gallium/drivers/ilo/ilo_render_gen7.c | 30 +--- src/gallium/drivers/ilo/ilo_render_gen8.c | 10 +- 9 files changed, 225 insertions(+), 186 deletions(-) diff --git a/src/gallium/drivers/ilo/core/ilo_builder_3d_bottom.h b/src/gallium/drivers/ilo/core/ilo_builder_3d_bottom.h index 7c1825b6b13..cc1ece3ed14 100644 --- a/src/gallium/drivers/ilo/core/ilo_builder_3d_bottom.h +++ b/src/gallium/drivers/ilo/core/ilo_builder_3d_bottom.h @@ -450,9 +450,13 @@ gen7_3DSTATE_SAMPLER_STATE_POINTERS_PS(struct ilo_builder *builder, static inline void gen6_3DSTATE_MULTISAMPLE(struct ilo_builder *builder, const struct ilo_state_raster *rs, - const uint32_t *pattern, int pattern_len) + const struct ilo_state_sample_pattern *pattern, + uint8_t sample_count) { const uint8_t cmd_len = (ilo_dev_gen(builder->dev) >= ILO_GEN(7)) ? 4 : 3; + const uint32_t *packed = (const uint32_t *) + ilo_state_sample_pattern_get_packed_offsets(pattern, + builder->dev, sample_count); uint32_t *dw; ILO_DEV_ASSERT(builder->dev, 6, 7.5); @@ -463,10 +467,10 @@ gen6_3DSTATE_MULTISAMPLE(struct ilo_builder *builder, /* see raster_set_gen8_3DSTATE_MULTISAMPLE() */ dw[1] = rs->sample[0]; - assert(pattern_len == 1 || pattern_len == 2); - dw[2] = pattern[0]; + /* see sample_pattern_set_gen8_3DSTATE_SAMPLE_PATTERN() */ + dw[2] = (sample_count >= 4) ? packed[0] : 0; if (ilo_dev_gen(builder->dev) >= ILO_GEN(7)) - dw[3] = (pattern_len == 2) ? pattern[1] : 0; + dw[3] = (sample_count >= 8) ? packed[1] : 0; } static inline void @@ -487,11 +491,7 @@ gen8_3DSTATE_MULTISAMPLE(struct ilo_builder *builder, static inline void gen8_3DSTATE_SAMPLE_PATTERN(struct ilo_builder *builder, - const uint32_t *pattern_1x, - const uint32_t *pattern_2x, - const uint32_t *pattern_4x, - const uint32_t *pattern_8x, - const uint32_t *pattern_16x) + const struct ilo_state_sample_pattern *pattern) { const uint8_t cmd_len = 9; uint32_t *dw; @@ -501,15 +501,16 @@ gen8_3DSTATE_SAMPLE_PATTERN(struct ilo_builder *builder, ilo_builder_batch_pointer(builder, cmd_len, &dw); dw[0] = GEN8_RENDER_CMD(3D, 3DSTATE_SAMPLE_PATTERN) | (cmd_len - 2); - dw[1] = pattern_16x[3]; - dw[2] = pattern_16x[2]; - dw[3] = pattern_16x[1]; - dw[4] = pattern_16x[0]; - dw[5] = pattern_8x[1]; - dw[6] = pattern_8x[0]; - dw[7] = pattern_4x[0]; - dw[8] = pattern_1x[0] << 16 | - pattern_2x[0]; + dw[1] = 0; + dw[2] = 0; + dw[3] = 0; + dw[4] = 0; + /* see sample_pattern_set_gen8_3DSTATE_SAMPLE_PATTERN() */ + dw[5] = ((const uint32_t *) pattern->pattern_8x)[1]; + dw[6] = ((const uint32_t *) pattern->pattern_8x)[0]; + dw[7] = ((const uint32_t *) pattern->pattern_4x)[0]; + dw[8] = pattern->pattern_1x[0] << 16 | + ((const uint16_t *) pattern->pattern_2x)[0]; } static inline void diff --git a/src/gallium/drivers/ilo/core/ilo_state_raster.c b/src/gallium/drivers/ilo/core/ilo_state_raster.c index a33812d7638..a93eaad154c 100644 --- a/src/gallium/drivers/ilo/core/ilo_state_raster.c +++ b/src/gallium/drivers/ilo/core/ilo_state_raster.c @@ -862,6 +862,69 @@ raster_set_gen8_3dstate_wm_hz_op(struct ilo_state_raster *rs, return true; } +static bool +sample_pattern_get_gen6_packed_offsets(const struct ilo_dev *dev, + uint8_t sample_count, + const struct ilo_state_sample_pattern_offset_info *in, + uint8_t *out) +{ + uint8_t max_dist, i; + + ILO_DEV_ASSERT(dev, 6, 8); + + max_dist = 0; + for (i = 0; i < sample_count; i++) { + const int8_t dist_x = (int8_t) in[i].x - 8; + const int8_t dist_y = (int8_t) in[i].y - 8; + const uint8_t dist = dist_x * dist_x + dist_y * dist_y; + + /* + * From the Sandy Bridge PRM, volume 2 part 1, page 305: + * + * "Programming Note: When programming the sample offsets (for + * NUMSAMPLES_4 or _8 and MSRASTMODE_xxx_PATTERN), the order of the + * samples 0 to 3 (or 7 for 8X) must have monotonically increasing + * distance from the pixel center. This is required to get the + * correct centroid computation in the device." + */ + assert(dist >= max_dist); + max_dist = dist; + + assert(in[i].x < 16); + assert(in[i].y < 16); + + out[i] = in[i].x << 4 | in[i].y; + } + + return true; +} + +static bool +sample_pattern_set_gen8_3DSTATE_SAMPLE_PATTERN(struct ilo_state_sample_pattern *pattern, + const struct ilo_dev *dev, + const struct ilo_state_sample_pattern_info *info) +{ + ILO_DEV_ASSERT(dev, 6, 8); + + STATIC_ASSERT(ARRAY_SIZE(pattern->pattern_1x) >= 1); + STATIC_ASSERT(ARRAY_SIZE(pattern->pattern_2x) >= 2); + STATIC_ASSERT(ARRAY_SIZE(pattern->pattern_4x) >= 4); + STATIC_ASSERT(ARRAY_SIZE(pattern->pattern_8x) >= 8); + STATIC_ASSERT(ARRAY_SIZE(pattern->pattern_16x) >= 16); + + return (sample_pattern_get_gen6_packed_offsets(dev, 1, + info->pattern_1x, pattern->pattern_1x) && + sample_pattern_get_gen6_packed_offsets(dev, 2, + info->pattern_2x, pattern->pattern_2x) && + sample_pattern_get_gen6_packed_offsets(dev, 4, + info->pattern_4x, pattern->pattern_4x) && + sample_pattern_get_gen6_packed_offsets(dev, 8, + info->pattern_8x, pattern->pattern_8x) && + sample_pattern_get_gen6_packed_offsets(dev, 16, + info->pattern_16x, pattern->pattern_16x)); + +} + bool ilo_state_raster_init(struct ilo_state_raster *rs, const struct ilo_dev *dev, @@ -1027,3 +1090,83 @@ ilo_state_raster_get_delta(const struct ilo_state_raster *rs, delta->dirty |= ILO_STATE_RASTER_3DSTATE_WM_HZ_OP; } } + +bool +ilo_state_sample_pattern_init(struct ilo_state_sample_pattern *pattern, + const struct ilo_dev *dev, + const struct ilo_state_sample_pattern_info *info) +{ + bool ret = true; + + ret &= sample_pattern_set_gen8_3DSTATE_SAMPLE_PATTERN(pattern, dev, info); + + assert(ret); + + return ret; +} + +bool +ilo_state_sample_pattern_init_default(struct ilo_state_sample_pattern *pattern, + const struct ilo_dev *dev) +{ + static const struct ilo_state_sample_pattern_info default_info = { + .pattern_1x = { + { 8, 8 }, + }, + + .pattern_2x = { + { 4, 4 }, { 12, 12 }, + }, + + .pattern_4x = { + { 6, 2 }, { 14, 6 }, { 2, 10 }, { 10, 14 }, + }, + + /* \see brw_multisample_positions_8x */ + .pattern_8x = { + { 7, 9 }, { 9, 13 }, { 11, 3 }, { 13, 11 }, + { 1, 7 }, { 5, 1 }, { 15, 5 }, { 3, 15 }, + }, + + .pattern_16x = { + { 8, 10 }, { 11, 8 }, { 5, 6 }, { 6, 4 }, + { 12, 11 }, { 13, 9 }, { 14, 7 }, { 10, 2 }, + { 4, 13 }, { 3, 3 }, { 7, 1 }, { 15, 5 }, + { 1, 12 }, { 9, 0 }, { 2, 14 }, { 0, 15 }, + }, + }; + + return ilo_state_sample_pattern_init(pattern, dev, &default_info); +} + +const uint8_t * +ilo_state_sample_pattern_get_packed_offsets(const struct ilo_state_sample_pattern *pattern, + const struct ilo_dev *dev, + uint8_t sample_count) +{ + switch (sample_count) { + case 1: return pattern->pattern_1x; + case 2: return pattern->pattern_2x; + case 4: return pattern->pattern_4x; + case 8: return pattern->pattern_8x; + case 16: return pattern->pattern_16x; + default: + assert(!"unknown sample count"); + return NULL; + } +} + +void +ilo_state_sample_pattern_get_offset(const struct ilo_state_sample_pattern *pattern, + const struct ilo_dev *dev, + uint8_t sample_count, uint8_t sample_index, + uint8_t *x, uint8_t *y) +{ + const const uint8_t *packed = + ilo_state_sample_pattern_get_packed_offsets(pattern, dev, sample_count); + + assert(sample_index < sample_count); + + *x = (packed[sample_index] >> 4) & 0xf; + *y = packed[sample_index] & 0xf; +} diff --git a/src/gallium/drivers/ilo/core/ilo_state_raster.h b/src/gallium/drivers/ilo/core/ilo_state_raster.h index add26cd0a48..e4697bc383f 100644 --- a/src/gallium/drivers/ilo/core/ilo_state_raster.h +++ b/src/gallium/drivers/ilo/core/ilo_state_raster.h @@ -197,6 +197,28 @@ struct ilo_state_raster_delta { uint32_t dirty; }; +struct ilo_state_sample_pattern_offset_info { + /* in U0.4 */ + uint8_t x; + uint8_t y; +}; + +struct ilo_state_sample_pattern_info { + struct ilo_state_sample_pattern_offset_info pattern_1x[1]; + struct ilo_state_sample_pattern_offset_info pattern_2x[2]; + struct ilo_state_sample_pattern_offset_info pattern_4x[4]; + struct ilo_state_sample_pattern_offset_info pattern_8x[8]; + struct ilo_state_sample_pattern_offset_info pattern_16x[16]; +}; + +struct ilo_state_sample_pattern { + uint8_t pattern_1x[1]; + uint8_t pattern_2x[2]; + uint8_t pattern_4x[4]; + uint8_t pattern_8x[8]; + uint8_t pattern_16x[16]; +}; + bool ilo_state_raster_init(struct ilo_state_raster *rs, const struct ilo_dev *dev, @@ -230,4 +252,24 @@ ilo_state_raster_get_delta(const struct ilo_state_raster *rs, const struct ilo_state_raster *old, struct ilo_state_raster_delta *delta); +bool +ilo_state_sample_pattern_init(struct ilo_state_sample_pattern *pattern, + const struct ilo_dev *dev, + const struct ilo_state_sample_pattern_info *info); + +bool +ilo_state_sample_pattern_init_default(struct ilo_state_sample_pattern *pattern, + const struct ilo_dev *dev); + +const uint8_t * +ilo_state_sample_pattern_get_packed_offsets(const struct ilo_state_sample_pattern *pattern, + const struct ilo_dev *dev, + uint8_t sample_count); + +void +ilo_state_sample_pattern_get_offset(const struct ilo_state_sample_pattern *pattern, + const struct ilo_dev *dev, + uint8_t sample_count, uint8_t sample_index, + uint8_t *x, uint8_t *y); + #endif /* ILO_STATE_RASTER_H */ diff --git a/src/gallium/drivers/ilo/ilo_render.c b/src/gallium/drivers/ilo/ilo_render.c index c3b53724a28..910ed8c9608 100644 --- a/src/gallium/drivers/ilo/ilo_render.c +++ b/src/gallium/drivers/ilo/ilo_render.c @@ -35,76 +35,10 @@ #include "ilo_query.h" #include "ilo_render_gen.h" -/* in S1.3 */ -struct sample_position { - int8_t x, y; -}; - -static const struct sample_position ilo_sample_pattern_1x[1] = { - { 0, 0 }, -}; - -static const struct sample_position ilo_sample_pattern_2x[2] = { - { -4, -4 }, - { 4, 4 }, -}; - -static const struct sample_position ilo_sample_pattern_4x[4] = { - { -2, -6 }, - { 6, -2 }, - { -6, 2 }, - { 2, 6 }, -}; - -/* \see brw_multisample_positions_8x */ -static const struct sample_position ilo_sample_pattern_8x[8] = { - { -1, 1 }, - { 1, 5 }, - { 3, -5 }, - { 5, 3 }, - { -7, -1 }, - { -3, -7 }, - { 7, -3 }, - { -5, 7 }, -}; - -static const struct sample_position ilo_sample_pattern_16x[16] = { - { 0, 2 }, - { 3, 0 }, - { -3, -2 }, - { -2, -4 }, - { 4, 3 }, - { 5, 1 }, - { 6, -1 }, - { 2, -6 }, - { -4, 5 }, - { -5, -5 }, - { -1, -7 }, - { 7, -3 }, - { -7, 4 }, - { 1, -8 }, - { -6, 6 }, - { -8, 7 }, -}; - -static uint8_t -pack_sample_position(const struct sample_position *pos) -{ - return (pos->x + 8) << 4 | (pos->y + 8); -} - -static void -get_sample_position(const struct sample_position *pos, float *x, float *y) -{ - *x = (float) (pos->x + 8) / 16.0f; - *y = (float) (pos->y + 8) / 16.0f; -} - struct ilo_render * ilo_render_create(struct ilo_builder *builder) { struct ilo_render *render; - int i; render = CALLOC_STRUCT(ilo_render); if (!render) @@ -121,29 +55,8 @@ ilo_render_create(struct ilo_builder *builder) return NULL; } - /* pack into dwords */ - render->sample_pattern_1x = pack_sample_position(ilo_sample_pattern_1x); - render->sample_pattern_2x = - pack_sample_position(&ilo_sample_pattern_2x[1]) << 8 | - pack_sample_position(&ilo_sample_pattern_2x[0]); - for (i = 0; i < 4; i++) { - render->sample_pattern_4x |= - pack_sample_position(&ilo_sample_pattern_4x[i]) << (8 * i); - - render->sample_pattern_8x[0] |= - pack_sample_position(&ilo_sample_pattern_8x[i]) << (8 * i); - render->sample_pattern_8x[1] |= - pack_sample_position(&ilo_sample_pattern_8x[i + 4]) << (8 * i); - - render->sample_pattern_16x[0] |= - pack_sample_position(&ilo_sample_pattern_16x[i]) << (8 * i); - render->sample_pattern_16x[1] |= - pack_sample_position(&ilo_sample_pattern_16x[i + 4]) << (8 * i); - render->sample_pattern_16x[2] |= - pack_sample_position(&ilo_sample_pattern_16x[i + 8]) << (8 * i); - render->sample_pattern_16x[3] |= - pack_sample_position(&ilo_sample_pattern_16x[i + 12]) << (8 * i); - } + ilo_state_sample_pattern_init_default(&render->sample_pattern, + render->dev); ilo_render_invalidate_hw(render); ilo_render_invalidate_builder(render); @@ -164,38 +77,13 @@ ilo_render_get_sample_position(const struct ilo_render *render, unsigned sample_index, float *x, float *y) { - const struct sample_position *pattern; + uint8_t off_x, off_y; - switch (sample_count) { - case 1: - assert(sample_index < Elements(ilo_sample_pattern_1x)); - pattern = ilo_sample_pattern_1x; - break; - case 2: - assert(sample_index < Elements(ilo_sample_pattern_2x)); - pattern = ilo_sample_pattern_2x; - break; - case 4: - assert(sample_index < Elements(ilo_sample_pattern_4x)); - pattern = ilo_sample_pattern_4x; - break; - case 8: - assert(sample_index < Elements(ilo_sample_pattern_8x)); - pattern = ilo_sample_pattern_8x; - break; - case 16: - assert(sample_index < Elements(ilo_sample_pattern_16x)); - pattern = ilo_sample_pattern_16x; - break; - default: - assert(!"unknown sample count"); - *x = 0.5f; - *y = 0.5f; - return; - break; - } + ilo_state_sample_pattern_get_offset(&render->sample_pattern, render->dev, + sample_count, sample_index, &off_x, &off_y); - get_sample_position(&pattern[sample_index], x, y); + *x = (float) off_x / 16.0f; + *y = (float) off_y / 16.0f; } void diff --git a/src/gallium/drivers/ilo/ilo_render.h b/src/gallium/drivers/ilo/ilo_render.h index a85b2800fb1..098af73ec9b 100644 --- a/src/gallium/drivers/ilo/ilo_render.h +++ b/src/gallium/drivers/ilo/ilo_render.h @@ -43,9 +43,6 @@ ilo_render_create(struct ilo_builder *builder); void ilo_render_destroy(struct ilo_render *render); -/** - * Estimate the size of an action. - */ void ilo_render_get_sample_position(const struct ilo_render *render, unsigned sample_count, diff --git a/src/gallium/drivers/ilo/ilo_render_gen.h b/src/gallium/drivers/ilo/ilo_render_gen.h index 439d4326852..ae14e779e1f 100644 --- a/src/gallium/drivers/ilo/ilo_render_gen.h +++ b/src/gallium/drivers/ilo/ilo_render_gen.h @@ -51,11 +51,7 @@ struct ilo_render { struct intel_bo *workaround_bo; - uint32_t sample_pattern_1x; - uint32_t sample_pattern_2x; - uint32_t sample_pattern_4x; - uint32_t sample_pattern_8x[2]; - uint32_t sample_pattern_16x[4]; + struct ilo_state_sample_pattern sample_pattern; bool hw_ctx_changed; diff --git a/src/gallium/drivers/ilo/ilo_render_gen6.c b/src/gallium/drivers/ilo/ilo_render_gen6.c index 22081e44c03..73c26e9093d 100644 --- a/src/gallium/drivers/ilo/ilo_render_gen6.c +++ b/src/gallium/drivers/ilo/ilo_render_gen6.c @@ -688,18 +688,15 @@ gen6_draw_wm_multisample(struct ilo_render *r, /* 3DSTATE_MULTISAMPLE */ if (DIRTY(FB) || (session->rs_delta.dirty & ILO_STATE_RASTER_3DSTATE_MULTISAMPLE)) { - const uint32_t *pattern; - - pattern = (vec->fb.num_samples > 1) ? - &r->sample_pattern_4x : &r->sample_pattern_1x; + const uint8_t sample_count = (vec->fb.num_samples > 1) ? 4 : 1; if (ilo_dev_gen(r->dev) == ILO_GEN(6)) { gen6_wa_pre_non_pipelined(r); gen6_wa_pre_3dstate_multisample(r); } - gen6_3DSTATE_MULTISAMPLE(r->builder, - &vec->rasterizer->rs, pattern, 1); + gen6_3DSTATE_MULTISAMPLE(r->builder, &vec->rasterizer->rs, + &r->sample_pattern, sample_count); } /* 3DSTATE_SAMPLE_MASK */ @@ -875,12 +872,11 @@ static void gen6_rectlist_wm_multisample(struct ilo_render *r, const struct ilo_blitter *blitter) { - const uint32_t *pattern = (blitter->fb.num_samples > 1) ? - &r->sample_pattern_4x : &r->sample_pattern_1x; + const uint8_t sample_count = (blitter->fb.num_samples > 1) ? 4 : 1; gen6_wa_pre_3dstate_multisample(r); - gen6_3DSTATE_MULTISAMPLE(r->builder, &blitter->fb.rs, pattern, true); + gen6_3DSTATE_MULTISAMPLE(r->builder, &blitter->fb.rs, &r->sample_pattern, sample_count); gen6_3DSTATE_SAMPLE_MASK(r->builder, &blitter->fb.rs); } diff --git a/src/gallium/drivers/ilo/ilo_render_gen7.c b/src/gallium/drivers/ilo/ilo_render_gen7.c index 0b2245c80da..e4d2bf064da 100644 --- a/src/gallium/drivers/ilo/ilo_render_gen7.c +++ b/src/gallium/drivers/ilo/ilo_render_gen7.c @@ -598,22 +598,13 @@ gen7_draw_wm_multisample(struct ilo_render *r, /* 3DSTATE_MULTISAMPLE */ if (DIRTY(FB) || (session->rs_delta.dirty & ILO_STATE_RASTER_3DSTATE_MULTISAMPLE)) { - const uint32_t *pattern; - int pattern_len; + const uint8_t sample_count = (vec->fb.num_samples > 4) ? 8 : + (vec->fb.num_samples > 1) ? 4 : 1; gen7_wa_pre_3dstate_multisample(r); - if (vec->fb.num_samples > 4) { - pattern = r->sample_pattern_8x; - pattern_len = ARRAY_SIZE(r->sample_pattern_8x); - } else { - pattern = (vec->fb.num_samples > 1) ? - &r->sample_pattern_4x : &r->sample_pattern_1x; - pattern_len = 1; - } - gen6_3DSTATE_MULTISAMPLE(r->builder, &vec->rasterizer->rs, - pattern, pattern_len); + &r->sample_pattern, sample_count); } /* 3DSTATE_SAMPLE_MASK */ @@ -748,22 +739,13 @@ static void gen7_rectlist_wm_multisample(struct ilo_render *r, const struct ilo_blitter *blitter) { - const uint32_t *pattern; - int pattern_len; - - if (blitter->fb.num_samples > 4) { - pattern = r->sample_pattern_8x; - pattern_len = ARRAY_SIZE(r->sample_pattern_8x); - } else { - pattern = (blitter->fb.num_samples > 1) ? - &r->sample_pattern_4x : &r->sample_pattern_1x; - pattern_len = 1; - } + const uint8_t sample_count = (blitter->fb.num_samples > 4) ? 8 : + (blitter->fb.num_samples > 1) ? 4 : 1; gen7_wa_pre_3dstate_multisample(r); gen6_3DSTATE_MULTISAMPLE(r->builder, &blitter->fb.rs, - pattern, pattern_len); + &r->sample_pattern, sample_count); gen6_3DSTATE_SAMPLE_MASK(r->builder, &blitter->fb.rs); } diff --git a/src/gallium/drivers/ilo/ilo_render_gen8.c b/src/gallium/drivers/ilo/ilo_render_gen8.c index 2ce71fb161e..495dbc3a283 100644 --- a/src/gallium/drivers/ilo/ilo_render_gen8.c +++ b/src/gallium/drivers/ilo/ilo_render_gen8.c @@ -176,14 +176,8 @@ gen8_draw_wm_sample_pattern(struct ilo_render *r, struct ilo_render_draw_session *session) { /* 3DSTATE_SAMPLE_PATTERN */ - if (r->hw_ctx_changed) { - gen8_3DSTATE_SAMPLE_PATTERN(r->builder, - &r->sample_pattern_1x, - &r->sample_pattern_2x, - &r->sample_pattern_4x, - r->sample_pattern_8x, - r->sample_pattern_16x); - } + if (r->hw_ctx_changed) + gen8_3DSTATE_SAMPLE_PATTERN(r->builder, &r->sample_pattern); } static void From 94ab56367169ba2902e83aded409db2df3d25eb1 Mon Sep 17 00:00:00 2001 From: Chia-I Wu Date: Mon, 15 Jun 2015 11:24:47 +0800 Subject: [PATCH 638/834] ilo: add ilo_state_raster_{line,poly}_stipple Initialize hardware stipple states on bound instead of on emission. --- .../drivers/ilo/core/ilo_builder_3d_bottom.h | 44 +++------- .../drivers/ilo/core/ilo_state_raster.c | 80 +++++++++++++++++++ .../drivers/ilo/core/ilo_state_raster.h | 26 ++++++ src/gallium/drivers/ilo/ilo_render_gen6.c | 10 +-- src/gallium/drivers/ilo/ilo_state.c | 18 ++++- src/gallium/drivers/ilo/ilo_state.h | 3 +- 6 files changed, 138 insertions(+), 43 deletions(-) diff --git a/src/gallium/drivers/ilo/core/ilo_builder_3d_bottom.h b/src/gallium/drivers/ilo/core/ilo_builder_3d_bottom.h index cc1ece3ed14..f7f95f493b7 100644 --- a/src/gallium/drivers/ilo/core/ilo_builder_3d_bottom.h +++ b/src/gallium/drivers/ilo/core/ilo_builder_3d_bottom.h @@ -33,7 +33,6 @@ #include "ilo_core.h" #include "ilo_dev.h" -#include "ilo_format.h" #include "ilo_state_cc.h" #include "ilo_state_raster.h" #include "ilo_state_sbe.h" @@ -576,74 +575,51 @@ gen6_3DSTATE_DRAWING_RECTANGLE(struct ilo_builder *builder, static inline void gen6_3DSTATE_POLY_STIPPLE_OFFSET(struct ilo_builder *builder, - int x_offset, int y_offset) + const struct ilo_state_poly_stipple *stipple) { const uint8_t cmd_len = 2; uint32_t *dw; ILO_DEV_ASSERT(builder->dev, 6, 8); - assert(x_offset >= 0 && x_offset <= 31); - assert(y_offset >= 0 && y_offset <= 31); - ilo_builder_batch_pointer(builder, cmd_len, &dw); dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_POLY_STIPPLE_OFFSET) | (cmd_len - 2); - dw[1] = x_offset << 8 | y_offset; + /* constant */ + dw[1] = 0; } static inline void gen6_3DSTATE_POLY_STIPPLE_PATTERN(struct ilo_builder *builder, - const struct pipe_poly_stipple *pattern) + const struct ilo_state_poly_stipple *stipple) { const uint8_t cmd_len = 33; uint32_t *dw; - int i; ILO_DEV_ASSERT(builder->dev, 6, 8); ilo_builder_batch_pointer(builder, cmd_len, &dw); dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_POLY_STIPPLE_PATTERN) | (cmd_len - 2); - dw++; - - STATIC_ASSERT(Elements(pattern->stipple) == 32); - for (i = 0; i < 32; i++) - dw[i] = pattern->stipple[i]; + /* see poly_stipple_set_gen6_3DSTATE_POLY_STIPPLE_PATTERN() */ + memcpy(&dw[1], stipple->stipple, sizeof(stipple->stipple)); } static inline void gen6_3DSTATE_LINE_STIPPLE(struct ilo_builder *builder, - unsigned pattern, unsigned factor) + const struct ilo_state_line_stipple *stipple) { const uint8_t cmd_len = 3; - unsigned inverse; uint32_t *dw; ILO_DEV_ASSERT(builder->dev, 6, 8); - assert((pattern & 0xffff) == pattern); - assert(factor >= 1 && factor <= 256); - ilo_builder_batch_pointer(builder, cmd_len, &dw); dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_LINE_STIPPLE) | (cmd_len - 2); - dw[1] = pattern; - - if (ilo_dev_gen(builder->dev) >= ILO_GEN(7)) { - /* in U1.16 */ - inverse = 65536 / factor; - - dw[2] = inverse << GEN7_LINE_STIPPLE_DW2_INVERSE_REPEAT_COUNT__SHIFT | - factor; - } - else { - /* in U1.13 */ - inverse = 8192 / factor; - - dw[2] = inverse << GEN6_LINE_STIPPLE_DW2_INVERSE_REPEAT_COUNT__SHIFT | - factor; - } + /* see line_stipple_set_gen6_3DSTATE_LINE_STIPPLE() */ + dw[1] = stipple->stipple[0]; + dw[2] = stipple->stipple[1]; } static inline void diff --git a/src/gallium/drivers/ilo/core/ilo_state_raster.c b/src/gallium/drivers/ilo/core/ilo_state_raster.c index a93eaad154c..ed64a1f0d3c 100644 --- a/src/gallium/drivers/ilo/core/ilo_state_raster.c +++ b/src/gallium/drivers/ilo/core/ilo_state_raster.c @@ -899,6 +899,37 @@ sample_pattern_get_gen6_packed_offsets(const struct ilo_dev *dev, return true; } +static bool +line_stipple_set_gen6_3DSTATE_LINE_STIPPLE(struct ilo_state_line_stipple *stipple, + const struct ilo_dev *dev, + const struct ilo_state_line_stipple_info *info) +{ + uint32_t dw1, dw2; + + ILO_DEV_ASSERT(dev, 6, 8); + + assert(info->repeat_count >= 1 && info->repeat_count <= 256); + + dw1 = info->pattern; + if (ilo_dev_gen(dev) >= ILO_GEN(7)) { + /* in U1.16 */ + const uint32_t inverse = 65536 / info->repeat_count; + dw2 = inverse << GEN7_LINE_STIPPLE_DW2_INVERSE_REPEAT_COUNT__SHIFT | + info->repeat_count << GEN6_LINE_STIPPLE_DW2_REPEAT_COUNT__SHIFT; + } else { + /* in U1.13 */ + const uint16_t inverse = 8192 / info->repeat_count; + dw2 = inverse << GEN6_LINE_STIPPLE_DW2_INVERSE_REPEAT_COUNT__SHIFT | + info->repeat_count << GEN6_LINE_STIPPLE_DW2_REPEAT_COUNT__SHIFT; + } + + STATIC_ASSERT(ARRAY_SIZE(stipple->stipple) >= 2); + stipple->stipple[0] = dw1; + stipple->stipple[1] = dw2; + + return true; +} + static bool sample_pattern_set_gen8_3DSTATE_SAMPLE_PATTERN(struct ilo_state_sample_pattern *pattern, const struct ilo_dev *dev, @@ -925,6 +956,19 @@ sample_pattern_set_gen8_3DSTATE_SAMPLE_PATTERN(struct ilo_state_sample_pattern * } +static bool +poly_stipple_set_gen6_3DSTATE_POLY_STIPPLE_PATTERN(struct ilo_state_poly_stipple *stipple, + const struct ilo_dev *dev, + const struct ilo_state_poly_stipple_info *info) +{ + ILO_DEV_ASSERT(dev, 6, 8); + + STATIC_ASSERT(ARRAY_SIZE(stipple->stipple) >= 32); + memcpy(stipple->stipple, info->pattern, sizeof(info->pattern)); + + return true; +} + bool ilo_state_raster_init(struct ilo_state_raster *rs, const struct ilo_dev *dev, @@ -1170,3 +1214,39 @@ ilo_state_sample_pattern_get_offset(const struct ilo_state_sample_pattern *patte *x = (packed[sample_index] >> 4) & 0xf; *y = packed[sample_index] & 0xf; } + +/** + * No need to initialize first. + */ +bool +ilo_state_line_stipple_set_info(struct ilo_state_line_stipple *stipple, + const struct ilo_dev *dev, + const struct ilo_state_line_stipple_info *info) +{ + bool ret = true; + + ret &= line_stipple_set_gen6_3DSTATE_LINE_STIPPLE(stipple, + dev, info); + + assert(ret); + + return ret; +} + +/** + * No need to initialize first. + */ +bool +ilo_state_poly_stipple_set_info(struct ilo_state_poly_stipple *stipple, + const struct ilo_dev *dev, + const struct ilo_state_poly_stipple_info *info) +{ + bool ret = true; + + ret &= poly_stipple_set_gen6_3DSTATE_POLY_STIPPLE_PATTERN(stipple, + dev, info); + + assert(ret); + + return ret; +} diff --git a/src/gallium/drivers/ilo/core/ilo_state_raster.h b/src/gallium/drivers/ilo/core/ilo_state_raster.h index e4697bc383f..fc90b49cfc3 100644 --- a/src/gallium/drivers/ilo/core/ilo_state_raster.h +++ b/src/gallium/drivers/ilo/core/ilo_state_raster.h @@ -219,6 +219,23 @@ struct ilo_state_sample_pattern { uint8_t pattern_16x[16]; }; +struct ilo_state_line_stipple_info { + uint16_t pattern; + uint16_t repeat_count; +}; + +struct ilo_state_line_stipple { + uint32_t stipple[2]; +}; + +struct ilo_state_poly_stipple_info { + uint32_t pattern[32]; +}; + +struct ilo_state_poly_stipple { + uint32_t stipple[32]; +}; + bool ilo_state_raster_init(struct ilo_state_raster *rs, const struct ilo_dev *dev, @@ -271,5 +288,14 @@ ilo_state_sample_pattern_get_offset(const struct ilo_state_sample_pattern *patte const struct ilo_dev *dev, uint8_t sample_count, uint8_t sample_index, uint8_t *x, uint8_t *y); +bool +ilo_state_line_stipple_set_info(struct ilo_state_line_stipple *stipple, + const struct ilo_dev *dev, + const struct ilo_state_line_stipple_info *info); + +bool +ilo_state_poly_stipple_set_info(struct ilo_state_poly_stipple *stipple, + const struct ilo_dev *dev, + const struct ilo_state_poly_stipple_info *info); #endif /* ILO_STATE_RASTER_H */ diff --git a/src/gallium/drivers/ilo/ilo_render_gen6.c b/src/gallium/drivers/ilo/ilo_render_gen6.c index 73c26e9093d..1414f12b439 100644 --- a/src/gallium/drivers/ilo/ilo_render_gen6.c +++ b/src/gallium/drivers/ilo/ilo_render_gen6.c @@ -754,10 +754,8 @@ gen6_draw_wm_raster(struct ilo_render *r, if (ilo_dev_gen(r->dev) == ILO_GEN(6)) gen6_wa_pre_non_pipelined(r); - gen6_3DSTATE_POLY_STIPPLE_PATTERN(r->builder, - &vec->poly_stipple); - - gen6_3DSTATE_POLY_STIPPLE_OFFSET(r->builder, 0, 0); + gen6_3DSTATE_POLY_STIPPLE_PATTERN(r->builder, &vec->poly_stipple); + gen6_3DSTATE_POLY_STIPPLE_OFFSET(r->builder, &vec->poly_stipple); } /* 3DSTATE_LINE_STIPPLE */ @@ -765,9 +763,7 @@ gen6_draw_wm_raster(struct ilo_render *r, if (ilo_dev_gen(r->dev) == ILO_GEN(6)) gen6_wa_pre_non_pipelined(r); - gen6_3DSTATE_LINE_STIPPLE(r->builder, - vec->rasterizer->state.line_stipple_pattern, - vec->rasterizer->state.line_stipple_factor + 1); + gen6_3DSTATE_LINE_STIPPLE(r->builder, &vec->line_stipple); } /* 3DSTATE_AA_LINE_PARAMETERS */ diff --git a/src/gallium/drivers/ilo/ilo_state.c b/src/gallium/drivers/ilo/ilo_state.c index 0145fcbb8d5..84fd7991c26 100644 --- a/src/gallium/drivers/ilo/ilo_state.c +++ b/src/gallium/drivers/ilo/ilo_state.c @@ -1088,10 +1088,20 @@ ilo_create_rasterizer_state(struct pipe_context *pipe, static void ilo_bind_rasterizer_state(struct pipe_context *pipe, void *state) { + const struct ilo_dev *dev = ilo_context(pipe)->dev; struct ilo_state_vector *vec = &ilo_context(pipe)->state_vector; vec->rasterizer = state; + if (vec->rasterizer) { + struct ilo_state_line_stipple_info info; + + info.pattern = vec->rasterizer->state.line_stipple_pattern; + info.repeat_count = vec->rasterizer->state.line_stipple_factor + 1; + + ilo_state_line_stipple_set_info(&vec->line_stipple, dev, &info); + } + vec->dirty |= ILO_DIRTY_RASTERIZER; } @@ -1610,9 +1620,15 @@ static void ilo_set_polygon_stipple(struct pipe_context *pipe, const struct pipe_poly_stipple *state) { + const struct ilo_dev *dev = ilo_context(pipe)->dev; struct ilo_state_vector *vec = &ilo_context(pipe)->state_vector; + struct ilo_state_poly_stipple_info info; + int i; - vec->poly_stipple = *state; + for (i = 0; i < 32; i++) + info.pattern[i] = state->stipple[i]; + + ilo_state_poly_stipple_set_info(&vec->poly_stipple, dev, &info); vec->dirty |= ILO_DIRTY_POLY_STIPPLE; } diff --git a/src/gallium/drivers/ilo/ilo_state.h b/src/gallium/drivers/ilo/ilo_state.h index 90514d52224..91c2a8d01dc 100644 --- a/src/gallium/drivers/ilo/ilo_state.h +++ b/src/gallium/drivers/ilo/ilo_state.h @@ -354,7 +354,8 @@ struct ilo_state_vector { struct ilo_rasterizer_state *rasterizer; - struct pipe_poly_stipple poly_stipple; + struct ilo_state_line_stipple line_stipple; + struct ilo_state_poly_stipple poly_stipple; unsigned sample_mask; struct ilo_shader_state *fs; From 311abe7fbd590505fd86e22a3030e00445218cb0 Mon Sep 17 00:00:00 2001 From: Emil Velikov Date: Sun, 14 Jun 2015 16:26:40 +0100 Subject: [PATCH 639/834] docs: Update 10.6.0 release notes Signed-off-by: Emil Velikov (cherry picked from commit 3b9cde5c8138fb5cc45c652f2a5c15c5fa222bd7) --- docs/relnotes/10.6.0.html | 243 +++++++++++++++++++++++++++++++++++++- 1 file changed, 241 insertions(+), 2 deletions(-) diff --git a/docs/relnotes/10.6.0.html b/docs/relnotes/10.6.0.html index 474a2c71fd0..f0f3afddfba 100644 --- a/docs/relnotes/10.6.0.html +++ b/docs/relnotes/10.6.0.html @@ -14,7 +14,7 @@
      -

      Mesa 10.6.0 Release Notes / TBD

      +

      Mesa 10.6.0 Release Notes / June 14, 2015

      Mesa 10.6.0 is a new development release. @@ -71,7 +71,246 @@ Note: some of the new features are only available with certain drivers.

      Bug fixes

      -TBD. +

      This list is likely incomplete.

      + +
        + +
      • Bug 15006 - translate & rotate the line cause Aliasing
      • + +
      • Bug 27007 - Lines disappear with GL_LINE_SMOOTH
      • + +
      • Bug 28832 - piglit/general/line-aa-width fail
      • + +
      • Bug 45348 - [swrast] piglit fbo-drawbuffers-arbfp regression
      • + +
      • Bug 60797 - 1px lines in octave plot aliased to 0
      • + +
      • Bug 67564 - HiZ buffers are much larger than necessary
      • + +
      • Bug 69226 - Cannot enable basic shaders with Second Life aborts attempt
      • + +
      • Bug 71591 - Second Life shaders fail to compile (extension declared in middle of shader)
      • + +
      • Bug 79202 - valgrind errors in glsl-fs-uniform-array-loop-unroll.shader_test; random code generation
      • + +
      • Bug 81025 - [IVB/BYT Bisected]Piglit spec_ARB_draw_indirect_arb_draw_indirect-draw-elements-prim-restart-ugly fails
      • + +
      • Bug 82477 - [softpipe] piglit fp-long-alu regression
      • + +
      • Bug 82668 - Can't set int attributes to certain values on 32-bit
      • + +
      • Bug 82831 - i965: Support GL_ARB_blend_func_extended in SIMD16
      • + +
      • Bug 83962 - [HSW/BYT]Piglit spec_ARB_gpu_shader5_arb_gpu_shader5-emitstreamvertex_nodraw fails
      • + +
      • Bug 84613 - [G965, bisected] piglit regressions : glslparsertest.glsl2
      • + +
      • Bug 86747 - Noise in Football Manager 2014 textures
      • + +
      • Bug 86792 - [NVC0] Portal 2 Crashes in Wine
      • + +
      • Bug 86811 - [BDW/BSW Bisected]Piglit spec_arb_shading_language_packing_execution_built-in-functions_vs-unpackSnorm4x8 fails
      • + +
      • Bug 86837 - kodi segfault since auxiliary/vl: rework the build of the VL code
      • + +
      • Bug 86944 - glsl_parser_extras.cpp", line 1455: Error: Badly formed expression. (Oracle Studio)
      • + +
      • Bug 86974 - INTEL_DEBUG=shader_time always asserts in fs_generator::generate_code() when Mesa is built with --enable-debug (= with asserts)
      • + +
      • Bug 86980 - [swrast] piglit fp-rfl regression
      • + +
      • Bug 87258 - [BDW/BSW Bisected]Piglit spec_ARB_shader_atomic_counters_array-indexing fails
      • + +
      • Bug 88246 - Commit 2881b12 causes 43 DrawElements test regressions
      • + +
      • Bug 88248 - Calling glClear while there is an occlusion query in progress messes up the results
      • + +
      • Bug 88521 - GLBenchmark 2.7 TRex renders with artifacts on Gen8 with !UXA
      • + +
      • Bug 88534 - include/c11/threads_posix.h PTHREAD_MUTEX_RECURSIVE_NP not defined
      • + +
      • Bug 88561 - [radeonsi][regression,bisected] Depth test/buffer issues in Portal
      • + +
      • Bug 88793 - [BDW/BSW Bisected]Piglit/shaders_glsl-max-varyings fails
      • + +
      • Bug 88815 - Incorrect handling of GLSL #line directive
      • + +
      • Bug 88883 - ir-a2xx.c: variable changed in assert statement
      • + +
      • Bug 88885 - Transform feedback uses incorrect interleaving if a previous draw did not write gl_Position
      • + +
      • Bug 88905 - [SNB+ Bisected]Ogles3conform ES3-CTS.gtf.GL3Tests.packed_pixels.packed_pixels fails
      • + +
      • Bug 88999 - [SKL] Compiz crashes after opening unity dash
      • + +
      • Bug 89014 - PIPE_QUERY_GPU_FINISHED is not acting as expected on SI
      • + +
      • Bug 89026 - Renderbuffer layered state used for framebuffer completeness test
      • + +
      • Bug 89032 - [BDW/BSW/SKL Bisected]Piglit spec_OpenGL_1.1_infinite-spot-light fails
      • + +
      • Bug 89037 - [SKL]Piglit spec_EXT_texture_array_copyteximage_1D_ARRAY_samples=2 sporadically causes GPU hang
      • + +
      • Bug 89039 - [SKL]etqw system hang
      • + +
      • Bug 89058 - [SKL]Render error in some games (etqw-demo, nexuiz, portal)
      • + +
      • Bug 89068 - glTexImage2D regression by texstore_rgba switch to _mesa_format_convert
      • + +
      • Bug 89069 - Lack of grass in The Talos Principle on radeonsi (native\wine\nine)
      • + +
      • Bug 89094 - [SNB/IVB/HSW/BYT Bisected]Ogles3conform ES3-CTS.gtf.GL3Tests.shadow.shadow_execution_vert fails
      • + +
      • Bug 89095 - [SNB/IVB/BYT Bisected]Webglc conformance/glsl/functions/glsl-function-mix-float.html fails
      • + +
      • Bug 89112 - u_atomic_test: u_atomic_test.c:124: test_atomic_8bits_bool: Assertion `r == 65 && "p_atomic_add"' failed.
      • + +
      • Bug 89118 - [SKL Bisected]many Ogles3conform cases core dumped
      • + +
      • Bug 89131 - [Bisected] Graphical corruption in Weston, shows old framebuffer pieces
      • + +
      • Bug 89156 - r300g: GL_COMPRESSED_RED_RGTC1 / ATI1N support broken
      • + +
      • Bug 89180 - [IVB regression] Rendering issues in Mass Effect through VMware Workstation
      • + +
      • Bug 89210 - GS statistics fail on SNB
      • + +
      • Bug 89218 - lower_instructions.cpp:648:48: error: invalid suffix 'd' on floating constant
      • + +
      • Bug 89224 - Incorrect rendering of Unigine Valley running in VM on VMware Workstation
      • + +
      • Bug 89260 - macros.h:34:25: fatal error: util/u_math.h: No such file or directory
      • + +
      • Bug 89292 - [regression,bisected] incomplete screenshots in some cases
      • + +
      • Bug 89311 - [regression, bisected] dEQP: Added entry points for glCompressedTextureSubImage*D.
      • + +
      • Bug 89312 - [regression, bisected] main: Added entry points for CopyTextureSubImage*D. (d6b7c40cecfe01)
      • + +
      • Bug 89315 - [HSW, regression, bisected] i965/fs: Emit MAD instructions when possible.
      • + +
      • Bug 89317 - [HSW, regression, bisected] i965: Add LINTERP/CINTERP to can_do_cmod() (d91390634)
      • + +
      • Bug 89328 - python required to build Mesa release tarballs
      • + +
      • Bug 89342 - main/light.c:159:62: error: 'M_PI' undeclared (first use in this function)
      • + +
      • Bug 89343 - compiler/tests/radeon_compiler_optimize_tests.c:43:3: error: implicit declaration of function ‘fprintf’ [-Werror=implicit-function-declaration]
      • + +
      • Bug 89345 - imports.h:452:58: error: expected declaration specifiers or '...' before 'va_list'
      • + +
      • Bug 89364 - c99_alloca.h:40:22: fatal error: alloca.h: No such file or directory
      • + +
      • Bug 89372 - [softpipe] piglit glsl-1.50 generate-zero-primitives regression
      • + +
      • Bug 89387 - Double delete in lp_bld_misc.cpp
      • + +
      • Bug 89416 - UE4Editor crash after load project
      • + +
      • Bug 89430 - [g965][bisected] arb_copy_image-targets gl_texture* tests fail
      • + +
      • Bug 89433 - GCC 4.2 does not support -Wvla
      • + +
      • Bug 89455 - [NVC0/Gallium] Unigine Heaven black and white boxes
      • + +
      • Bug 89457 - [BSW Bisected]ogles3conform ES3-CTS.gtf.GL3Tests.shadow.shadow_execution_vert fails
      • + +
      • Bug 89477 - include/no_extern_c.h:47:1: error: template with C linkage
      • + +
      • Bug 89508 - Bad int(floatBitsToInt(vec4))
      • + +
      • Bug 89530 - FTBFS in loader: missing fstat
      • + +
      • Bug 89569 - Papo & Yo crash on startup [HSW]
      • + +
      • Bug 89590 - Crash in glLinkProgram with shaders with multiple constant arrays
      • + +
      • Bug 89662 - context.c:943: undefined reference to `_glapi_new_nop_table'
      • + +
      • Bug 89670 - cmod_propagation_test.andnz_one regression
      • + +
      • Bug 89679 - [NV50] Portal/Half-Life 2 will not start (native Steam)
      • + +
      • Bug 89689 - [Regression] Weston on DRM backend won't start with new version of mesa
      • + +
      • Bug 89722 - [ILK Bisected]Ogles2conform/ES2-CTS.gtf.GL.equal.equal_vec2_frag fails
      • + +
      • Bug 89726 - [Bisected] dEQP-GLES3: uniform linking logic in the presence of structs
      • + +
      • Bug 89746 - Mesa and LLVM 3.6+ break opengl for genymotion
      • + +
      • Bug 89754 - vertexAttrib fails WebGL Conformance test with mesa drivers
      • + +
      • Bug 89758 - pow WebGL Conformance test with mesa drivers
      • + +
      • Bug 89759 - WebGL OGL ES GLSL conformance test with mesa drivers fails
      • + +
      • Bug 89831 - [r600] r600_asm.c:310:assign_alu_units: Assertion `0' failed.
      • + +
      • Bug 89899 - nir/nir_lower_tex_projector.c:112: error: unknown field ‘ssa’ specified in initializer
      • + +
      • Bug 89957 - vm protection faults in piglit lest: texsubimage cube_map_array pbo
      • + +
      • Bug 89960 - [softpipe] piglit copy-pixels regreession
      • + +
      • Bug 89961 - [BDW/BSW Bisected]Synmark2_v6 OglDrvRes/OglDrvShComp/OglDrvState/OglPSPom Image Validation fail
      • + +
      • Bug 89963 - lp_bld_debug.cpp:100:31: error: no matching function for call to ‘llvm::raw_ostream::raw_ostream()’
      • + +
      • Bug 90000 - [i965 Bisected NIR] Piglit/gglean_fragprog1-z-write_test fail
      • + +
      • Bug 90109 - [SNB+ Bisected]Ogles3conform ES3-CTS.shaders.uniform_block.random.basic_arrays.3 fails
      • + +
      • Bug 90114 - [SNB+ Bisected]Ogles3conform ES3-CTS.shaders.struct.uniform.sampler_array_fragment fails
      • + +
      • Bug 90130 - gl_PrimitiveId seems to reset at 340
      • + +
      • Bug 90147 - swrast: build error undeclared _SC_PHYS_PAGES on osx
      • + +
      • Bug 90149 - [SNB+ Bisected]ES3-CTS.gtf.GL3Tests.uniform_buffer_object.uniform_buffer_object_getactiveuniformsiv_for_nonexistent_uniform_indices fails
      • + +
      • Bug 90153 - [SKL Bisected]ES3-CTS.gtf.GL3Tests.uniform_buffer_object.uniform_buffer_object_all_valid_basic_types fails
      • + +
      • Bug 90167 - [softpipe] piglit depthstencil-default_fb-drawpixels-32f_24_8_rev regression
      • + +
      • Bug 90207 - [r600g, bisected] regression: NI/Turks crash on WebGL Water (most WebGL stuff)
      • + +
      • Bug 90213 - glDrawPixels with GL_COLOR_INDEX never returns.
      • + +
      • Bug 90243 - [bisected] regression: spec.!opengl 3_2.get-active-attrib-returns-all-inputs
      • + +
      • Bug 90258 - [IVB] spec.glsl-1_10.execution.fs-dfdy-accuracy fails intermittently
      • + +
      • Bug 90310 - Fails to build gallium_dri.so at linking stage with clang because of multiple redefinitions
      • + +
      • Bug 90350 - [G96] Portal's portal are incorrectly rendered
      • + +
      • Bug 90363 - [nv50] HW state is not reset correctly when using a new GL context
      • + +
      • Bug 90397 - ARB_program_interface_query: glGetProgramResourceiv() returns wrong value for GL_REFERENCED_BY_*_SHADER prop for GL_UNIFORM for members of an interface block with an instance name
      • + +
      • Bug 90466 - arm: linker error ndefined reference to `nir_metadata_preserve'
      • + +
      • Bug 90520 - Register spilling clobbers registers used elsewhere in the shader
      • + +
      • Bug 90547 - [BDW/BSW/SKL Bisected]Piglit/glean@vertprog1-rsq_test_2_(reciprocal_square_root_of_negative_value) fais
      • + +
      • Bug 90580 - [HSW bisected] integer multiplication bug
      • + +
      • Bug 90629 - [i965] SIMD16 dual_source_blend assertion `src[i].file != GRF || src[i].width == dst.width' failed
      • + +
      • Bug 90749 - [BDW Bisected]dEQP-GLES3.functional.rasterization.fbo.rbo_multisample_max.primitives.lines_wide fails
      • + +
      • Bug 90830 - [bsw bisected regression] GPU hang for spec.arb_gpu_shader5.execution.sampler_array_indexing.vs-nonzero-base
      • + +
      • Bug 90839 - [10.5.5/10.6 regression, bisected] PBO glDrawPixels no longer using blit fastpath
      • + +
      • Bug 90905 - mesa: Finish subdir-objects transition
      • + +
      • Bug 9951 - GL_LINE_SMOOTH and GL_POLYGON_SMOOTH with i965 driver
      • + +
      +

      Changes

      From f9e04413284ce29214527b4d6369c8462000cb3d Mon Sep 17 00:00:00 2001 From: Emil Velikov Date: Sun, 14 Jun 2015 16:40:00 +0100 Subject: [PATCH 640/834] docs: Add sha256sums for the 10.6.0 release Signed-off-by: Emil Velikov (cherry picked from commit 5d327b373531861f86a726db669b3d656f1b5f8d) --- docs/relnotes/10.6.0.html | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/docs/relnotes/10.6.0.html b/docs/relnotes/10.6.0.html index f0f3afddfba..ebd1f104d6f 100644 --- a/docs/relnotes/10.6.0.html +++ b/docs/relnotes/10.6.0.html @@ -31,9 +31,10 @@ because compatibility contexts are not supported.

      -

      MD5 checksums

      +

      SHA256 checksums

      -TBD.
      +9bc659abdba26202509304f259723aaa4343dba6aac4bd87d5baea11d23c8c63  mesa-10.6.0.tar.gz
      +f37e2633978deed02ff0522abc36c709586e2b555fd439a82ab71dce2c866c76  mesa-10.6.0.tar.xz
       
      From 061c9bc2042b0686867e4321d94ba18761a6a1a7 Mon Sep 17 00:00:00 2001 From: Emil Velikov Date: Sun, 14 Jun 2015 16:43:21 +0100 Subject: [PATCH 641/834] docs: add news item and link release notes for mesa 10.6.0 Signed-off-by: Emil Velikov --- docs/index.html | 7 +++++++ docs/relnotes.html | 1 + 2 files changed, 8 insertions(+) diff --git a/docs/index.html b/docs/index.html index a88f930c9e0..252242495b9 100644 --- a/docs/index.html +++ b/docs/index.html @@ -16,6 +16,13 @@

      News

      +

      June 14, 2015

      +

      +Mesa 10.6.0 is released. This is a new +development release. See the release notes for more information about +the release. +

      +

      June 07, 2015

      Mesa 10.5.7 is released. diff --git a/docs/relnotes.html b/docs/relnotes.html index 26d196847cb..a037b9684a2 100644 --- a/docs/relnotes.html +++ b/docs/relnotes.html @@ -21,6 +21,7 @@ The release notes summarize what's new or changed in each Mesa release.

        +
      • 10.6.0 release notes
      • 10.5.7 release notes
      • 10.5.6 release notes
      • 10.5.5 release notes From 2e42deb29c878fb4c52aed6d2d54833aacba18ae Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=BCrgen=20R=C3=BChle?= Date: Sat, 6 Jun 2015 18:37:20 +0200 Subject: [PATCH 642/834] nv50/ir: OP_JOIN is a flow instruction OP_JOIN instructions are assumed to be flow instructions and mercilessly casted to FlowInstruction. This patch fixes an instance where an OP_JOIN is created as a plain instruction. This can cause crashes in the ir printer. [imirkin: add ->fixed = 1] Reviewed-by: Ilia Mirkin --- src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp index 1ad086094dc..2c7f7e326b2 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp @@ -887,7 +887,7 @@ NV50LoweringPreSSA::handleTXL(TexInstruction *i) } } bld.setPosition(joinBB, false); - bld.mkOp(OP_JOIN, TYPE_NONE, NULL); + bld.mkFlow(OP_JOIN, NULL, CC_ALWAYS, NULL)->fixed = 1; return true; } From 84d27c32d238ca7a7b115bf190e7e527b7f70e92 Mon Sep 17 00:00:00 2001 From: Anuj Phogat Date: Thu, 28 May 2015 14:48:51 -0700 Subject: [PATCH 643/834] i965: Remove break after return MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Anuj Phogat Reviewed-by: Tapani Pälli --- src/mesa/drivers/dri/i965/intel_blit.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/mesa/drivers/dri/i965/intel_blit.c b/src/mesa/drivers/dri/i965/intel_blit.c index 5afc771dea8..d3ab769356c 100644 --- a/src/mesa/drivers/dri/i965/intel_blit.c +++ b/src/mesa/drivers/dri/i965/intel_blit.c @@ -77,13 +77,10 @@ br13_for_cpp(int cpp) switch (cpp) { case 4: return BR13_8888; - break; case 2: return BR13_565; - break; case 1: return BR13_8; - break; default: unreachable("not reached"); } From 278460279b4e089d51a24fb01dc56dc1e88dcb72 Mon Sep 17 00:00:00 2001 From: Anuj Phogat Date: Mon, 4 May 2015 23:10:28 -0700 Subject: [PATCH 644/834] i965: Check for miptree pitch alignment before using intel_miptree_map_movntdqa() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We have an assert() in intel_miptree_map_movntdqa() which expects the pitch to be 16 byte aligned. Signed-off-by: Anuj Phogat Reviewed-by: Tapani Pälli --- src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c index 8addcc5010c..593bb9da0d5 100644 --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c @@ -2630,7 +2630,9 @@ intel_miptree_map(struct brw_context *brw, } else if (use_intel_mipree_map_blit(brw, mt, mode, level, slice)) { intel_miptree_map_blit(brw, mt, map, level, slice); #if defined(USE_SSE41) - } else if (!(mode & GL_MAP_WRITE_BIT) && !mt->compressed && cpu_has_sse4_1) { + } else if (!(mode & GL_MAP_WRITE_BIT) && + !mt->compressed && cpu_has_sse4_1 && + (mt->pitch % 16 == 0)) { intel_miptree_map_movntdqa(brw, mt, map, level, slice); #endif } else { From 6c14b66e40d34104c841ee6dfaeb65617e47be80 Mon Sep 17 00:00:00 2001 From: Anuj Phogat Date: Mon, 1 Jun 2015 09:32:55 -0700 Subject: [PATCH 645/834] meta: Use is_power_of_two() helper function MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Anuj Phogat Reviewed-by: Tapani Pälli --- src/mesa/drivers/common/meta_blit.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/mesa/drivers/common/meta_blit.c b/src/mesa/drivers/common/meta_blit.c index bb2164276b2..9cace2b245a 100644 --- a/src/mesa/drivers/common/meta_blit.c +++ b/src/mesa/drivers/common/meta_blit.c @@ -82,7 +82,7 @@ setup_glsl_msaa_blit_scaled_shader(struct gl_context *ctx, y_scale = samples * 0.5; /* We expect only power of 2 samples in source multisample buffer. */ - assert(samples > 0 && (samples & (samples - 1)) == 0); + assert(samples > 0 && is_power_of_two(samples)); while (samples >> (shader_offset + 1)) { shader_offset++; } @@ -263,7 +263,7 @@ setup_glsl_msaa_blit_shader(struct gl_context *ctx, } /* We expect only power of 2 samples in source multisample buffer. */ - assert(samples > 0 && (samples & (samples - 1)) == 0); + assert(samples > 0 && is_power_of_two(samples)); while (samples >> (shader_offset + 1)) { shader_offset++; } @@ -434,7 +434,7 @@ setup_glsl_msaa_blit_shader(struct gl_context *ctx, * (so the floating point exponent just gets increased), rather than * doing a naive sum and dividing. */ - assert((samples & (samples - 1)) == 0); + assert(is_power_of_two(samples)); /* Fetch each individual sample. */ sample_resolve = rzalloc_size(mem_ctx, 1); for (i = 0; i < samples; i++) { From 82abdf209a2fb5b95b2bae80045aecc61202b13c Mon Sep 17 00:00:00 2001 From: Anuj Phogat Date: Thu, 30 Apr 2015 23:35:20 -0700 Subject: [PATCH 646/834] mesa: Handle integer formats in need_rgb_to_luminance_conversion() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Anuj Phogat Cc: Reviewed-by: Tapani Pälli Reviewed-by: Iago Toral Quiroga --- src/mesa/main/readpix.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/mesa/main/readpix.c b/src/mesa/main/readpix.c index df46f8361b9..9166a50c22e 100644 --- a/src/mesa/main/readpix.c +++ b/src/mesa/main/readpix.c @@ -54,7 +54,10 @@ need_rgb_to_luminance_conversion(mesa_format texFormat, GLenum format) return (baseTexFormat == GL_RG || baseTexFormat == GL_RGB || baseTexFormat == GL_RGBA) && - (format == GL_LUMINANCE || format == GL_LUMINANCE_ALPHA); + (format == GL_LUMINANCE || + format == GL_LUMINANCE_ALPHA || + format == GL_LUMINANCE_INTEGER_EXT || + format == GL_LUMINANCE_ALPHA_INTEGER_EXT); } From 0b13adcd0802d1ad60f625e7e557d2090a7c143e Mon Sep 17 00:00:00 2001 From: Anuj Phogat Date: Thu, 30 Apr 2015 23:36:18 -0700 Subject: [PATCH 647/834] mesa: Use helper function need_rgb_to_luminance_conversion() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Anuj Phogat Cc: Reviewed-by: Tapani Pälli Reviewed-by: Iago Toral Quiroga --- src/mesa/main/readpix.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/src/mesa/main/readpix.c b/src/mesa/main/readpix.c index 9166a50c22e..cba9db87d86 100644 --- a/src/mesa/main/readpix.c +++ b/src/mesa/main/readpix.c @@ -421,7 +421,7 @@ read_rgba_pixels( struct gl_context *ctx, const struct gl_pixelstore_attrib *packing ) { GLbitfield transferOps; - bool dst_is_integer, dst_is_luminance, needs_rebase; + bool dst_is_integer, convert_rgb_to_lum, needs_rebase; int dst_stride, src_stride, rb_stride; uint32_t dst_format, src_format; GLubyte *dst, *map; @@ -442,10 +442,7 @@ read_rgba_pixels( struct gl_context *ctx, dst_is_integer = _mesa_is_enum_format_integer(format); dst_stride = _mesa_image_row_stride(packing, width, format, type); dst_format = _mesa_format_from_format_and_type(format, type); - dst_is_luminance = format == GL_LUMINANCE || - format == GL_LUMINANCE_ALPHA || - format == GL_LUMINANCE_INTEGER_EXT || - format == GL_LUMINANCE_ALPHA_INTEGER_EXT; + convert_rgb_to_lum = need_rgb_to_luminance_conversion(rb->Format, format); dst = (GLubyte *) _mesa_image_address2d(packing, pixels, width, height, format, type, 0, 0); @@ -493,7 +490,7 @@ read_rgba_pixels( struct gl_context *ctx, */ assert(!transferOps || (transferOps && !dst_is_integer)); - needs_rgba = transferOps || dst_is_luminance; + needs_rgba = transferOps || convert_rgb_to_lum; rgba = NULL; if (needs_rgba) { uint32_t rgba_format; @@ -566,7 +563,7 @@ read_rgba_pixels( struct gl_context *ctx, * If the dst format is Luminance, we need to do the conversion by computing * L=R+G+B values. */ - if (!dst_is_luminance) { + if (!convert_rgb_to_lum) { _mesa_format_convert(dst, dst_format, dst_stride, src, src_format, src_stride, width, height, From ba2b1f8668811eade97a4f134f6df900ff36c8aa Mon Sep 17 00:00:00 2001 From: Anuj Phogat Date: Fri, 1 May 2015 00:05:18 -0700 Subject: [PATCH 648/834] mesa: Turn need_rgb_to_luminance_conversion() in to a global function MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This will be used by _mesa_meta_pbo_GetTexSubImage() in a later patch. Signed-off-by: Anuj Phogat Cc: Reviewed-by: Tapani Pälli --- src/mesa/main/readpix.c | 11 ++++++----- src/mesa/main/readpix.h | 3 +++ 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/src/mesa/main/readpix.c b/src/mesa/main/readpix.c index cba9db87d86..a3357cd6419 100644 --- a/src/mesa/main/readpix.c +++ b/src/mesa/main/readpix.c @@ -46,8 +46,8 @@ /** * Return true if the conversion L=R+G+B is needed. */ -static GLboolean -need_rgb_to_luminance_conversion(mesa_format texFormat, GLenum format) +GLboolean +_mesa_need_rgb_to_luminance_conversion(mesa_format texFormat, GLenum format) { GLenum baseTexFormat = _mesa_get_format_base_format(texFormat); @@ -105,7 +105,7 @@ get_readpixels_transfer_ops(const struct gl_context *ctx, mesa_format texFormat, * have any effect anyway. */ if (_mesa_get_format_datatype(texFormat) == GL_UNSIGNED_NORMALIZED && - !need_rgb_to_luminance_conversion(texFormat, format)) { + !_mesa_need_rgb_to_luminance_conversion(texFormat, format)) { transferOps &= ~IMAGE_CLAMP_BIT; } @@ -149,7 +149,7 @@ _mesa_readpixels_needs_slow_path(const struct gl_context *ctx, GLenum format, default: /* Color formats. */ - if (need_rgb_to_luminance_conversion(rb->Format, format)) { + if (_mesa_need_rgb_to_luminance_conversion(rb->Format, format)) { return GL_TRUE; } @@ -442,7 +442,8 @@ read_rgba_pixels( struct gl_context *ctx, dst_is_integer = _mesa_is_enum_format_integer(format); dst_stride = _mesa_image_row_stride(packing, width, format, type); dst_format = _mesa_format_from_format_and_type(format, type); - convert_rgb_to_lum = need_rgb_to_luminance_conversion(rb->Format, format); + convert_rgb_to_lum = + _mesa_need_rgb_to_luminance_conversion(rb->Format, format); dst = (GLubyte *) _mesa_image_address2d(packing, pixels, width, height, format, type, 0, 0); diff --git a/src/mesa/main/readpix.h b/src/mesa/main/readpix.h index 4bb35e17e4d..1636dd9ce3e 100644 --- a/src/mesa/main/readpix.h +++ b/src/mesa/main/readpix.h @@ -37,6 +37,9 @@ extern GLboolean _mesa_readpixels_needs_slow_path(const struct gl_context *ctx, GLenum format, GLenum type, GLboolean uses_blit); +extern GLboolean +_mesa_need_rgb_to_luminance_conversion(mesa_format texFormat, GLenum format); + extern void _mesa_readpixels(struct gl_context *ctx, GLint x, GLint y, GLsizei width, GLsizei height, From a4ff47ade9d95a27c9c55afbf6dd77d3f3b10562 Mon Sep 17 00:00:00 2001 From: Anuj Phogat Date: Tue, 12 May 2015 04:17:04 -0700 Subject: [PATCH 649/834] meta: Abort meta path if ReadPixels need rgb to luminance conversion MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit After recent addition of pbo testing in piglit test getteximage-luminance, it fails on i965. This patch makes a sub test pass. Signed-off-by: Anuj Phogat Cc: Reviewed-by: Tapani Pälli --- src/mesa/drivers/common/meta_tex_subimage.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/mesa/drivers/common/meta_tex_subimage.c b/src/mesa/drivers/common/meta_tex_subimage.c index ad6e7873ecd..6bd74e126f2 100644 --- a/src/mesa/drivers/common/meta_tex_subimage.c +++ b/src/mesa/drivers/common/meta_tex_subimage.c @@ -34,6 +34,7 @@ #include "macros.h" #include "meta.h" #include "pbo.h" +#include "readpix.h" #include "shaderapi.h" #include "state.h" #include "teximage.h" @@ -257,6 +258,7 @@ _mesa_meta_pbo_GetTexSubImage(struct gl_context *ctx, GLuint dims, GLuint pbo = 0, pbo_tex = 0, fbos[2] = { 0, 0 }; int full_height, image_height; struct gl_texture_image *pbo_tex_image; + struct gl_renderbuffer *rb = NULL; GLenum status; bool success = false; int z; @@ -273,6 +275,13 @@ _mesa_meta_pbo_GetTexSubImage(struct gl_context *ctx, GLuint dims, if (ctx->_ImageTransferState) return false; + + if (!tex_image) { + rb = ctx->ReadBuffer->_ColorReadBuffer; + if (_mesa_need_rgb_to_luminance_conversion(rb->Format, format)) + return false; + } + /* For arrays, use a tall (height * depth) 2D texture but taking into * account the inter-image padding specified with the image height packing * property. From 8e9eec5cbf73bf977bc7e808a4e653737ee94c38 Mon Sep 17 00:00:00 2001 From: Anuj Phogat Date: Fri, 15 May 2015 06:01:15 -0700 Subject: [PATCH 650/834] meta: Abort texture upload if pixels == null and no pixel unpack buffer set in case of glTexImage{1,2,3}D(). Texture has already been allocated at this point and we have no data to upload. With out this patch, with create_pbo = true, we end up creating a temporary pbo and then uploading uninitialzed texture data. Signed-off-by: Anuj Phogat Reviewed-by: Neil Roberts --- src/mesa/drivers/common/meta_tex_subimage.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/mesa/drivers/common/meta_tex_subimage.c b/src/mesa/drivers/common/meta_tex_subimage.c index 6bd74e126f2..d2474f52718 100644 --- a/src/mesa/drivers/common/meta_tex_subimage.c +++ b/src/mesa/drivers/common/meta_tex_subimage.c @@ -151,7 +151,8 @@ _mesa_meta_pbo_TexSubImage(struct gl_context *ctx, GLuint dims, bool success = false; int z; - if (!_mesa_is_bufferobj(packing->BufferObj) && !create_pbo) + if (!_mesa_is_bufferobj(packing->BufferObj) && + (!create_pbo || pixels == NULL)) return false; if (format == GL_DEPTH_COMPONENT || From 71aaf62fca3ed8b18fc2dcd69be0fd6bb7e58a91 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Sun, 7 Jun 2015 16:47:25 -0700 Subject: [PATCH 651/834] egl/dri2: Fix Android Lollipop build on ARM. Reviewed-by: Emil Velikov --- src/egl/drivers/dri2/Android.mk | 1 + 1 file changed, 1 insertion(+) diff --git a/src/egl/drivers/dri2/Android.mk b/src/egl/drivers/dri2/Android.mk index d4d809bc3de..109e4d4a0d8 100644 --- a/src/egl/drivers/dri2/Android.mk +++ b/src/egl/drivers/dri2/Android.mk @@ -36,6 +36,7 @@ LOCAL_CFLAGS := \ -DHAVE_ANDROID_PLATFORM ifeq ($(MESA_LOLLIPOP_BUILD),true) +LOCAL_CFLAGS_arm := -DDEFAULT_DRIVER_DIR=\"/system/lib/dri\" LOCAL_CFLAGS_x86 := -DDEFAULT_DRIVER_DIR=\"/system/lib/dri\" LOCAL_CFLAGS_x86_64 := -DDEFAULT_DRIVER_DIR=\"/system/lib64/dri\" else From fd3234891f7203d6b2b0992c34e880df325f75ea Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Sun, 7 Jun 2015 11:57:46 -0700 Subject: [PATCH 652/834] gallium: Enable build of NIR support on Android. v2: Add a comment explaining why we link libmesa_glsl. Reviewed-by: Emil Velikov --- src/gallium/auxiliary/Android.mk | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/gallium/auxiliary/Android.mk b/src/gallium/auxiliary/Android.mk index 2d91752595c..86430eb6a21 100644 --- a/src/gallium/auxiliary/Android.mk +++ b/src/gallium/auxiliary/Android.mk @@ -30,6 +30,7 @@ include $(CLEAR_VARS) LOCAL_SRC_FILES := \ $(C_SOURCES) \ + $(NIR_SOURCES) \ $(VL_STUB_SOURCES) LOCAL_C_INCLUDES := \ @@ -43,7 +44,9 @@ LOCAL_SRC_FILES += \ LOCAL_CPPFLAGS := -std=c++11 endif +# We need libmesa_glsl to get NIR's generated include directories. LOCAL_MODULE := libmesa_gallium +LOCAL_STATIC_LIBRARIES += libmesa_glsl # generate sources LOCAL_MODULE_CLASS := STATIC_LIBRARIES From 6ce0b0e31754d88a542d4e3c90062e3f6a67f7b9 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Wed, 3 Jun 2015 10:15:31 -0700 Subject: [PATCH 653/834] vc4: Add support for building on Android. v2: Add a comment explaining why we link libmesa_glsl. Drop warning option from freedreno. Add vc4 to the documentation for BOARD_GPU_DRIVERS. Reviewed-by: Emil Velikov --- Android.mk | 4 +-- src/gallium/Android.mk | 5 ++++ src/gallium/drivers/vc4/Android.mk | 37 +++++++++++++++++++++++++++ src/gallium/targets/dri/Android.mk | 4 +++ src/gallium/winsys/vc4/drm/Android.mk | 34 ++++++++++++++++++++++++ 5 files changed, 82 insertions(+), 2 deletions(-) create mode 100644 src/gallium/drivers/vc4/Android.mk create mode 100644 src/gallium/winsys/vc4/drm/Android.mk diff --git a/Android.mk b/Android.mk index 341978a68c6..69e0d33f1aa 100644 --- a/Android.mk +++ b/Android.mk @@ -24,7 +24,7 @@ # BOARD_GPU_DRIVERS should be defined. The valid values are # # classic drivers: i915 i965 -# gallium drivers: swrast freedreno i915g ilo nouveau r300g r600g radeonsi vmwgfx +# gallium drivers: swrast freedreno i915g ilo nouveau r300g r600g radeonsi vc4 vmwgfx # # The main target is libGLES_mesa. For each classic driver enabled, a DRI # module will also be built. DRI modules will be loaded by libGLES_mesa. @@ -48,7 +48,7 @@ MESA_PYTHON2 := python DRM_GRALLOC_TOP := hardware/drm_gralloc classic_drivers := i915 i965 -gallium_drivers := swrast freedreno i915g ilo nouveau r300g r600g radeonsi vmwgfx +gallium_drivers := swrast freedreno i915g ilo nouveau r300g r600g radeonsi vmwgfx vc4 MESA_GPU_DRIVERS := $(strip $(BOARD_GPU_DRIVERS)) diff --git a/src/gallium/Android.mk b/src/gallium/Android.mk index a9c34d9146c..b946681840c 100644 --- a/src/gallium/Android.mk +++ b/src/gallium/Android.mk @@ -76,6 +76,11 @@ endif endif endif +# vc4 +ifneq ($(filter vc4, $(MESA_GPU_DRIVERS)),) +SUBDIRS += winsys/vc4/drm drivers/vc4 +endif + # vmwgfx ifneq ($(filter vmwgfx, $(MESA_GPU_DRIVERS)),) SUBDIRS += winsys/svga/drm drivers/svga diff --git a/src/gallium/drivers/vc4/Android.mk b/src/gallium/drivers/vc4/Android.mk new file mode 100644 index 00000000000..f42a152aa8c --- /dev/null +++ b/src/gallium/drivers/vc4/Android.mk @@ -0,0 +1,37 @@ +# Copyright (C) 2014 Emil Velikov +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included +# in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +# DEALINGS IN THE SOFTWARE. + +LOCAL_PATH := $(call my-dir) + +# get C_SOURCES +include $(LOCAL_PATH)/Makefile.sources + +include $(CLEAR_VARS) + +LOCAL_SRC_FILES := \ + $(C_SOURCES) + +LOCAL_SHARED_LIBRARIES := libdrm +# We need libmesa_glsl to get NIR's generated include directories. +LOCAL_STATIC_LIBRARIES := libmesa_glsl +LOCAL_MODULE := libmesa_pipe_vc4 + +include $(GALLIUM_COMMON_MK) +include $(BUILD_STATIC_LIBRARY) diff --git a/src/gallium/targets/dri/Android.mk b/src/gallium/targets/dri/Android.mk index 1772d250b4b..5ba129b7961 100644 --- a/src/gallium/targets/dri/Android.mk +++ b/src/gallium/targets/dri/Android.mk @@ -90,6 +90,10 @@ ifneq ($(filter swrast,$(MESA_GPU_DRIVERS)),) gallium_DRIVERS += libmesa_pipe_softpipe libmesa_winsys_sw_dri libmesa_winsys_sw_kms_dri LOCAL_CFLAGS += -DGALLIUM_SOFTPIPE endif +ifneq ($(filter vc4,$(MESA_GPU_DRIVERS)),) +LOCAL_CFLAGS += -DGALLIUM_VC4 +gallium_DRIVERS += libmesa_winsys_vc4 libmesa_pipe_vc4 +endif ifneq ($(filter vmwgfx,$(MESA_GPU_DRIVERS)),) gallium_DRIVERS += libmesa_winsys_svga libmesa_pipe_svga LOCAL_CFLAGS += -DGALLIUM_VMWGFX diff --git a/src/gallium/winsys/vc4/drm/Android.mk b/src/gallium/winsys/vc4/drm/Android.mk new file mode 100644 index 00000000000..55edc179aaa --- /dev/null +++ b/src/gallium/winsys/vc4/drm/Android.mk @@ -0,0 +1,34 @@ +# Copyright (C) 2014 Emil Velikov +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included +# in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +# DEALINGS IN THE SOFTWARE. + +LOCAL_PATH := $(call my-dir) + +# get C_SOURCES +include $(LOCAL_PATH)/Makefile.sources + +include $(CLEAR_VARS) + +LOCAL_SRC_FILES := $(C_SOURCES) + +LOCAL_SHARED_LIBRARIES := libdrm +LOCAL_MODULE := libmesa_winsys_vc4 + +include $(GALLIUM_COMMON_MK) +include $(BUILD_STATIC_LIBRARY) From bcd8a64f32f6387cbd8ed8d0bda0f49bd7dd4251 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Tue, 9 Jun 2015 11:45:05 -0700 Subject: [PATCH 654/834] gallium: Drop the gallium-specific Android sw winsys. This was part of gallium_egl, and we now have the normal libEGL Android winsys support to handle it. Reviewed-by: Emil Velikov --- src/gallium/winsys/sw/android/Android.mk | 34 --- .../winsys/sw/android/android_sw_winsys.cpp | 264 ------------------ .../winsys/sw/android/android_sw_winsys.h | 48 ---- 3 files changed, 346 deletions(-) delete mode 100644 src/gallium/winsys/sw/android/Android.mk delete mode 100644 src/gallium/winsys/sw/android/android_sw_winsys.cpp delete mode 100644 src/gallium/winsys/sw/android/android_sw_winsys.h diff --git a/src/gallium/winsys/sw/android/Android.mk b/src/gallium/winsys/sw/android/Android.mk deleted file mode 100644 index 4fb2715a56c..00000000000 --- a/src/gallium/winsys/sw/android/Android.mk +++ /dev/null @@ -1,34 +0,0 @@ -# Mesa 3-D graphics library -# -# Copyright (C) 2010-2011 Chia-I Wu -# Copyright (C) 2010-2011 LunarG Inc. -# -# Permission is hereby granted, free of charge, to any person obtaining a -# copy of this software and associated documentation files (the "Software"), -# to deal in the Software without restriction, including without limitation -# the rights to use, copy, modify, merge, publish, distribute, sublicense, -# and/or sell copies of the Software, and to permit persons to whom the -# Software is furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included -# in all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -# DEALINGS IN THE SOFTWARE. - -LOCAL_PATH := $(call my-dir) - -include $(CLEAR_VARS) - -LOCAL_SRC_FILES := \ - android_sw_winsys.cpp - -LOCAL_MODULE := libmesa_winsys_sw_android - -include $(GALLIUM_COMMON_MK) -include $(BUILD_STATIC_LIBRARY) diff --git a/src/gallium/winsys/sw/android/android_sw_winsys.cpp b/src/gallium/winsys/sw/android/android_sw_winsys.cpp deleted file mode 100644 index 4b1040cb6ee..00000000000 --- a/src/gallium/winsys/sw/android/android_sw_winsys.cpp +++ /dev/null @@ -1,264 +0,0 @@ -/* - * Mesa 3-D graphics library - * - * Copyright (C) 2010-2011 LunarG Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included - * in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - * - * Authors: - * Chia-I Wu - */ - -#include "pipe/p_compiler.h" -#include "pipe/p_state.h" -#include "util/u_memory.h" -#include "util/u_format.h" -#include "state_tracker/sw_winsys.h" - -#include -#include - -#if ANDROID_VERSION < 0x0300 -#include -#endif - -#include "android_sw_winsys.h" - -struct android_sw_winsys -{ - struct sw_winsys base; - - const gralloc_module_t *grmod; -}; - -struct android_sw_displaytarget -{ - buffer_handle_t handle; - int stride; - int width, height; - int usage; /* gralloc usage */ - - void *mapped; -}; - -static INLINE struct android_sw_winsys * -android_sw_winsys(struct sw_winsys *ws) -{ - return (struct android_sw_winsys *) ws; -} - -static INLINE struct android_sw_displaytarget * -android_sw_displaytarget(struct sw_displaytarget *dt) -{ - return (struct android_sw_displaytarget *) dt; -} - -namespace android { - -static void -android_displaytarget_display(struct sw_winsys *ws, - struct sw_displaytarget *dt, - void *context_private, - struct pipe_box *box) -{ -} - -static struct sw_displaytarget * -android_displaytarget_create(struct sw_winsys *ws, - unsigned tex_usage, - enum pipe_format format, - unsigned width, unsigned height, - unsigned alignment, - unsigned *stride) -{ - return NULL; -} - -static void -android_displaytarget_destroy(struct sw_winsys *ws, - struct sw_displaytarget *dt) -{ - struct android_sw_displaytarget *adt = android_sw_displaytarget(dt); - - assert(!adt->mapped); - FREE(adt); -} - -static void -android_displaytarget_unmap(struct sw_winsys *ws, - struct sw_displaytarget *dt) -{ - struct android_sw_winsys *droid = android_sw_winsys(ws); - struct android_sw_displaytarget *adt = android_sw_displaytarget(dt); - -#if ANDROID_VERSION < 0x0300 - /* try sw_gralloc first */ - if (adt->mapped && sw_gralloc_handle_t::validate(adt->handle) >= 0) { - adt->mapped = NULL; - return; - } -#endif - - if (adt->mapped) { - droid->grmod->unlock(droid->grmod, adt->handle); - adt->mapped = NULL; - } -} - -static void * -android_displaytarget_map(struct sw_winsys *ws, - struct sw_displaytarget *dt, - unsigned flags) -{ - struct android_sw_winsys *droid = android_sw_winsys(ws); - struct android_sw_displaytarget *adt = android_sw_displaytarget(dt); - -#if ANDROID_VERSION < 0x0300 - /* try sw_gralloc first */ - if (sw_gralloc_handle_t::validate(adt->handle) >= 0) { - const sw_gralloc_handle_t *swhandle = - reinterpret_cast(adt->handle); - adt->mapped = reinterpret_cast(swhandle->base); - - return adt->mapped; - } -#endif - - if (!adt->mapped) { - /* lock the buffer for CPU access */ - droid->grmod->lock(droid->grmod, adt->handle, - adt->usage, 0, 0, adt->width, adt->height, &adt->mapped); - } - - return adt->mapped; -} - -static struct sw_displaytarget * -android_displaytarget_from_handle(struct sw_winsys *ws, - const struct pipe_resource *templ, - struct winsys_handle *whandle, - unsigned *stride) -{ - struct android_winsys_handle *ahandle = - (struct android_winsys_handle *) whandle; - struct android_sw_displaytarget *adt; - - adt = CALLOC_STRUCT(android_sw_displaytarget); - if (!adt) - return NULL; - - adt->handle = ahandle->handle; - adt->stride = ahandle->stride; - adt->width = templ->width0; - adt->height = templ->height0; - - if (templ->bind & (PIPE_BIND_RENDER_TARGET | PIPE_BIND_TRANSFER_WRITE)) - adt->usage |= GRALLOC_USAGE_SW_WRITE_OFTEN; - if (templ->bind & (PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_TRANSFER_READ)) - adt->usage |= GRALLOC_USAGE_SW_READ_OFTEN; - - if (stride) - *stride = adt->stride; - - return reinterpret_cast(adt); -} - -static boolean -android_displaytarget_get_handle(struct sw_winsys *ws, - struct sw_displaytarget *dt, - struct winsys_handle *whandle) -{ - return FALSE; -} - -static boolean -android_is_displaytarget_format_supported(struct sw_winsys *ws, - unsigned tex_usage, - enum pipe_format format) -{ - struct android_sw_winsys *droid = android_sw_winsys(ws); - int fmt = -1; - - switch (format) { - case PIPE_FORMAT_R8G8B8A8_UNORM: - fmt = HAL_PIXEL_FORMAT_RGBA_8888; - break; - case PIPE_FORMAT_R8G8B8X8_UNORM: - fmt = HAL_PIXEL_FORMAT_RGBX_8888; - break; - case PIPE_FORMAT_R8G8B8_UNORM: - fmt = HAL_PIXEL_FORMAT_RGB_888; - break; - case PIPE_FORMAT_B5G6R5_UNORM: - fmt = HAL_PIXEL_FORMAT_RGB_565; - break; - case PIPE_FORMAT_B8G8R8A8_UNORM: - fmt = HAL_PIXEL_FORMAT_BGRA_8888; - break; - default: - break; - } - - return (fmt != -1); -} - -static void -android_destroy(struct sw_winsys *ws) -{ - struct android_sw_winsys *droid = android_sw_winsys(ws); - - FREE(droid); -} - -}; /* namespace android */ - -using namespace android; - -struct sw_winsys * -android_create_sw_winsys(void) -{ - struct android_sw_winsys *droid; - const hw_module_t *mod; - - droid = CALLOC_STRUCT(android_sw_winsys); - if (!droid) - return NULL; - - if (hw_get_module(GRALLOC_HARDWARE_MODULE_ID, &mod)) { - FREE(droid); - return NULL; - } - - droid->grmod = (const gralloc_module_t *) mod; - - droid->base.destroy = android_destroy; - droid->base.is_displaytarget_format_supported = - android_is_displaytarget_format_supported; - - droid->base.displaytarget_create = android_displaytarget_create; - droid->base.displaytarget_destroy = android_displaytarget_destroy; - droid->base.displaytarget_from_handle = android_displaytarget_from_handle; - droid->base.displaytarget_get_handle = android_displaytarget_get_handle; - - droid->base.displaytarget_map = android_displaytarget_map; - droid->base.displaytarget_unmap = android_displaytarget_unmap; - droid->base.displaytarget_display = android_displaytarget_display; - - return &droid->base; -} diff --git a/src/gallium/winsys/sw/android/android_sw_winsys.h b/src/gallium/winsys/sw/android/android_sw_winsys.h deleted file mode 100644 index 24c85edec0e..00000000000 --- a/src/gallium/winsys/sw/android/android_sw_winsys.h +++ /dev/null @@ -1,48 +0,0 @@ -/* - * Mesa 3-D graphics library - * - * Copyright (C) 2010-2011 LunarG Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included - * in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - * - * Authors: - * Chia-I Wu - */ - -#ifndef ANDROID_SW_WINSYS -#define ANDROID_SW_WINSYS - -#include -#include - -__BEGIN_DECLS - -struct sw_winsys; - -struct android_winsys_handle { - buffer_handle_t handle; - int stride; -}; - -struct sw_winsys * -android_create_sw_winsys(void); - -__END_DECLS - -#endif /* ANDROID_SW_WINSYS */ From 932d1613d1e15ec22555e5ec09105c49eb850e36 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Tue, 9 Jun 2015 12:16:19 -0700 Subject: [PATCH 655/834] egl: Drop check for driver != NULL. Back in 2013, a patch was added (with 2 reviewers!) at the end of the block to early exit the loop in this case, without noticing that the loop already did. I added another early exit case, again without noticing, but Rob caught me. Just drop the loop condition that apparently surprises most of us, instead of leaving the end of the loop conspicuously not exiting on success. Reviewed-by: Ian Romanick Reviewed-by: Rob Clark --- src/egl/drivers/dri2/egl_dri2.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/egl/drivers/dri2/egl_dri2.c b/src/egl/drivers/dri2/egl_dri2.c index 44a6c96ae9a..dceb9a011d9 100644 --- a/src/egl/drivers/dri2/egl_dri2.c +++ b/src/egl/drivers/dri2/egl_dri2.c @@ -397,7 +397,7 @@ dri2_open_driver(_EGLDisplay *disp) dri2_dpy->driver = NULL; end = search_paths + strlen(search_paths); - for (p = search_paths; p < end && dri2_dpy->driver == NULL; p = next + 1) { + for (p = search_paths; p < end; p = next + 1) { int len; next = strchr(p, ':'); if (next == NULL) From a2af42c1d2dc91f4c31e25ff9fff15a89a9b6ead Mon Sep 17 00:00:00 2001 From: Ilia Mirkin Date: Fri, 12 Jun 2015 16:09:05 +0200 Subject: [PATCH 656/834] nvc0/ir: fix collection of first uses for texture barrier insertion One of the places we have to insert texbars is in situations where the result of the tex gets overwritten by a different instruction (e.g. in a conditional statement). However in some situations it can actually appear as though the original tex itself is an overwriting instruction. This can naturally never really happen, so just ignore the tex instruction when it comes up. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=90347 Signed-off-by: Ilia Mirkin Cc: "10.5 10.6" --- .../nouveau/codegen/nv50_ir_lowering_nvc0.cpp | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp index 411e2de1b11..7a5d1ce0299 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp @@ -168,7 +168,7 @@ NVC0LegalizePostRA::insnDominatedBy(const Instruction *later, void NVC0LegalizePostRA::addTexUse(std::list &uses, - Instruction *usei, const Instruction *insn) + Instruction *usei, const Instruction *texi) { bool add = true; for (std::list::iterator it = uses.begin(); @@ -183,7 +183,7 @@ NVC0LegalizePostRA::addTexUse(std::list &uses, ++it; } if (add) - uses.push_back(TexUse(usei, insn)); + uses.push_back(TexUse(usei, texi)); } void @@ -195,7 +195,8 @@ NVC0LegalizePostRA::findOverwritingDefs(const Instruction *texi, while (insn->op == OP_MOV && insn->getDef(0)->equals(insn->getSrc(0))) insn = insn->getSrc(0)->getUniqueInsn(); - if (!insn->bb->reachableBy(texi->bb, term)) + // NOTE: the tex itself is, of course, not an overwriting definition + if (insn == texi || !insn->bb->reachableBy(texi->bb, term)) return; switch (insn->op) { @@ -243,7 +244,12 @@ NVC0LegalizePostRA::findFirstUses( visited.insert(usei); if (usei->op == OP_PHI || usei->op == OP_UNION) { - // need a barrier before WAW cases + // need a barrier before WAW cases, like: + // %r0 = tex + // if ... + // texbar <- is required or tex might replace x again + // %r1 = x <- overwriting def + // %r2 = phi %r0, %r1 for (int s = 0; usei->srcExists(s); ++s) { Instruction *defi = usei->getSrc(s)->getUniqueInsn(); if (defi && &usei->src(s) != *u) @@ -262,7 +268,7 @@ NVC0LegalizePostRA::findFirstUses( usei->subOp != NV50_IR_SUBOP_MOV_FINAL) { findFirstUses(texi, usei, uses, visited); } else { - addTexUse(uses, usei, insn); + addTexUse(uses, usei, texi); } } } From 8b24388647f626a5cad10fd48e61335ed26a8560 Mon Sep 17 00:00:00 2001 From: Ilia Mirkin Date: Mon, 15 Jun 2015 15:48:58 -0400 Subject: [PATCH 657/834] nv50,nvc0: clamp uniform size to 64k The state tracker will pass through requests from buggy applications which will have the buffer size larger than the max allowed (64k). Clamp the size to 64k so that we don't get errors when uploading the constbuf data. Signed-off-by: Ilia Mirkin Cc: "10.5 10.6" --- src/gallium/drivers/nouveau/nv50/nv50_state.c | 4 ++-- src/gallium/drivers/nouveau/nvc0/nvc0_state.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/gallium/drivers/nouveau/nv50/nv50_state.c b/src/gallium/drivers/nouveau/nv50/nv50_state.c index 290750459cf..d4d41af3c61 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_state.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_state.c @@ -811,12 +811,12 @@ nv50_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index, nv50->constbuf[s][i].user = (cb && cb->user_buffer) ? TRUE : FALSE; if (nv50->constbuf[s][i].user) { nv50->constbuf[s][i].u.data = cb->user_buffer; - nv50->constbuf[s][i].size = cb->buffer_size; + nv50->constbuf[s][i].size = MIN2(cb->buffer_size, 0x10000); nv50->constbuf_valid[s] |= 1 << i; } else if (res) { nv50->constbuf[s][i].offset = cb->buffer_offset; - nv50->constbuf[s][i].size = align(cb->buffer_size, 0x100); + nv50->constbuf[s][i].size = MIN2(align(cb->buffer_size, 0x100), 0x10000); nv50->constbuf_valid[s] |= 1 << i; } else { nv50->constbuf_valid[s] &= ~(1 << i); diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_state.c b/src/gallium/drivers/nouveau/nvc0/nvc0_state.c index 63c3c52a5b2..6b7a211e71b 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_state.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_state.c @@ -793,12 +793,12 @@ nvc0_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index, nvc0->constbuf[s][i].user = (cb && cb->user_buffer) ? TRUE : FALSE; if (nvc0->constbuf[s][i].user) { nvc0->constbuf[s][i].u.data = cb->user_buffer; - nvc0->constbuf[s][i].size = cb->buffer_size; + nvc0->constbuf[s][i].size = MIN2(cb->buffer_size, 0x10000); nvc0->constbuf_valid[s] |= 1 << i; } else if (cb) { nvc0->constbuf[s][i].offset = cb->buffer_offset; - nvc0->constbuf[s][i].size = align(cb->buffer_size, 0x100); + nvc0->constbuf[s][i].size = MIN2(align(cb->buffer_size, 0x100), 0x10000); nvc0->constbuf_valid[s] |= 1 << i; } else { From 1a6220b416f02e56575894efbbd1717c9427c763 Mon Sep 17 00:00:00 2001 From: Neil Roberts Date: Thu, 11 Jun 2015 16:59:07 +0100 Subject: [PATCH 658/834] i965: Fix aligning to the block size in intel_miptree_copy_slice This function was trying to align the width and height to a multiple of the block size for compressed textures. It was using align_w/h as a shortcut to get the block size as up until Gen9 this always happens to match. However in Gen9+ the alignment values are expressed as multiples of the block size so in effect the alignment values are always 4 for compressed textures as that is the minimum value we can pick. This happened to work for most compressed formats because the block size is also 4, but for FXT1 this was breaking because it has a block width of 8. This fixes some Piglit tests testing FXT1 such as spec@3dfx_texture_compression_fxt1@fbo-generatemipmap-formats Reviewed-by: Nanley Chery --- src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c index 593bb9da0d5..80c52f2feef 100644 --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c @@ -1211,8 +1211,10 @@ intel_miptree_copy_slice(struct brw_context *brw, assert(src_mt->format == dst_mt->format); if (dst_mt->compressed) { - height = ALIGN(height, dst_mt->align_h) / dst_mt->align_h; - width = ALIGN(width, dst_mt->align_w); + unsigned int i, j; + _mesa_get_format_block_size(dst_mt->format, &i, &j); + height = ALIGN(height, j) / j; + width = ALIGN(width, i); } /* If it's a packed depth/stencil buffer with separate stencil, the blit From 7d88ab42b9dda825feddbae774a2a48ddf3cbec2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tapani=20P=C3=A4lli?= Date: Tue, 16 Jun 2015 13:46:47 +0300 Subject: [PATCH 659/834] mesa: set override_version per api version override MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Before 9b5e92f get_gl_override was called only once, but now it is called for multiple APIs (GLES2, GL), version needs to be set always. Signed-off-by: Tapani Pälli Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=90797 Reviewed-by: Jordan Justen Reviewed-by: Martin Peres Tested-by: Martin Peres --- src/mesa/main/version.c | 38 ++++++++++++++++++++++++-------------- 1 file changed, 24 insertions(+), 14 deletions(-) diff --git a/src/mesa/main/version.c b/src/mesa/main/version.c index 60c76040e2a..8bc00ace5c4 100644 --- a/src/mesa/main/version.c +++ b/src/mesa/main/version.c @@ -58,34 +58,44 @@ get_gl_override(gl_api api, int *version, bool *fwd_context, ? "MESA_GL_VERSION_OVERRIDE" : "MESA_GLES_VERSION_OVERRIDE"; const char *version_str; int major, minor, n; - static int override_version = -1; - static bool fc_suffix = false; - static bool compat_suffix = false; + static struct override_info { + int version; + bool fc_suffix; + bool compat_suffix; + } override[] = { + { -1, false, false}, + { -1, false, false}, + { -1, false, false}, + { -1, false, false}, + }; + + STATIC_ASSERT(ARRAY_SIZE(override) == API_OPENGL_LAST + 1); if (api == API_OPENGLES) goto exit; - if (override_version < 0) { - override_version = 0; + if (override[api].version < 0) { + override[api].version = 0; version_str = getenv(env_var); if (version_str) { - fc_suffix = check_for_ending(version_str, "FC"); - compat_suffix = check_for_ending(version_str, "COMPAT"); + override[api].fc_suffix = check_for_ending(version_str, "FC"); + override[api].compat_suffix = check_for_ending(version_str, "COMPAT"); n = sscanf(version_str, "%u.%u", &major, &minor); if (n != 2) { fprintf(stderr, "error: invalid value for %s: %s\n", env_var, version_str); - override_version = 0; + override[api].version = 0; } else { - override_version = major * 10 + minor; + override[api].version = major * 10 + minor; /* There is no such thing as compatibility or forward-compatible for * OpenGL ES 2.0 or 3.x APIs. */ - if ((override_version < 30 && fc_suffix) || - (api == API_OPENGLES2 && (fc_suffix || compat_suffix))) { + if ((override[api].version < 30 && override[api].fc_suffix) || + (api == API_OPENGLES2 && (override[api].fc_suffix || + override[api].compat_suffix))) { fprintf(stderr, "error: invalid value for %s: %s\n", env_var, version_str); } @@ -94,9 +104,9 @@ get_gl_override(gl_api api, int *version, bool *fwd_context, } exit: - *version = override_version; - *fwd_context = fc_suffix; - *compat_context = compat_suffix; + *version = override[api].version; + *fwd_context = override[api].fc_suffix; + *compat_context = override[api].compat_suffix; } /** From da6996485f2ca636218e3d83b53cf6a450bb9b38 Mon Sep 17 00:00:00 2001 From: Timothy Arceri Date: Mon, 15 Jun 2015 21:00:47 +1000 Subject: [PATCH 660/834] Revert "glsl: remove restriction on unsized arrays in GLSL ES 3.10" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit adee54f8269c5e9f4fde91d19f0e465afc8f14d8. Further down in the GLSL ES 3.10 spec it say: "If an array is declared as the last member of a shader storage block and the size is not specified at compile-time, it is sized at run-time. In all other cases, arrays are sized only at compile-time." Reviewed-by: Samuel Iglesias Gonsálvez --- src/glsl/ast_to_hir.cpp | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/src/glsl/ast_to_hir.cpp b/src/glsl/ast_to_hir.cpp index fc24305b244..259e01e0ca4 100644 --- a/src/glsl/ast_to_hir.cpp +++ b/src/glsl/ast_to_hir.cpp @@ -3943,15 +3943,7 @@ ast_declarator_list::hir(exec_list *instructions, decl->identifier); } - /* GLSL ES 3.10 removes the restriction on unsized arrays. - * - * Section 4.1.9 (Arrays) of the GLSL ES 3.10 spec says: - * - * "Variables of the same type can be aggregated into arrays by - * declaring a name followed by brackets ([ ]) enclosing an - * optional size." - */ - if (state->es_shader && state->language_version < 310) { + if (state->es_shader) { const glsl_type *const t = (earlier == NULL) ? var->type : earlier->type; From 797f4eacea8a6b08b7c9143a74c7f2b422d1535d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Sat, 6 Jun 2015 13:24:11 +0200 Subject: [PATCH 661/834] configure.ac: rename LLVM_VERSION_PATCH to avoid conflict with llvm-config.h Reviewed-by: Tom Stellard --- configure.ac | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/configure.ac b/configure.ac index 34d1ac988f6..e6d947e5fc3 100644 --- a/configure.ac +++ b/configure.ac @@ -1929,7 +1929,7 @@ if test "x$enable_gallium_llvm" = xyes; then LLVM_COMPONENTS="${LLVM_COMPONENTS} all-targets ipo linker instrumentation" LLVM_COMPONENTS="${LLVM_COMPONENTS} irreader option objcarcopts profiledata" fi - DEFINES="${DEFINES} -DHAVE_LLVM=0x0$LLVM_VERSION_INT -DLLVM_VERSION_PATCH=$LLVM_VERSION_PATCH" + DEFINES="${DEFINES} -DHAVE_LLVM=0x0$LLVM_VERSION_INT -DMESA_LLVM_VERSION_PATCH=$LLVM_VERSION_PATCH" MESA_LLVM=1 dnl Check for Clang internal headers From 2f86c22e75a3273a7541f88ffedd2edefaf6f482 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Thu, 19 Mar 2015 23:28:25 +0100 Subject: [PATCH 662/834] glsl: print locations of variables Reviewed-by: Brian Paul --- src/glsl/ir_print_visitor.cpp | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/glsl/ir_print_visitor.cpp b/src/glsl/ir_print_visitor.cpp index f5de6ac065b..c991658ab4b 100644 --- a/src/glsl/ir_print_visitor.cpp +++ b/src/glsl/ir_print_visitor.cpp @@ -161,6 +161,10 @@ void ir_print_visitor::visit(ir_variable *ir) { fprintf(f, "(declare "); + char loc[256] = {0}; + if (ir->data.location != -1) + snprintf(loc, sizeof(loc), "location=%i ", ir->data.location); + const char *const cent = (ir->data.centroid) ? "centroid " : ""; const char *const samp = (ir->data.sample) ? "sample " : ""; const char *const inv = (ir->data.invariant) ? "invariant " : ""; @@ -172,8 +176,8 @@ void ir_print_visitor::visit(ir_variable *ir) const char *const interp[] = { "", "smooth", "flat", "noperspective" }; STATIC_ASSERT(ARRAY_SIZE(interp) == INTERP_QUALIFIER_COUNT); - fprintf(f, "(%s%s%s%s%s%s) ", - cent, samp, inv, mode[ir->data.mode], + fprintf(f, "(%s%s%s%s%s%s%s) ", + loc, cent, samp, inv, mode[ir->data.mode], stream[ir->data.stream], interp[ir->data.interpolation]); From fa49536ab10748f6ab05e930d4b01fe714ea6b59 Mon Sep 17 00:00:00 2001 From: Chris Forbes Date: Sun, 7 Sep 2014 19:24:15 +1200 Subject: [PATCH 663/834] glsl: add ir reader support for ir_barrier Picked from the tessellation branch. Reviewed-by: Brian Paul --- src/glsl/ir_reader.cpp | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/src/glsl/ir_reader.cpp b/src/glsl/ir_reader.cpp index fd318c046e2..4eae4131c57 100644 --- a/src/glsl/ir_reader.cpp +++ b/src/glsl/ir_reader.cpp @@ -63,6 +63,7 @@ private: ir_texture *read_texture(s_expression *); ir_emit_vertex *read_emit_vertex(s_expression *); ir_end_primitive *read_end_primitive(s_expression *); + ir_barrier *read_barrier(s_expression *); ir_dereference *read_dereference(s_expression *); ir_dereference_variable *read_var_ref(s_expression *); @@ -375,6 +376,8 @@ ir_reader::read_instruction(s_expression *expr, ir_loop *loop_ctx) inst = read_emit_vertex(list); } else if (strcmp(tag->value(), "end-primitive") == 0) { inst = read_end_primitive(list); + } else if (strcmp(tag->value(), "barrier") == 0) { + inst = read_barrier(list); } else { inst = read_rvalue(list); if (inst == NULL) @@ -1142,3 +1145,15 @@ ir_reader::read_end_primitive(s_expression *expr) ir_read_error(NULL, "when reading end-primitive"); return NULL; } + +ir_barrier * +ir_reader::read_barrier(s_expression *expr) +{ + s_pattern pat[] = { "barrier" }; + + if (MATCH(expr, pat)) { + return new(mem_ctx) ir_barrier(); + } + ir_read_error(NULL, "when reading barrier"); + return NULL; +} From 8af11afc38532c65a242f7d45c31cf098ce2fa2f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Sat, 13 Jun 2015 13:02:20 +0200 Subject: [PATCH 664/834] mesa: remove unused variables from gl_program Reviewed-by: Brian Paul --- src/mesa/main/mtypes.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h index e67e8074a88..205c7d2fbee 100644 --- a/src/mesa/main/mtypes.h +++ b/src/mesa/main/mtypes.h @@ -2097,8 +2097,6 @@ struct gl_program GLbitfield64 DoubleInputsRead; /**< Bitmask of which input regs are read and are doubles */ GLbitfield64 OutputsWritten; /**< Bitmask of which output regs are written */ GLbitfield SystemValuesRead; /**< Bitmask of SYSTEM_VALUE_x inputs used */ - GLbitfield InputFlags[MAX_PROGRAM_INPUTS]; /**< PROG_PARAM_BIT_x flags */ - GLbitfield OutputFlags[MAX_PROGRAM_OUTPUTS]; /**< PROG_PARAM_BIT_x flags */ GLbitfield TexturesUsed[MAX_COMBINED_TEXTURE_IMAGE_UNITS]; /**< TEXTURE_x_BIT bitmask */ GLbitfield SamplersUsed; /**< Bitfield of which samplers are used */ GLbitfield ShadowSamplers; /**< Texture units used for shadow sampling. */ From 358b6bb7a71663ab5e1c62f2b7767c20acebc2fa Mon Sep 17 00:00:00 2001 From: Chris Forbes Date: Sun, 21 Sep 2014 12:07:55 +1200 Subject: [PATCH 665/834] mesa: generalize sso stage interleaving check MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For tessellation. v2: cleanup by Marek Olšák Reviewed-by: Brian Paul --- src/mesa/main/pipelineobj.c | 55 +++++++++++++++++++++++++------------ 1 file changed, 38 insertions(+), 17 deletions(-) diff --git a/src/mesa/main/pipelineobj.c b/src/mesa/main/pipelineobj.c index 0fefa7d568b..b4795ffe46b 100644 --- a/src/mesa/main/pipelineobj.c +++ b/src/mesa/main/pipelineobj.c @@ -673,6 +673,38 @@ program_stages_all_active(struct gl_pipeline_object *pipe, return status; } +static bool +program_stages_interleaved_illegally(const struct gl_pipeline_object *pipe) +{ + struct gl_shader_program *prev = NULL; + unsigned i, j; + + /* Look for programs bound to stages: A -> B -> A, with any intervening + * sequence of unrelated programs or empty stages. + */ + for (i = 0; i < MESA_SHADER_STAGES; i++) { + struct gl_shader_program *cur = pipe->CurrentProgram[i]; + + /* Empty stages anywhere in the pipe are OK */ + if (!cur || cur == prev) + continue; + + if (prev) { + /* We've seen an A -> B transition; look at the rest of the pipe + * to see if we ever see A again. + */ + for (j = i + 1; j < MESA_SHADER_STAGES; j++) { + if (pipe->CurrentProgram[j] == prev) + return true; + } + } + + prev = cur; + } + + return false; +} + extern GLboolean _mesa_validate_program_pipeline(struct gl_context* ctx, struct gl_pipeline_object *pipe, @@ -721,24 +753,13 @@ _mesa_validate_program_pipeline(struct gl_context* ctx, * - One program object is active for at least two shader stages * and a second program is active for a shader stage between two * stages for which the first program was active." - * - * Without Tesselation, the only case where this can occur is the geometry - * shader between the fragment shader and vertex shader. */ - if (pipe->CurrentProgram[MESA_SHADER_GEOMETRY] - && pipe->CurrentProgram[MESA_SHADER_FRAGMENT] - && pipe->CurrentProgram[MESA_SHADER_VERTEX]) { - if (pipe->CurrentProgram[MESA_SHADER_VERTEX]->Name == pipe->CurrentProgram[MESA_SHADER_FRAGMENT]->Name && - pipe->CurrentProgram[MESA_SHADER_GEOMETRY]->Name != pipe->CurrentProgram[MESA_SHADER_VERTEX]->Name) { - pipe->InfoLog = - ralloc_asprintf(pipe, - "Program %d is active for geometry stage between " - "two stages for which another program %d is " - "active", - pipe->CurrentProgram[MESA_SHADER_GEOMETRY]->Name, - pipe->CurrentProgram[MESA_SHADER_VERTEX]->Name); - goto err; - } + if (program_stages_interleaved_illegally(pipe)) { + pipe->InfoLog = + ralloc_strdup(pipe, + "Program is active for multiple shader stages with an " + "intervening stage provided by another program"); + goto err; } /* Section 2.11.11 (Shader Execution), subheading "Validation," of the From 42a3c1ec8471fc76eb6d3d6f1bd1739e24a5f33a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Sun, 14 Jun 2015 16:37:02 +0200 Subject: [PATCH 666/834] mesa: don't rebind constant buffers after every state change if GS is active Reviewed-by: Brian Paul --- src/mesa/main/state.c | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/src/mesa/main/state.c b/src/mesa/main/state.c index 5b970081a3f..bede7fe1d0e 100644 --- a/src/mesa/main/state.c +++ b/src/mesa/main/state.c @@ -266,15 +266,9 @@ update_program_constants(struct gl_context *ctx) } } - if (ctx->GeometryProgram._Current) { - const struct gl_program_parameter_list *params = - ctx->GeometryProgram._Current->Base.Parameters; - /*FIXME: StateFlags is always 0 because we have unnamed constant - * not state changes */ - if (params /*&& params->StateFlags & ctx->NewState*/) { - new_state |= _NEW_PROGRAM_CONSTANTS; - } - } + /* Don't handle geometry shaders here. They don't use any state + * constants. + */ if (ctx->VertexProgram._Current) { const struct gl_program_parameter_list *params = From aab55b0bc6086a032f44c99ad6569ea2eac128ca Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Tue, 7 Apr 2015 01:10:17 +0200 Subject: [PATCH 667/834] st/mesa: improve assertions in vp/fp translation Reviewed-by: Brian Paul --- src/mesa/state_tracker/st_program.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/mesa/state_tracker/st_program.c b/src/mesa/state_tracker/st_program.c index d5a124f2188..fa792bc349b 100644 --- a/src/mesa/state_tracker/st_program.c +++ b/src/mesa/state_tracker/st_program.c @@ -286,7 +286,8 @@ st_prepare_vertex_program(struct gl_context *ctx, /* fall through */ case VARYING_SLOT_VAR0: default: - assert(attr < VARYING_SLOT_MAX); + assert(attr >= VARYING_SLOT_VAR0 || + (attr >= VARYING_SLOT_TEX0 && attr <= VARYING_SLOT_TEX7)); stvp->output_semantic_name[slot] = TGSI_SEMANTIC_GENERIC; stvp->output_semantic_index[slot] = st_get_generic_varying_index(st, attr); @@ -663,7 +664,8 @@ st_translate_fragment_program(struct st_context *st, * consumed for the TEXi varyings, and we can base the locations of * the user varyings on VAR0. Otherwise, we use TEX0 as base index. */ - assert(attr >= VARYING_SLOT_TEX0); + assert(attr >= VARYING_SLOT_VAR0 || attr == VARYING_SLOT_PNTC || + (attr >= VARYING_SLOT_TEX0 && attr <= VARYING_SLOT_TEX7)); input_semantic_name[slot] = TGSI_SEMANTIC_GENERIC; input_semantic_index[slot] = st_get_generic_varying_index(st, attr); if (attr == VARYING_SLOT_PNTC) From c753866cc4ae15313430f9b6edba1b82e44b003a Mon Sep 17 00:00:00 2001 From: Neil Roberts Date: Thu, 28 May 2015 19:35:44 +0100 Subject: [PATCH 668/834] i965/vec4: Fix the source register for indexed samplers Previously when setting up the sample instruction for an indirect sampler the vec4 backend was directly passing the pseudo opcode's src0. However vec4_visitor::visit(ir_texture *) doesn't set the texture operation's src0 -- it's left as BAD_FILE, which when translated into a brw_reg gives the null register. In brw_SAMPLE, gen6_resolve_implied_move() inserts a MOV from the inst->base_mrf and sets the src0 appropriately. The indirect sampler case did not have a call to gen6_resolve_implied_move(). The fs backend avoids this because the platforms that support dynamic indexing of samplers (IVB+) have been converted to not use the fake-MRF hack, and instead send from proper GRFs. This patch makes it call gen6_resolve_implied_move before setting up the indirect message. This is similar to what is done for constant sampler numbers in brw_SAMPLE. The Piglit tests for sampler array indexing didn't pick this up because they were using a texture with a solid colour so it didn't matter what texture coordinates were actually used. The tests have now been changed to be more thorough in this commit: http://cgit.freedesktop.org/piglit/commit/?id=4f9caf084eda7 With that patch the tests for gs and vs are currently failing on Ivybridge, but this patch fixes them. There are no other changes to a Piglit run on Ivybridge. On Skylake the gs tests were failing even without the Piglit patch because Skylake needs the source registers to work correctly in order to send a message header to select SIMD4x2 mode. (The explanation in the commit message is partially written by Matt Turner) Tested-by: Anuj Phogat Reviewed-by: Matt Turner --- src/mesa/drivers/dri/i965/brw_vec4_generator.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp index 67495d2d76e..032b5c28091 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp @@ -414,6 +414,9 @@ vec4_generator::generate_tex(vec4_instruction *inst, brw_pop_insn_state(p); + if (inst->base_mrf != -1) + gen6_resolve_implied_move(p, &src, inst->base_mrf); + /* dst = send(offset, a0.0 | ) */ brw_inst *insn = brw_send_indirect_message( p, BRW_SFID_SAMPLER, dst, src, addr); From 6b8accb36b541f77774109ea42533c02bb90bc68 Mon Sep 17 00:00:00 2001 From: Haixia Shi Date: Fri, 12 Jun 2015 10:10:58 -0700 Subject: [PATCH 669/834] egl/dri2: implement platform_surfaceless The surfaceless platform is for off-screen rendering only. Render node support is required. Only consider the render nodes. Do not use normal nodes as they require auth hooks. v3: change platform_null to platform_surfaceless v4: make libdrm required for surfaceless v5: remove modified include guards with defined(HAVE_SURFACELESS_PLATFORM) v6: use O_CLOEXEC for drm fd Signed-off-by: Haixia Shi Signed-off-by: Zach Reizner Reviewed-by: Emil Velikov Reviewed-by: Chad Versace --- configure.ac | 6 + src/egl/drivers/dri2/Makefile.am | 5 + src/egl/drivers/dri2/egl_dri2.c | 7 + src/egl/drivers/dri2/egl_dri2.h | 3 + src/egl/drivers/dri2/platform_surfaceless.c | 171 ++++++++++++++++++++ src/egl/main/Makefile.am | 4 + src/egl/main/egldisplay.c | 3 +- src/egl/main/egldisplay.h | 1 + 8 files changed, 199 insertions(+), 1 deletion(-) create mode 100644 src/egl/drivers/dri2/platform_surfaceless.c diff --git a/configure.ac b/configure.ac index e6d947e5fc3..3c763c27b9f 100644 --- a/configure.ac +++ b/configure.ac @@ -1777,6 +1777,11 @@ for plat in $egl_platforms; do AC_MSG_ERROR([EGL platform drm requires libdrm >= $LIBDRM_REQUIRED]) ;; + surfaceless) + test "x$have_libdrm" != xyes && + AC_MSG_ERROR([EGL platform surfaceless requires libdrm >= $LIBDRM_REQUIRED]) + ;; + android|gdi|null) ;; @@ -1806,6 +1811,7 @@ fi AM_CONDITIONAL(HAVE_EGL_PLATFORM_X11, echo "$egl_platforms" | grep -q 'x11') AM_CONDITIONAL(HAVE_EGL_PLATFORM_WAYLAND, echo "$egl_platforms" | grep -q 'wayland') AM_CONDITIONAL(HAVE_EGL_PLATFORM_DRM, echo "$egl_platforms" | grep -q 'drm') +AM_CONDITIONAL(HAVE_EGL_PLATFORM_SURFACELESS, echo "$egl_platforms" | grep -q 'surfaceless') AM_CONDITIONAL(HAVE_EGL_PLATFORM_NULL, echo "$egl_platforms" | grep -q 'null') AM_CONDITIONAL(HAVE_EGL_DRIVER_DRI2, test "x$HAVE_EGL_DRIVER_DRI2" != "x") diff --git a/src/egl/drivers/dri2/Makefile.am b/src/egl/drivers/dri2/Makefile.am index f589600be0f..55be4a75ba5 100644 --- a/src/egl/drivers/dri2/Makefile.am +++ b/src/egl/drivers/dri2/Makefile.am @@ -65,4 +65,9 @@ libegl_dri2_la_SOURCES += platform_drm.c AM_CFLAGS += -DHAVE_DRM_PLATFORM endif +if HAVE_EGL_PLATFORM_SURFACELESS +libegl_dri2_la_SOURCES += platform_surfaceless.c +AM_CFLAGS += -DHAVE_SURFACELESS_PLATFORM +endif + EXTRA_DIST = SConscript diff --git a/src/egl/drivers/dri2/egl_dri2.c b/src/egl/drivers/dri2/egl_dri2.c index dceb9a011d9..a1cbd437f53 100644 --- a/src/egl/drivers/dri2/egl_dri2.c +++ b/src/egl/drivers/dri2/egl_dri2.c @@ -667,6 +667,13 @@ dri2_initialize(_EGLDriver *drv, _EGLDisplay *disp) return EGL_FALSE; switch (disp->Platform) { +#ifdef HAVE_SURFACELESS_PLATFORM + case _EGL_PLATFORM_SURFACELESS: + if (disp->Options.TestOnly) + return EGL_TRUE; + return dri2_initialize_surfaceless(drv, disp); +#endif + #ifdef HAVE_X11_PLATFORM case _EGL_PLATFORM_X11: if (disp->Options.TestOnly) diff --git a/src/egl/drivers/dri2/egl_dri2.h b/src/egl/drivers/dri2/egl_dri2.h index adade3db9cc..9985c49f984 100644 --- a/src/egl/drivers/dri2/egl_dri2.h +++ b/src/egl/drivers/dri2/egl_dri2.h @@ -351,6 +351,9 @@ dri2_initialize_wayland(_EGLDriver *drv, _EGLDisplay *disp); EGLBoolean dri2_initialize_android(_EGLDriver *drv, _EGLDisplay *disp); +EGLBoolean +dri2_initialize_surfaceless(_EGLDriver *drv, _EGLDisplay *disp); + void dri2_flush_drawable_for_swapbuffers(_EGLDisplay *disp, _EGLSurface *draw); diff --git a/src/egl/drivers/dri2/platform_surfaceless.c b/src/egl/drivers/dri2/platform_surfaceless.c new file mode 100644 index 00000000000..30cea368554 --- /dev/null +++ b/src/egl/drivers/dri2/platform_surfaceless.c @@ -0,0 +1,171 @@ +/* + * Mesa 3-D graphics library + * + * Copyright (c) 2014 The Chromium OS Authors. + * Copyright © 2011 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "egl_dri2.h" +#include "egl_dri2_fallbacks.h" +#include "loader.h" + +static struct dri2_egl_display_vtbl dri2_surfaceless_display_vtbl = { + .create_pixmap_surface = dri2_fallback_create_pixmap_surface, + .create_image = dri2_create_image_khr, + .swap_interval = dri2_fallback_swap_interval, + .swap_buffers_with_damage = dri2_fallback_swap_buffers_with_damage, + .swap_buffers_region = dri2_fallback_swap_buffers_region, + .post_sub_buffer = dri2_fallback_post_sub_buffer, + .copy_buffers = dri2_fallback_copy_buffers, + .query_buffer_age = dri2_fallback_query_buffer_age, + .create_wayland_buffer_from_image = dri2_fallback_create_wayland_buffer_from_image, + .get_sync_values = dri2_fallback_get_sync_values, +}; + +static void +surfaceless_flush_front_buffer(__DRIdrawable *driDrawable, void *loaderPrivate) +{ +} + +static __DRIbuffer * +surfaceless_get_buffers_with_format(__DRIdrawable * driDrawable, + int *width, int *height, + unsigned int *attachments, int count, + int *out_count, void *loaderPrivate) +{ + struct dri2_egl_surface *dri2_surf = loaderPrivate; + + dri2_surf->buffer_count = 1; + if (width) + *width = dri2_surf->base.Width; + if (height) + *height = dri2_surf->base.Height; + *out_count = dri2_surf->buffer_count;; + return dri2_surf->buffers; +} + +#define DRM_RENDER_DEV_NAME "%s/renderD%d" + +EGLBoolean +dri2_initialize_surfaceless(_EGLDriver *drv, _EGLDisplay *disp) +{ + struct dri2_egl_display *dri2_dpy; + const char* err; + int i; + int driver_loaded = 0; + + loader_set_logger(_eglLog); + + dri2_dpy = calloc(1, sizeof *dri2_dpy); + if (!dri2_dpy) + return _eglError(EGL_BAD_ALLOC, "eglInitialize"); + + disp->DriverData = (void *) dri2_dpy; + + const int limit = 64; + const int base = 128; + for (i = 0; i < limit; ++i) { + char *card_path; + if (asprintf(&card_path, DRM_RENDER_DEV_NAME, DRM_DIR_NAME, base + i) < 0) + continue; + +#ifdef O_CLOEXEC + dri2_dpy->fd = open(card_path, O_RDWR | O_CLOEXEC); + if (dri2_dpy->fd < 0 && errno == EINVAL) +#endif + { + dri2_dpy->fd = open(card_path, O_RDWR); + if (dri2_dpy->fd >= 0) + fcntl(dri2_dpy->fd, F_SETFD, fcntl(dri2_dpy->fd, F_GETFD) | + FD_CLOEXEC); + } + + free(card_path); + if (dri2_dpy->fd < 0) + continue; + + dri2_dpy->driver_name = loader_get_driver_for_fd(dri2_dpy->fd, 0); + if (dri2_dpy->driver_name) { + if (dri2_load_driver(disp)) { + driver_loaded = 1; + break; + } + free(dri2_dpy->driver_name); + } + close(dri2_dpy->fd); + } + + if (!driver_loaded) { + err = "DRI2: failed to load driver"; + goto cleanup_display; + } + + dri2_dpy->dri2_loader_extension.base.name = __DRI_DRI2_LOADER; + dri2_dpy->dri2_loader_extension.base.version = 3; + dri2_dpy->dri2_loader_extension.getBuffers = NULL; + dri2_dpy->dri2_loader_extension.flushFrontBuffer = + surfaceless_flush_front_buffer; + dri2_dpy->dri2_loader_extension.getBuffersWithFormat = + surfaceless_get_buffers_with_format; + + dri2_dpy->extensions[0] = &dri2_dpy->dri2_loader_extension.base; + dri2_dpy->extensions[1] = &image_lookup_extension.base; + dri2_dpy->extensions[2] = &use_invalidate.base; + dri2_dpy->extensions[3] = NULL; + + if (!dri2_create_screen(disp)) { + err = "DRI2: failed to create screen"; + goto cleanup_driver; + } + + for (i = 0; dri2_dpy->driver_configs[i]; i++) { + dri2_add_config(disp, dri2_dpy->driver_configs[i], + i + 1, EGL_WINDOW_BIT, NULL, NULL); + } + + disp->Extensions.KHR_image_base = EGL_TRUE; + + /* Fill vtbl last to prevent accidentally calling virtual function during + * initialization. + */ + dri2_dpy->vtbl = &dri2_surfaceless_display_vtbl; + + return EGL_TRUE; + +cleanup_driver: + dlclose(dri2_dpy->driver); + free(dri2_dpy->driver_name); + close(dri2_dpy->fd); +cleanup_display: + free(dri2_dpy); + + return _eglError(EGL_NOT_INITIALIZED, err); +} diff --git a/src/egl/main/Makefile.am b/src/egl/main/Makefile.am index b6617366c7e..9030d272b53 100644 --- a/src/egl/main/Makefile.am +++ b/src/egl/main/Makefile.am @@ -68,6 +68,10 @@ if HAVE_EGL_PLATFORM_NULL AM_CFLAGS += -DHAVE_NULL_PLATFORM endif +if HAVE_EGL_PLATFORM_SURFACELESS +AM_CFLAGS += -DHAVE_SURFACELESS_PLATFORM +endif + if HAVE_EGL_DRIVER_DRI2 AM_CFLAGS += -D_EGL_BUILT_IN_DRIVER_DRI2 AM_CFLAGS += -DHAVE_XCB_DRI2 diff --git a/src/egl/main/egldisplay.c b/src/egl/main/egldisplay.c index a3ecba8c41e..24a0c7e61a7 100644 --- a/src/egl/main/egldisplay.c +++ b/src/egl/main/egldisplay.c @@ -71,7 +71,8 @@ static const struct { { _EGL_PLATFORM_DRM, "drm" }, { _EGL_PLATFORM_NULL, "null" }, { _EGL_PLATFORM_ANDROID, "android" }, - { _EGL_PLATFORM_HAIKU, "haiku" } + { _EGL_PLATFORM_HAIKU, "haiku" }, + { _EGL_PLATFORM_SURFACELESS, "surfaceless" }, }; diff --git a/src/egl/main/egldisplay.h b/src/egl/main/egldisplay.h index 84cfbe19f7e..0b50a36a098 100644 --- a/src/egl/main/egldisplay.h +++ b/src/egl/main/egldisplay.h @@ -51,6 +51,7 @@ enum _egl_platform_type { _EGL_PLATFORM_NULL, _EGL_PLATFORM_ANDROID, _EGL_PLATFORM_HAIKU, + _EGL_PLATFORM_SURFACELESS, _EGL_NUM_PLATFORMS, _EGL_INVALID_PLATFORM = -1 From 06687564479be1a2eed5842cfe4ad85dd099261b Mon Sep 17 00:00:00 2001 From: Anuj Phogat Date: Tue, 14 Apr 2015 22:06:48 -0700 Subject: [PATCH 670/834] i965/gen9: Set tiled resource mode in surface state This patch sets the tiled resource mode for texture and renderbuffer surfaces. Signed-off-by: Anuj Phogat Reviewed-by: Topi Pohjolainen --- src/mesa/drivers/dri/i965/brw_defines.h | 6 ++++++ .../drivers/dri/i965/gen8_surface_state.c | 21 +++++++++++++++++++ 2 files changed, 27 insertions(+) diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h index 2a8fc0beea4..54e5a8f78bc 100644 --- a/src/mesa/drivers/dri/i965/brw_defines.h +++ b/src/mesa/drivers/dri/i965/brw_defines.h @@ -592,6 +592,12 @@ #define GEN7_SURFACE_MOCS_SHIFT 16 #define GEN7_SURFACE_MOCS_MASK INTEL_MASK(19, 16) +#define GEN9_SURFACE_TRMODE_SHIFT 18 +#define GEN9_SURFACE_TRMODE_MASK INTEL_MASK(19, 18) +#define GEN9_SURFACE_TRMODE_NONE 0 +#define GEN9_SURFACE_TRMODE_TILEYF 1 +#define GEN9_SURFACE_TRMODE_TILEYS 2 + /* Surface state DW6 */ #define GEN7_SURFACE_MCS_ENABLE (1 << 0) #define GEN7_SURFACE_MCS_PITCH_SHIFT 3 diff --git a/src/mesa/drivers/dri/i965/gen8_surface_state.c b/src/mesa/drivers/dri/i965/gen8_surface_state.c index 6b2463a2c63..7caa585e8be 100644 --- a/src/mesa/drivers/dri/i965/gen8_surface_state.c +++ b/src/mesa/drivers/dri/i965/gen8_surface_state.c @@ -56,6 +56,19 @@ swizzle_to_scs(unsigned swizzle) return (swizzle + 4) & 7; } +static uint32_t +surface_tiling_resource_mode(uint32_t tr_mode) +{ + switch (tr_mode) { + case INTEL_MIPTREE_TRMODE_YF: + return GEN9_SURFACE_TRMODE_TILEYF; + case INTEL_MIPTREE_TRMODE_YS: + return GEN9_SURFACE_TRMODE_TILEYS; + default: + return GEN9_SURFACE_TRMODE_NONE; + } +} + static uint32_t surface_tiling_mode(uint32_t tiling) { @@ -166,6 +179,7 @@ gen8_emit_texture_surface_state(struct brw_context *brw, uint32_t mocs_wb = brw->gen >= 9 ? SKL_MOCS_WB : BDW_MOCS_WB; int surf_index = surf_offset - &brw->wm.base.surf_offset[0]; unsigned tiling_mode, pitch; + const unsigned tr_mode = surface_tiling_resource_mode(mt->tr_mode); if (mt->format == MESA_FORMAT_S_UINT8) { tiling_mode = GEN8_SURFACE_TILING_W; @@ -221,6 +235,9 @@ gen8_emit_texture_surface_state(struct brw_context *brw, surf[5] = SET_FIELD(min_level - mt->first_level, GEN7_SURFACE_MIN_LOD) | (max_level - min_level - 1); /* mip count */ + if (brw->gen >= 9) + surf[5] |= SET_FIELD(tr_mode, GEN9_SURFACE_TRMODE); + if (aux_mt) { surf[6] = SET_FIELD(mt->qpitch / 4, GEN8_SURFACE_AUX_QPITCH) | SET_FIELD((aux_mt->pitch / 128) - 1, GEN8_SURFACE_AUX_PITCH) | @@ -351,6 +368,7 @@ gen8_update_renderbuffer_surface(struct brw_context *brw, unsigned height = mt->logical_height0; unsigned pitch = mt->pitch; uint32_t tiling = mt->tiling; + unsigned tr_mode = surface_tiling_resource_mode(mt->tr_mode); uint32_t format = 0; uint32_t surf_type; uint32_t offset; @@ -440,6 +458,9 @@ gen8_update_renderbuffer_surface(struct brw_context *brw, surf[5] = irb->mt_level - irb->mt->first_level; + if (brw->gen >= 9) + surf[5] |= SET_FIELD(tr_mode, GEN9_SURFACE_TRMODE); + if (aux_mt) { surf[6] = SET_FIELD(mt->qpitch / 4, GEN8_SURFACE_AUX_QPITCH) | SET_FIELD((aux_mt->pitch / 128) - 1, GEN8_SURFACE_AUX_PITCH) | From af0853033296a4db3c48352e8cb60f8209424f79 Mon Sep 17 00:00:00 2001 From: Anuj Phogat Date: Tue, 14 Apr 2015 22:06:48 -0700 Subject: [PATCH 671/834] i965: Rename use_linear_1d_layout() and make it global This function will be utilised in later patches. V2: Make both pointers constants (Topi) Signed-off-by: Anuj Phogat Reviewed-by: Topi Pohjolainen --- src/mesa/drivers/dri/i965/brw_context.h | 4 ++++ src/mesa/drivers/dri/i965/brw_tex_layout.c | 10 +++++----- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index 01c4283c8f0..58119ee5c5e 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -2003,6 +2003,10 @@ gen6_upload_push_constants(struct brw_context *brw, struct brw_stage_state *stage_state, enum aub_state_struct_type type); +bool +gen9_use_linear_1d_layout(const struct brw_context *brw, + const struct intel_mipmap_tree *mt); + #ifdef __cplusplus } #endif diff --git a/src/mesa/drivers/dri/i965/brw_tex_layout.c b/src/mesa/drivers/dri/i965/brw_tex_layout.c index 1e7d8a103db..998d8c42770 100644 --- a/src/mesa/drivers/dri/i965/brw_tex_layout.c +++ b/src/mesa/drivers/dri/i965/brw_tex_layout.c @@ -500,9 +500,9 @@ align_cube(struct intel_mipmap_tree *mt) mt->total_height += 2; } -static bool -use_linear_1d_layout(struct brw_context *brw, - struct intel_mipmap_tree *mt) +bool +gen9_use_linear_1d_layout(const struct brw_context *brw, + const struct intel_mipmap_tree *mt) { /* On Gen9+ the mipmap levels of a 1D surface are all laid out in a * horizontal line. This isn't done for depth/stencil buffers however @@ -527,7 +527,7 @@ brw_miptree_layout_texture_array(struct brw_context *brw, struct intel_mipmap_tree *mt) { unsigned height = mt->physical_height0; - bool layout_1d = use_linear_1d_layout(brw, mt); + bool layout_1d = gen9_use_linear_1d_layout(brw, mt); int physical_qpitch; if (layout_1d) @@ -749,7 +749,7 @@ intel_miptree_set_total_width_height(struct brw_context *brw, break; case INTEL_MSAA_LAYOUT_NONE: case INTEL_MSAA_LAYOUT_IMS: - if (use_linear_1d_layout(brw, mt)) + if (gen9_use_linear_1d_layout(brw, mt)) gen9_miptree_layout_1d(mt); else brw_miptree_layout_2d(mt); From 6c380d42b161da977d164ccf75ccc25a2e056bb1 Mon Sep 17 00:00:00 2001 From: Anuj Phogat Date: Tue, 14 Apr 2015 22:06:48 -0700 Subject: [PATCH 672/834] i965: Use BRW_SURFACE_* in place of GL_TEXTURE_* Makes no functional changes in the code. Signed-off-by: Anuj Phogat Reviewed-by: Chris Forbes Reviewed-by: Topi Pohjolainen --- src/mesa/drivers/dri/i965/gen8_surface_state.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/mesa/drivers/dri/i965/gen8_surface_state.c b/src/mesa/drivers/dri/i965/gen8_surface_state.c index 7caa585e8be..96310742bf1 100644 --- a/src/mesa/drivers/dri/i965/gen8_surface_state.c +++ b/src/mesa/drivers/dri/i965/gen8_surface_state.c @@ -205,16 +205,16 @@ gen8_emit_texture_surface_state(struct brw_context *brw, assert(brw->gen < 8 || mt->num_samples > 1 || mt->align_w == 16); } + const uint32_t surf_type = translate_tex_target(target); uint32_t *surf = allocate_surface_state(brw, surf_offset, surf_index); - surf[0] = translate_tex_target(target) << BRW_SURFACE_TYPE_SHIFT | + surf[0] = SET_FIELD(surf_type, BRW_SURFACE_TYPE) | format << BRW_SURFACE_FORMAT_SHIFT | vertical_alignment(mt) | horizontal_alignment(mt) | tiling_mode; - if (target == GL_TEXTURE_CUBE_MAP || - target == GL_TEXTURE_CUBE_MAP_ARRAY) { + if (surf_type == BRW_SURFACE_CUBE) { surf[0] |= BRW_SURFACE_CUBEFACE_ENABLES; } From 54591bb67f189820ef0d61b040179abbd5ecf78a Mon Sep 17 00:00:00 2001 From: Anuj Phogat Date: Tue, 14 Apr 2015 22:06:48 -0700 Subject: [PATCH 673/834] i965/gen9: Set vertical and horizontal surface alignments Patch sets the alignments for texture and renderbuffer surfaces. V3: Make changes inside horizontal_alignment() and vertical_alignment() (Topi) Signed-off-by: Anuj Phogat Reviewed-by: Topi Pohjolainen --- .../drivers/dri/i965/gen8_surface_state.c | 32 +++++++++++++++---- 1 file changed, 26 insertions(+), 6 deletions(-) diff --git a/src/mesa/drivers/dri/i965/gen8_surface_state.c b/src/mesa/drivers/dri/i965/gen8_surface_state.c index 96310742bf1..2514d960e2e 100644 --- a/src/mesa/drivers/dri/i965/gen8_surface_state.c +++ b/src/mesa/drivers/dri/i965/gen8_surface_state.c @@ -83,8 +83,18 @@ surface_tiling_mode(uint32_t tiling) } static unsigned -vertical_alignment(const struct intel_mipmap_tree *mt) +vertical_alignment(const struct brw_context *brw, + const struct intel_mipmap_tree *mt, + uint32_t surf_type) { + /* On Gen9+ vertical alignment is ignored for 1D surfaces and when + * tr_mode is not TRMODE_NONE. + */ + if (brw->gen > 8 && + (mt->tr_mode != INTEL_MIPTREE_TRMODE_NONE || + surf_type == BRW_SURFACE_1D)) + return 0; + switch (mt->align_h) { case 4: return GEN8_SURFACE_VALIGN_4; @@ -98,8 +108,18 @@ vertical_alignment(const struct intel_mipmap_tree *mt) } static unsigned -horizontal_alignment(const struct intel_mipmap_tree *mt) +horizontal_alignment(const struct brw_context *brw, + const struct intel_mipmap_tree *mt, + uint32_t surf_type) { + /* On Gen9+ horizontal alignment is ignored when tr_mode is not + * TRMODE_NONE. + */ + if (brw->gen > 8 && + (mt->tr_mode != INTEL_MIPTREE_TRMODE_NONE || + gen9_use_linear_1d_layout(brw, mt))) + return 0; + switch (mt->align_w) { case 4: return GEN8_SURFACE_HALIGN_4; @@ -210,8 +230,8 @@ gen8_emit_texture_surface_state(struct brw_context *brw, surf[0] = SET_FIELD(surf_type, BRW_SURFACE_TYPE) | format << BRW_SURFACE_FORMAT_SHIFT | - vertical_alignment(mt) | - horizontal_alignment(mt) | + vertical_alignment(brw, mt, surf_type) | + horizontal_alignment(brw, mt, surf_type) | tiling_mode; if (surf_type == BRW_SURFACE_CUBE) { @@ -438,8 +458,8 @@ gen8_update_renderbuffer_surface(struct brw_context *brw, surf[0] = (surf_type << BRW_SURFACE_TYPE_SHIFT) | (is_array ? GEN7_SURFACE_IS_ARRAY : 0) | (format << BRW_SURFACE_FORMAT_SHIFT) | - vertical_alignment(mt) | - horizontal_alignment(mt) | + vertical_alignment(brw, mt, surf_type) | + horizontal_alignment(brw, mt, surf_type) | surface_tiling_mode(tiling); surf[1] = SET_FIELD(mocs, GEN8_SURFACE_MOCS) | mt->qpitch >> 2; From e20345204d8fe8864240be2428ac0f225b92b7cc Mon Sep 17 00:00:00 2001 From: Anuj Phogat Date: Tue, 14 Apr 2015 22:06:50 -0700 Subject: [PATCH 674/834] i965/gen9: Disable Mip Tail for YF/YS tiled surfaces Disabling miptails fixed the buffer corruption happening in FBO which use YF/YS tiled renderbuffer or texture as color attachment. Spec recommends disabling mip tails only for non-mip-mapped surfaces. But, without disabling miptails I couldn't get correct data out of mipmapped YF/YS tiled surface. We need better understanding of miptails before start using them. For now this patch helps move things forward. Signed-off-by: Anuj Phogat Reviewed-by: Ben Widawsky --- src/mesa/drivers/dri/i965/brw_defines.h | 3 +++ src/mesa/drivers/dri/i965/gen8_surface_state.c | 10 ++++++++-- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h index 54e5a8f78bc..bfcc4424c3b 100644 --- a/src/mesa/drivers/dri/i965/brw_defines.h +++ b/src/mesa/drivers/dri/i965/brw_defines.h @@ -598,6 +598,9 @@ #define GEN9_SURFACE_TRMODE_TILEYF 1 #define GEN9_SURFACE_TRMODE_TILEYS 2 +#define GEN9_SURFACE_MIP_TAIL_START_LOD_SHIFT 8 +#define GEN9_SURFACE_MIP_TAIL_START_LOD_MASK INTEL_MASK(11, 8) + /* Surface state DW6 */ #define GEN7_SURFACE_MCS_ENABLE (1 << 0) #define GEN7_SURFACE_MCS_PITCH_SHIFT 3 diff --git a/src/mesa/drivers/dri/i965/gen8_surface_state.c b/src/mesa/drivers/dri/i965/gen8_surface_state.c index 2514d960e2e..b2d1a579815 100644 --- a/src/mesa/drivers/dri/i965/gen8_surface_state.c +++ b/src/mesa/drivers/dri/i965/gen8_surface_state.c @@ -255,8 +255,11 @@ gen8_emit_texture_surface_state(struct brw_context *brw, surf[5] = SET_FIELD(min_level - mt->first_level, GEN7_SURFACE_MIN_LOD) | (max_level - min_level - 1); /* mip count */ - if (brw->gen >= 9) + if (brw->gen >= 9) { surf[5] |= SET_FIELD(tr_mode, GEN9_SURFACE_TRMODE); + /* Disable Mip Tail by setting a large value. */ + surf[5] |= SET_FIELD(15, GEN9_SURFACE_MIP_TAIL_START_LOD); + } if (aux_mt) { surf[6] = SET_FIELD(mt->qpitch / 4, GEN8_SURFACE_AUX_QPITCH) | @@ -478,8 +481,11 @@ gen8_update_renderbuffer_surface(struct brw_context *brw, surf[5] = irb->mt_level - irb->mt->first_level; - if (brw->gen >= 9) + if (brw->gen >= 9) { surf[5] |= SET_FIELD(tr_mode, GEN9_SURFACE_TRMODE); + /* Disable Mip Tail by setting a large value. */ + surf[5] |= SET_FIELD(15, GEN9_SURFACE_MIP_TAIL_START_LOD); + } if (aux_mt) { surf[6] = SET_FIELD(mt->qpitch / 4, GEN8_SURFACE_AUX_QPITCH) | From 5fbbec9aae8185b96aa4cf6d778901dea44fefa4 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Wed, 10 Jun 2015 12:47:56 -0700 Subject: [PATCH 675/834] vc4: Move vc4_packet.h to the kernel/ directory, since it's also shared. I want to notice discrepancies when I diff -u between Mesa and the kernel. --- src/gallium/drivers/vc4/Makefile.sources | 2 +- src/gallium/drivers/vc4/{ => kernel}/vc4_packet.h | 0 src/gallium/drivers/vc4/vc4_cl.h | 2 +- src/gallium/drivers/vc4/vc4_resource.h | 2 +- 4 files changed, 3 insertions(+), 3 deletions(-) rename src/gallium/drivers/vc4/{ => kernel}/vc4_packet.h (100%) diff --git a/src/gallium/drivers/vc4/Makefile.sources b/src/gallium/drivers/vc4/Makefile.sources index f678b2fc0d3..edef49353a2 100644 --- a/src/gallium/drivers/vc4/Makefile.sources +++ b/src/gallium/drivers/vc4/Makefile.sources @@ -1,6 +1,7 @@ C_SOURCES := \ kernel/vc4_drv.h \ kernel/vc4_gem.c \ + kernel/vc4_packet.h \ kernel/vc4_validate.c \ kernel/vc4_validate_shaders.c \ vc4_blit.c \ @@ -24,7 +25,6 @@ C_SOURCES := \ vc4_opt_dead_code.c \ vc4_opt_small_immediates.c \ vc4_opt_vpm_writes.c \ - vc4_packet.h \ vc4_program.c \ vc4_qir.c \ vc4_qir_lower_uniforms.c \ diff --git a/src/gallium/drivers/vc4/vc4_packet.h b/src/gallium/drivers/vc4/kernel/vc4_packet.h similarity index 100% rename from src/gallium/drivers/vc4/vc4_packet.h rename to src/gallium/drivers/vc4/kernel/vc4_packet.h diff --git a/src/gallium/drivers/vc4/vc4_cl.h b/src/gallium/drivers/vc4/vc4_cl.h index 32a2e717379..4a50e790942 100644 --- a/src/gallium/drivers/vc4/vc4_cl.h +++ b/src/gallium/drivers/vc4/vc4_cl.h @@ -29,7 +29,7 @@ #include "util/u_math.h" #include "util/macros.h" -#include "vc4_packet.h" +#include "kernel/vc4_packet.h" struct vc4_bo; diff --git a/src/gallium/drivers/vc4/vc4_resource.h b/src/gallium/drivers/vc4/vc4_resource.h index b3cba8f2f65..a81c4704d54 100644 --- a/src/gallium/drivers/vc4/vc4_resource.h +++ b/src/gallium/drivers/vc4/vc4_resource.h @@ -26,7 +26,7 @@ #define VC4_RESOURCE_H #include "vc4_screen.h" -#include "vc4_packet.h" +#include "kernel/vc4_packet.h" #include "util/u_transfer.h" struct vc4_transfer { From c2f82876014c9acb0518cf31a6f675fcc73c955a Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Wed, 10 Jun 2015 12:58:47 -0700 Subject: [PATCH 676/834] vc4: Use symbolic values in texture ptype validation. --- src/gallium/drivers/vc4/kernel/vc4_validate.c | 23 +++++++++++-------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/src/gallium/drivers/vc4/kernel/vc4_validate.c b/src/gallium/drivers/vc4/kernel/vc4_validate.c index 2b57ca0b4b0..aba5b51e54d 100644 --- a/src/gallium/drivers/vc4/kernel/vc4_validate.c +++ b/src/gallium/drivers/vc4/kernel/vc4_validate.c @@ -814,10 +814,10 @@ reloc_tex(struct vc4_exec_info *exec, uint32_t p3 = (sample->p_offset[3] != ~0 ? *(uint32_t *)(uniform_data_u + sample->p_offset[3]) : 0); uint32_t *validated_p0 = exec->uniforms_v + sample->p_offset[0]; - uint32_t offset = p0 & ~0xfff; - uint32_t miplevels = (p0 & 15); - uint32_t width = (p1 >> 8) & 2047; - uint32_t height = (p1 >> 20) & 2047; + uint32_t offset = p0 & VC4_TEX_P0_OFFSET_MASK; + uint32_t miplevels = VC4_GET_FIELD(p0, VC4_TEX_P0_MIPLVLS); + uint32_t width = VC4_GET_FIELD(p1, VC4_TEX_P1_WIDTH); + uint32_t height = VC4_GET_FIELD(p1, VC4_TEX_P1_HEIGHT); uint32_t cpp, tiling_format, utile_w, utile_h; uint32_t i; uint32_t cube_map_stride = 0; @@ -845,16 +845,18 @@ reloc_tex(struct vc4_exec_info *exec, if (height == 0) height = 2048; - if (p0 & (1 << 9)) { - if ((p2 & (3 << 30)) == (1 << 30)) - cube_map_stride = p2 & 0x3ffff000; - if ((p3 & (3 << 30)) == (1 << 30)) { + if (p0 & VC4_TEX_P0_CMMODE_MASK) { + if (VC4_GET_FIELD(p2, VC4_TEX_P2_PTYPE) == + VC4_TEX_P2_PTYPE_CUBE_MAP_STRIDE) + cube_map_stride = p2 & VC4_TEX_P2_CMST_MASK; + if (VC4_GET_FIELD(p3, VC4_TEX_P2_PTYPE) == + VC4_TEX_P2_PTYPE_CUBE_MAP_STRIDE) { if (cube_map_stride) { DRM_ERROR("Cube map stride set twice\n"); return false; } - cube_map_stride = p3 & 0x3ffff000; + cube_map_stride = p3 & VC4_TEX_P2_CMST_MASK; } if (!cube_map_stride) { DRM_ERROR("Cube map stride not set\n"); @@ -862,7 +864,8 @@ reloc_tex(struct vc4_exec_info *exec, } } - type = ((p0 >> 4) & 15) | ((p1 >> 31) << 4); + type = (VC4_GET_FIELD(p0, VC4_TEX_P0_TYPE) | + (VC4_GET_FIELD(p1, VC4_TEX_P1_TYPE4) << 4)); switch (type) { case VC4_TEXTURE_TYPE_RGBA8888: From e22a1927844cdda499ea15f539028c16e47394ea Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Wed, 10 Jun 2015 13:20:25 -0700 Subject: [PATCH 677/834] vc4: Make symbolic values for packet sizes. --- src/gallium/drivers/vc4/kernel/vc4_packet.h | 32 +++++++++ src/gallium/drivers/vc4/kernel/vc4_validate.c | 67 ++++++++++--------- src/gallium/drivers/vc4/vc4_blit.c | 12 ++-- src/gallium/drivers/vc4/vc4_context.c | 28 ++++---- 4 files changed, 84 insertions(+), 55 deletions(-) diff --git a/src/gallium/drivers/vc4/kernel/vc4_packet.h b/src/gallium/drivers/vc4/kernel/vc4_packet.h index 181f2e01dc9..af0997f55df 100644 --- a/src/gallium/drivers/vc4/kernel/vc4_packet.h +++ b/src/gallium/drivers/vc4/kernel/vc4_packet.h @@ -81,6 +81,38 @@ enum vc4_packet { VC4_PACKET_GEM_HANDLES = 254, } __attribute__ ((__packed__)); +#define VC4_PACKET_HALT_SIZE 1 +#define VC4_PACKET_NOP_SIZE 1 +#define VC4_PACKET_FLUSH_SIZE 1 +#define VC4_PACKET_FLUSH_ALL_SIZE 1 +#define VC4_PACKET_START_TILE_BINNING_SIZE 1 +#define VC4_PACKET_INCREMENT_SEMAPHORE_SIZE 1 +#define VC4_PACKET_WAIT_ON_SEMAPHORE_SIZE 1 +#define VC4_PACKET_BRANCH_TO_SUB_LIST_SIZE 5 +#define VC4_PACKET_STORE_MS_TILE_BUFFER_SIZE 1 +#define VC4_PACKET_STORE_MS_TILE_BUFFER_AND_EOF_SIZE 1 +#define VC4_PACKET_STORE_TILE_BUFFER_GENERAL_SIZE 7 +#define VC4_PACKET_LOAD_TILE_BUFFER_GENERAL_SIZE 7 +#define VC4_PACKET_GL_INDEXED_PRIMITIVE_SIZE 14 +#define VC4_PACKET_GL_ARRAY_PRIMITIVE_SIZE 10 +#define VC4_PACKET_PRIMITIVE_LIST_FORMAT_SIZE 2 +#define VC4_PACKET_GL_SHADER_STATE_SIZE 5 +#define VC4_PACKET_NV_SHADER_STATE_SIZE 5 +#define VC4_PACKET_CONFIGURATION_BITS_SIZE 4 +#define VC4_PACKET_FLAT_SHADE_FLAGS_SIZE 5 +#define VC4_PACKET_POINT_SIZE_SIZE 5 +#define VC4_PACKET_LINE_WIDTH_SIZE 5 +#define VC4_PACKET_RHT_X_BOUNDARY_SIZE 3 +#define VC4_PACKET_DEPTH_OFFSET_SIZE 5 +#define VC4_PACKET_CLIP_WINDOW_SIZE 9 +#define VC4_PACKET_VIEWPORT_OFFSET_SIZE 5 +#define VC4_PACKET_CLIPPER_XY_SCALING_SIZE 9 +#define VC4_PACKET_CLIPPER_Z_SCALING_SIZE 9 +#define VC4_PACKET_TILE_BINNING_MODE_CONFIG_SIZE 16 +#define VC4_PACKET_TILE_RENDERING_MODE_CONFIG_SIZE 11 +#define VC4_PACKET_CLEAR_COLORS_SIZE 14 +#define VC4_PACKET_TILE_COORDINATES_SIZE 3 +#define VC4_PACKET_GEM_HANDLES_SIZE 9 #define VC4_MASK(high, low) (((1 << ((high) - (low) + 1)) - 1) << (low)) /* Using the GNU statement expression extension */ diff --git a/src/gallium/drivers/vc4/kernel/vc4_validate.c b/src/gallium/drivers/vc4/kernel/vc4_validate.c index aba5b51e54d..a8392705abb 100644 --- a/src/gallium/drivers/vc4/kernel/vc4_validate.c +++ b/src/gallium/drivers/vc4/kernel/vc4_validate.c @@ -633,6 +633,9 @@ validate_gem_handles(VALIDATE_ARGS) return 0; } +#define VC4_DEFINE_PACKET(packet, bin, render, name, func) \ + [packet] = { bin, render, packet ## _SIZE, name, func } + static const struct cmd_info { bool bin; bool render; @@ -641,59 +644,59 @@ static const struct cmd_info { int (*func)(struct vc4_exec_info *exec, void *validated, void *untrusted); } cmd_info[] = { - [VC4_PACKET_HALT] = { 1, 1, 1, "halt", NULL }, - [VC4_PACKET_NOP] = { 1, 1, 1, "nop", NULL }, - [VC4_PACKET_FLUSH] = { 1, 1, 1, "flush", NULL }, - [VC4_PACKET_FLUSH_ALL] = { 1, 0, 1, "flush all state", validate_flush_all }, - [VC4_PACKET_START_TILE_BINNING] = { 1, 0, 1, "start tile binning", validate_start_tile_binning }, - [VC4_PACKET_INCREMENT_SEMAPHORE] = { 1, 0, 1, "increment semaphore", validate_increment_semaphore }, - [VC4_PACKET_WAIT_ON_SEMAPHORE] = { 0, 1, 1, "wait on semaphore", validate_wait_on_semaphore }, + VC4_DEFINE_PACKET(VC4_PACKET_HALT, 1, 1, "halt", NULL), + VC4_DEFINE_PACKET(VC4_PACKET_NOP, 1, 1, "nop", NULL), + VC4_DEFINE_PACKET(VC4_PACKET_FLUSH, 1, 1, "flush", NULL), + VC4_DEFINE_PACKET(VC4_PACKET_FLUSH_ALL, 1, 0, "flush all state", validate_flush_all), + VC4_DEFINE_PACKET(VC4_PACKET_START_TILE_BINNING, 1, 0, "start tile binning", validate_start_tile_binning), + VC4_DEFINE_PACKET(VC4_PACKET_INCREMENT_SEMAPHORE, 1, 0, "increment semaphore", validate_increment_semaphore), + VC4_DEFINE_PACKET(VC4_PACKET_WAIT_ON_SEMAPHORE, 0, 1, "wait on semaphore", validate_wait_on_semaphore), /* BRANCH_TO_SUB_LIST is actually supported in the binner as well, but * we only use it from the render CL in order to jump into the tile * allocation BO. */ - [VC4_PACKET_BRANCH_TO_SUB_LIST] = { 0, 1, 5, "branch to sublist", validate_branch_to_sublist }, - [VC4_PACKET_STORE_MS_TILE_BUFFER] = { 0, 1, 1, "store MS resolved tile color buffer", NULL }, - [VC4_PACKET_STORE_MS_TILE_BUFFER_AND_EOF] = { 0, 1, 1, "store MS resolved tile color buffer and EOF", NULL }, + VC4_DEFINE_PACKET(VC4_PACKET_BRANCH_TO_SUB_LIST, 0, 1, "branch to sublist", validate_branch_to_sublist), + VC4_DEFINE_PACKET(VC4_PACKET_STORE_MS_TILE_BUFFER, 0, 1, "store MS resolved tile color buffer", NULL), + VC4_DEFINE_PACKET(VC4_PACKET_STORE_MS_TILE_BUFFER_AND_EOF, 0, 1, "store MS resolved tile color buffer and EOF", NULL), - [VC4_PACKET_STORE_TILE_BUFFER_GENERAL] = { 0, 1, 7, "Store Tile Buffer General", validate_loadstore_tile_buffer_general }, - [VC4_PACKET_LOAD_TILE_BUFFER_GENERAL] = { 0, 1, 7, "Load Tile Buffer General", validate_loadstore_tile_buffer_general }, + VC4_DEFINE_PACKET(VC4_PACKET_STORE_TILE_BUFFER_GENERAL, 0, 1, "Store Tile Buffer General", validate_loadstore_tile_buffer_general), + VC4_DEFINE_PACKET(VC4_PACKET_LOAD_TILE_BUFFER_GENERAL, 0, 1, "Load Tile Buffer General", validate_loadstore_tile_buffer_general), - [VC4_PACKET_GL_INDEXED_PRIMITIVE] = { 1, 1, 14, "Indexed Primitive List", validate_indexed_prim_list }, + VC4_DEFINE_PACKET(VC4_PACKET_GL_INDEXED_PRIMITIVE, 1, 1, "Indexed Primitive List", validate_indexed_prim_list), - [VC4_PACKET_GL_ARRAY_PRIMITIVE] = { 1, 1, 10, "Vertex Array Primitives", validate_gl_array_primitive }, + VC4_DEFINE_PACKET(VC4_PACKET_GL_ARRAY_PRIMITIVE, 1, 1, "Vertex Array Primitives", validate_gl_array_primitive), /* This is only used by clipped primitives (packets 48 and 49), which * we don't support parsing yet. */ - [VC4_PACKET_PRIMITIVE_LIST_FORMAT] = { 1, 1, 2, "primitive list format", NULL }, + VC4_DEFINE_PACKET(VC4_PACKET_PRIMITIVE_LIST_FORMAT, 1, 1, "primitive list format", NULL), - [VC4_PACKET_GL_SHADER_STATE] = { 1, 1, 5, "GL Shader State", validate_gl_shader_state }, - [VC4_PACKET_NV_SHADER_STATE] = { 1, 1, 5, "NV Shader State", validate_nv_shader_state }, + VC4_DEFINE_PACKET(VC4_PACKET_GL_SHADER_STATE, 1, 1, "GL Shader State", validate_gl_shader_state), + VC4_DEFINE_PACKET(VC4_PACKET_NV_SHADER_STATE, 1, 1, "NV Shader State", validate_nv_shader_state), - [VC4_PACKET_CONFIGURATION_BITS] = { 1, 1, 4, "configuration bits", NULL }, - [VC4_PACKET_FLAT_SHADE_FLAGS] = { 1, 1, 5, "flat shade flags", NULL }, - [VC4_PACKET_POINT_SIZE] = { 1, 1, 5, "point size", NULL }, - [VC4_PACKET_LINE_WIDTH] = { 1, 1, 5, "line width", NULL }, - [VC4_PACKET_RHT_X_BOUNDARY] = { 1, 1, 3, "RHT X boundary", NULL }, - [VC4_PACKET_DEPTH_OFFSET] = { 1, 1, 5, "Depth Offset", NULL }, - [VC4_PACKET_CLIP_WINDOW] = { 1, 1, 9, "Clip Window", NULL }, - [VC4_PACKET_VIEWPORT_OFFSET] = { 1, 1, 5, "Viewport Offset", NULL }, - [VC4_PACKET_CLIPPER_XY_SCALING] = { 1, 1, 9, "Clipper XY Scaling", NULL }, + VC4_DEFINE_PACKET(VC4_PACKET_CONFIGURATION_BITS, 1, 1, "configuration bits", NULL), + VC4_DEFINE_PACKET(VC4_PACKET_FLAT_SHADE_FLAGS, 1, 1, "flat shade flags", NULL), + VC4_DEFINE_PACKET(VC4_PACKET_POINT_SIZE, 1, 1, "point size", NULL), + VC4_DEFINE_PACKET(VC4_PACKET_LINE_WIDTH, 1, 1, "line width", NULL), + VC4_DEFINE_PACKET(VC4_PACKET_RHT_X_BOUNDARY, 1, 1, "RHT X boundary", NULL), + VC4_DEFINE_PACKET(VC4_PACKET_DEPTH_OFFSET, 1, 1, "Depth Offset", NULL), + VC4_DEFINE_PACKET(VC4_PACKET_CLIP_WINDOW, 1, 1, "Clip Window", NULL), + VC4_DEFINE_PACKET(VC4_PACKET_VIEWPORT_OFFSET, 1, 1, "Viewport Offset", NULL), + VC4_DEFINE_PACKET(VC4_PACKET_CLIPPER_XY_SCALING, 1, 1, "Clipper XY Scaling", NULL), /* Note: The docs say this was also 105, but it was 106 in the * initial userland code drop. */ - [VC4_PACKET_CLIPPER_Z_SCALING] = { 1, 1, 9, "Clipper Z Scale and Offset", NULL }, + VC4_DEFINE_PACKET(VC4_PACKET_CLIPPER_Z_SCALING, 1, 1, "Clipper Z Scale and Offset", NULL), - [VC4_PACKET_TILE_BINNING_MODE_CONFIG] = { 1, 0, 16, "tile binning configuration", validate_tile_binning_config }, + VC4_DEFINE_PACKET(VC4_PACKET_TILE_BINNING_MODE_CONFIG, 1, 0, "tile binning configuration", validate_tile_binning_config), - [VC4_PACKET_TILE_RENDERING_MODE_CONFIG] = { 0, 1, 11, "tile rendering mode configuration", validate_tile_rendering_mode_config}, + VC4_DEFINE_PACKET(VC4_PACKET_TILE_RENDERING_MODE_CONFIG, 0, 1, "tile rendering mode configuration", validate_tile_rendering_mode_config), - [VC4_PACKET_CLEAR_COLORS] = { 0, 1, 14, "Clear Colors", NULL }, + VC4_DEFINE_PACKET(VC4_PACKET_CLEAR_COLORS, 0, 1, "Clear Colors", NULL), - [VC4_PACKET_TILE_COORDINATES] = { 0, 1, 3, "Tile Coordinates", validate_tile_coordinates }, + VC4_DEFINE_PACKET(VC4_PACKET_TILE_COORDINATES, 0, 1, "Tile Coordinates", validate_tile_coordinates), - [VC4_PACKET_GEM_HANDLES] = { 1, 1, 9, "GEM handles", validate_gem_handles }, + VC4_DEFINE_PACKET(VC4_PACKET_GEM_HANDLES, 1, 1, "GEM handles", validate_gem_handles), }; int diff --git a/src/gallium/drivers/vc4/vc4_blit.c b/src/gallium/drivers/vc4/vc4_blit.c index 2d524c40b4d..58066501428 100644 --- a/src/gallium/drivers/vc4/vc4_blit.c +++ b/src/gallium/drivers/vc4/vc4_blit.c @@ -40,14 +40,12 @@ vc4_tile_blit_color_rcl(struct vc4_context *vc4, uint32_t max_y_tile = (dst_surf->base.height - 1) / 64; uint32_t xtiles = max_x_tile - min_x_tile + 1; uint32_t ytiles = max_y_tile - min_y_tile + 1; - uint32_t reloc_size = 9; - uint32_t config_size = 11 + reloc_size; - uint32_t loadstore_size = 7 + reloc_size; - uint32_t tilecoords_size = 3; cl_ensure_space(&vc4->rcl, - config_size + - xtiles * ytiles * (loadstore_size * 2 + - tilecoords_size * 1)); + (VC4_PACKET_TILE_RENDERING_MODE_CONFIG_SIZE + + VC4_PACKET_GEM_HANDLES_SIZE) + + xtiles * ytiles * ((VC4_PACKET_LOAD_TILE_BUFFER_GENERAL_SIZE + + VC4_PACKET_GEM_HANDLES_SIZE) * 2 + + VC4_PACKET_TILE_COORDINATES_SIZE)); cl_ensure_space(&vc4->bo_handles, 2 * sizeof(uint32_t)); cl_ensure_space(&vc4->bo_pointers, 2 * sizeof(struct vc4_bo *)); diff --git a/src/gallium/drivers/vc4/vc4_context.c b/src/gallium/drivers/vc4/vc4_context.c index a2b1cac952d..a6231d04aa8 100644 --- a/src/gallium/drivers/vc4/vc4_context.c +++ b/src/gallium/drivers/vc4/vc4_context.c @@ -112,23 +112,19 @@ vc4_setup_rcl(struct vc4_context *vc4) resolve_uncleared); #endif - uint32_t reloc_size = 9; - uint32_t clear_size = 14; - uint32_t config_size = 11 + reloc_size; - uint32_t loadstore_size = 7 + reloc_size; - uint32_t tilecoords_size = 3; - uint32_t branch_size = 5 + reloc_size; - uint32_t color_store_size = 1; - uint32_t semaphore_size = 1; cl_ensure_space(&vc4->rcl, - clear_size + - config_size + - loadstore_size + - semaphore_size + - xtiles * ytiles * (loadstore_size * 4 + - tilecoords_size * 3 + - branch_size + - color_store_size)); + VC4_PACKET_CLEAR_COLORS_SIZE + + (VC4_PACKET_TILE_RENDERING_MODE_CONFIG_SIZE + + VC4_PACKET_GEM_HANDLES_SIZE) + + (VC4_PACKET_STORE_TILE_BUFFER_GENERAL_SIZE + + VC4_PACKET_TILE_RENDERING_MODE_CONFIG_SIZE) + + VC4_PACKET_WAIT_ON_SEMAPHORE_SIZE + + xtiles * ytiles * ((VC4_PACKET_STORE_TILE_BUFFER_GENERAL_SIZE + + VC4_PACKET_GEM_HANDLES_SIZE) * 4 + + VC4_PACKET_TILE_COORDINATES_SIZE * 3 + + (VC4_PACKET_BRANCH_TO_SUB_LIST_SIZE + + VC4_PACKET_GEM_HANDLES_SIZE) + + VC4_PACKET_STORE_MS_TILE_BUFFER_SIZE)); if (vc4->cleared) { cl_u8(&vc4->rcl, VC4_PACKET_CLEAR_COLORS); From 731ac05cc4e444175288032a76a29c95059af038 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Thu, 11 Jun 2015 16:08:11 -0700 Subject: [PATCH 678/834] vc4: Use VC4_SET/GET_FIELD for some RCL packets. --- src/gallium/drivers/vc4/kernel/vc4_packet.h | 51 +++++++++------- src/gallium/drivers/vc4/kernel/vc4_validate.c | 21 +++---- src/gallium/drivers/vc4/vc4_blit.c | 29 +++++---- src/gallium/drivers/vc4/vc4_context.c | 61 ++++++++++--------- 4 files changed, 87 insertions(+), 75 deletions(-) diff --git a/src/gallium/drivers/vc4/kernel/vc4_packet.h b/src/gallium/drivers/vc4/kernel/vc4_packet.h index af0997f55df..764a125c6e8 100644 --- a/src/gallium/drivers/vc4/kernel/vc4_packet.h +++ b/src/gallium/drivers/vc4/kernel/vc4_packet.h @@ -149,18 +149,19 @@ enum vc4_packet { /** @{ * - * byte 1 of VC4_PACKET_STORE_TILE_BUFFER_GENERAL and + * byte 0-1 of VC4_PACKET_STORE_TILE_BUFFER_GENERAL and * VC4_PACKET_LOAD_TILE_BUFFER_GENERAL */ -#define VC4_STORE_TILE_BUFFER_DISABLE_VG_MASK_CLEAR (1 << 7) -#define VC4_STORE_TILE_BUFFER_DISABLE_ZS_CLEAR (1 << 6) -#define VC4_STORE_TILE_BUFFER_DISABLE_COLOR_CLEAR (1 << 5) -#define VC4_STORE_TILE_BUFFER_DISABLE_SWAP (1 << 4) +#define VC4_STORE_TILE_BUFFER_DISABLE_VG_MASK_CLEAR (1 << 15) +#define VC4_STORE_TILE_BUFFER_DISABLE_ZS_CLEAR (1 << 14) +#define VC4_STORE_TILE_BUFFER_DISABLE_COLOR_CLEAR (1 << 13) +#define VC4_STORE_TILE_BUFFER_DISABLE_SWAP (1 << 12) -#define VC4_LOADSTORE_TILE_BUFFER_RGBA8888 (0 << 0) -#define VC4_LOADSTORE_TILE_BUFFER_BGR565_DITHER (1 << 0) -#define VC4_LOADSTORE_TILE_BUFFER_BGR565 (2 << 0) -#define VC4_LOADSTORE_TILE_BUFFER_MASK (3 << 0) +#define VC4_LOADSTORE_TILE_BUFFER_FORMAT_MASK VC4_MASK(9, 8) +#define VC4_LOADSTORE_TILE_BUFFER_FORMAT_SHIFT 8 +#define VC4_LOADSTORE_TILE_BUFFER_RGBA8888 0 +#define VC4_LOADSTORE_TILE_BUFFER_BGR565_DITHER 1 +#define VC4_LOADSTORE_TILE_BUFFER_BGR565 2 /** @} */ /** @{ @@ -168,21 +169,24 @@ enum vc4_packet { * byte 0 of VC4_PACKET_STORE_TILE_BUFFER_GENERAL and * VC4_PACKET_LOAD_TILE_BUFFER_GENERAL */ +#define VC4_STORE_TILE_BUFFER_MODE_MASK VC4_MASK(7, 6) +#define VC4_STORE_TILE_BUFFER_MODE_SHIFT 6 #define VC4_STORE_TILE_BUFFER_MODE_SAMPLE0 (0 << 6) #define VC4_STORE_TILE_BUFFER_MODE_DECIMATE_X4 (1 << 6) #define VC4_STORE_TILE_BUFFER_MODE_DECIMATE_X16 (2 << 6) /** The values of the field are VC4_TILING_FORMAT_* */ -#define VC4_LOADSTORE_TILE_BUFFER_FORMAT_MASK (3 << 4) -#define VC4_LOADSTORE_TILE_BUFFER_FORMAT_SHIFT 4 +#define VC4_LOADSTORE_TILE_BUFFER_TILING_MASK VC4_MASK(5, 4) +#define VC4_LOADSTORE_TILE_BUFFER_TILING_SHIFT 4 - -#define VC4_LOADSTORE_TILE_BUFFER_NONE (0 << 0) -#define VC4_LOADSTORE_TILE_BUFFER_COLOR (1 << 0) -#define VC4_LOADSTORE_TILE_BUFFER_ZS (2 << 0) -#define VC4_LOADSTORE_TILE_BUFFER_Z (3 << 0) -#define VC4_LOADSTORE_TILE_BUFFER_VG_MASK (4 << 0) -#define VC4_LOADSTORE_TILE_BUFFER_FULL (5 << 0) +#define VC4_LOADSTORE_TILE_BUFFER_BUFFER_MASK VC4_MASK(2, 0) +#define VC4_LOADSTORE_TILE_BUFFER_BUFFER_SHIFT 0 +#define VC4_LOADSTORE_TILE_BUFFER_NONE 0 +#define VC4_LOADSTORE_TILE_BUFFER_COLOR 1 +#define VC4_LOADSTORE_TILE_BUFFER_ZS 2 +#define VC4_LOADSTORE_TILE_BUFFER_Z 3 +#define VC4_LOADSTORE_TILE_BUFFER_VG_MASK 4 +#define VC4_LOADSTORE_TILE_BUFFER_FULL 5 /** @} */ #define VC4_INDEX_BUFFER_U8 (0 << 4) @@ -251,17 +255,18 @@ enum vc4_packet { #define VC4_RENDER_CONFIG_ENABLE_VG_MASK (1 << 8) /** The values of the field are VC4_TILING_FORMAT_* */ -#define VC4_RENDER_CONFIG_MEMORY_FORMAT_MASK (3 << 6) +#define VC4_RENDER_CONFIG_MEMORY_FORMAT_MASK VC4_MASK(7, 6) #define VC4_RENDER_CONFIG_MEMORY_FORMAT_SHIFT 6 #define VC4_RENDER_CONFIG_DECIMATE_MODE_1X (0 << 4) #define VC4_RENDER_CONFIG_DECIMATE_MODE_4X (1 << 4) #define VC4_RENDER_CONFIG_DECIMATE_MODE_16X (2 << 4) -#define VC4_RENDER_CONFIG_FORMAT_BGR565_DITHERED (0 << 2) -#define VC4_RENDER_CONFIG_FORMAT_RGBA8888 (1 << 2) -#define VC4_RENDER_CONFIG_FORMAT_BGR565 (2 << 2) -#define VC4_RENDER_CONFIG_FORMAT_MASK (3 << 2) +#define VC4_RENDER_CONFIG_FORMAT_MASK VC4_MASK(3, 2) +#define VC4_RENDER_CONFIG_FORMAT_SHIFT 2 +#define VC4_RENDER_CONFIG_FORMAT_BGR565_DITHERED 0 +#define VC4_RENDER_CONFIG_FORMAT_RGBA8888 1 +#define VC4_RENDER_CONFIG_FORMAT_BGR565 2 #define VC4_RENDER_CONFIG_TILE_BUFFER_64BIT (1 << 1) #define VC4_RENDER_CONFIG_MS_MODE_4X (1 << 0) diff --git a/src/gallium/drivers/vc4/kernel/vc4_validate.c b/src/gallium/drivers/vc4/kernel/vc4_validate.c index a8392705abb..0a74a2c6db7 100644 --- a/src/gallium/drivers/vc4/kernel/vc4_validate.c +++ b/src/gallium/drivers/vc4/kernel/vc4_validate.c @@ -311,17 +311,18 @@ validate_branch_to_sublist(VALIDATE_ARGS) static int validate_loadstore_tile_buffer_general(VALIDATE_ARGS) { - uint32_t packet_b0 = *(uint8_t *)(untrusted + 0); - uint32_t packet_b1 = *(uint8_t *)(untrusted + 1); + uint16_t packet_b01 = *(uint16_t *)(untrusted + 0); struct drm_gem_cma_object *fbo; - uint32_t buffer_type = packet_b0 & 0xf; + uint32_t buffer_type = VC4_GET_FIELD(packet_b01, + VC4_LOADSTORE_TILE_BUFFER_BUFFER); uint32_t untrusted_address, offset, cpp; switch (buffer_type) { case VC4_LOADSTORE_TILE_BUFFER_NONE: return 0; case VC4_LOADSTORE_TILE_BUFFER_COLOR: - if ((packet_b1 & VC4_LOADSTORE_TILE_BUFFER_MASK) == + if (VC4_GET_FIELD(packet_b01, + VC4_LOADSTORE_TILE_BUFFER_FORMAT) == VC4_LOADSTORE_TILE_BUFFER_RGBA8888) { cpp = 4; } else { @@ -346,9 +347,8 @@ validate_loadstore_tile_buffer_general(VALIDATE_ARGS) offset = untrusted_address & ~0xf; if (!check_tex_size(exec, fbo, offset, - ((packet_b0 & - VC4_LOADSTORE_TILE_BUFFER_FORMAT_MASK) >> - VC4_LOADSTORE_TILE_BUFFER_FORMAT_SHIFT), + VC4_GET_FIELD(packet_b01, + VC4_LOADSTORE_TILE_BUFFER_TILING), exec->fb_width, exec->fb_height, cpp)) { return -EINVAL; } @@ -590,7 +590,7 @@ validate_tile_rendering_mode_config(VALIDATE_ARGS) exec->fb_height = *(uint16_t *)(untrusted + 6); flags = *(uint16_t *)(untrusted + 8); - if ((flags & VC4_RENDER_CONFIG_FORMAT_MASK) == + if (VC4_GET_FIELD(flags, VC4_RENDER_CONFIG_FORMAT) == VC4_RENDER_CONFIG_FORMAT_RGBA8888) { cpp = 4; } else { @@ -599,9 +599,8 @@ validate_tile_rendering_mode_config(VALIDATE_ARGS) offset = *(uint32_t *)untrusted; if (!check_tex_size(exec, fbo, offset, - ((flags & - VC4_RENDER_CONFIG_MEMORY_FORMAT_MASK) >> - VC4_RENDER_CONFIG_MEMORY_FORMAT_SHIFT), + VC4_GET_FIELD(flags, + VC4_RENDER_CONFIG_MEMORY_FORMAT), exec->fb_width, exec->fb_height, cpp)) { return -EINVAL; } diff --git a/src/gallium/drivers/vc4/vc4_blit.c b/src/gallium/drivers/vc4/vc4_blit.c index 58066501428..b3811025cc1 100644 --- a/src/gallium/drivers/vc4/vc4_blit.c +++ b/src/gallium/drivers/vc4/vc4_blit.c @@ -54,11 +54,13 @@ vc4_tile_blit_color_rcl(struct vc4_context *vc4, cl_reloc(vc4, &vc4->rcl, dst->bo, dst_surf->offset); cl_u16(&vc4->rcl, dst_surf->base.width); cl_u16(&vc4->rcl, dst_surf->base.height); - cl_u16(&vc4->rcl, ((dst_surf->tiling << - VC4_RENDER_CONFIG_MEMORY_FORMAT_SHIFT) | - (vc4_rt_format_is_565(dst_surf->base.format) ? - VC4_RENDER_CONFIG_FORMAT_BGR565 : - VC4_RENDER_CONFIG_FORMAT_RGBA8888))); + cl_u16(&vc4->rcl, + VC4_SET_FIELD(dst_surf->tiling, + VC4_RENDER_CONFIG_MEMORY_FORMAT) | + VC4_SET_FIELD(vc4_rt_format_is_565(dst_surf->base.format) ? + VC4_RENDER_CONFIG_FORMAT_BGR565 : + VC4_RENDER_CONFIG_FORMAT_RGBA8888, + VC4_RENDER_CONFIG_FORMAT)); uint32_t src_hindex = vc4_gem_hindex(vc4, src->bo); @@ -69,14 +71,15 @@ vc4_tile_blit_color_rcl(struct vc4_context *vc4, cl_start_reloc(&vc4->rcl, 1); cl_u8(&vc4->rcl, VC4_PACKET_LOAD_TILE_BUFFER_GENERAL); - cl_u8(&vc4->rcl, - VC4_LOADSTORE_TILE_BUFFER_COLOR | - (src_surf->tiling << - VC4_LOADSTORE_TILE_BUFFER_FORMAT_SHIFT)); - cl_u8(&vc4->rcl, - vc4_rt_format_is_565(src_surf->base.format) ? - VC4_LOADSTORE_TILE_BUFFER_BGR565 : - VC4_LOADSTORE_TILE_BUFFER_RGBA8888); + cl_u16(&vc4->rcl, + VC4_SET_FIELD(VC4_LOADSTORE_TILE_BUFFER_COLOR, + VC4_LOADSTORE_TILE_BUFFER_BUFFER) | + VC4_SET_FIELD(src_surf->tiling, + VC4_LOADSTORE_TILE_BUFFER_TILING) | + VC4_SET_FIELD(vc4_rt_format_is_565(src_surf->base.format) ? + VC4_LOADSTORE_TILE_BUFFER_BGR565 : + VC4_LOADSTORE_TILE_BUFFER_RGBA8888, + VC4_LOADSTORE_TILE_BUFFER_FORMAT)); cl_reloc_hindex(&vc4->rcl, src_hindex, src_surf->offset); diff --git a/src/gallium/drivers/vc4/vc4_context.c b/src/gallium/drivers/vc4/vc4_context.c index a6231d04aa8..10b58b0d815 100644 --- a/src/gallium/drivers/vc4/vc4_context.c +++ b/src/gallium/drivers/vc4/vc4_context.c @@ -50,10 +50,12 @@ vc4_store_before_load(struct vc4_context *vc4, bool *coords_emitted) return; cl_u8(&vc4->rcl, VC4_PACKET_STORE_TILE_BUFFER_GENERAL); - cl_u8(&vc4->rcl, VC4_LOADSTORE_TILE_BUFFER_NONE); - cl_u8(&vc4->rcl, (VC4_STORE_TILE_BUFFER_DISABLE_COLOR_CLEAR | - VC4_STORE_TILE_BUFFER_DISABLE_ZS_CLEAR | - VC4_STORE_TILE_BUFFER_DISABLE_VG_MASK_CLEAR)); + cl_u16(&vc4->rcl, + VC4_SET_FIELD(VC4_LOADSTORE_TILE_BUFFER_NONE, + VC4_LOADSTORE_TILE_BUFFER_BUFFER) | + VC4_STORE_TILE_BUFFER_DISABLE_COLOR_CLEAR | + VC4_STORE_TILE_BUFFER_DISABLE_ZS_CLEAR | + VC4_STORE_TILE_BUFFER_DISABLE_VG_MASK_CLEAR); cl_u32(&vc4->rcl, 0); /* no address, since we're in None mode */ *coords_emitted = false; @@ -148,11 +150,13 @@ vc4_setup_rcl(struct vc4_context *vc4) cl_reloc(vc4, &vc4->rcl, render_tex->bo, render_surf->offset); cl_u16(&vc4->rcl, width); cl_u16(&vc4->rcl, height); - cl_u16(&vc4->rcl, ((render_surf->tiling << - VC4_RENDER_CONFIG_MEMORY_FORMAT_SHIFT) | - (vc4_rt_format_is_565(render_surf->base.format) ? - VC4_RENDER_CONFIG_FORMAT_BGR565 : - VC4_RENDER_CONFIG_FORMAT_RGBA8888))); + cl_u16(&vc4->rcl, + VC4_SET_FIELD(render_surf->tiling, + VC4_RENDER_CONFIG_MEMORY_FORMAT) | + VC4_SET_FIELD((vc4_rt_format_is_565(render_surf->base.format) ? + VC4_RENDER_CONFIG_FORMAT_BGR565 : + VC4_RENDER_CONFIG_FORMAT_RGBA8888), + VC4_RENDER_CONFIG_FORMAT)); /* The tile buffer normally gets cleared when the previous tile is * stored. If the clear values changed between frames, then the tile @@ -193,14 +197,15 @@ vc4_setup_rcl(struct vc4_context *vc4) cl_start_reloc(&vc4->rcl, 1); cl_u8(&vc4->rcl, VC4_PACKET_LOAD_TILE_BUFFER_GENERAL); - cl_u8(&vc4->rcl, - VC4_LOADSTORE_TILE_BUFFER_COLOR | - (csurf->tiling << - VC4_LOADSTORE_TILE_BUFFER_FORMAT_SHIFT)); - cl_u8(&vc4->rcl, - vc4_rt_format_is_565(csurf->base.format) ? - VC4_LOADSTORE_TILE_BUFFER_BGR565 : - VC4_LOADSTORE_TILE_BUFFER_RGBA8888); + cl_u16(&vc4->rcl, + VC4_SET_FIELD(VC4_LOADSTORE_TILE_BUFFER_COLOR, + VC4_LOADSTORE_TILE_BUFFER_BUFFER) | + VC4_SET_FIELD(csurf->tiling, + VC4_LOADSTORE_TILE_BUFFER_TILING) | + VC4_SET_FIELD(vc4_rt_format_is_565(csurf->base.format) ? + VC4_LOADSTORE_TILE_BUFFER_BGR565 : + VC4_LOADSTORE_TILE_BUFFER_RGBA8888, + VC4_LOADSTORE_TILE_BUFFER_FORMAT)); cl_reloc_hindex(&vc4->rcl, color_hindex, csurf->offset); @@ -212,11 +217,11 @@ vc4_setup_rcl(struct vc4_context *vc4) cl_start_reloc(&vc4->rcl, 1); cl_u8(&vc4->rcl, VC4_PACKET_LOAD_TILE_BUFFER_GENERAL); - cl_u8(&vc4->rcl, - VC4_LOADSTORE_TILE_BUFFER_ZS | - (zsurf->tiling << - VC4_LOADSTORE_TILE_BUFFER_FORMAT_SHIFT)); - cl_u8(&vc4->rcl, 0); + cl_u16(&vc4->rcl, + VC4_SET_FIELD(VC4_LOADSTORE_TILE_BUFFER_ZS, + VC4_LOADSTORE_TILE_BUFFER_BUFFER) | + VC4_SET_FIELD(zsurf->tiling, + VC4_LOADSTORE_TILE_BUFFER_TILING)); cl_reloc_hindex(&vc4->rcl, depth_hindex, zsurf->offset); @@ -245,12 +250,12 @@ vc4_setup_rcl(struct vc4_context *vc4) cl_start_reloc(&vc4->rcl, 1); cl_u8(&vc4->rcl, VC4_PACKET_STORE_TILE_BUFFER_GENERAL); - cl_u8(&vc4->rcl, - VC4_LOADSTORE_TILE_BUFFER_ZS | - (zsurf->tiling << - VC4_LOADSTORE_TILE_BUFFER_FORMAT_SHIFT)); - cl_u8(&vc4->rcl, - VC4_STORE_TILE_BUFFER_DISABLE_COLOR_CLEAR); + cl_u16(&vc4->rcl, + VC4_SET_FIELD(VC4_LOADSTORE_TILE_BUFFER_ZS, + VC4_LOADSTORE_TILE_BUFFER_BUFFER) | + VC4_SET_FIELD(zsurf->tiling, + VC4_LOADSTORE_TILE_BUFFER_TILING) | + VC4_STORE_TILE_BUFFER_DISABLE_COLOR_CLEAR); cl_reloc_hindex(&vc4->rcl, depth_hindex, zsurf->offset | ((end_of_frame && From 6dd55b49090da22d3a8e9226507a95e914eaf10f Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Fri, 12 Jun 2015 12:47:47 -0700 Subject: [PATCH 679/834] vc4: Handle refcounting the exec BO like we do in the kernel. This reduces the diff to the kernel, and will be useful when I make the kernel allocate more BOs as part of validation. --- src/gallium/drivers/vc4/kernel/vc4_drv.h | 5 +++++ src/gallium/drivers/vc4/kernel/vc4_gem.c | 3 +++ src/gallium/drivers/vc4/vc4_simulator.c | 16 ++++++++++----- .../drivers/vc4/vc4_simulator_validate.h | 20 ++++++++++++++----- 4 files changed, 34 insertions(+), 10 deletions(-) diff --git a/src/gallium/drivers/vc4/kernel/vc4_drv.h b/src/gallium/drivers/vc4/kernel/vc4_drv.h index dede7162c42..8e9230b8949 100644 --- a/src/gallium/drivers/vc4/kernel/vc4_drv.h +++ b/src/gallium/drivers/vc4/kernel/vc4_drv.h @@ -52,6 +52,11 @@ struct vc4_exec_info { struct vc4_bo_exec_state *bo; uint32_t bo_count; + /* List of other BOs used in the job that need to be released + * once the job is complete. + */ + struct list_head unref_list; + /* Current unvalidated indices into @bo loaded by the non-hardware * VC4_PACKET_GEM_HANDLES. */ diff --git a/src/gallium/drivers/vc4/kernel/vc4_gem.c b/src/gallium/drivers/vc4/kernel/vc4_gem.c index ac29ab35dbc..e559ddd1d4e 100644 --- a/src/gallium/drivers/vc4/kernel/vc4_gem.c +++ b/src/gallium/drivers/vc4/kernel/vc4_gem.c @@ -114,6 +114,9 @@ vc4_cl_validate(struct drm_device *dev, struct vc4_exec_info *exec) } #endif + list_addtail(&to_vc4_bo(&exec->exec_bo->base)->unref_head, + &exec->unref_list); + exec->ct0ca = exec->exec_bo->paddr + bin_offset; exec->ct1ca = exec->exec_bo->paddr + render_offset; diff --git a/src/gallium/drivers/vc4/vc4_simulator.c b/src/gallium/drivers/vc4/vc4_simulator.c index 2f72e722fc5..2e4d8798f8e 100644 --- a/src/gallium/drivers/vc4/vc4_simulator.c +++ b/src/gallium/drivers/vc4/vc4_simulator.c @@ -39,10 +39,11 @@ vc4_wrap_bo_with_cma(struct drm_device *dev, struct vc4_bo *bo) { struct vc4_context *vc4 = dev->vc4; struct vc4_screen *screen = vc4->screen; - struct drm_gem_cma_object *obj = CALLOC_STRUCT(drm_gem_cma_object); + struct drm_vc4_bo *drm_bo = CALLOC_STRUCT(drm_vc4_bo); + struct drm_gem_cma_object *obj = &drm_bo->base; uint32_t size = align(bo->size, 4096); - obj->bo = bo; + drm_bo->bo = bo; obj->base.size = size; obj->vaddr = screen->simulator_mem_base + dev->simulator_mem_next; obj->paddr = simpenrose_hw_addr(obj->vaddr); @@ -94,7 +95,7 @@ vc4_simulator_unpin_bos(struct vc4_exec_info *exec) { for (int i = 0; i < exec->bo_count; i++) { struct drm_gem_cma_object *obj = exec->bo[i].bo; - struct vc4_bo *bo = obj->bo; + struct vc4_bo *bo = to_vc4_bo(&obj->base)->bo; memcpy(bo->map, obj->vaddr, bo->size); @@ -124,6 +125,7 @@ vc4_simulator_flush(struct vc4_context *vc4, struct drm_vc4_submit_cl *args) int ret; memset(&exec, 0, sizeof(exec)); + list_inithead(&exec.unref_list); if (ctex && ctex->bo->simulator_winsys_map) { #if 0 @@ -176,8 +178,12 @@ vc4_simulator_flush(struct vc4_context *vc4, struct drm_vc4_submit_cl *args) if (ret) return ret; - vc4_bo_unreference(&exec.exec_bo->bo); - free(exec.exec_bo); + list_for_each_entry_safe(struct drm_vc4_bo, bo, &exec.unref_list, + unref_head) { + list_del(&bo->unref_head); + vc4_bo_unreference(&bo->bo); + free(bo); + } if (ctex && ctex->bo->simulator_winsys_map) { for (int y = 0; y < ctex->base.b.height0; y++) { diff --git a/src/gallium/drivers/vc4/vc4_simulator_validate.h b/src/gallium/drivers/vc4/vc4_simulator_validate.h index a1903269a20..c3b7a638f93 100644 --- a/src/gallium/drivers/vc4/vc4_simulator_validate.h +++ b/src/gallium/drivers/vc4/vc4_simulator_validate.h @@ -64,16 +64,26 @@ struct drm_device { uint32_t simulator_mem_next; }; -struct drm_gem_cma_object { - struct vc4_bo *bo; +struct drm_gem_object { + uint32_t size; +}; - struct { - uint32_t size; - } base; +struct drm_gem_cma_object { + struct drm_gem_object base; uint32_t paddr; void *vaddr; }; +struct drm_vc4_bo { + struct drm_gem_cma_object base; + struct vc4_bo *bo; + struct list_head unref_head; +}; + +static inline struct drm_vc4_bo *to_vc4_bo(struct drm_gem_object *obj) +{ + return (struct drm_vc4_bo *)obj; +} struct drm_gem_cma_object * drm_gem_cma_create(struct drm_device *dev, size_t size); From 596532cc7d477671f87116e0788b4214ae1d0559 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Mon, 15 Jun 2015 11:41:06 -0700 Subject: [PATCH 680/834] vc4: Drop the unused "stride" field of surfaces. We're always looking at the slice anyway, when we would have needed it. --- src/gallium/drivers/vc4/vc4_resource.h | 1 - 1 file changed, 1 deletion(-) diff --git a/src/gallium/drivers/vc4/vc4_resource.h b/src/gallium/drivers/vc4/vc4_resource.h index a81c4704d54..ab8f5d3cd55 100644 --- a/src/gallium/drivers/vc4/vc4_resource.h +++ b/src/gallium/drivers/vc4/vc4_resource.h @@ -45,7 +45,6 @@ struct vc4_resource_slice { struct vc4_surface { struct pipe_surface base; uint32_t offset; - uint32_t stride; uint8_t tiling; }; From 2eac356467cef898ed05d0699077d9a9f4fa9156 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Mon, 15 Jun 2015 14:54:26 -0700 Subject: [PATCH 681/834] vc4: Factor out the live clamp register getter. --- .../drivers/vc4/kernel/vc4_validate_shaders.c | 32 ++++++++++++++----- 1 file changed, 24 insertions(+), 8 deletions(-) diff --git a/src/gallium/drivers/vc4/kernel/vc4_validate_shaders.c b/src/gallium/drivers/vc4/kernel/vc4_validate_shaders.c index e5a75c5f8c2..2e727a4425b 100644 --- a/src/gallium/drivers/vc4/kernel/vc4_validate_shaders.c +++ b/src/gallium/drivers/vc4/kernel/vc4_validate_shaders.c @@ -77,6 +77,24 @@ waddr_to_live_reg_index(uint32_t waddr, bool is_b) } } +static uint32_t +raddr_add_a_to_live_reg_index(uint64_t inst) +{ + uint32_t add_a = QPU_GET_FIELD(inst, QPU_ADD_A); + uint32_t raddr_a = QPU_GET_FIELD(inst, QPU_RADDR_A); + uint32_t raddr_b = QPU_GET_FIELD(inst, QPU_RADDR_B); + + if (add_a == QPU_MUX_A) { + return raddr_a; + } else if (add_a == QPU_MUX_B) { + return 32 + raddr_b; + } else if (add_a <= QPU_MUX_R4) { + return 64 + add_a; + } else { + return ~0; + } +} + static bool is_tmu_submit(uint32_t waddr) { @@ -136,9 +154,8 @@ check_tmu_write(uint64_t inst, uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG); if (is_direct) { - uint32_t add_a = QPU_GET_FIELD(inst, QPU_ADD_A); uint32_t add_b = QPU_GET_FIELD(inst, QPU_ADD_B); - uint32_t clamp_offset = ~0; + uint32_t clamp_reg, clamp_offset; if (sig == QPU_SIG_SMALL_IMM) { DRM_ERROR("direct TMU read used small immediate\n"); @@ -159,14 +176,13 @@ check_tmu_write(uint64_t inst, * This is arbitrary, but simpler than supporting flipping the * two either way. */ - if (add_a == QPU_MUX_A) { - clamp_offset = validation_state->live_clamp_offsets[raddr_a]; - } else if (add_a == QPU_MUX_B) { - clamp_offset = validation_state->live_clamp_offsets[32 + raddr_b]; - } else if (add_a <= QPU_MUX_R4) { - clamp_offset = validation_state->live_clamp_offsets[64 + add_a]; + clamp_reg = raddr_add_a_to_live_reg_index(inst); + if (clamp_reg == ~0) { + DRM_ERROR("direct TMU load wasn't clamped\n"); + return false; } + clamp_offset = validation_state->live_clamp_offsets[clamp_reg]; if (clamp_offset == ~0) { DRM_ERROR("direct TMU load wasn't clamped\n"); return false; From 507f3e708cbd10a4272aeffa0f066f1a80b48239 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Tue, 16 Jun 2015 12:03:10 -0700 Subject: [PATCH 682/834] vc4: R4 is not a valid register for clamped direct texturing. Our array only goes to R3, and R4 is a special case that shouldn't be used. --- src/gallium/drivers/vc4/kernel/vc4_validate_shaders.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gallium/drivers/vc4/kernel/vc4_validate_shaders.c b/src/gallium/drivers/vc4/kernel/vc4_validate_shaders.c index 2e727a4425b..ba1ae0a1925 100644 --- a/src/gallium/drivers/vc4/kernel/vc4_validate_shaders.c +++ b/src/gallium/drivers/vc4/kernel/vc4_validate_shaders.c @@ -88,7 +88,7 @@ raddr_add_a_to_live_reg_index(uint64_t inst) return raddr_a; } else if (add_a == QPU_MUX_B) { return 32 + raddr_b; - } else if (add_a <= QPU_MUX_R4) { + } else if (add_a <= QPU_MUX_R3) { return 64 + add_a; } else { return ~0; From d4d27361499cac73da4716b571519ecb71cef551 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Mon, 15 Jun 2015 17:47:12 -0700 Subject: [PATCH 683/834] vc4: Swap around which src we spill to ra31/rb31. I wanted to assert that src1 came from a non-unspilled register in shader validation, and this easily gets us that. And, as a bonus: total instructions in shared programs: 93347 -> 92723 (-0.67%) instructions in affected programs: 60524 -> 59900 (-1.03%) --- src/gallium/drivers/vc4/vc4_qpu_emit.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/gallium/drivers/vc4/vc4_qpu_emit.c b/src/gallium/drivers/vc4/vc4_qpu_emit.c index 577eb9200f4..99afe4b8798 100644 --- a/src/gallium/drivers/vc4/vc4_qpu_emit.c +++ b/src/gallium/drivers/vc4/vc4_qpu_emit.c @@ -117,11 +117,11 @@ fixup_raddr_conflict(struct vc4_compile *c, return; if (mux0 == QPU_MUX_A) { - queue(c, qpu_a_MOV(qpu_rb(31), *src1)); - *src1 = qpu_rb(31); + queue(c, qpu_a_MOV(qpu_rb(31), *src0)); + *src0 = qpu_rb(31); } else { - queue(c, qpu_a_MOV(qpu_ra(31), *src1)); - *src1 = qpu_ra(31); + queue(c, qpu_a_MOV(qpu_ra(31), *src0)); + *src0 = qpu_ra(31); } } From a0cd1a4060fdb55a57609b460629c7059bbe7047 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Mon, 15 Jun 2015 15:05:36 -0700 Subject: [PATCH 684/834] vc4: Make sure that direct texture clamps have a minimum value of 0. I was thinking of the MIN opcode in terms of unsigned math, but it's signed, so if you used a negative array index, you could read before the UBO. Fixes segfaults under simulation in piglit array indexing tests with mprotect-based guard pages. --- .../drivers/vc4/kernel/vc4_validate_shaders.c | 88 +++++++++++++------ src/gallium/drivers/vc4/vc4_program.c | 3 + 2 files changed, 66 insertions(+), 25 deletions(-) diff --git a/src/gallium/drivers/vc4/kernel/vc4_validate_shaders.c b/src/gallium/drivers/vc4/kernel/vc4_validate_shaders.c index ba1ae0a1925..ab9a6512e82 100644 --- a/src/gallium/drivers/vc4/kernel/vc4_validate_shaders.c +++ b/src/gallium/drivers/vc4/kernel/vc4_validate_shaders.c @@ -58,7 +58,8 @@ struct vc4_shader_validation_state { * * This is used for the validation of direct address memory reads. */ - uint32_t live_clamp_offsets[32 + 32 + 4]; + uint32_t live_min_clamp_offsets[32 + 32 + 4]; + bool live_max_clamp_regs[32 + 32 + 4]; }; static uint32_t @@ -80,13 +81,14 @@ waddr_to_live_reg_index(uint32_t waddr, bool is_b) static uint32_t raddr_add_a_to_live_reg_index(uint64_t inst) { + uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG); uint32_t add_a = QPU_GET_FIELD(inst, QPU_ADD_A); uint32_t raddr_a = QPU_GET_FIELD(inst, QPU_RADDR_A); uint32_t raddr_b = QPU_GET_FIELD(inst, QPU_RADDR_B); if (add_a == QPU_MUX_A) { return raddr_a; - } else if (add_a == QPU_MUX_B) { + } else if (add_a == QPU_MUX_B && sig != QPU_SIG_SMALL_IMM) { return 32 + raddr_b; } else if (add_a <= QPU_MUX_R3) { return 64 + add_a; @@ -182,7 +184,7 @@ check_tmu_write(uint64_t inst, return false; } - clamp_offset = validation_state->live_clamp_offsets[clamp_reg]; + clamp_offset = validation_state->live_min_clamp_offsets[clamp_reg]; if (clamp_offset == ~0) { DRM_ERROR("direct TMU load wasn't clamped\n"); return false; @@ -245,8 +247,6 @@ check_register_write(uint64_t inst, uint32_t waddr = (is_mul ? QPU_GET_FIELD(inst, QPU_WADDR_MUL) : QPU_GET_FIELD(inst, QPU_WADDR_ADD)); - bool is_b = is_mul != ((inst & QPU_WS) != 0); - uint32_t live_reg_index; switch (waddr) { case QPU_W_UNIFORMS_ADDRESS: @@ -301,14 +301,6 @@ check_register_write(uint64_t inst, return true; } - /* Clear out the live offset clamp tracking for the written register. - * If this particular instruction is setting up an offset clamp, it'll - * get tracked immediately after we return. - */ - live_reg_index = waddr_to_live_reg_index(waddr, is_b); - if (live_reg_index != ~0) - validation_state->live_clamp_offsets[live_reg_index] = ~0; - return true; } @@ -317,26 +309,72 @@ track_live_clamps(uint64_t inst, struct vc4_validated_shader_info *validated_shader, struct vc4_shader_validation_state *validation_state) { + uint32_t op_add = QPU_GET_FIELD(inst, QPU_OP_ADD); uint32_t waddr_add = QPU_GET_FIELD(inst, QPU_WADDR_ADD); + uint32_t waddr_mul = QPU_GET_FIELD(inst, QPU_WADDR_MUL); + uint32_t cond_add = QPU_GET_FIELD(inst, QPU_COND_ADD); + uint32_t add_a = QPU_GET_FIELD(inst, QPU_ADD_A); uint32_t add_b = QPU_GET_FIELD(inst, QPU_ADD_B); uint32_t raddr_a = QPU_GET_FIELD(inst, QPU_RADDR_A); uint32_t raddr_b = QPU_GET_FIELD(inst, QPU_RADDR_B); uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG); - bool is_b = inst & QPU_WS; - uint32_t live_reg_index; + bool ws = inst & QPU_WS; + uint32_t lri_add_a, lri_add, lri_mul; + bool add_a_is_min_0; - if (QPU_GET_FIELD(inst, QPU_OP_ADD) != QPU_A_MIN) - return; + /* Check whether OP_ADD's A argumennt comes from a live MAX(x, 0), + * before we clear previous live state. + */ + lri_add_a = raddr_add_a_to_live_reg_index(inst); + add_a_is_min_0 = (lri_add_a != ~0 && + validation_state->live_max_clamp_regs[lri_add_a]); - if (!(add_b == QPU_MUX_A && raddr_a == QPU_R_UNIF) && - !(add_b == QPU_MUX_B && raddr_b == QPU_R_UNIF && - sig != QPU_SIG_SMALL_IMM)) { + /* Clear live state for registers written by our instruction. */ + lri_add = waddr_to_live_reg_index(waddr_add, ws); + lri_mul = waddr_to_live_reg_index(waddr_mul, !ws); + if (lri_mul != ~0) { + validation_state->live_max_clamp_regs[lri_mul] = false; + validation_state->live_min_clamp_offsets[lri_mul] = ~0; + } + if (lri_add != ~0) { + validation_state->live_max_clamp_regs[lri_add] = false; + validation_state->live_min_clamp_offsets[lri_add] = ~0; + } else { + /* Nothing further to do for live tracking, since only ADDs + * generate new live clamp registers. + */ return; } - live_reg_index = waddr_to_live_reg_index(waddr_add, is_b); - if (live_reg_index != ~0) { - validation_state->live_clamp_offsets[live_reg_index] = + /* Now, handle remaining live clamp tracking for the ADD operation. */ + + if (cond_add != QPU_COND_ALWAYS) + return; + + if (op_add == QPU_A_MAX) { + /* Track live clamps of a value to a minimum of 0 (in either + * arg). + */ + if (sig != QPU_SIG_SMALL_IMM || raddr_b != 0 || + (add_a != QPU_MUX_B && add_b != QPU_MUX_B)) { + return; + } + + validation_state->live_max_clamp_regs[lri_add] = true; + } if (op_add == QPU_A_MIN) { + /* Track live clamps of a value clamped to a minimum of 0 and + * a maximum of some uniform's offset. + */ + if (!add_a_is_min_0) + return; + + if (!(add_b == QPU_MUX_A && raddr_a == QPU_R_UNIF) && + !(add_b == QPU_MUX_B && raddr_b == QPU_R_UNIF && + sig != QPU_SIG_SMALL_IMM)) { + return; + } + + validation_state->live_min_clamp_offsets[lri_add] = validated_shader->uniforms_size; } } @@ -398,8 +436,8 @@ vc4_validate_shader(struct drm_gem_cma_object *shader_obj) for (i = 0; i < 8; i++) validation_state.tmu_setup[i / 4].p_offset[i % 4] = ~0; - for (i = 0; i < ARRAY_SIZE(validation_state.live_clamp_offsets); i++) - validation_state.live_clamp_offsets[i] = ~0; + for (i = 0; i < ARRAY_SIZE(validation_state.live_min_clamp_offsets); i++) + validation_state.live_min_clamp_offsets[i] = ~0; shader = shader_obj->vaddr; max_ip = shader_obj->base.size / sizeof(uint64_t); diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c index 91540cfe2fa..bb45eb1288e 100644 --- a/src/gallium/drivers/vc4/vc4_program.c +++ b/src/gallium/drivers/vc4/vc4_program.c @@ -147,6 +147,9 @@ indirect_uniform_load(struct vc4_compile *c, indirect_offset = qir_ADD(c, indirect_offset, qir_uniform_ui(c, (range->dst_offset + offset))); + + /* Clamp to [0, array size). Note that MIN/MAX are signed. */ + indirect_offset = qir_MAX(c, indirect_offset, qir_uniform_ui(c, 0)); indirect_offset = qir_MIN(c, indirect_offset, qir_uniform_ui(c, (range->dst_offset + range->size - 4))); From da81999bee7b1f1bc0bb296e903deb03617ae22c Mon Sep 17 00:00:00 2001 From: Kevin Rogovin Date: Wed, 17 Jun 2015 13:29:50 +0300 Subject: [PATCH 685/834] mesa: Define infrastructure for ARB_framebuffer_no_attachments Define the infrastructure for the extension GL_ARB_framebuffer_no_attachments: - extension table - additions to gl_framebuffer Reviewed-by: Ian Romanick Signed-off-by: Kevin Rogovin --- src/mesa/main/extensions.c | 1 + src/mesa/main/fbobject.c | 1 + src/mesa/main/framebuffer.c | 1 + src/mesa/main/mtypes.h | 48 +++++++++++++++++++++++++++++++++---- 4 files changed, 46 insertions(+), 5 deletions(-) diff --git a/src/mesa/main/extensions.c b/src/mesa/main/extensions.c index f9bf503a066..4176a69ed7c 100644 --- a/src/mesa/main/extensions.c +++ b/src/mesa/main/extensions.c @@ -117,6 +117,7 @@ static const struct extension extension_table[] = { { "GL_ARB_fragment_program", o(ARB_fragment_program), GLL, 2002 }, { "GL_ARB_fragment_program_shadow", o(ARB_fragment_program_shadow), GLL, 2003 }, { "GL_ARB_fragment_shader", o(ARB_fragment_shader), GL, 2002 }, + { "GL_ARB_framebuffer_no_attachments", o(ARB_framebuffer_no_attachments), GL, 2012 }, { "GL_ARB_framebuffer_object", o(ARB_framebuffer_object), GL, 2005 }, { "GL_ARB_framebuffer_sRGB", o(EXT_framebuffer_sRGB), GL, 1998 }, { "GL_ARB_get_program_binary", o(dummy_true), GL, 2010 }, diff --git a/src/mesa/main/fbobject.c b/src/mesa/main/fbobject.c index c5a702636a8..498edfb564c 100644 --- a/src/mesa/main/fbobject.c +++ b/src/mesa/main/fbobject.c @@ -957,6 +957,7 @@ _mesa_test_framebuffer_completeness(struct gl_context *ctx, fb->Height = 0; fb->_AllColorBuffersFixedPoint = GL_TRUE; fb->_HasSNormOrFloatColorBuffer = GL_FALSE; + fb->_HasAttachments = true; /* Start at -2 to more easily loop over all attachment points. * -2: depth buffer diff --git a/src/mesa/main/framebuffer.c b/src/mesa/main/framebuffer.c index 665a5ba1492..f49d74c89f0 100644 --- a/src/mesa/main/framebuffer.c +++ b/src/mesa/main/framebuffer.c @@ -157,6 +157,7 @@ _mesa_initialize_window_framebuffer(struct gl_framebuffer *fb, fb->_Status = GL_FRAMEBUFFER_COMPLETE_EXT; fb->_AllColorBuffersFixedPoint = !visual->floatMode; fb->_HasSNormOrFloatColorBuffer = visual->floatMode; + fb->_HasAttachments = true; compute_depth_max(fb); } diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h index 205c7d2fbee..e9f70e20612 100644 --- a/src/mesa/main/mtypes.h +++ b/src/mesa/main/mtypes.h @@ -3133,12 +3133,29 @@ struct gl_framebuffer */ struct gl_config Visual; - GLuint Width, Height; /**< size of frame buffer in pixels */ + /** + * Size of frame buffer in pixels. If there are no attachments, then both + * of these are 0. + */ + GLuint Width, Height; - /** \name Drawing bounds (Intersection of buffer size and scissor box) */ + /** + * In the case that the framebuffer has no attachment (i.e. + * GL_ARB_framebuffer_no_attachments) then the geometry of + * the framebuffer is specified by the default values. + */ + struct { + GLuint Width, Height, Layers, NumSamples; + GLboolean FixedSampleLocations; + } DefaultGeometry; + + /** \name Drawing bounds (Intersection of buffer size and scissor box) + * The drawing region is given by [_Xmin, _Xmax) x [_Ymin, _Ymax), + * (inclusive for _Xmin and _Ymin while exclusive for _Xmax and _Ymax) + */ /*@{*/ - GLint _Xmin, _Xmax; /**< inclusive */ - GLint _Ymin, _Ymax; /**< exclusive */ + GLint _Xmin, _Xmax; + GLint _Ymin, _Ymax; /*@}*/ /** \name Derived Z buffer stuff */ @@ -3151,6 +3168,16 @@ struct gl_framebuffer /** One of the GL_FRAMEBUFFER_(IN)COMPLETE_* tokens */ GLenum _Status; + /** Whether one of Attachment has Type != GL_NONE + * NOTE: the values for Width and Height are set to 0 in case of having + * no attachments, a backend driver supporting the extension + * GL_ARB_framebuffer_no_attachments must check for the flag _HasAttachments + * and if GL_FALSE, must then use the values in DefaultGeometry to initialize + * its viewport, scissor and so on (in particular _Xmin, _Xmax, _Ymin and + * _Ymax do NOT take into account _HasAttachments being false) + */ + bool _HasAttachments; + /** Integer color values */ GLboolean _IntegerColor; @@ -3161,7 +3188,9 @@ struct gl_framebuffer /** * The maximum number of layers in the framebuffer, or 0 if the framebuffer * is not layered. For cube maps and cube map arrays, each cube face - * counts as a layer. + * counts as a layer. As the case for Width, Height a backend driver + * supporting GL_ARB_framebuffer_no_attachments must use DefaultGeometry + * in the case that _HasAttachments is false */ GLuint MaxNumLayers; @@ -3340,6 +3369,14 @@ struct gl_constants GLuint MaxRenderbufferSize; /**< GL_EXT_framebuffer_object */ GLuint MaxSamples; /**< GL_ARB_framebuffer_object */ + /** + * GL_ARB_framebuffer_no_attachments + */ + GLuint MaxFramebufferWidth; + GLuint MaxFramebufferHeight; + GLuint MaxFramebufferLayers; + GLuint MaxFramebufferSamples; + /** Number of varying vectors between any two shader stages. */ GLuint MaxVarying; @@ -3617,6 +3654,7 @@ struct gl_extensions GLboolean ARB_fragment_program; GLboolean ARB_fragment_program_shadow; GLboolean ARB_fragment_shader; + GLboolean ARB_framebuffer_no_attachments; GLboolean ARB_framebuffer_object; GLboolean ARB_explicit_attrib_location; GLboolean ARB_explicit_uniform_location; From c9d26f201aca58c72629d1ba1bb13c32c158d9dd Mon Sep 17 00:00:00 2001 From: Kevin Rogovin Date: Wed, 17 Jun 2015 13:29:51 +0300 Subject: [PATCH 686/834] mesa: Constants and functions for ARB_framebuffer_no_attachments Define the enumeration constants, function entry points and glGet for the GL_ARB_framebuffer_no_attachments. Reviewed-by: Ian Romanick Signed-off-by: Kevin Rogovin --- .../gen/ARB_framebuffer_no_attachments.xml | 32 +++++++++++++++++++ src/mapi/glapi/gen/Makefile.am | 1 + src/mapi/glapi/gen/apiexec.py | 5 +++ src/mapi/glapi/gen/gl_API.xml | 4 ++- src/mesa/main/fbobject.c | 28 ++++++++++++++++ src/mesa/main/fbobject.h | 6 ++++ src/mesa/main/get.c | 1 + src/mesa/main/get_hash_params.py | 6 ++++ src/mesa/main/tests/dispatch_sanity.cpp | 4 +-- 9 files changed, 84 insertions(+), 3 deletions(-) create mode 100644 src/mapi/glapi/gen/ARB_framebuffer_no_attachments.xml diff --git a/src/mapi/glapi/gen/ARB_framebuffer_no_attachments.xml b/src/mapi/glapi/gen/ARB_framebuffer_no_attachments.xml new file mode 100644 index 00000000000..59839a044be --- /dev/null +++ b/src/mapi/glapi/gen/ARB_framebuffer_no_attachments.xml @@ -0,0 +1,32 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/src/mapi/glapi/gen/Makefile.am b/src/mapi/glapi/gen/Makefile.am index 34602579c8a..5b163b02e00 100644 --- a/src/mapi/glapi/gen/Makefile.am +++ b/src/mapi/glapi/gen/Makefile.am @@ -131,6 +131,7 @@ API_XML = \ ARB_draw_instanced.xml \ ARB_ES2_compatibility.xml \ ARB_ES3_compatibility.xml \ + ARB_framebuffer_no_attachments.xml \ ARB_framebuffer_object.xml \ ARB_geometry_shader4.xml \ ARB_get_program_binary.xml \ diff --git a/src/mapi/glapi/gen/apiexec.py b/src/mapi/glapi/gen/apiexec.py index 535de8a434b..b623b44beeb 100644 --- a/src/mapi/glapi/gen/apiexec.py +++ b/src/mapi/glapi/gen/apiexec.py @@ -138,6 +138,11 @@ functions = { # with OpenGL 3.1. "TexBufferRange": exec_info(core=31), + # OpenGL 4.3 / GL_ARB_framebuffer_no_attachments. Mesa can expose the + # extension with OpenGL 3.0. + "FramebufferParameteri": exec_info(compatibility=30, core=31), + "GetFramebufferParameteri": exec_info(compatibility=30, core=31), + # OpenGL 4.5 / GL_ARB_direct_state_access. Mesa can expose the extension # with core profile. "CreateTransformFeedbacks": exec_info(core=31), diff --git a/src/mapi/glapi/gen/gl_API.xml b/src/mapi/glapi/gen/gl_API.xml index bd8db62033e..2f330756f22 100644 --- a/src/mapi/glapi/gen/gl_API.xml +++ b/src/mapi/glapi/gen/gl_API.xml @@ -8188,7 +8188,9 @@ - + + + diff --git a/src/mesa/main/fbobject.c b/src/mesa/main/fbobject.c index 498edfb564c..6d75209c6a5 100644 --- a/src/mesa/main/fbobject.c +++ b/src/mesa/main/fbobject.c @@ -1335,6 +1335,34 @@ _mesa_BindRenderbufferEXT(GLenum target, GLuint renderbuffer) bind_renderbuffer(target, renderbuffer, true); } +void GLAPIENTRY +_mesa_FramebufferParameteri(GLenum target, GLenum pname, GLint param) +{ + GET_CURRENT_CONTEXT(ctx); + + (void) target; + (void) pname; + (void) param; + + _mesa_error(ctx, GL_INVALID_OPERATION, + "glFramebufferParameteri not supported " + "(ARB_framebuffer_no_attachments not implemented)"); +} + +void GLAPIENTRY +_mesa_GetFramebufferParameteriv(GLenum target, GLenum pname, GLint *params) +{ + GET_CURRENT_CONTEXT(ctx); + + (void) target; + (void) pname; + (void) param; + + _mesa_error(ctx, GL_INVALID_OPERATION, + "glGetFramebufferParameteriv not supported " + "(ARB_framebuffer_no_attachments not implemented)"); +} + /** * Remove the specified renderbuffer or texture from any attachment point in diff --git a/src/mesa/main/fbobject.h b/src/mesa/main/fbobject.h index 9f570db3a26..8dad0ff34e7 100644 --- a/src/mesa/main/fbobject.h +++ b/src/mesa/main/fbobject.h @@ -288,4 +288,10 @@ extern void GLAPIENTRY _mesa_DiscardFramebufferEXT(GLenum target, GLsizei numAttachments, const GLenum *attachments); +extern void GLAPIENTRY +_mesa_FramebufferParameteri(GLenum target, GLenum pname, GLint param); + +extern void GLAPIENTRY +_mesa_GetFramebufferParameteriv(GLenum target, GLenum pname, GLint *params); + #endif /* FBOBJECT_H */ diff --git a/src/mesa/main/get.c b/src/mesa/main/get.c index 1bc9b5d82cf..3d6d63916b3 100644 --- a/src/mesa/main/get.c +++ b/src/mesa/main/get.c @@ -400,6 +400,7 @@ EXTRA_EXT(INTEL_performance_query); EXTRA_EXT(ARB_explicit_uniform_location); EXTRA_EXT(ARB_clip_control); EXTRA_EXT(EXT_polygon_offset_clamp); +EXTRA_EXT(ARB_framebuffer_no_attachments); static const int extra_ARB_color_buffer_float_or_glcore[] = { diff --git a/src/mesa/main/get_hash_params.py b/src/mesa/main/get_hash_params.py index 513d5d21b3f..84c5aa31a68 100644 --- a/src/mesa/main/get_hash_params.py +++ b/src/mesa/main/get_hash_params.py @@ -804,6 +804,12 @@ descriptor=[ [ "MIN_FRAGMENT_INTERPOLATION_OFFSET", "CONTEXT_FLOAT(Const.MinFragmentInterpolationOffset), extra_ARB_gpu_shader5" ], [ "MAX_FRAGMENT_INTERPOLATION_OFFSET", "CONTEXT_FLOAT(Const.MaxFragmentInterpolationOffset), extra_ARB_gpu_shader5" ], [ "FRAGMENT_INTERPOLATION_OFFSET_BITS", "CONST(FRAGMENT_INTERPOLATION_OFFSET_BITS), extra_ARB_gpu_shader5" ], + +# GL_ARB_framebuffer_no_attachments + ["MAX_FRAMEBUFFER_WIDTH", "CONTEXT_INT(Const.MaxFramebufferWidth), extra_ARB_framebuffer_no_attachments"], + ["MAX_FRAMEBUFFER_HEIGHT", "CONTEXT_INT(Const.MaxFramebufferHeight), extra_ARB_framebuffer_no_attachments"], + ["MAX_FRAMEBUFFER_LAYERS", "CONTEXT_INT(Const.MaxFramebufferLayers), extra_ARB_framebuffer_no_attachments"], + ["MAX_FRAMEBUFFER_SAMPLES", "CONTEXT_INT(Const.MaxFramebufferSamples), extra_ARB_framebuffer_no_attachments"], ]}, # Enums restricted to OpenGL Core profile diff --git a/src/mesa/main/tests/dispatch_sanity.cpp b/src/mesa/main/tests/dispatch_sanity.cpp index 0b7262a21e7..800720b798e 100644 --- a/src/mesa/main/tests/dispatch_sanity.cpp +++ b/src/mesa/main/tests/dispatch_sanity.cpp @@ -823,8 +823,8 @@ const struct function common_desktop_functions_possible[] = { // { "glVertexArrayVertexAttribIFormatEXT", 43, -1 }, // XXX: Add to xml // { "glVertexArrayVertexAttribBindingEXT", 43, -1 }, // XXX: Add to xml // { "glVertexArrayVertexBindingDivisorEXT", 43, -1 }, // XXX: Add to xml -// { "glFramebufferParameteri", 43, -1 }, // XXX: Add to xml -// { "glGetFramebufferParameteriv", 43, -1 }, // XXX: Add to xml + { "glFramebufferParameteri", 43, -1 }, + { "glGetFramebufferParameteriv", 43, -1 }, // { "glNamedFramebufferParameteriEXT", 43, -1 }, // XXX: Add to xml // { "glGetNamedFramebufferParameterivEXT", 43, -1 }, // XXX: Add to xml // { "glGetInternalformati64v", 43, -1 }, // XXX: Add to xml From 6aa12994bdf0068a9804204a8f1b197cc0f46ec6 Mon Sep 17 00:00:00 2001 From: Kevin Rogovin Date: Wed, 17 Jun 2015 13:29:52 +0300 Subject: [PATCH 687/834] PATCH 03/10] mesa: Complete ARB_framebuffer_no_attachments in Mesa core Implement GL_ARB_framebuffer_no_attachments in Mesa core - changes to conditions for framebuffer completenss - implement set/get functions for framebuffers for new functions in GL_ARB_framebuffer_no_attachments Reviewed-by: Ian Romanick Signed-off-by: Kevin Rogovin --- src/mesa/main/fbobject.c | 211 +++++++++++++++++++++++++++++++++------ 1 file changed, 182 insertions(+), 29 deletions(-) diff --git a/src/mesa/main/fbobject.c b/src/mesa/main/fbobject.c index 6d75209c6a5..f8dcf122d99 100644 --- a/src/mesa/main/fbobject.c +++ b/src/mesa/main/fbobject.c @@ -1156,14 +1156,48 @@ _mesa_test_framebuffer_completeness(struct gl_context *ctx, } else if (att_layer_count > max_layer_count) { max_layer_count = att_layer_count; } + + /* + * The extension GL_ARB_framebuffer_no_attachments places additional + * requirement on each attachment. Those additional requirements are + * tighter that those of previous versions of GL. In interest of better + * compatibility, we will not enforce these restrictions. For the record + * those additional restrictions are quoted below: + * + * "The width and height of image are greater than zero and less than or + * equal to the values of the implementation-dependent limits + * MAX_FRAMEBUFFER_WIDTH and MAX_FRAMEBUFFER_HEIGHT, respectively." + * + * "If is a three-dimensional texture or a one- or two-dimensional + * array texture and the attachment is layered, the depth or layer count + * of the texture is less than or equal to the implementation-dependent + * limit MAX_FRAMEBUFFER_LAYERS." + * + * "If image has multiple samples, its sample count is less than or equal + * to the value of the implementation-dependent limit + * MAX_FRAMEBUFFER_SAMPLES." + * + * The same requirements are also in place for GL 4.5, + * Section 9.4.1 "Framebuffer Attachment Completeness", pg 310-311 + */ } fb->MaxNumLayers = max_layer_count; if (numImages == 0) { - fb->_Status = GL_FRAMEBUFFER_INCOMPLETE_MISSING_ATTACHMENT_EXT; - fbo_incomplete(ctx, "no attachments", -1); - return; + fb->_HasAttachments = false; + + if (!ctx->Extensions.ARB_framebuffer_no_attachments) { + fb->_Status = GL_FRAMEBUFFER_INCOMPLETE_MISSING_ATTACHMENT_EXT; + fbo_incomplete(ctx, "no attachments", -1); + return; + } + + if (fb->DefaultGeometry.Width == 0 || fb->DefaultGeometry.Height == 0) { + fb->_Status = GL_FRAMEBUFFER_INCOMPLETE_MISSING_ATTACHMENT_EXT; + fbo_incomplete(ctx, "no attachments and default width or height is 0", -1); + return; + } } if (_mesa_is_desktop_gl(ctx) && !ctx->Extensions.ARB_ES2_compatibility) { @@ -1228,8 +1262,10 @@ _mesa_test_framebuffer_completeness(struct gl_context *ctx, * renderbuffers/textures are different sizes, the framebuffer * width/height will be set to the smallest width/height. */ - fb->Width = minWidth; - fb->Height = minHeight; + if (numImages != 0) { + fb->Width = minWidth; + fb->Height = minHeight; + } /* finally, update the visual info for the framebuffer */ _mesa_update_framebuffer_visual(ctx, fb); @@ -1335,32 +1371,129 @@ _mesa_BindRenderbufferEXT(GLenum target, GLuint renderbuffer) bind_renderbuffer(target, renderbuffer, true); } +static void +framebuffer_parameteri(struct gl_context *ctx, struct gl_framebuffer *fb, + GLenum pname, GLint param, const char *func) +{ + switch (pname) { + case GL_FRAMEBUFFER_DEFAULT_WIDTH: + if (param < 0 || param > ctx->Const.MaxFramebufferWidth) + _mesa_error(ctx, GL_INVALID_VALUE, "%s", func); + else + fb->DefaultGeometry.Width = param; + break; + case GL_FRAMEBUFFER_DEFAULT_HEIGHT: + if (param < 0 || param > ctx->Const.MaxFramebufferHeight) + _mesa_error(ctx, GL_INVALID_VALUE, "%s", func); + else + fb->DefaultGeometry.Height = param; + break; + case GL_FRAMEBUFFER_DEFAULT_LAYERS: + if (param < 0 || param > ctx->Const.MaxFramebufferLayers) + _mesa_error(ctx, GL_INVALID_VALUE, "%s", func); + else + fb->DefaultGeometry.Layers = param; + break; + case GL_FRAMEBUFFER_DEFAULT_SAMPLES: + if (param < 0 || param > ctx->Const.MaxFramebufferSamples) + _mesa_error(ctx, GL_INVALID_VALUE, "%s", func); + else + fb->DefaultGeometry.NumSamples = param; + break; + case GL_FRAMEBUFFER_DEFAULT_FIXED_SAMPLE_LOCATIONS: + fb->DefaultGeometry.FixedSampleLocations = param; + break; + default: + _mesa_error(ctx, GL_INVALID_ENUM, + "%s(pname=0x%x)", func, pname); + } +} + void GLAPIENTRY _mesa_FramebufferParameteri(GLenum target, GLenum pname, GLint param) { GET_CURRENT_CONTEXT(ctx); + struct gl_framebuffer *fb; - (void) target; - (void) pname; - (void) param; + if (!ctx->Extensions.ARB_framebuffer_no_attachments) { + _mesa_error(ctx, GL_INVALID_OPERATION, + "glFramebufferParameteriv not supported " + "(ARB_framebuffer_no_attachments not implemented)"); + return; + } - _mesa_error(ctx, GL_INVALID_OPERATION, - "glFramebufferParameteri not supported " - "(ARB_framebuffer_no_attachments not implemented)"); + fb = get_framebuffer_target(ctx, target); + if (!fb) { + _mesa_error(ctx, GL_INVALID_ENUM, + "glFramebufferParameteri(target=0x%x)", target); + return; + } + + /* check framebuffer binding */ + if (_mesa_is_winsys_fbo(fb)) { + _mesa_error(ctx, GL_INVALID_OPERATION, + "glFramebufferParameteri"); + return; + } + + framebuffer_parameteri(ctx, fb, pname, param, "glFramebufferParameteri"); +} + +static void +get_framebuffer_parameteriv(struct gl_context *ctx, struct gl_framebuffer *fb, + GLenum pname, GLint *params, const char *func) +{ + switch (pname) { + case GL_FRAMEBUFFER_DEFAULT_WIDTH: + *params = fb->DefaultGeometry.Width; + break; + case GL_FRAMEBUFFER_DEFAULT_HEIGHT: + *params = fb->DefaultGeometry.Height; + break; + case GL_FRAMEBUFFER_DEFAULT_LAYERS: + *params = fb->DefaultGeometry.Layers; + break; + case GL_FRAMEBUFFER_DEFAULT_SAMPLES: + *params = fb->DefaultGeometry.NumSamples; + break; + case GL_FRAMEBUFFER_DEFAULT_FIXED_SAMPLE_LOCATIONS: + *params = fb->DefaultGeometry.FixedSampleLocations; + break; + default: + _mesa_error(ctx, GL_INVALID_ENUM, + "%s(pname=0x%x)", func, pname); + } } void GLAPIENTRY _mesa_GetFramebufferParameteriv(GLenum target, GLenum pname, GLint *params) { GET_CURRENT_CONTEXT(ctx); + struct gl_framebuffer *fb; - (void) target; - (void) pname; - (void) param; + if (!ctx->Extensions.ARB_framebuffer_no_attachments) { + _mesa_error(ctx, GL_INVALID_OPERATION, + "glGetFramebufferParameteriv not supported " + "(ARB_framebuffer_no_attachments not implemented)"); + return; + } - _mesa_error(ctx, GL_INVALID_OPERATION, - "glGetFramebufferParameteriv not supported " - "(ARB_framebuffer_no_attachments not implemented)"); + fb = get_framebuffer_target(ctx, target); + if (!fb) { + _mesa_error(ctx, GL_INVALID_ENUM, + "glGetFramebufferParameteriv(target=0x%x)", target); + return; + } + + /* check framebuffer binding */ + if (_mesa_is_winsys_fbo(fb)) { + _mesa_error(ctx, GL_INVALID_OPERATION, + "glGetFramebufferParameteriv"); + return; + } + + get_framebuffer_parameteriv(ctx, fb, pname, params, + "glGetFramebufferParameteriv"); } @@ -3704,14 +3837,22 @@ _mesa_NamedFramebufferParameteri(GLuint framebuffer, GLenum pname, GLint param) { GET_CURRENT_CONTEXT(ctx); + struct gl_framebuffer *fb = NULL; - (void) framebuffer; - (void) pname; - (void) param; + if (!ctx->Extensions.ARB_framebuffer_no_attachments) { + _mesa_error(ctx, GL_INVALID_OPERATION, + "glNamedFramebufferParameteri(" + "ARB_framebuffer_no_attachments not implemented)"); + return; + } - _mesa_error(ctx, GL_INVALID_OPERATION, - "glNamedFramebufferParameteri not supported " - "(ARB_framebuffer_no_attachments not implemented)"); + fb = _mesa_lookup_framebuffer_err(ctx, framebuffer, + "glNamedFramebufferParameteri"); + + if (fb) { + framebuffer_parameteri(ctx, fb, pname, param, + "glNamedFramebufferParameteriv"); + } } @@ -3720,14 +3861,26 @@ _mesa_GetNamedFramebufferParameteriv(GLuint framebuffer, GLenum pname, GLint *param) { GET_CURRENT_CONTEXT(ctx); + struct gl_framebuffer *fb; - (void) framebuffer; - (void) pname; - (void) param; + if (!ctx->Extensions.ARB_framebuffer_no_attachments) { + _mesa_error(ctx, GL_INVALID_OPERATION, + "glNamedFramebufferParameteriv(" + "ARB_framebuffer_no_attachments not implemented)"); + return; + } - _mesa_error(ctx, GL_INVALID_OPERATION, - "glGetNamedFramebufferParameteriv not supported " - "(ARB_framebuffer_no_attachments not implemented)"); + if (framebuffer) { + fb = _mesa_lookup_framebuffer_err(ctx, framebuffer, + "glGetNamedFramebufferParameteriv"); + } else { + fb = ctx->WinSysDrawBuffer; + } + + if (fb) { + get_framebuffer_parameteriv(ctx, fb, pname, param, + "glGetNamedFramebufferParameteriv"); + } } From 74987977a36a7111281e8fb53568dc05dbd3a8b4 Mon Sep 17 00:00:00 2001 From: Kevin Rogovin Date: Wed, 17 Jun 2015 13:29:53 +0300 Subject: [PATCH 688/834] mesa: add helper functions for geometry of gl_framebuffer Add convenience helper functions for fetching geometry of gl_framebuffer that return the geometry of the gl_framebuffer instead of the geometry of the buffers of the gl_framebuffer when then the gl_framebuffer has no attachments. Reviewed-by: Ian Romanick Signed-off-by: Kevin Rogovin --- src/mesa/main/framebuffer.h | 28 ++++++++++++++++++++++++++++ src/mesa/main/mtypes.h | 8 +++++++- 2 files changed, 35 insertions(+), 1 deletion(-) diff --git a/src/mesa/main/framebuffer.h b/src/mesa/main/framebuffer.h index d02b86f20d9..ca286e9a992 100644 --- a/src/mesa/main/framebuffer.h +++ b/src/mesa/main/framebuffer.h @@ -76,6 +76,34 @@ _mesa_scissor_bounding_box(const struct gl_context *ctx, const struct gl_framebuffer *buffer, unsigned idx, int *bbox); +static inline GLuint +_mesa_geometric_width(const struct gl_framebuffer *buffer) +{ + return buffer->_HasAttachments ? + buffer->Width : buffer->DefaultGeometry.Width; +} + +static inline GLuint +_mesa_geometric_height(const struct gl_framebuffer *buffer) +{ + return buffer->_HasAttachments ? + buffer->Height : buffer->DefaultGeometry.Height; +} + +static inline GLuint +_mesa_geometric_samples(const struct gl_framebuffer *buffer) +{ + return buffer->_HasAttachments ? + buffer->Visual.samples : buffer->DefaultGeometry.NumSamples; +} + +static inline GLuint +_mesa_geometric_layers(const struct gl_framebuffer *buffer) +{ + return buffer->_HasAttachments ? + buffer->MaxNumLayers : buffer->DefaultGeometry.Layers; +} + extern void _mesa_update_draw_buffer_bounds(struct gl_context *ctx, struct gl_framebuffer *drawFb); diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h index e9f70e20612..a10e49434bc 100644 --- a/src/mesa/main/mtypes.h +++ b/src/mesa/main/mtypes.h @@ -3174,7 +3174,13 @@ struct gl_framebuffer * GL_ARB_framebuffer_no_attachments must check for the flag _HasAttachments * and if GL_FALSE, must then use the values in DefaultGeometry to initialize * its viewport, scissor and so on (in particular _Xmin, _Xmax, _Ymin and - * _Ymax do NOT take into account _HasAttachments being false) + * _Ymax do NOT take into account _HasAttachments being false). To get the + * geometry of the framebuffer, the helper functions + * _mesa_geometric_width(), + * _mesa_geometric_height(), + * _mesa_geometric_samples() and + * _mesa_geometric_layers() + * are available that check _HasAttachments. */ bool _HasAttachments; From 51f4b51151cb08988b5de466f3c2348876784cc5 Mon Sep 17 00:00:00 2001 From: Kevin Rogovin Date: Wed, 17 Jun 2015 13:29:54 +0300 Subject: [PATCH 689/834] mesa: helper function for scissor box of gl_framebuffer Add helper convenience function that intersects the scissor values against a passed bounding box. In addition, to avoid replicated code, make the function _mesa_scissor_bounding_box() use this new function. Reviewed-by: Ian Romanick Signed-off-by: Kevin Rogovin --- src/mesa/main/framebuffer.c | 59 ++++++++++++++++++++++++------------- src/mesa/main/framebuffer.h | 3 ++ 2 files changed, 41 insertions(+), 21 deletions(-) diff --git a/src/mesa/main/framebuffer.c b/src/mesa/main/framebuffer.c index f49d74c89f0..77c04b8dab8 100644 --- a/src/mesa/main/framebuffer.c +++ b/src/mesa/main/framebuffer.c @@ -357,6 +357,43 @@ update_framebuffer_size(struct gl_context *ctx, struct gl_framebuffer *fb) } + +/** + * Given a bounding box, intersect the bounding box with the scissor of + * a specified vieport. + * + * \param ctx GL context. + * \param idx Index of the desired viewport + * \param bbox Bounding box for the scissored viewport. Stored as xmin, + * xmax, ymin, ymax. + */ +void +_mesa_intersect_scissor_bounding_box(const struct gl_context *ctx, + unsigned idx, int *bbox) +{ + if (ctx->Scissor.EnableFlags & (1u << idx)) { + if (ctx->Scissor.ScissorArray[idx].X > bbox[0]) { + bbox[0] = ctx->Scissor.ScissorArray[idx].X; + } + if (ctx->Scissor.ScissorArray[idx].Y > bbox[2]) { + bbox[2] = ctx->Scissor.ScissorArray[idx].Y; + } + if (ctx->Scissor.ScissorArray[idx].X + ctx->Scissor.ScissorArray[idx].Width < bbox[1]) { + bbox[1] = ctx->Scissor.ScissorArray[idx].X + ctx->Scissor.ScissorArray[idx].Width; + } + if (ctx->Scissor.ScissorArray[idx].Y + ctx->Scissor.ScissorArray[idx].Height < bbox[3]) { + bbox[3] = ctx->Scissor.ScissorArray[idx].Y + ctx->Scissor.ScissorArray[idx].Height; + } + /* finally, check for empty region */ + if (bbox[0] > bbox[1]) { + bbox[0] = bbox[1]; + } + if (bbox[2] > bbox[3]) { + bbox[2] = bbox[3]; + } + } +} + /** * Calculate the inclusive bounding box for the scissor of a specific viewport * @@ -381,27 +418,7 @@ _mesa_scissor_bounding_box(const struct gl_context *ctx, bbox[1] = buffer->Width; bbox[3] = buffer->Height; - if (ctx->Scissor.EnableFlags & (1u << idx)) { - if (ctx->Scissor.ScissorArray[idx].X > bbox[0]) { - bbox[0] = ctx->Scissor.ScissorArray[idx].X; - } - if (ctx->Scissor.ScissorArray[idx].Y > bbox[2]) { - bbox[2] = ctx->Scissor.ScissorArray[idx].Y; - } - if (ctx->Scissor.ScissorArray[idx].X + ctx->Scissor.ScissorArray[idx].Width < bbox[1]) { - bbox[1] = ctx->Scissor.ScissorArray[idx].X + ctx->Scissor.ScissorArray[idx].Width; - } - if (ctx->Scissor.ScissorArray[idx].Y + ctx->Scissor.ScissorArray[idx].Height < bbox[3]) { - bbox[3] = ctx->Scissor.ScissorArray[idx].Y + ctx->Scissor.ScissorArray[idx].Height; - } - /* finally, check for empty region */ - if (bbox[0] > bbox[1]) { - bbox[0] = bbox[1]; - } - if (bbox[2] > bbox[3]) { - bbox[2] = bbox[3]; - } - } + _mesa_intersect_scissor_bounding_box(ctx, idx, bbox); assert(bbox[0] <= bbox[1]); assert(bbox[2] <= bbox[3]); diff --git a/src/mesa/main/framebuffer.h b/src/mesa/main/framebuffer.h index ca286e9a992..08e43222045 100644 --- a/src/mesa/main/framebuffer.h +++ b/src/mesa/main/framebuffer.h @@ -75,6 +75,9 @@ extern void _mesa_scissor_bounding_box(const struct gl_context *ctx, const struct gl_framebuffer *buffer, unsigned idx, int *bbox); +extern void +_mesa_intersect_scissor_bounding_box(const struct gl_context *ctx, + unsigned idx, int *bbox); static inline GLuint _mesa_geometric_width(const struct gl_framebuffer *buffer) From 41b6db225f42a5d81beec1b4455ec7b504e2416d Mon Sep 17 00:00:00 2001 From: Kevin Rogovin Date: Wed, 17 Jun 2015 13:29:55 +0300 Subject: [PATCH 690/834] i965: Use _mesa_geometric_ functions appropriately Change references to gl_framebuffer::Width, Height, MaxNumLayers and Visual::samples to use the _mesa_geometry_ convenience functions for those places where the geometry of the gl_framebuffer is needed (in contrast to the geometry of the intersection of the attachments of the gl_framebuffer). This patch is to pave the way to enable GL_ARB_framebuffer_no_attachments on Gen7 and higher in i965. Reviewed-by: Ian Romanick Signed-off-by: Kevin Rogovin --- src/mesa/drivers/dri/i965/brw_clip_state.c | 9 ++++++--- src/mesa/drivers/dri/i965/brw_misc_state.c | 9 ++++++--- src/mesa/drivers/dri/i965/brw_sf_state.c | 6 ++++++ src/mesa/drivers/dri/i965/brw_state_upload.c | 6 ++++-- src/mesa/drivers/dri/i965/brw_wm.c | 7 ++++--- src/mesa/drivers/dri/i965/brw_wm_surface_state.c | 12 +++++++----- src/mesa/drivers/dri/i965/gen6_clip_state.c | 10 +++++++--- src/mesa/drivers/dri/i965/gen6_multisample_state.c | 3 ++- src/mesa/drivers/dri/i965/gen6_scissor_state.c | 12 +++++++++--- src/mesa/drivers/dri/i965/gen6_sf_state.c | 3 ++- src/mesa/drivers/dri/i965/gen6_viewport_state.c | 5 +++-- src/mesa/drivers/dri/i965/gen6_wm_state.c | 3 ++- src/mesa/drivers/dri/i965/gen7_sf_state.c | 3 ++- src/mesa/drivers/dri/i965/gen7_viewport_state.c | 5 +++-- src/mesa/drivers/dri/i965/gen7_wm_state.c | 3 ++- src/mesa/drivers/dri/i965/gen8_viewport_state.c | 8 +++++--- 16 files changed, 70 insertions(+), 34 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_clip_state.c b/src/mesa/drivers/dri/i965/brw_clip_state.c index 32238341aae..dee74dba8af 100644 --- a/src/mesa/drivers/dri/i965/brw_clip_state.c +++ b/src/mesa/drivers/dri/i965/brw_clip_state.c @@ -32,6 +32,7 @@ #include "brw_context.h" #include "brw_state.h" #include "brw_defines.h" +#include "main/framebuffer.h" static void upload_clip_vp(struct brw_context *brw) @@ -59,7 +60,9 @@ brw_upload_clip_unit(struct brw_context *brw) struct brw_clip_unit_state *clip; /* _NEW_BUFFERS */ - struct gl_framebuffer *fb = ctx->DrawBuffer; + const struct gl_framebuffer *fb = ctx->DrawBuffer; + const float fb_width = (float)_mesa_geometric_width(fb); + const float fb_height = (float)_mesa_geometric_height(fb); upload_clip_vp(brw); @@ -127,8 +130,8 @@ brw_upload_clip_unit(struct brw_context *brw) /* enable guardband clipping if we can */ if (ctx->ViewportArray[0].X == 0 && ctx->ViewportArray[0].Y == 0 && - ctx->ViewportArray[0].Width == (float) fb->Width && - ctx->ViewportArray[0].Height == (float) fb->Height) + ctx->ViewportArray[0].Width == fb_width && + ctx->ViewportArray[0].Height == fb_height) { clip->clip5.guard_band_enable = 1; clip->clip6.clipper_viewport_state_ptr = diff --git a/src/mesa/drivers/dri/i965/brw_misc_state.c b/src/mesa/drivers/dri/i965/brw_misc_state.c index 67a693b5ec1..5a4515b582d 100644 --- a/src/mesa/drivers/dri/i965/brw_misc_state.c +++ b/src/mesa/drivers/dri/i965/brw_misc_state.c @@ -39,6 +39,7 @@ #include "brw_state.h" #include "brw_defines.h" +#include "main/framebuffer.h" #include "main/fbobject.h" #include "main/glformats.h" @@ -46,12 +47,14 @@ static void upload_drawing_rect(struct brw_context *brw) { struct gl_context *ctx = &brw->ctx; + const struct gl_framebuffer *fb = ctx->DrawBuffer; + const unsigned int fb_width = _mesa_geometric_width(fb); + const unsigned int fb_height = _mesa_geometric_height(fb); BEGIN_BATCH(4); OUT_BATCH(_3DSTATE_DRAWING_RECTANGLE << 16 | (4 - 2)); OUT_BATCH(0); /* xmin, ymin */ - OUT_BATCH(((ctx->DrawBuffer->Width - 1) & 0xffff) | - ((ctx->DrawBuffer->Height - 1) << 16)); + OUT_BATCH(((fb_width - 1) & 0xffff) | ((fb_height - 1) << 16)); OUT_BATCH(0); ADVANCE_BATCH(); } @@ -767,7 +770,7 @@ static void upload_polygon_stipple_offset(struct brw_context *brw) * works just fine, and there's no window system to worry about. */ if (_mesa_is_winsys_fbo(ctx->DrawBuffer)) - OUT_BATCH((32 - (ctx->DrawBuffer->Height & 31)) & 31); + OUT_BATCH((32 - (_mesa_geometric_height(ctx->DrawBuffer) & 31)) & 31); else OUT_BATCH(0); ADVANCE_BATCH(); diff --git a/src/mesa/drivers/dri/i965/brw_sf_state.c b/src/mesa/drivers/dri/i965/brw_sf_state.c index 014b43448ad..5d9892214a9 100644 --- a/src/mesa/drivers/dri/i965/brw_sf_state.c +++ b/src/mesa/drivers/dri/i965/brw_sf_state.c @@ -52,6 +52,12 @@ static void upload_sf_vp(struct brw_context *brw) sizeof(*sfv), 32, &brw->sf.vp_offset); memset(sfv, 0, sizeof(*sfv)); + /* Accessing the fields Width and Height of gl_framebuffer to produce the + * values to program the viewport and scissor is fine as long as the + * gl_framebuffer has atleast one attachment. + */ + assert(ctx->DrawBuffer->_HasAttachments); + if (render_to_fbo) { y_scale = 1.0; y_bias = 0; diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c b/src/mesa/drivers/dri/i965/brw_state_upload.c index 84b0861aaad..08d1ac28885 100644 --- a/src/mesa/drivers/dri/i965/brw_state_upload.c +++ b/src/mesa/drivers/dri/i965/brw_state_upload.c @@ -41,6 +41,7 @@ #include "brw_gs.h" #include "brw_wm.h" #include "brw_cs.h" +#include "main/framebuffer.h" static const struct brw_tracked_state *gen4_atoms[] = { @@ -660,6 +661,7 @@ brw_upload_pipeline_state(struct brw_context *brw, int i; static int dirty_count = 0; struct brw_state_flags state = brw->state.pipelines[pipeline]; + unsigned int fb_samples = _mesa_geometric_samples(ctx->DrawBuffer); brw_select_pipeline(brw, pipeline); @@ -696,8 +698,8 @@ brw_upload_pipeline_state(struct brw_context *brw, brw->ctx.NewDriverState |= BRW_NEW_META_IN_PROGRESS; } - if (brw->num_samples != ctx->DrawBuffer->Visual.samples) { - brw->num_samples = ctx->DrawBuffer->Visual.samples; + if (brw->num_samples != fb_samples) { + brw->num_samples = fb_samples; brw->ctx.NewDriverState |= BRW_NEW_NUM_SAMPLES; } diff --git a/src/mesa/drivers/dri/i965/brw_wm.c b/src/mesa/drivers/dri/i965/brw_wm.c index 45a03bba857..592a72927c3 100644 --- a/src/mesa/drivers/dri/i965/brw_wm.c +++ b/src/mesa/drivers/dri/i965/brw_wm.c @@ -36,6 +36,7 @@ #include "main/formats.h" #include "main/fbobject.h" #include "main/samplerobj.h" +#include "main/framebuffer.h" #include "program/prog_parameter.h" #include "program/program.h" #include "intel_mipmap_tree.h" @@ -462,7 +463,7 @@ static void brw_wm_populate_key( struct brw_context *brw, GLuint lookup = 0; GLuint line_aa; bool program_uses_dfdy = fp->program.UsesDFdy; - bool multisample_fbo = ctx->DrawBuffer->Visual.samples > 1; + const bool multisample_fbo = _mesa_geometric_samples(ctx->DrawBuffer) > 1; memset(key, 0, sizeof(*key)); @@ -561,7 +562,7 @@ static void brw_wm_populate_key( struct brw_context *brw, * drawable height in order to invert the Y axis. */ if (fp->program.Base.InputsRead & VARYING_BIT_POS) { - key->drawable_height = ctx->DrawBuffer->Height; + key->drawable_height = _mesa_geometric_height(ctx->DrawBuffer); } if ((fp->program.Base.InputsRead & VARYING_BIT_POS) || program_uses_dfdy) { @@ -580,7 +581,7 @@ static void brw_wm_populate_key( struct brw_context *brw, key->persample_shading = _mesa_get_min_invocations_per_fragment(ctx, &fp->program, true) > 1; if (key->persample_shading) - key->persample_2x = ctx->DrawBuffer->Visual.samples == 2; + key->persample_2x = _mesa_geometric_samples(ctx->DrawBuffer) == 2; key->compute_pos_offset = _mesa_get_min_invocations_per_fragment(ctx, &fp->program, false) > 1 && diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c index 160dd2f6c62..72aad96bb6a 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c @@ -35,6 +35,7 @@ #include "main/mtypes.h" #include "main/samplerobj.h" #include "program/prog_parameter.h" +#include "main/framebuffer.h" #include "intel_mipmap_tree.h" #include "intel_batchbuffer.h" @@ -738,6 +739,9 @@ brw_update_renderbuffer_surfaces(struct brw_context *brw, uint32_t *surf_offset) { GLuint i; + const unsigned int w = _mesa_geometric_width(fb); + const unsigned int h = _mesa_geometric_height(fb); + const unsigned int s = _mesa_geometric_samples(fb); /* Update surfaces for drawing buffers */ if (fb->_NumColorDrawBuffers >= 1) { @@ -748,17 +752,15 @@ brw_update_renderbuffer_surfaces(struct brw_context *brw, surf_offset[surf_index] = brw->vtbl.update_renderbuffer_surface( brw, fb->_ColorDrawBuffers[i], - fb->MaxNumLayers > 0, i, surf_index); + _mesa_geometric_layers(fb) > 0, i, surf_index); } else { - brw->vtbl.emit_null_surface_state( - brw, fb->Width, fb->Height, fb->Visual.samples, + brw->vtbl.emit_null_surface_state(brw, w, h, s, &surf_offset[surf_index]); } } } else { const uint32_t surf_index = render_target_start; - brw->vtbl.emit_null_surface_state( - brw, fb->Width, fb->Height, fb->Visual.samples, + brw->vtbl.emit_null_surface_state(brw, w, h, s, &surf_offset[surf_index]); } } diff --git a/src/mesa/drivers/dri/i965/gen6_clip_state.c b/src/mesa/drivers/dri/i965/gen6_clip_state.c index aaf90df2b9c..9a29366f0e0 100644 --- a/src/mesa/drivers/dri/i965/gen6_clip_state.c +++ b/src/mesa/drivers/dri/i965/gen6_clip_state.c @@ -31,6 +31,7 @@ #include "brw_util.h" #include "intel_batchbuffer.h" #include "main/fbobject.h" +#include "main/framebuffer.h" static void upload_clip_state(struct brw_context *brw) @@ -145,11 +146,14 @@ upload_clip_state(struct brw_context *brw) * the viewport, so we can ignore this restriction. */ if (brw->gen < 8) { + const float fb_width = (float)_mesa_geometric_width(fb); + const float fb_height = (float)_mesa_geometric_height(fb); + for (unsigned i = 0; i < ctx->Const.MaxViewports; i++) { if (ctx->ViewportArray[i].X != 0 || ctx->ViewportArray[i].Y != 0 || - ctx->ViewportArray[i].Width != (float) fb->Width || - ctx->ViewportArray[i].Height != (float) fb->Height) { + ctx->ViewportArray[i].Width != fb_width || + ctx->ViewportArray[i].Height != fb_height) { dw2 &= ~GEN6_CLIP_GB_TEST; break; } @@ -179,7 +183,7 @@ upload_clip_state(struct brw_context *brw) dw2); OUT_BATCH(U_FIXED(0.125, 3) << GEN6_CLIP_MIN_POINT_WIDTH_SHIFT | U_FIXED(255.875, 3) << GEN6_CLIP_MAX_POINT_WIDTH_SHIFT | - (fb->MaxNumLayers > 0 ? 0 : GEN6_CLIP_FORCE_ZERO_RTAINDEX) | + (_mesa_geometric_layers(fb) > 0 ? 0 : GEN6_CLIP_FORCE_ZERO_RTAINDEX) | ((ctx->Const.MaxViewports - 1) & GEN6_CLIP_MAX_VP_INDEX_MASK)); ADVANCE_BATCH(); } diff --git a/src/mesa/drivers/dri/i965/gen6_multisample_state.c b/src/mesa/drivers/dri/i965/gen6_multisample_state.c index ec46479ff75..36734f598fe 100644 --- a/src/mesa/drivers/dri/i965/gen6_multisample_state.c +++ b/src/mesa/drivers/dri/i965/gen6_multisample_state.c @@ -26,6 +26,7 @@ #include "brw_context.h" #include "brw_defines.h" #include "brw_multisample_state.h" +#include "main/framebuffer.h" void gen6_get_sample_position(struct gl_context *ctx, @@ -34,7 +35,7 @@ gen6_get_sample_position(struct gl_context *ctx, { uint8_t bits; - switch (fb->Visual.samples) { + switch (_mesa_geometric_samples(fb)) { case 1: result[0] = result[1] = 0.5f; return; diff --git a/src/mesa/drivers/dri/i965/gen6_scissor_state.c b/src/mesa/drivers/dri/i965/gen6_scissor_state.c index 0111f152ef6..17b4a7fba96 100644 --- a/src/mesa/drivers/dri/i965/gen6_scissor_state.c +++ b/src/mesa/drivers/dri/i965/gen6_scissor_state.c @@ -39,6 +39,8 @@ gen6_upload_scissor_state(struct brw_context *brw) const bool render_to_fbo = _mesa_is_user_fbo(ctx->DrawBuffer); struct gen6_scissor_rect *scissor; uint32_t scissor_state_offset; + const unsigned int fb_width= _mesa_geometric_width(ctx->DrawBuffer); + const unsigned int fb_height = _mesa_geometric_height(ctx->DrawBuffer); scissor = brw_state_batch(brw, AUB_TRACE_SCISSOR_STATE, sizeof(*scissor) * ctx->Const.MaxViewports, 32, @@ -56,7 +58,11 @@ gen6_upload_scissor_state(struct brw_context *brw) for (unsigned i = 0; i < ctx->Const.MaxViewports; i++) { int bbox[4]; - _mesa_scissor_bounding_box(ctx, ctx->DrawBuffer, i, bbox); + bbox[0] = 0; + bbox[1] = fb_width; + bbox[2] = 0; + bbox[3] = fb_height; + _mesa_intersect_scissor_bounding_box(ctx, i, bbox); if (bbox[0] == bbox[1] || bbox[2] == bbox[3]) { /* If the scissor was out of bounds and got clamped to 0 width/height @@ -80,8 +86,8 @@ gen6_upload_scissor_state(struct brw_context *brw) /* memory: Y=0=top */ scissor[i].xmin = bbox[0]; scissor[i].xmax = bbox[1] - 1; - scissor[i].ymin = ctx->DrawBuffer->Height - bbox[3]; - scissor[i].ymax = ctx->DrawBuffer->Height - bbox[2] - 1; + scissor[i].ymin = fb_height - bbox[3]; + scissor[i].ymax = fb_height - bbox[2] - 1; } } BEGIN_BATCH(2); diff --git a/src/mesa/drivers/dri/i965/gen6_sf_state.c b/src/mesa/drivers/dri/i965/gen6_sf_state.c index 5809628e021..b00517ed81e 100644 --- a/src/mesa/drivers/dri/i965/gen6_sf_state.c +++ b/src/mesa/drivers/dri/i965/gen6_sf_state.c @@ -31,6 +31,7 @@ #include "brw_util.h" #include "main/macros.h" #include "main/fbobject.h" +#include "main/framebuffer.h" #include "intel_batchbuffer.h" /** @@ -273,7 +274,7 @@ upload_sf_state(struct brw_context *brw) int i; /* _NEW_BUFFER */ bool render_to_fbo = _mesa_is_user_fbo(ctx->DrawBuffer); - bool multisampled_fbo = ctx->DrawBuffer->Visual.samples > 1; + const bool multisampled_fbo = _mesa_geometric_samples(ctx->DrawBuffer) > 1; const int urb_entry_read_offset = BRW_SF_URB_ENTRY_READ_OFFSET; float point_size; diff --git a/src/mesa/drivers/dri/i965/gen6_viewport_state.c b/src/mesa/drivers/dri/i965/gen6_viewport_state.c index 2fb0182c56e..7c8d8849f4e 100644 --- a/src/mesa/drivers/dri/i965/gen6_viewport_state.c +++ b/src/mesa/drivers/dri/i965/gen6_viewport_state.c @@ -30,6 +30,7 @@ #include "brw_defines.h" #include "intel_batchbuffer.h" #include "main/fbobject.h" +#include "main/framebuffer.h" #include "main/viewport.h" /* The clip VP defines the guardband region where expensive clipping is skipped @@ -93,10 +94,10 @@ gen6_upload_sf_vp(struct brw_context *brw) /* _NEW_BUFFERS */ if (render_to_fbo) { y_scale = 1.0; - y_bias = 0; + y_bias = 0.0; } else { y_scale = -1.0; - y_bias = ctx->DrawBuffer->Height; + y_bias = (float)_mesa_geometric_height(ctx->DrawBuffer); } for (unsigned i = 0; i < ctx->Const.MaxViewports; i++) { diff --git a/src/mesa/drivers/dri/i965/gen6_wm_state.c b/src/mesa/drivers/dri/i965/gen6_wm_state.c index 7081eb73428..d1748ba7457 100644 --- a/src/mesa/drivers/dri/i965/gen6_wm_state.c +++ b/src/mesa/drivers/dri/i965/gen6_wm_state.c @@ -33,6 +33,7 @@ #include "program/program.h" #include "program/prog_parameter.h" #include "program/prog_statevars.h" +#include "main/framebuffer.h" #include "intel_batchbuffer.h" static void @@ -284,7 +285,7 @@ upload_wm_state(struct brw_context *brw) const struct brw_wm_prog_data *prog_data = brw->wm.prog_data; /* _NEW_BUFFERS */ - const bool multisampled_fbo = ctx->DrawBuffer->Visual.samples > 1; + const bool multisampled_fbo = _mesa_geometric_samples(ctx->DrawBuffer) > 1; /* In case of non 1x per sample shading, only one of SIMD8 and SIMD16 * should be enabled. We do 'SIMD16 only' dispatch if a SIMD16 shader diff --git a/src/mesa/drivers/dri/i965/gen7_sf_state.c b/src/mesa/drivers/dri/i965/gen7_sf_state.c index a20967caf5c..4fa46a8eb97 100644 --- a/src/mesa/drivers/dri/i965/gen7_sf_state.c +++ b/src/mesa/drivers/dri/i965/gen7_sf_state.c @@ -27,6 +27,7 @@ #include "brw_util.h" #include "main/macros.h" #include "main/fbobject.h" +#include "main/framebuffer.h" #include "intel_batchbuffer.h" static void @@ -109,7 +110,7 @@ upload_sf_state(struct brw_context *brw) float point_size; /* _NEW_BUFFERS */ bool render_to_fbo = _mesa_is_user_fbo(ctx->DrawBuffer); - bool multisampled_fbo = ctx->DrawBuffer->Visual.samples > 1; + const bool multisampled_fbo = _mesa_geometric_samples(ctx->DrawBuffer) > 1; dw1 = GEN6_SF_STATISTICS_ENABLE; diff --git a/src/mesa/drivers/dri/i965/gen7_viewport_state.c b/src/mesa/drivers/dri/i965/gen7_viewport_state.c index eb596845b72..b655205ec35 100644 --- a/src/mesa/drivers/dri/i965/gen7_viewport_state.c +++ b/src/mesa/drivers/dri/i965/gen7_viewport_state.c @@ -26,6 +26,7 @@ #include "brw_defines.h" #include "intel_batchbuffer.h" #include "main/fbobject.h" +#include "main/framebuffer.h" #include "main/viewport.h" static void @@ -45,10 +46,10 @@ gen7_upload_sf_clip_viewport(struct brw_context *brw) /* _NEW_BUFFERS */ if (render_to_fbo) { y_scale = 1.0; - y_bias = 0; + y_bias = 0.0; } else { y_scale = -1.0; - y_bias = ctx->DrawBuffer->Height; + y_bias = (float)_mesa_geometric_height(ctx->DrawBuffer); } for (unsigned i = 0; i < ctx->Const.MaxViewports; i++) { diff --git a/src/mesa/drivers/dri/i965/gen7_wm_state.c b/src/mesa/drivers/dri/i965/gen7_wm_state.c index b9182758852..1c470769c7f 100644 --- a/src/mesa/drivers/dri/i965/gen7_wm_state.c +++ b/src/mesa/drivers/dri/i965/gen7_wm_state.c @@ -30,6 +30,7 @@ #include "program/program.h" #include "program/prog_parameter.h" #include "program/prog_statevars.h" +#include "main/framebuffer.h" #include "intel_batchbuffer.h" static void @@ -45,7 +46,7 @@ upload_wm_state(struct brw_context *brw) uint32_t dw1, dw2; /* _NEW_BUFFERS */ - bool multisampled_fbo = ctx->DrawBuffer->Visual.samples > 1; + const bool multisampled_fbo = _mesa_geometric_samples(ctx->DrawBuffer) > 1; dw1 = dw2 = 0; dw1 |= GEN7_WM_STATISTICS_ENABLE; diff --git a/src/mesa/drivers/dri/i965/gen8_viewport_state.c b/src/mesa/drivers/dri/i965/gen8_viewport_state.c index 322e4663b99..2d8eeb1f10f 100644 --- a/src/mesa/drivers/dri/i965/gen8_viewport_state.c +++ b/src/mesa/drivers/dri/i965/gen8_viewport_state.c @@ -26,6 +26,7 @@ #include "brw_defines.h" #include "intel_batchbuffer.h" #include "main/fbobject.h" +#include "main/framebuffer.h" #include "main/viewport.h" static void @@ -33,6 +34,7 @@ gen8_upload_sf_clip_viewport(struct brw_context *brw) { struct gl_context *ctx = &brw->ctx; float y_scale, y_bias; + const float fb_height = (float)_mesa_geometric_height(ctx->DrawBuffer); const bool render_to_fbo = _mesa_is_user_fbo(ctx->DrawBuffer); float *vp = brw_state_batch(brw, AUB_TRACE_SF_VP_STATE, @@ -47,7 +49,7 @@ gen8_upload_sf_clip_viewport(struct brw_context *brw) y_bias = 0; } else { y_scale = -1.0; - y_bias = ctx->DrawBuffer->Height; + y_bias = fb_height; } for (unsigned i = 0; i < ctx->Const.MaxViewports; i++) { @@ -116,8 +118,8 @@ gen8_upload_sf_clip_viewport(struct brw_context *brw) } else { vp[12] = ctx->ViewportArray[i].X; vp[13] = viewport_Xmax - 1; - vp[14] = ctx->DrawBuffer->Height - viewport_Ymax; - vp[15] = ctx->DrawBuffer->Height - ctx->ViewportArray[i].Y - 1; + vp[14] = fb_height - viewport_Ymax; + vp[15] = fb_height - ctx->ViewportArray[i].Y - 1; } vp += 16; From bbb700967e9991a03ed6e8073c9bdc2ca0d1381d Mon Sep 17 00:00:00 2001 From: Kevin Rogovin Date: Wed, 17 Jun 2015 13:29:56 +0300 Subject: [PATCH 691/834] mesa: function for testing if current frag-shader has atomics Add helper function that checks if current fragment shader active of gl_context has atomic buffer access. Reviewed-by: Ian Romanick Signed-off-by: Kevin Rogovin --- src/mesa/main/mtypes.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h index a10e49434bc..ffa7f0cd704 100644 --- a/src/mesa/main/mtypes.h +++ b/src/mesa/main/mtypes.h @@ -4448,7 +4448,12 @@ enum _debug DEBUG_INCOMPLETE_FBO = (1 << 3) }; - +static inline bool +_mesa_active_fragment_shader_has_atomic_ops(const struct gl_context *ctx) +{ + return ctx->Shader._CurrentFragmentProgram != NULL && + ctx->Shader._CurrentFragmentProgram->NumAtomicBuffers > 0; +} #ifdef __cplusplus } From 9ded6369754910f7f58f896c1627ba0bbfb0f864 Mon Sep 17 00:00:00 2001 From: Kevin Rogovin Date: Wed, 17 Jun 2015 13:29:57 +0300 Subject: [PATCH 692/834] i965: execution of frag-shader when it has atomic buffer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Ensure that the GPU spawns the fragment shader thread for those fragment shaders with atomic buffer access. Reviewed-by: Tapani Pälli Signed-off-by: Kevin Rogovin --- src/mesa/drivers/dri/i965/gen7_wm_state.c | 4 ++++ src/mesa/drivers/dri/i965/gen8_ps_state.c | 3 +++ 2 files changed, 7 insertions(+) diff --git a/src/mesa/drivers/dri/i965/gen7_wm_state.c b/src/mesa/drivers/dri/i965/gen7_wm_state.c index 1c470769c7f..ea11ae845e3 100644 --- a/src/mesa/drivers/dri/i965/gen7_wm_state.c +++ b/src/mesa/drivers/dri/i965/gen7_wm_state.c @@ -77,6 +77,10 @@ upload_wm_state(struct brw_context *brw) dw1 |= GEN7_WM_KILL_ENABLE; } + if (_mesa_active_fragment_shader_has_atomic_ops(&brw->ctx)) { + dw1 |= GEN7_WM_DISPATCH_ENABLE; + } + /* _NEW_BUFFERS | _NEW_COLOR */ if (brw_color_buffer_write_enabled(brw) || writes_depth || dw1 & GEN7_WM_KILL_ENABLE) { diff --git a/src/mesa/drivers/dri/i965/gen8_ps_state.c b/src/mesa/drivers/dri/i965/gen8_ps_state.c index 6b9489bf7f6..a88f109c691 100644 --- a/src/mesa/drivers/dri/i965/gen8_ps_state.c +++ b/src/mesa/drivers/dri/i965/gen8_ps_state.c @@ -58,6 +58,9 @@ gen8_upload_ps_extra(struct brw_context *brw, if (prog_data->uses_omask) dw1 |= GEN8_PSX_OMASK_TO_RENDER_TARGET; + if (_mesa_active_fragment_shader_has_atomic_ops(&brw->ctx)) + dw1 |= GEN8_PSX_SHADER_HAS_UAV; + BEGIN_BATCH(2); OUT_BATCH(_3DSTATE_PS_EXTRA << 16 | (2 - 2)); OUT_BATCH(dw1); From 83199998310591b9162ab12e922ed79ee235b5c8 Mon Sep 17 00:00:00 2001 From: Kevin Rogovin Date: Wed, 17 Jun 2015 13:29:58 +0300 Subject: [PATCH 693/834] i965: enable ARB_framebuffer_no_attachments for Gen7+ Enable GL_ARB_framebuffer_no_attachments in i965 for Gen7 and higher. Reviewed-by: Ian Romanick Signed-off-by: Kevin Rogovin --- src/mesa/drivers/dri/i965/brw_context.c | 6 ++++++ src/mesa/drivers/dri/i965/intel_extensions.c | 1 + 2 files changed, 7 insertions(+) diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c index f39b3501539..c629f39bb2a 100644 --- a/src/mesa/drivers/dri/i965/brw_context.c +++ b/src/mesa/drivers/dri/i965/brw_context.c @@ -614,6 +614,12 @@ brw_initialize_context_constants(struct brw_context *brw) /* ARB_gpu_shader5 */ if (brw->gen >= 7) ctx->Const.MaxVertexStreams = MIN2(4, MAX_VERTEX_STREAMS); + + /* ARB_framebuffer_no_attachments */ + ctx->Const.MaxFramebufferWidth = ctx->Const.MaxViewportWidth; + ctx->Const.MaxFramebufferHeight = ctx->Const.MaxViewportHeight; + ctx->Const.MaxFramebufferLayers = ctx->Const.MaxArrayTextureLayers; + ctx->Const.MaxFramebufferSamples = max_samples; } static void diff --git a/src/mesa/drivers/dri/i965/intel_extensions.c b/src/mesa/drivers/dri/i965/intel_extensions.c index cafb77455d7..365b4b8f718 100644 --- a/src/mesa/drivers/dri/i965/intel_extensions.c +++ b/src/mesa/drivers/dri/i965/intel_extensions.c @@ -325,6 +325,7 @@ intelInitExtensions(struct gl_context *ctx) if (brw->gen >= 7) { ctx->Extensions.ARB_conservative_depth = true; ctx->Extensions.ARB_derivative_control = true; + ctx->Extensions.ARB_framebuffer_no_attachments = true; ctx->Extensions.ARB_gpu_shader5 = true; ctx->Extensions.ARB_shader_atomic_counters = true; ctx->Extensions.ARB_texture_compression_bptc = true; From ff06901082b84c91ee64d3a54bf372f0c809f4bf Mon Sep 17 00:00:00 2001 From: Kevin Rogovin Date: Wed, 17 Jun 2015 13:29:59 +0300 Subject: [PATCH 694/834] docs: mark GL_ARB_framebuffer_no_attachments done for i965 Mark GL_ARB_framebuffer_no_attachments as done for i965. Reviewed-by: Ian Romanick Signed-off-by: Kevin Rogovin --- docs/GL3.txt | 4 ++-- docs/relnotes/10.7.0.html | 1 + 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/docs/GL3.txt b/docs/GL3.txt index f2d06f17360..a40fdee487a 100644 --- a/docs/GL3.txt +++ b/docs/GL3.txt @@ -157,7 +157,7 @@ GL 4.3, GLSL 4.30: GL_KHR_debug DONE (all drivers) GL_ARB_explicit_uniform_location DONE (all drivers that support GLSL) GL_ARB_fragment_layer_viewport DONE (nv50, nvc0, r600, llvmpipe) - GL_ARB_framebuffer_no_attachments not started + GL_ARB_framebuffer_no_attachments DONE (i965) GL_ARB_internalformat_query2 not started GL_ARB_invalidate_subdata DONE (all drivers) GL_ARB_multi_draw_indirect DONE (i965, nvc0, r600, radeonsi, llvmpipe, softpipe) @@ -216,7 +216,7 @@ GLES3.1, GLSL ES 3.1 GL_ARB_compute_shader in progress (jljusten) GL_ARB_draw_indirect DONE (i965, nvc0, r600, radeonsi, llvmpipe, softpipe) GL_ARB_explicit_uniform_location DONE (all drivers that support GLSL) - GL_ARB_framebuffer_no_attachments not started + GL_ARB_framebuffer_no_attachments DONE (i965) GL_ARB_program_interface_query DONE (all drivers) GL_ARB_shader_atomic_counters DONE (i965) GL_ARB_shader_image_load_store in progress (curro) diff --git a/docs/relnotes/10.7.0.html b/docs/relnotes/10.7.0.html index 7518389ba4b..e089889667d 100644 --- a/docs/relnotes/10.7.0.html +++ b/docs/relnotes/10.7.0.html @@ -44,6 +44,7 @@ Note: some of the new features are only available with certain drivers.

          +
        • GL_ARB_framebuffer_no_attachments on i965
        • GL_ARB_shader_stencil_export on llvmpipe
        From 36e3eb6a957f8f20ed187ec88a067fc65cb81432 Mon Sep 17 00:00:00 2001 From: Ilia Mirkin Date: Wed, 17 Jun 2015 22:18:09 -0400 Subject: [PATCH 695/834] nvc0/ir: can't have a join on a load with an indirect source Triggers an INVALID_OPCODE warning on GK208. Seems rare enough to not warrant verification on other chips. Fixes the new piglits: ubo_array_indexing/fs-nonuniform-control-flow.shader_test ubo_array_indexing/vs-nonuniform-control-flow.shader_test Signed-off-by: Ilia Mirkin Cc: "10.5 10.6" --- src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp index b7fcd56724d..ae739eeda83 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp @@ -2288,7 +2288,7 @@ FlatteningPass::visit(BasicBlock *bb) insn->op != OP_LINTERP && // probably just nve4 insn->op != OP_PINTERP && // probably just nve4 ((insn->op != OP_LOAD && insn->op != OP_STORE) || - typeSizeof(insn->dType) <= 4) && + (typeSizeof(insn->dType) <= 4 && !insn->src(0).isIndirect(0))) && !insn->isNop()) { insn->join = 1; bb->remove(bb->getExit()); From 2b1cdb0eddb73f62e4848d4b64840067f1f70865 Mon Sep 17 00:00:00 2001 From: Iago Toral Quiroga Date: Tue, 24 Feb 2015 19:02:50 +0100 Subject: [PATCH 696/834] i965: Fix textureGrad with cube samplers We can't use sampler messages with gradient information (like sample_g or sample_d) to deal with this scenario because according to the PRM: "The r coordinate and its gradients are required only for surface types that use the third coordinate. Usage of this message type on cube surfaces assumes that the u, v, and gradients have already been transformed onto the appropriate face, but still in [-1,+1] range. The r coordinate contains the faceid, and the r gradients are ignored by hardware." Instead, we should lower this to compute the LOD manually based on the gradients and use a different sample message that takes the computed LOD instead of the gradients. This is already being done in brw_lower_texture_gradients.cpp, but it is restricted to shadow samplers only, although there is a comment stating that we should probably do this also for samplerCube and samplerCubeArray. Because of this, both dEQP and Piglit test cases for textureGrad with cube maps currently fail. This patch does two things: 1) Activates the texturegrad lowering pass for all cube samplers. 2) Corrects the computation of the LOD value for cube samplers. I had to do 2) because for cube maps the calculations implemented in the lowering pass always compute a value of rho that is twice the value we want (so we get a LOD value one unit larger than we want). This only happens for cube map samplers (all kinds). I am not sure about why we need to do this, but I suspect that it is related to the fact that cube map coordinates, when transported to a specific face in the cube, are in the range [-1, 1] instead of [0, 1] so we probably need to divide the derivatives by 2 when we compute the LOD. Doing that would produce the same result as dividing the final rho computation by 2 (or removing a unit from the computed LOD, which is what we are doing here). Fixes the following piglit tests: bin/tex-miplevel-selection textureGrad Cube -auto -fbo bin/tex-miplevel-selection textureGrad CubeArray -auto -fbo bin/tex-miplevel-selection textureGrad CubeShadow -auto -fbo Fixes 10 dEQP tests in the following category: dEQP-GLES3.functional.shaders.texture_functions.texturegrad.*cube* Reviewed-by: Ben Widawsky --- .../dri/i965/brw_lower_texture_gradients.cpp | 26 +++++++++++++------ 1 file changed, 18 insertions(+), 8 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_lower_texture_gradients.cpp b/src/mesa/drivers/dri/i965/brw_lower_texture_gradients.cpp index 0424003ffd5..7a5f9834423 100644 --- a/src/mesa/drivers/dri/i965/brw_lower_texture_gradients.cpp +++ b/src/mesa/drivers/dri/i965/brw_lower_texture_gradients.cpp @@ -89,19 +89,18 @@ txs_type(const glsl_type *type) ir_visitor_status lower_texture_grad_visitor::visit_leave(ir_texture *ir) { - /* Only lower textureGrad with shadow samplers */ - if (ir->op != ir_txd || !ir->shadow_comparitor) + /* Only lower textureGrad with cube maps or shadow samplers */ + if (ir->op != ir_txd || + (ir->sampler->type->sampler_dimensionality != GLSL_SAMPLER_DIM_CUBE && + !ir->shadow_comparitor)) return visit_continue; - /* Lower textureGrad() with samplerCubeShadow even if we have the sample_d_c + /* Lower textureGrad() with samplerCube* even if we have the sample_d_c * message. GLSL provides gradients for the 'r' coordinate. Unfortunately: * * From the Ivybridge PRM, Volume 4, Part 1, sample_d message description: * "The r coordinate contains the faceid, and the r gradients are ignored * by hardware." - * - * We likely need to do a similar treatment for samplerCube and - * samplerCubeArray, but we have insufficient testing for that at the moment. */ bool need_lowering = !has_sample_d_c || ir->sampler->type->sampler_dimensionality == GLSL_SAMPLER_DIM_CUBE; @@ -155,9 +154,20 @@ lower_texture_grad_visitor::visit_leave(ir_texture *ir) expr(ir_unop_sqrt, dot(dPdy, dPdy))); } - /* lambda_base = log2(rho). We're ignoring GL state biases for now. */ + /* lambda_base = log2(rho). We're ignoring GL state biases for now. + * + * For cube maps the result of these formulas is giving us a value of rho + * that is twice the value we should use, so divide it by 2 or, + * alternatively, remove one unit from the result of the log2 computation. + */ ir->op = ir_txl; - ir->lod_info.lod = expr(ir_unop_log2, rho); + if (ir->sampler->type->sampler_dimensionality == GLSL_SAMPLER_DIM_CUBE) { + ir->lod_info.lod = expr(ir_binop_add, + expr(ir_unop_log2, rho), + new(mem_ctx) ir_constant(-1.0f)); + } else { + ir->lod_info.lod = expr(ir_unop_log2, rho); + } progress = true; return visit_continue; From 62d153ea37b1bf572c39aab8ec46099fc903362d Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Wed, 17 Jun 2015 22:56:15 -0700 Subject: [PATCH 697/834] vc4: Track the number of BOs allocated and their size. This is useful for BO leak debugging. --- src/gallium/drivers/vc4/vc4_bufmgr.c | 101 +++++++++++++++++++++++++-- src/gallium/drivers/vc4/vc4_screen.h | 6 ++ 2 files changed, 100 insertions(+), 7 deletions(-) diff --git a/src/gallium/drivers/vc4/vc4_bufmgr.c b/src/gallium/drivers/vc4/vc4_bufmgr.c index 69a7584e14b..eef7e9dc16f 100644 --- a/src/gallium/drivers/vc4/vc4_bufmgr.c +++ b/src/gallium/drivers/vc4/vc4_bufmgr.c @@ -34,6 +34,47 @@ #include "vc4_context.h" #include "vc4_screen.h" +static bool dump_stats = false; + +static void +vc4_bo_dump_stats(struct vc4_screen *screen) +{ + struct vc4_bo_cache *cache = &screen->bo_cache; + + fprintf(stderr, " BOs allocated: %d\n", screen->bo_count); + fprintf(stderr, " BOs size: %dkb\n", screen->bo_size / 102); + fprintf(stderr, " BOs cached: %d\n", cache->bo_count); + fprintf(stderr, " BOs cached size: %dkb\n", cache->bo_size / 102); + + if (!list_empty(&cache->time_list)) { + struct vc4_bo *first = LIST_ENTRY(struct vc4_bo, + cache->time_list.next, + time_list); + struct vc4_bo *last = LIST_ENTRY(struct vc4_bo, + cache->time_list.prev, + time_list); + + fprintf(stderr, " oldest cache time: %ld\n", + (long)first->free_time); + fprintf(stderr, " newest cache time: %ld\n", + (long)last->free_time); + + struct timespec time; + clock_gettime(CLOCK_MONOTONIC, &time); + fprintf(stderr, " now: %ld\n", + time.tv_sec); + } +} + +static void +vc4_bo_remove_from_cache(struct vc4_bo_cache *cache, struct vc4_bo *bo) +{ + list_del(&bo->time_list); + list_del(&bo->size_list); + cache->bo_count--; + cache->bo_size -= bo->size; +} + static struct vc4_bo * vc4_bo_from_cache(struct vc4_screen *screen, uint32_t size, const char *name) { @@ -60,8 +101,7 @@ vc4_bo_from_cache(struct vc4_screen *screen, uint32_t size, const char *name) } pipe_reference_init(&bo->reference, 1); - list_del(&bo->time_list); - list_del(&bo->size_list); + vc4_bo_remove_from_cache(cache, bo); bo->name = name; } @@ -78,8 +118,14 @@ vc4_bo_alloc(struct vc4_screen *screen, uint32_t size, const char *name) size = align(size, 4096); bo = vc4_bo_from_cache(screen, size, name); - if (bo) + if (bo) { + if (dump_stats) { + fprintf(stderr, "Allocated %s %dkb from cache:\n", + name, size / 1024); + vc4_bo_dump_stats(screen); + } return bo; + } bo = CALLOC_STRUCT(vc4_bo); if (!bo) @@ -116,6 +162,13 @@ vc4_bo_alloc(struct vc4_screen *screen, uint32_t size, const char *name) abort(); } + screen->bo_count++; + screen->bo_size += bo->size; + if (dump_stats) { + fprintf(stderr, "Allocated %s %dkb:\n", name, size / 1024); + vc4_bo_dump_stats(screen); + } + return bo; } @@ -153,6 +206,17 @@ vc4_bo_free(struct vc4_bo *bo) if (ret != 0) fprintf(stderr, "close object %d: %s\n", bo->handle, strerror(errno)); + screen->bo_count--; + screen->bo_size -= bo->size; + + if (dump_stats) { + fprintf(stderr, "Freed %s%s%dkb:\n", + bo->name ? bo->name : "", + bo->name ? " " : "", + bo->size / 1024); + vc4_bo_dump_stats(screen); + } + free(bo); } @@ -160,18 +224,29 @@ static void free_stale_bos(struct vc4_screen *screen, time_t time) { struct vc4_bo_cache *cache = &screen->bo_cache; + bool freed_any = false; list_for_each_entry_safe(struct vc4_bo, bo, &cache->time_list, time_list) { + if (dump_stats && !freed_any) { + fprintf(stderr, "Freeing stale BOs:\n"); + vc4_bo_dump_stats(screen); + freed_any = true; + } + /* If it's more than a second old, free it. */ if (time - bo->free_time > 2) { - list_del(&bo->time_list); - list_del(&bo->size_list); + vc4_bo_remove_from_cache(cache, bo); vc4_bo_free(bo); } else { break; } } + + if (dump_stats && freed_any) { + fprintf(stderr, "Freed stale BOs:\n"); + vc4_bo_dump_stats(screen); + } } void @@ -214,6 +289,14 @@ vc4_bo_last_unreference_locked_timed(struct vc4_bo *bo, time_t time) bo->free_time = time; list_addtail(&bo->size_list, &cache->size_list[page_index]); list_addtail(&bo->time_list, &cache->time_list); + cache->bo_count++; + cache->bo_size += bo->size; + if (dump_stats) { + fprintf(stderr, "Freed %s %dkb to cache:\n", + bo->name, bo->size / 1024); + vc4_bo_dump_stats(screen); + } + bo->name = NULL; free_stale_bos(screen, time); } @@ -450,8 +533,12 @@ vc4_bufmgr_destroy(struct pipe_screen *pscreen) list_for_each_entry_safe(struct vc4_bo, bo, &cache->time_list, time_list) { - list_del(&bo->time_list); - list_del(&bo->size_list); + vc4_bo_remove_from_cache(cache, bo); vc4_bo_free(bo); } + + if (dump_stats) { + fprintf(stderr, "BO stats after screen destroy:\n"); + vc4_bo_dump_stats(screen); + } } diff --git a/src/gallium/drivers/vc4/vc4_screen.h b/src/gallium/drivers/vc4/vc4_screen.h index fb08cc1c7a0..5992e371093 100644 --- a/src/gallium/drivers/vc4/vc4_screen.h +++ b/src/gallium/drivers/vc4/vc4_screen.h @@ -67,7 +67,13 @@ struct vc4_screen { uint32_t size_list_size; pipe_mutex lock; + + uint32_t bo_size; + uint32_t bo_count; } bo_cache; + + uint32_t bo_size; + uint32_t bo_count; }; static inline struct vc4_screen * From dc1fbad2eb5454ed36a066d2a69b575cd5a8abaf Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Wed, 17 Jun 2015 23:49:19 -0700 Subject: [PATCH 698/834] vc4: Fix memory leak from simple_list conversion. I accidentally shadowed the outside declaration, so we always returned NULL even when we'd found something in the cache. --- src/gallium/drivers/vc4/vc4_bufmgr.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/gallium/drivers/vc4/vc4_bufmgr.c b/src/gallium/drivers/vc4/vc4_bufmgr.c index eef7e9dc16f..cbdb9e89cf6 100644 --- a/src/gallium/drivers/vc4/vc4_bufmgr.c +++ b/src/gallium/drivers/vc4/vc4_bufmgr.c @@ -87,9 +87,8 @@ vc4_bo_from_cache(struct vc4_screen *screen, uint32_t size, const char *name) struct vc4_bo *bo = NULL; pipe_mutex_lock(cache->lock); if (!list_empty(&cache->size_list[page_index])) { - struct vc4_bo *bo = LIST_ENTRY(struct vc4_bo, - cache->size_list[page_index].next, - size_list); + bo = LIST_ENTRY(struct vc4_bo, cache->size_list[page_index].next, + size_list); /* Check that the BO has gone idle. If not, then we want to * allocate something new instead, since we assume that the From 91c73a9a280b749a781cd3f071fc377fcb9758e1 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Wed, 17 Jun 2015 13:51:55 -0700 Subject: [PATCH 699/834] vc4: Add dumping of VC4_PACKET_TILE_BINNING_MODE_CONFIG. --- src/gallium/drivers/vc4/vc4_cl_dump.c | 33 ++++++++++++++++++++++++++- 1 file changed, 32 insertions(+), 1 deletion(-) diff --git a/src/gallium/drivers/vc4/vc4_cl_dump.c b/src/gallium/drivers/vc4/vc4_cl_dump.c index 14239840d32..69055081daa 100644 --- a/src/gallium/drivers/vc4/vc4_cl_dump.c +++ b/src/gallium/drivers/vc4/vc4_cl_dump.c @@ -173,6 +173,37 @@ dump_VC4_PACKET_CLIPPER_Z_SCALING(void *cl, uint32_t offset, uint32_t hw_offset) scale[0], scale[1]); } +static void +dump_VC4_PACKET_TILE_BINNING_MODE_CONFIG(void *cl, uint32_t offset, uint32_t hw_offset) +{ + uint32_t *tile_alloc_addr = cl + offset; + uint32_t *tile_alloc_size = cl + offset + 4; + uint32_t *tile_state_addr = cl + offset + 8; + uint8_t *bin_x = cl + offset + 12; + uint8_t *bin_y = cl + offset + 13; + uint8_t *flags = cl + offset + 14; + + fprintf(stderr, "0x%08x 0x%08x: tile alloc addr 0x%08x\n", + offset, hw_offset, + *tile_alloc_addr); + + fprintf(stderr, "0x%08x 0x%08x: tile alloc size %db\n", + offset + 4, hw_offset + 4, + *tile_alloc_size); + + fprintf(stderr, "0x%08x 0x%08x: tile state addr 0x%08x\n", + offset + 8, hw_offset + 8, + *tile_state_addr); + + fprintf(stderr, "0x%08x 0x%08x: tiles (%d, %d)\n", + offset + 12, hw_offset + 12, + *bin_x, *bin_y); + + fprintf(stderr, "0x%08x 0x%08x: flags 0x%02x\n", + offset + 14, hw_offset + 14, + *flags); +} + static void dump_VC4_PACKET_TILE_RENDERING_MODE_CONFIG(void *cl, uint32_t offset, uint32_t hw_offset) { @@ -311,7 +342,7 @@ static const struct packet_info { PACKET_DUMP(VC4_PACKET_CLIPPER_XY_SCALING, 9), PACKET_DUMP(VC4_PACKET_CLIPPER_Z_SCALING, 9), - PACKET(VC4_PACKET_TILE_BINNING_MODE_CONFIG, 16), + PACKET_DUMP(VC4_PACKET_TILE_BINNING_MODE_CONFIG, 16), PACKET_DUMP(VC4_PACKET_TILE_RENDERING_MODE_CONFIG, 11), PACKET(VC4_PACKET_CLEAR_COLORS, 14), PACKET_DUMP(VC4_PACKET_TILE_COORDINATES, 3), From 9adcd2d80aceec90b9c3712b53d8e7839dc5634b Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Wed, 10 Jun 2015 12:36:47 -0700 Subject: [PATCH 700/834] vc4: Move RCL generation into the kernel. There weren't that many variations of RCL generation, and this lets us skip all the in-kernel validation for what we generated. --- src/gallium/drivers/vc4/Makefile.sources | 1 + src/gallium/drivers/vc4/kernel/vc4_drv.h | 28 +- src/gallium/drivers/vc4/kernel/vc4_gem.c | 70 +-- .../drivers/vc4/kernel/vc4_render_cl.c | 446 ++++++++++++++++++ src/gallium/drivers/vc4/kernel/vc4_validate.c | 302 ++---------- src/gallium/drivers/vc4/vc4_blit.c | 107 +---- src/gallium/drivers/vc4/vc4_context.c | 289 ++---------- src/gallium/drivers/vc4/vc4_context.h | 15 +- src/gallium/drivers/vc4/vc4_draw.c | 2 + src/gallium/drivers/vc4/vc4_drm.h | 40 +- src/gallium/drivers/vc4/vc4_job.c | 97 +++- 11 files changed, 723 insertions(+), 674 deletions(-) create mode 100644 src/gallium/drivers/vc4/kernel/vc4_render_cl.c diff --git a/src/gallium/drivers/vc4/Makefile.sources b/src/gallium/drivers/vc4/Makefile.sources index edef49353a2..1eb029e67e7 100644 --- a/src/gallium/drivers/vc4/Makefile.sources +++ b/src/gallium/drivers/vc4/Makefile.sources @@ -2,6 +2,7 @@ C_SOURCES := \ kernel/vc4_drv.h \ kernel/vc4_gem.c \ kernel/vc4_packet.h \ + kernel/vc4_render_cl.c \ kernel/vc4_validate.c \ kernel/vc4_validate_shaders.c \ vc4_blit.c \ diff --git a/src/gallium/drivers/vc4/kernel/vc4_drv.h b/src/gallium/drivers/vc4/kernel/vc4_drv.h index 8e9230b8949..83802dd774a 100644 --- a/src/gallium/drivers/vc4/kernel/vc4_drv.h +++ b/src/gallium/drivers/vc4/kernel/vc4_drv.h @@ -88,14 +88,9 @@ struct vc4_exec_info { uint32_t shader_state_count; bool found_tile_binning_mode_config_packet; - bool found_tile_rendering_mode_config_packet; bool found_start_tile_binning_packet; bool found_increment_semaphore_packet; - bool found_wait_on_semaphore_packet; uint8_t bin_tiles_x, bin_tiles_y; - uint32_t fb_width, fb_height; - uint32_t tile_alloc_init_block_mask; - uint32_t tile_alloc_init_block_last; struct drm_gem_cma_object *tile_alloc_bo; /** @@ -163,13 +158,10 @@ struct vc4_validated_shader_info /* vc4_validate.c */ int -vc4_validate_cl(struct drm_device *dev, - void *validated, - void *unvalidated, - uint32_t len, - bool is_bin, - bool has_bin, - struct vc4_exec_info *exec); +vc4_validate_bin_cl(struct drm_device *dev, + void *validated, + void *unvalidated, + struct vc4_exec_info *exec); int vc4_validate_shader_recs(struct drm_device *dev, struct vc4_exec_info *exec); @@ -177,4 +169,16 @@ vc4_validate_shader_recs(struct drm_device *dev, struct vc4_exec_info *exec); struct vc4_validated_shader_info * vc4_validate_shader(struct drm_gem_cma_object *shader_obj); +bool vc4_use_bo(struct vc4_exec_info *exec, + uint32_t hindex, + enum vc4_bo_mode mode, + struct drm_gem_cma_object **obj); + +int vc4_get_rcl(struct drm_device *dev, struct vc4_exec_info *exec); + +bool vc4_check_tex_size(struct vc4_exec_info *exec, + struct drm_gem_cma_object *fbo, + uint32_t offset, uint8_t tiling_format, + uint32_t width, uint32_t height, uint8_t cpp); + #endif /* VC4_DRV_H */ diff --git a/src/gallium/drivers/vc4/kernel/vc4_gem.c b/src/gallium/drivers/vc4/kernel/vc4_gem.c index e559ddd1d4e..e4b7fea5968 100644 --- a/src/gallium/drivers/vc4/kernel/vc4_gem.c +++ b/src/gallium/drivers/vc4/kernel/vc4_gem.c @@ -25,24 +25,26 @@ #include "vc4_drv.h" -int -vc4_cl_validate(struct drm_device *dev, struct vc4_exec_info *exec) +/* + * Copies in the user's binning command list and generates the validated bin + * CL, along with associated data (shader records, uniforms). + */ +static int +vc4_get_bcl(struct drm_device *dev, struct vc4_exec_info *exec) { struct drm_vc4_submit_cl *args = exec->args; void *temp = NULL; - void *bin, *render; + void *bin; int ret = 0; uint32_t bin_offset = 0; - uint32_t render_offset = bin_offset + args->bin_cl_size; - uint32_t shader_rec_offset = roundup(render_offset + - args->render_cl_size, 16); + uint32_t shader_rec_offset = roundup(bin_offset + args->bin_cl_size, + 16); uint32_t uniforms_offset = shader_rec_offset + args->shader_rec_size; uint32_t exec_size = uniforms_offset + args->uniforms_size; uint32_t temp_size = exec_size + (sizeof(struct vc4_shader_state) * args->shader_rec_count); - if (shader_rec_offset < render_offset || - uniforms_offset < shader_rec_offset || + if (uniforms_offset < shader_rec_offset || exec_size < uniforms_offset || args->shader_rec_count >= (UINT_MAX / sizeof(struct vc4_shader_state)) || @@ -66,7 +68,6 @@ vc4_cl_validate(struct drm_device *dev, struct vc4_exec_info *exec) goto fail; } bin = temp + bin_offset; - render = temp + render_offset; exec->shader_rec_u = temp + shader_rec_offset; exec->uniforms_u = temp + uniforms_offset; exec->shader_state = temp + exec_size; @@ -80,14 +81,6 @@ vc4_cl_validate(struct drm_device *dev, struct vc4_exec_info *exec) goto fail; } - ret = copy_from_user(render, - (void __user *)(uintptr_t)args->render_cl, - args->render_cl_size); - if (ret) { - DRM_ERROR("Failed to copy in render cl\n"); - goto fail; - } - ret = copy_from_user(exec->shader_rec_u, (void __user *)(uintptr_t)args->shader_rec, args->shader_rec_size); @@ -118,7 +111,6 @@ vc4_cl_validate(struct drm_device *dev, struct vc4_exec_info *exec) &exec->unref_list); exec->ct0ca = exec->exec_bo->paddr + bin_offset; - exec->ct1ca = exec->exec_bo->paddr + render_offset; exec->shader_rec_v = exec->exec_bo->vaddr + shader_rec_offset; exec->shader_rec_p = exec->exec_bo->paddr + shader_rec_offset; @@ -128,23 +120,10 @@ vc4_cl_validate(struct drm_device *dev, struct vc4_exec_info *exec) exec->uniforms_p = exec->exec_bo->paddr + uniforms_offset; exec->uniforms_size = args->uniforms_size; - ret = vc4_validate_cl(dev, - exec->exec_bo->vaddr + bin_offset, - bin, - args->bin_cl_size, - true, - args->bin_cl_size != 0, - exec); - if (ret) - goto fail; - - ret = vc4_validate_cl(dev, - exec->exec_bo->vaddr + render_offset, - render, - args->render_cl_size, - false, - args->bin_cl_size != 0, - exec); + ret = vc4_validate_bin_cl(dev, + exec->exec_bo->vaddr + bin_offset, + bin, + exec); if (ret) goto fail; @@ -155,4 +134,25 @@ fail: return ret; } +int +vc4_cl_validate(struct drm_device *dev, struct vc4_exec_info *exec) +{ + int ret = 0; + + if (exec->args->bin_cl_size != 0) { + ret = vc4_get_bcl(dev, exec); + if (ret) + goto fail; + } else { + exec->ct0ca = exec->ct0ea = 0; + } + + ret = vc4_get_rcl(dev, exec); + if (ret) + goto fail; + +fail: + return ret; +} + #endif /* USE_VC4_SIMULATOR */ diff --git a/src/gallium/drivers/vc4/kernel/vc4_render_cl.c b/src/gallium/drivers/vc4/kernel/vc4_render_cl.c new file mode 100644 index 00000000000..de6070fec72 --- /dev/null +++ b/src/gallium/drivers/vc4/kernel/vc4_render_cl.c @@ -0,0 +1,446 @@ +/* + * Copyright © 2014-2015 Broadcom + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +/** + * DOC: Render command list generation + * + * In the VC4 driver, render command list generation is performed by the + * kernel instead of userspace. We do this because validating a + * user-submitted command list is hard to get right and has high CPU overhead, + * while the number of valid configurations for render command lists is + * actually fairly low. + */ + +#include "vc4_drv.h" +#include "vc4_packet.h" + +struct vc4_rcl_setup { + struct drm_gem_cma_object *color_read; + struct drm_gem_cma_object *color_ms_write; + struct drm_gem_cma_object *zs_read; + struct drm_gem_cma_object *zs_write; + + struct drm_gem_cma_object *rcl; + u32 next_offset; +}; + +static inline void rcl_u8(struct vc4_rcl_setup *setup, u8 val) +{ + *(u8 *)(setup->rcl->vaddr + setup->next_offset) = val; + setup->next_offset += 1; +} + +static inline void rcl_u16(struct vc4_rcl_setup *setup, u16 val) +{ + *(u16 *)(setup->rcl->vaddr + setup->next_offset) = val; + setup->next_offset += 2; +} + +static inline void rcl_u32(struct vc4_rcl_setup *setup, u32 val) +{ + *(u32 *)(setup->rcl->vaddr + setup->next_offset) = val; + setup->next_offset += 4; +} + + +/* + * Emits a no-op STORE_TILE_BUFFER_GENERAL. + * + * If we emit a PACKET_TILE_COORDINATES, it must be followed by a store of + * some sort before another load is triggered. + */ +static void vc4_store_before_load(struct vc4_rcl_setup *setup) +{ + rcl_u8(setup, VC4_PACKET_STORE_TILE_BUFFER_GENERAL); + rcl_u16(setup, + VC4_SET_FIELD(VC4_LOADSTORE_TILE_BUFFER_NONE, + VC4_LOADSTORE_TILE_BUFFER_BUFFER) | + VC4_STORE_TILE_BUFFER_DISABLE_COLOR_CLEAR | + VC4_STORE_TILE_BUFFER_DISABLE_ZS_CLEAR | + VC4_STORE_TILE_BUFFER_DISABLE_VG_MASK_CLEAR); + rcl_u32(setup, 0); /* no address, since we're in None mode */ +} + +/* + * Emits a PACKET_TILE_COORDINATES if one isn't already pending. + * + * The tile coordinates packet triggers a pending load if there is one, are + * used for clipping during rendering, and determine where loads/stores happen + * relative to their base address. + */ +static void vc4_tile_coordinates(struct vc4_rcl_setup *setup, + uint32_t x, uint32_t y) +{ + rcl_u8(setup, VC4_PACKET_TILE_COORDINATES); + rcl_u8(setup, x); + rcl_u8(setup, y); +} + +static void emit_tile(struct vc4_exec_info *exec, + struct vc4_rcl_setup *setup, + uint8_t x, uint8_t y, bool first, bool last) +{ + bool has_bin = exec->args->bin_cl_size != 0; + + /* Note that the load doesn't actually occur until the + * tile coords packet is processed, and only one load + * may be outstanding at a time. + */ + if (setup->color_read) { + rcl_u8(setup, VC4_PACKET_LOAD_TILE_BUFFER_GENERAL); + rcl_u16(setup, exec->args->color_read.bits); + rcl_u32(setup, + setup->color_read->paddr + + exec->args->color_read.offset); + } + + if (setup->zs_read) { + if (setup->color_read) { + /* Exec previous load. */ + vc4_tile_coordinates(setup, x, y); + vc4_store_before_load(setup); + } + + rcl_u8(setup, VC4_PACKET_LOAD_TILE_BUFFER_GENERAL); + rcl_u16(setup, exec->args->zs_read.bits); + rcl_u32(setup, + setup->zs_read->paddr + exec->args->zs_read.offset); + } + + /* Clipping depends on tile coordinates having been + * emitted, so we always need one here. + */ + vc4_tile_coordinates(setup, x, y); + + /* Wait for the binner before jumping to the first + * tile's lists. + */ + if (first && has_bin) + rcl_u8(setup, VC4_PACKET_WAIT_ON_SEMAPHORE); + + if (has_bin) { + rcl_u8(setup, VC4_PACKET_BRANCH_TO_SUB_LIST); + rcl_u32(setup, (exec->tile_alloc_bo->paddr + + (y * exec->bin_tiles_x + x) * 32)); + } + + if (setup->zs_write) { + rcl_u8(setup, VC4_PACKET_STORE_TILE_BUFFER_GENERAL); + rcl_u16(setup, exec->args->zs_write.bits | + (setup->color_ms_write ? + VC4_STORE_TILE_BUFFER_DISABLE_COLOR_CLEAR : 0)); + rcl_u32(setup, + (setup->zs_write->paddr + exec->args->zs_write.offset) | + ((last && !setup->color_ms_write) ? + VC4_LOADSTORE_TILE_BUFFER_EOF : 0)); + } + + if (setup->color_ms_write) { + if (setup->zs_write) { + /* Reset after previous store */ + vc4_tile_coordinates(setup, x, y); + } + + if (last) + rcl_u8(setup, VC4_PACKET_STORE_MS_TILE_BUFFER_AND_EOF); + else + rcl_u8(setup, VC4_PACKET_STORE_MS_TILE_BUFFER); + } +} + +static int vc4_create_rcl_bo(struct drm_device *dev, struct vc4_exec_info *exec, + struct vc4_rcl_setup *setup) +{ + bool has_bin = exec->args->bin_cl_size != 0; + uint8_t min_x_tile = exec->args->min_x_tile; + uint8_t min_y_tile = exec->args->min_y_tile; + uint8_t max_x_tile = exec->args->max_x_tile; + uint8_t max_y_tile = exec->args->max_y_tile; + uint8_t xtiles = max_x_tile - min_x_tile + 1; + uint8_t ytiles = max_y_tile - min_y_tile + 1; + uint8_t x, y; + uint32_t size, loop_body_size; + + size = VC4_PACKET_TILE_RENDERING_MODE_CONFIG_SIZE; + loop_body_size = VC4_PACKET_TILE_COORDINATES_SIZE; + + if (exec->args->flags & VC4_SUBMIT_CL_USE_CLEAR_COLOR) { + size += VC4_PACKET_CLEAR_COLORS_SIZE + + VC4_PACKET_TILE_COORDINATES_SIZE + + VC4_PACKET_STORE_TILE_BUFFER_GENERAL_SIZE; + } + + if (setup->color_read) { + loop_body_size += (VC4_PACKET_LOAD_TILE_BUFFER_GENERAL_SIZE); + } + if (setup->zs_read) { + if (setup->color_read) { + loop_body_size += VC4_PACKET_TILE_COORDINATES_SIZE; + loop_body_size += VC4_PACKET_STORE_TILE_BUFFER_GENERAL_SIZE; + } + loop_body_size += VC4_PACKET_LOAD_TILE_BUFFER_GENERAL_SIZE; + } + + if (has_bin) { + size += VC4_PACKET_WAIT_ON_SEMAPHORE_SIZE; + loop_body_size += VC4_PACKET_BRANCH_TO_SUB_LIST_SIZE; + } + + if (setup->zs_write) + loop_body_size += VC4_PACKET_LOAD_TILE_BUFFER_GENERAL_SIZE; + if (setup->color_ms_write) { + if (setup->zs_write) + loop_body_size += VC4_PACKET_TILE_COORDINATES_SIZE; + loop_body_size += VC4_PACKET_STORE_MS_TILE_BUFFER_SIZE; + } + size += xtiles * ytiles * loop_body_size; + + setup->rcl = drm_gem_cma_create(dev, size); + if (!setup->rcl) + return -ENOMEM; + list_addtail(&to_vc4_bo(&setup->rcl->base)->unref_head, + &exec->unref_list); + + rcl_u8(setup, VC4_PACKET_TILE_RENDERING_MODE_CONFIG); + rcl_u32(setup, + (setup->color_ms_write ? + (setup->color_ms_write->paddr + + exec->args->color_ms_write.offset) : + 0)); + rcl_u16(setup, exec->args->width); + rcl_u16(setup, exec->args->height); + rcl_u16(setup, exec->args->color_ms_write.bits); + + /* The tile buffer gets cleared when the previous tile is stored. If + * the clear values changed between frames, then the tile buffer has + * stale clear values in it, so we have to do a store in None mode (no + * writes) so that we trigger the tile buffer clear. + */ + if (exec->args->flags & VC4_SUBMIT_CL_USE_CLEAR_COLOR) { + rcl_u8(setup, VC4_PACKET_CLEAR_COLORS); + rcl_u32(setup, exec->args->clear_color[0]); + rcl_u32(setup, exec->args->clear_color[1]); + rcl_u32(setup, exec->args->clear_z); + rcl_u8(setup, exec->args->clear_s); + + vc4_tile_coordinates(setup, 0, 0); + + rcl_u8(setup, VC4_PACKET_STORE_TILE_BUFFER_GENERAL); + rcl_u16(setup, VC4_LOADSTORE_TILE_BUFFER_NONE); + rcl_u32(setup, 0); /* no address, since we're in None mode */ + } + + for (y = min_y_tile; y <= max_y_tile; y++) { + for (x = min_x_tile; x <= max_x_tile; x++) { + bool first = (x == min_x_tile && y == min_y_tile); + bool last = (x == max_x_tile && y == max_y_tile); + emit_tile(exec, setup, x, y, first, last); + } + } + + BUG_ON(setup->next_offset != size); + exec->ct1ca = setup->rcl->paddr; + exec->ct1ea = setup->rcl->paddr + setup->next_offset; + + return 0; +} + +static int vc4_rcl_surface_setup(struct vc4_exec_info *exec, + struct drm_gem_cma_object **obj, + struct drm_vc4_submit_rcl_surface *surf) +{ + uint8_t tiling = VC4_GET_FIELD(surf->bits, + VC4_LOADSTORE_TILE_BUFFER_TILING); + uint8_t buffer = VC4_GET_FIELD(surf->bits, + VC4_LOADSTORE_TILE_BUFFER_BUFFER); + uint8_t format = VC4_GET_FIELD(surf->bits, + VC4_LOADSTORE_TILE_BUFFER_FORMAT); + int cpp; + + if (surf->pad != 0) { + DRM_ERROR("Padding unset\n"); + return -EINVAL; + } + + if (surf->hindex == ~0) + return 0; + + if (!vc4_use_bo(exec, surf->hindex, VC4_MODE_RENDER, obj)) + return -EINVAL; + + if (surf->bits & ~(VC4_LOADSTORE_TILE_BUFFER_TILING_MASK | + VC4_LOADSTORE_TILE_BUFFER_BUFFER_MASK | + VC4_LOADSTORE_TILE_BUFFER_FORMAT_MASK)) { + DRM_ERROR("Unknown bits in load/store: 0x%04x\n", + surf->bits); + return -EINVAL; + } + + if (tiling > VC4_TILING_FORMAT_LT) { + DRM_ERROR("Bad tiling format\n"); + return -EINVAL; + } + + if (buffer == VC4_LOADSTORE_TILE_BUFFER_ZS) { + if (format != 0) { + DRM_ERROR("No color format should be set for ZS\n"); + return -EINVAL; + } + cpp = 4; + } else if (buffer == VC4_LOADSTORE_TILE_BUFFER_COLOR) { + switch (format) { + case VC4_LOADSTORE_TILE_BUFFER_BGR565: + case VC4_LOADSTORE_TILE_BUFFER_BGR565_DITHER: + cpp = 2; + break; + case VC4_LOADSTORE_TILE_BUFFER_RGBA8888: + cpp = 4; + break; + default: + DRM_ERROR("Bad tile buffer format\n"); + return -EINVAL; + } + } else { + DRM_ERROR("Bad load/store buffer %d.\n", buffer); + return -EINVAL; + } + + if (surf->offset & 0xf) { + DRM_ERROR("load/store buffer must be 16b aligned.\n"); + return -EINVAL; + } + + if (!vc4_check_tex_size(exec, *obj, surf->offset, tiling, + exec->args->width, exec->args->height, cpp)) { + return -EINVAL; + } + + return 0; +} + +static int +vc4_rcl_ms_surface_setup(struct vc4_exec_info *exec, + struct drm_gem_cma_object **obj, + struct drm_vc4_submit_rcl_surface *surf) +{ + uint8_t tiling = VC4_GET_FIELD(surf->bits, + VC4_RENDER_CONFIG_MEMORY_FORMAT); + uint8_t format = VC4_GET_FIELD(surf->bits, + VC4_RENDER_CONFIG_FORMAT); + int cpp; + + if (surf->pad != 0) { + DRM_ERROR("Padding unset\n"); + return -EINVAL; + } + + if (surf->bits & ~(VC4_RENDER_CONFIG_MEMORY_FORMAT_MASK | + VC4_RENDER_CONFIG_FORMAT_MASK)) { + DRM_ERROR("Unknown bits in render config: 0x%04x\n", + surf->bits); + return -EINVAL; + } + + if (surf->hindex == ~0) + return 0; + + if (!vc4_use_bo(exec, surf->hindex, VC4_MODE_RENDER, obj)) + return -EINVAL; + + if (tiling > VC4_TILING_FORMAT_LT) { + DRM_ERROR("Bad tiling format\n"); + return -EINVAL; + } + + switch (format) { + case VC4_RENDER_CONFIG_FORMAT_BGR565_DITHERED: + case VC4_RENDER_CONFIG_FORMAT_BGR565: + cpp = 2; + break; + case VC4_RENDER_CONFIG_FORMAT_RGBA8888: + cpp = 4; + break; + default: + DRM_ERROR("Bad tile buffer format\n"); + return -EINVAL; + } + + if (!vc4_check_tex_size(exec, *obj, surf->offset, tiling, + exec->args->width, exec->args->height, cpp)) { + return -EINVAL; + } + + return 0; +} + +int vc4_get_rcl(struct drm_device *dev, struct vc4_exec_info *exec) +{ + struct vc4_rcl_setup setup = {0}; + struct drm_vc4_submit_cl *args = exec->args; + bool has_bin = args->bin_cl_size != 0; + int ret; + + if (args->min_x_tile > args->max_x_tile || + args->min_y_tile > args->max_y_tile) { + DRM_ERROR("Bad render tile set (%d,%d)-(%d,%d)\n", + args->min_x_tile, args->min_y_tile, + args->max_x_tile, args->max_y_tile); + return -EINVAL; + } + + if (has_bin && + (args->max_x_tile > exec->bin_tiles_x || + args->max_y_tile > exec->bin_tiles_y)) { + DRM_ERROR("Render tiles (%d,%d) outside of bin config (%d,%d)\n", + args->max_x_tile, args->max_y_tile, + exec->bin_tiles_x, exec->bin_tiles_y); + return -EINVAL; + } + + ret = vc4_rcl_surface_setup(exec, &setup.color_read, &args->color_read); + if (ret) + return ret; + + ret = vc4_rcl_ms_surface_setup(exec, &setup.color_ms_write, + &args->color_ms_write); + if (ret) + return ret; + + ret = vc4_rcl_surface_setup(exec, &setup.zs_read, &args->zs_read); + if (ret) + return ret; + + ret = vc4_rcl_surface_setup(exec, &setup.zs_write, &args->zs_write); + if (ret) + return ret; + + /* We shouldn't even have the job submitted to us if there's no + * surface to write out. + */ + if (!setup.color_ms_write && !setup.zs_write) { + DRM_ERROR("RCL requires color or Z/S write\n"); + return -EINVAL; + } + + return vc4_create_rcl_bo(dev, exec, &setup); +} diff --git a/src/gallium/drivers/vc4/kernel/vc4_validate.c b/src/gallium/drivers/vc4/kernel/vc4_validate.c index 0a74a2c6db7..80b0e653d80 100644 --- a/src/gallium/drivers/vc4/kernel/vc4_validate.c +++ b/src/gallium/drivers/vc4/kernel/vc4_validate.c @@ -94,7 +94,7 @@ size_is_lt(uint32_t width, uint32_t height, int cpp) height <= 4 * utile_height(cpp)); } -static bool +bool vc4_use_bo(struct vc4_exec_info *exec, uint32_t hindex, enum vc4_bo_mode mode, @@ -147,10 +147,10 @@ gl_shader_rec_size(uint32_t pointer_bits) return 36 + attribute_count * 8; } -static bool -check_tex_size(struct vc4_exec_info *exec, struct drm_gem_cma_object *fbo, - uint32_t offset, uint8_t tiling_format, - uint32_t width, uint32_t height, uint8_t cpp) +bool +vc4_check_tex_size(struct vc4_exec_info *exec, struct drm_gem_cma_object *fbo, + uint32_t offset, uint8_t tiling_format, + uint32_t width, uint32_t height, uint8_t cpp) { uint32_t aligned_width, aligned_height, stride, size; uint32_t utile_w = utile_width(cpp); @@ -247,118 +247,6 @@ validate_increment_semaphore(VALIDATE_ARGS) return 0; } -static int -validate_wait_on_semaphore(VALIDATE_ARGS) -{ - if (exec->found_wait_on_semaphore_packet) { - DRM_ERROR("Duplicate VC4_PACKET_WAIT_ON_SEMAPHORE\n"); - return -EINVAL; - } - exec->found_wait_on_semaphore_packet = true; - - if (!exec->found_increment_semaphore_packet) { - DRM_ERROR("VC4_PACKET_WAIT_ON_SEMAPHORE without " - "VC4_PACKET_INCREMENT_SEMAPHORE\n"); - return -EINVAL; - } - - return 0; -} - -static int -validate_branch_to_sublist(VALIDATE_ARGS) -{ - uint32_t offset; - - if (!exec->tile_alloc_bo) { - DRM_ERROR("VC4_PACKET_BRANCH_TO_SUB_LIST seen before " - "binner setup\n"); - return -EINVAL; - } - - if (!exec->found_wait_on_semaphore_packet) { - DRM_ERROR("Jumping to tile alloc before binning finished.\n"); - return -EINVAL; - } - - offset = *(uint32_t *)(untrusted + 0); - if (offset & exec->tile_alloc_init_block_mask || - offset > exec->tile_alloc_init_block_last) { - DRM_ERROR("VC4_PACKET_BRANCH_TO_SUB_LIST must jump to initial " - "tile allocation space.\n"); - return -EINVAL; - } - - *(uint32_t *)(validated + 0) = exec->tile_alloc_bo->paddr + offset; - - return 0; -} - -/** - * validate_loadstore_tile_buffer_general() - Validation for - * VC4_PACKET_LOAD_TILE_BUFFER_GENERAL and - * VC4_PACKET_STORE_TILE_BUFFER_GENERAL. - * - * The two packets are nearly the same, except for the TLB-clearing management - * bits not being present for loads. Additionally, while stores are executed - * immediately (using the current tile coordinates), loads are queued to be - * executed when the tile coordinates packet occurs. - * - * Note that coordinates packets are validated to be within the declared - * bin_x/y, which themselves are verified to match the rendering-configuration - * FB width and height (which the hardware uses to clip loads and stores). - */ -static int -validate_loadstore_tile_buffer_general(VALIDATE_ARGS) -{ - uint16_t packet_b01 = *(uint16_t *)(untrusted + 0); - struct drm_gem_cma_object *fbo; - uint32_t buffer_type = VC4_GET_FIELD(packet_b01, - VC4_LOADSTORE_TILE_BUFFER_BUFFER); - uint32_t untrusted_address, offset, cpp; - - switch (buffer_type) { - case VC4_LOADSTORE_TILE_BUFFER_NONE: - return 0; - case VC4_LOADSTORE_TILE_BUFFER_COLOR: - if (VC4_GET_FIELD(packet_b01, - VC4_LOADSTORE_TILE_BUFFER_FORMAT) == - VC4_LOADSTORE_TILE_BUFFER_RGBA8888) { - cpp = 4; - } else { - cpp = 2; - } - break; - - case VC4_LOADSTORE_TILE_BUFFER_Z: - case VC4_LOADSTORE_TILE_BUFFER_ZS: - cpp = 4; - break; - - default: - DRM_ERROR("Load/store type %d unsupported\n", buffer_type); - return -EINVAL; - } - - if (!vc4_use_handle(exec, 0, VC4_MODE_RENDER, &fbo)) - return -EINVAL; - - untrusted_address = *(uint32_t *)(untrusted + 2); - offset = untrusted_address & ~0xf; - - if (!check_tex_size(exec, fbo, offset, - VC4_GET_FIELD(packet_b01, - VC4_LOADSTORE_TILE_BUFFER_TILING), - exec->fb_width, exec->fb_height, cpp)) { - return -EINVAL; - } - - *(uint32_t *)(validated + 2) = (offset + fbo->paddr + - (untrusted_address & 0xf)); - - return 0; -} - static int validate_indexed_prim_list(VALIDATE_ARGS) { @@ -552,9 +440,6 @@ validate_tile_binning_config(VALIDATE_ARGS) tile_allocation_size); return -EINVAL; } - exec->tile_alloc_init_block_mask = tile_alloc_init_block_size - 1; - exec->tile_alloc_init_block_last = tile_alloc_init_block_size * - (exec->bin_tiles_x * exec->bin_tiles_y - 1); if (*(uint32_t *)(untrusted + 8) != 0) { DRM_ERROR("TSDA offset != 0 unsupported\n"); @@ -571,60 +456,6 @@ validate_tile_binning_config(VALIDATE_ARGS) return 0; } -static int -validate_tile_rendering_mode_config(VALIDATE_ARGS) -{ - struct drm_gem_cma_object *fbo; - uint32_t flags, offset, cpp; - - if (exec->found_tile_rendering_mode_config_packet) { - DRM_ERROR("Duplicate VC4_PACKET_TILE_RENDERING_MODE_CONFIG\n"); - return -EINVAL; - } - exec->found_tile_rendering_mode_config_packet = true; - - if (!vc4_use_handle(exec, 0, VC4_MODE_RENDER, &fbo)) - return -EINVAL; - - exec->fb_width = *(uint16_t *)(untrusted + 4); - exec->fb_height = *(uint16_t *)(untrusted + 6); - - flags = *(uint16_t *)(untrusted + 8); - if (VC4_GET_FIELD(flags, VC4_RENDER_CONFIG_FORMAT) == - VC4_RENDER_CONFIG_FORMAT_RGBA8888) { - cpp = 4; - } else { - cpp = 2; - } - - offset = *(uint32_t *)untrusted; - if (!check_tex_size(exec, fbo, offset, - VC4_GET_FIELD(flags, - VC4_RENDER_CONFIG_MEMORY_FORMAT), - exec->fb_width, exec->fb_height, cpp)) { - return -EINVAL; - } - - *(uint32_t *)validated = fbo->paddr + offset; - - return 0; -} - -static int -validate_tile_coordinates(VALIDATE_ARGS) -{ - uint8_t tile_x = *(uint8_t *)(untrusted + 0); - uint8_t tile_y = *(uint8_t *)(untrusted + 1); - - if (tile_x * 64 >= exec->fb_width || tile_y * 64 >= exec->fb_height) { - DRM_ERROR("Tile coordinates %d,%d > render config %dx%d\n", - tile_x, tile_y, exec->fb_width, exec->fb_height); - return -EINVAL; - } - - return 0; -} - static int validate_gem_handles(VALIDATE_ARGS) { @@ -632,81 +463,60 @@ validate_gem_handles(VALIDATE_ARGS) return 0; } -#define VC4_DEFINE_PACKET(packet, bin, render, name, func) \ - [packet] = { bin, render, packet ## _SIZE, name, func } +#define VC4_DEFINE_PACKET(packet, name, func) \ + [packet] = { packet ## _SIZE, name, func } static const struct cmd_info { - bool bin; - bool render; uint16_t len; const char *name; int (*func)(struct vc4_exec_info *exec, void *validated, void *untrusted); } cmd_info[] = { - VC4_DEFINE_PACKET(VC4_PACKET_HALT, 1, 1, "halt", NULL), - VC4_DEFINE_PACKET(VC4_PACKET_NOP, 1, 1, "nop", NULL), - VC4_DEFINE_PACKET(VC4_PACKET_FLUSH, 1, 1, "flush", NULL), - VC4_DEFINE_PACKET(VC4_PACKET_FLUSH_ALL, 1, 0, "flush all state", validate_flush_all), - VC4_DEFINE_PACKET(VC4_PACKET_START_TILE_BINNING, 1, 0, "start tile binning", validate_start_tile_binning), - VC4_DEFINE_PACKET(VC4_PACKET_INCREMENT_SEMAPHORE, 1, 0, "increment semaphore", validate_increment_semaphore), - VC4_DEFINE_PACKET(VC4_PACKET_WAIT_ON_SEMAPHORE, 0, 1, "wait on semaphore", validate_wait_on_semaphore), - /* BRANCH_TO_SUB_LIST is actually supported in the binner as well, but - * we only use it from the render CL in order to jump into the tile - * allocation BO. - */ - VC4_DEFINE_PACKET(VC4_PACKET_BRANCH_TO_SUB_LIST, 0, 1, "branch to sublist", validate_branch_to_sublist), - VC4_DEFINE_PACKET(VC4_PACKET_STORE_MS_TILE_BUFFER, 0, 1, "store MS resolved tile color buffer", NULL), - VC4_DEFINE_PACKET(VC4_PACKET_STORE_MS_TILE_BUFFER_AND_EOF, 0, 1, "store MS resolved tile color buffer and EOF", NULL), + VC4_DEFINE_PACKET(VC4_PACKET_HALT, "halt", NULL), + VC4_DEFINE_PACKET(VC4_PACKET_NOP, "nop", NULL), + VC4_DEFINE_PACKET(VC4_PACKET_FLUSH, "flush", NULL), + VC4_DEFINE_PACKET(VC4_PACKET_FLUSH_ALL, "flush all state", validate_flush_all), + VC4_DEFINE_PACKET(VC4_PACKET_START_TILE_BINNING, "start tile binning", validate_start_tile_binning), + VC4_DEFINE_PACKET(VC4_PACKET_INCREMENT_SEMAPHORE, "increment semaphore", validate_increment_semaphore), - VC4_DEFINE_PACKET(VC4_PACKET_STORE_TILE_BUFFER_GENERAL, 0, 1, "Store Tile Buffer General", validate_loadstore_tile_buffer_general), - VC4_DEFINE_PACKET(VC4_PACKET_LOAD_TILE_BUFFER_GENERAL, 0, 1, "Load Tile Buffer General", validate_loadstore_tile_buffer_general), + VC4_DEFINE_PACKET(VC4_PACKET_GL_INDEXED_PRIMITIVE, "Indexed Primitive List", validate_indexed_prim_list), - VC4_DEFINE_PACKET(VC4_PACKET_GL_INDEXED_PRIMITIVE, 1, 1, "Indexed Primitive List", validate_indexed_prim_list), - - VC4_DEFINE_PACKET(VC4_PACKET_GL_ARRAY_PRIMITIVE, 1, 1, "Vertex Array Primitives", validate_gl_array_primitive), + VC4_DEFINE_PACKET(VC4_PACKET_GL_ARRAY_PRIMITIVE, "Vertex Array Primitives", validate_gl_array_primitive), /* This is only used by clipped primitives (packets 48 and 49), which * we don't support parsing yet. */ - VC4_DEFINE_PACKET(VC4_PACKET_PRIMITIVE_LIST_FORMAT, 1, 1, "primitive list format", NULL), + VC4_DEFINE_PACKET(VC4_PACKET_PRIMITIVE_LIST_FORMAT, "primitive list format", NULL), - VC4_DEFINE_PACKET(VC4_PACKET_GL_SHADER_STATE, 1, 1, "GL Shader State", validate_gl_shader_state), - VC4_DEFINE_PACKET(VC4_PACKET_NV_SHADER_STATE, 1, 1, "NV Shader State", validate_nv_shader_state), + VC4_DEFINE_PACKET(VC4_PACKET_GL_SHADER_STATE, "GL Shader State", validate_gl_shader_state), + VC4_DEFINE_PACKET(VC4_PACKET_NV_SHADER_STATE, "NV Shader State", validate_nv_shader_state), - VC4_DEFINE_PACKET(VC4_PACKET_CONFIGURATION_BITS, 1, 1, "configuration bits", NULL), - VC4_DEFINE_PACKET(VC4_PACKET_FLAT_SHADE_FLAGS, 1, 1, "flat shade flags", NULL), - VC4_DEFINE_PACKET(VC4_PACKET_POINT_SIZE, 1, 1, "point size", NULL), - VC4_DEFINE_PACKET(VC4_PACKET_LINE_WIDTH, 1, 1, "line width", NULL), - VC4_DEFINE_PACKET(VC4_PACKET_RHT_X_BOUNDARY, 1, 1, "RHT X boundary", NULL), - VC4_DEFINE_PACKET(VC4_PACKET_DEPTH_OFFSET, 1, 1, "Depth Offset", NULL), - VC4_DEFINE_PACKET(VC4_PACKET_CLIP_WINDOW, 1, 1, "Clip Window", NULL), - VC4_DEFINE_PACKET(VC4_PACKET_VIEWPORT_OFFSET, 1, 1, "Viewport Offset", NULL), - VC4_DEFINE_PACKET(VC4_PACKET_CLIPPER_XY_SCALING, 1, 1, "Clipper XY Scaling", NULL), + VC4_DEFINE_PACKET(VC4_PACKET_CONFIGURATION_BITS, "configuration bits", NULL), + VC4_DEFINE_PACKET(VC4_PACKET_FLAT_SHADE_FLAGS, "flat shade flags", NULL), + VC4_DEFINE_PACKET(VC4_PACKET_POINT_SIZE, "point size", NULL), + VC4_DEFINE_PACKET(VC4_PACKET_LINE_WIDTH, "line width", NULL), + VC4_DEFINE_PACKET(VC4_PACKET_RHT_X_BOUNDARY, "RHT X boundary", NULL), + VC4_DEFINE_PACKET(VC4_PACKET_DEPTH_OFFSET, "Depth Offset", NULL), + VC4_DEFINE_PACKET(VC4_PACKET_CLIP_WINDOW, "Clip Window", NULL), + VC4_DEFINE_PACKET(VC4_PACKET_VIEWPORT_OFFSET, "Viewport Offset", NULL), + VC4_DEFINE_PACKET(VC4_PACKET_CLIPPER_XY_SCALING, "Clipper XY Scaling", NULL), /* Note: The docs say this was also 105, but it was 106 in the * initial userland code drop. */ - VC4_DEFINE_PACKET(VC4_PACKET_CLIPPER_Z_SCALING, 1, 1, "Clipper Z Scale and Offset", NULL), + VC4_DEFINE_PACKET(VC4_PACKET_CLIPPER_Z_SCALING, "Clipper Z Scale and Offset", NULL), - VC4_DEFINE_PACKET(VC4_PACKET_TILE_BINNING_MODE_CONFIG, 1, 0, "tile binning configuration", validate_tile_binning_config), + VC4_DEFINE_PACKET(VC4_PACKET_TILE_BINNING_MODE_CONFIG, "tile binning configuration", validate_tile_binning_config), - VC4_DEFINE_PACKET(VC4_PACKET_TILE_RENDERING_MODE_CONFIG, 0, 1, "tile rendering mode configuration", validate_tile_rendering_mode_config), - - VC4_DEFINE_PACKET(VC4_PACKET_CLEAR_COLORS, 0, 1, "Clear Colors", NULL), - - VC4_DEFINE_PACKET(VC4_PACKET_TILE_COORDINATES, 0, 1, "Tile Coordinates", validate_tile_coordinates), - - VC4_DEFINE_PACKET(VC4_PACKET_GEM_HANDLES, 1, 1, "GEM handles", validate_gem_handles), + VC4_DEFINE_PACKET(VC4_PACKET_GEM_HANDLES, "GEM handles", validate_gem_handles), }; int -vc4_validate_cl(struct drm_device *dev, - void *validated, - void *unvalidated, - uint32_t len, - bool is_bin, - bool has_bin, - struct vc4_exec_info *exec) +vc4_validate_bin_cl(struct drm_device *dev, + void *validated, + void *unvalidated, + struct vc4_exec_info *exec) { + uint32_t len = exec->args->bin_cl_size; uint32_t dst_offset = 0; uint32_t src_offset = 0; @@ -734,14 +544,6 @@ vc4_validate_cl(struct drm_device *dev, src_offset, cmd, info->name, info->len); #endif - if ((is_bin && !info->bin) || - (!is_bin && !info->render)) { - DRM_ERROR("0x%08x: packet %d (%s) invalid for %s\n", - src_offset, cmd, info->name, - is_bin ? "binner" : "render"); - return -EINVAL; - } - if (src_offset + info->len > len) { DRM_ERROR("0x%08x: packet %d (%s) length 0x%08x " "exceeds bounds (0x%08x)\n", @@ -772,30 +574,16 @@ vc4_validate_cl(struct drm_device *dev, break; } - if (is_bin) { - exec->ct0ea = exec->ct0ca + dst_offset; + exec->ct0ea = exec->ct0ca + dst_offset; - if (has_bin && !exec->found_start_tile_binning_packet) { - DRM_ERROR("Bin CL missing VC4_PACKET_START_TILE_BINNING\n"); - return -EINVAL; - } - } else { - if (!exec->found_tile_rendering_mode_config_packet) { - DRM_ERROR("Render CL missing VC4_PACKET_TILE_RENDERING_MODE_CONFIG\n"); - return -EINVAL; - } + if (!exec->found_start_tile_binning_packet) { + DRM_ERROR("Bin CL missing VC4_PACKET_START_TILE_BINNING\n"); + return -EINVAL; + } - /* Make sure that they actually consumed the semaphore - * increment from the bin CL. Otherwise a later submit would - * have render execute immediately. - */ - if (exec->found_wait_on_semaphore_packet != has_bin) { - DRM_ERROR("Render CL %s VC4_PACKET_WAIT_ON_SEMAPHORE\n", - exec->found_wait_on_semaphore_packet ? - "has" : "missing"); - return -EINVAL; - } - exec->ct1ea = exec->ct1ca + dst_offset; + if (!exec->found_increment_semaphore_packet) { + DRM_ERROR("Bin CL missing VC4_PACKET_INCREMENT_SEMAPHORE\n"); + return -EINVAL; } return 0; @@ -910,8 +698,8 @@ reloc_tex(struct vc4_exec_info *exec, tiling_format = VC4_TILING_FORMAT_T; } - if (!check_tex_size(exec, tex, offset + cube_map_stride * 5, - tiling_format, width, height, cpp)) { + if (!vc4_check_tex_size(exec, tex, offset + cube_map_stride * 5, + tiling_format, width, height, cpp)) { return false; } diff --git a/src/gallium/drivers/vc4/vc4_blit.c b/src/gallium/drivers/vc4/vc4_blit.c index b3811025cc1..d29e2c9c318 100644 --- a/src/gallium/drivers/vc4/vc4_blit.c +++ b/src/gallium/drivers/vc4/vc4_blit.c @@ -26,87 +26,7 @@ #include "util/u_blitter.h" #include "vc4_context.h" -static void -vc4_tile_blit_color_rcl(struct vc4_context *vc4, - struct vc4_surface *dst_surf, - struct vc4_surface *src_surf) -{ - struct vc4_resource *src = vc4_resource(src_surf->base.texture); - struct vc4_resource *dst = vc4_resource(dst_surf->base.texture); - - uint32_t min_x_tile = 0; - uint32_t min_y_tile = 0; - uint32_t max_x_tile = (dst_surf->base.width - 1) / 64; - uint32_t max_y_tile = (dst_surf->base.height - 1) / 64; - uint32_t xtiles = max_x_tile - min_x_tile + 1; - uint32_t ytiles = max_y_tile - min_y_tile + 1; - cl_ensure_space(&vc4->rcl, - (VC4_PACKET_TILE_RENDERING_MODE_CONFIG_SIZE + - VC4_PACKET_GEM_HANDLES_SIZE) + - xtiles * ytiles * ((VC4_PACKET_LOAD_TILE_BUFFER_GENERAL_SIZE + - VC4_PACKET_GEM_HANDLES_SIZE) * 2 + - VC4_PACKET_TILE_COORDINATES_SIZE)); - cl_ensure_space(&vc4->bo_handles, 2 * sizeof(uint32_t)); - cl_ensure_space(&vc4->bo_pointers, 2 * sizeof(struct vc4_bo *)); - - cl_start_reloc(&vc4->rcl, 1); - cl_u8(&vc4->rcl, VC4_PACKET_TILE_RENDERING_MODE_CONFIG); - cl_reloc(vc4, &vc4->rcl, dst->bo, dst_surf->offset); - cl_u16(&vc4->rcl, dst_surf->base.width); - cl_u16(&vc4->rcl, dst_surf->base.height); - cl_u16(&vc4->rcl, - VC4_SET_FIELD(dst_surf->tiling, - VC4_RENDER_CONFIG_MEMORY_FORMAT) | - VC4_SET_FIELD(vc4_rt_format_is_565(dst_surf->base.format) ? - VC4_RENDER_CONFIG_FORMAT_BGR565 : - VC4_RENDER_CONFIG_FORMAT_RGBA8888, - VC4_RENDER_CONFIG_FORMAT)); - - uint32_t src_hindex = vc4_gem_hindex(vc4, src->bo); - - for (int y = min_y_tile; y <= max_y_tile; y++) { - for (int x = min_x_tile; x <= max_x_tile; x++) { - bool end_of_frame = (x == max_x_tile && - y == max_y_tile); - - cl_start_reloc(&vc4->rcl, 1); - cl_u8(&vc4->rcl, VC4_PACKET_LOAD_TILE_BUFFER_GENERAL); - cl_u16(&vc4->rcl, - VC4_SET_FIELD(VC4_LOADSTORE_TILE_BUFFER_COLOR, - VC4_LOADSTORE_TILE_BUFFER_BUFFER) | - VC4_SET_FIELD(src_surf->tiling, - VC4_LOADSTORE_TILE_BUFFER_TILING) | - VC4_SET_FIELD(vc4_rt_format_is_565(src_surf->base.format) ? - VC4_LOADSTORE_TILE_BUFFER_BGR565 : - VC4_LOADSTORE_TILE_BUFFER_RGBA8888, - VC4_LOADSTORE_TILE_BUFFER_FORMAT)); - cl_reloc_hindex(&vc4->rcl, src_hindex, - src_surf->offset); - - cl_u8(&vc4->rcl, VC4_PACKET_TILE_COORDINATES); - cl_u8(&vc4->rcl, x); - cl_u8(&vc4->rcl, y); - - if (end_of_frame) { - cl_u8(&vc4->rcl, - VC4_PACKET_STORE_MS_TILE_BUFFER_AND_EOF); - } else { - cl_u8(&vc4->rcl, - VC4_PACKET_STORE_MS_TILE_BUFFER); - } - } - } - - vc4->draw_min_x = 0; - vc4->draw_min_y = 0; - vc4->draw_max_x = dst_surf->base.width; - vc4->draw_max_y = dst_surf->base.height; - - dst->writes++; - vc4->needs_flush = true; -} - -static struct vc4_surface * +static struct pipe_surface * vc4_get_blit_surface(struct pipe_context *pctx, struct pipe_resource *prsc, unsigned level) { @@ -118,7 +38,7 @@ vc4_get_blit_surface(struct pipe_context *pctx, tmpl.u.tex.first_layer = 0; tmpl.u.tex.last_layer = 0; - return vc4_surface(pctx->create_surface(pctx, prsc, &tmpl)); + return pctx->create_surface(pctx, prsc, &tmpl); } static bool @@ -142,17 +62,28 @@ vc4_tile_blit(struct pipe_context *pctx, const struct pipe_blit_info *info) if (info->dst.resource->format != info->src.resource->format) return false; - struct vc4_surface *dst_surf = + vc4_flush(pctx); + + struct pipe_surface *dst_surf = vc4_get_blit_surface(pctx, info->dst.resource, info->dst.level); - struct vc4_surface *src_surf = + struct pipe_surface *src_surf = vc4_get_blit_surface(pctx, info->src.resource, info->src.level); - vc4_flush(pctx); - vc4_tile_blit_color_rcl(vc4, dst_surf, src_surf); + pipe_surface_reference(&vc4->color_read, src_surf); + pipe_surface_reference(&vc4->color_write, dst_surf); + pipe_surface_reference(&vc4->zs_read, NULL); + pipe_surface_reference(&vc4->zs_write, NULL); + vc4->draw_min_x = 0; + vc4->draw_min_y = 0; + vc4->draw_max_x = dst_surf->width; + vc4->draw_max_y = dst_surf->height; + vc4->draw_width = dst_surf->width; + vc4->draw_height = dst_surf->height; + vc4->needs_flush = true; vc4_job_submit(vc4); - pctx->surface_destroy(pctx, &dst_surf->base); - pctx->surface_destroy(pctx, &src_surf->base); + pipe_surface_reference(&dst_surf, NULL); + pipe_surface_reference(&src_surf, NULL); return true; } diff --git a/src/gallium/drivers/vc4/vc4_context.c b/src/gallium/drivers/vc4/vc4_context.c index 10b58b0d815..ebd357f7065 100644 --- a/src/gallium/drivers/vc4/vc4_context.c +++ b/src/gallium/drivers/vc4/vc4_context.c @@ -37,271 +37,12 @@ #include "vc4_context.h" #include "vc4_resource.h" -/** - * Emits a no-op STORE_TILE_BUFFER_GENERAL. - * - * If we emit a PACKET_TILE_COORDINATES, it must be followed by a store of - * some sort before another load is triggered. - */ -static void -vc4_store_before_load(struct vc4_context *vc4, bool *coords_emitted) -{ - if (!*coords_emitted) - return; - - cl_u8(&vc4->rcl, VC4_PACKET_STORE_TILE_BUFFER_GENERAL); - cl_u16(&vc4->rcl, - VC4_SET_FIELD(VC4_LOADSTORE_TILE_BUFFER_NONE, - VC4_LOADSTORE_TILE_BUFFER_BUFFER) | - VC4_STORE_TILE_BUFFER_DISABLE_COLOR_CLEAR | - VC4_STORE_TILE_BUFFER_DISABLE_ZS_CLEAR | - VC4_STORE_TILE_BUFFER_DISABLE_VG_MASK_CLEAR); - cl_u32(&vc4->rcl, 0); /* no address, since we're in None mode */ - - *coords_emitted = false; -} - -/** - * Emits a PACKET_TILE_COORDINATES if one isn't already pending. - * - * The tile coordinates packet triggers a pending load if there is one, are - * used for clipping during rendering, and determine where loads/stores happen - * relative to their base address. - */ -static void -vc4_tile_coordinates(struct vc4_context *vc4, uint32_t x, uint32_t y, - bool *coords_emitted) -{ - if (*coords_emitted) - return; - - cl_u8(&vc4->rcl, VC4_PACKET_TILE_COORDINATES); - cl_u8(&vc4->rcl, x); - cl_u8(&vc4->rcl, y); - - *coords_emitted = true; -} - -static void -vc4_setup_rcl(struct vc4_context *vc4) -{ - struct vc4_surface *csurf = vc4_surface(vc4->framebuffer.cbufs[0]); - struct vc4_resource *ctex = csurf ? vc4_resource(csurf->base.texture) : NULL; - struct vc4_surface *zsurf = vc4_surface(vc4->framebuffer.zsbuf); - struct vc4_resource *ztex = zsurf ? vc4_resource(zsurf->base.texture) : NULL; - - if (!csurf) - vc4->resolve &= ~PIPE_CLEAR_COLOR0; - if (!zsurf) - vc4->resolve &= ~(PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL); - uint32_t resolve_uncleared = vc4->resolve & ~vc4->cleared; - uint32_t width = vc4->framebuffer.width; - uint32_t height = vc4->framebuffer.height; - uint32_t stride_in_tiles = align(width, 64) / 64; - - assert(vc4->draw_min_x != ~0 && vc4->draw_min_y != ~0); - uint32_t min_x_tile = vc4->draw_min_x / 64; - uint32_t min_y_tile = vc4->draw_min_y / 64; - uint32_t max_x_tile = (vc4->draw_max_x - 1) / 64; - uint32_t max_y_tile = (vc4->draw_max_y - 1) / 64; - uint32_t xtiles = max_x_tile - min_x_tile + 1; - uint32_t ytiles = max_y_tile - min_y_tile + 1; - -#if 0 - fprintf(stderr, "RCL: resolve 0x%x clear 0x%x resolve uncleared 0x%x\n", - vc4->resolve, - vc4->cleared, - resolve_uncleared); -#endif - - cl_ensure_space(&vc4->rcl, - VC4_PACKET_CLEAR_COLORS_SIZE + - (VC4_PACKET_TILE_RENDERING_MODE_CONFIG_SIZE + - VC4_PACKET_GEM_HANDLES_SIZE) + - (VC4_PACKET_STORE_TILE_BUFFER_GENERAL_SIZE + - VC4_PACKET_TILE_RENDERING_MODE_CONFIG_SIZE) + - VC4_PACKET_WAIT_ON_SEMAPHORE_SIZE + - xtiles * ytiles * ((VC4_PACKET_STORE_TILE_BUFFER_GENERAL_SIZE + - VC4_PACKET_GEM_HANDLES_SIZE) * 4 + - VC4_PACKET_TILE_COORDINATES_SIZE * 3 + - (VC4_PACKET_BRANCH_TO_SUB_LIST_SIZE + - VC4_PACKET_GEM_HANDLES_SIZE) + - VC4_PACKET_STORE_MS_TILE_BUFFER_SIZE)); - - if (vc4->cleared) { - cl_u8(&vc4->rcl, VC4_PACKET_CLEAR_COLORS); - cl_u32(&vc4->rcl, vc4->clear_color[0]); - cl_u32(&vc4->rcl, vc4->clear_color[1]); - cl_u32(&vc4->rcl, vc4->clear_depth); - cl_u8(&vc4->rcl, vc4->clear_stencil); - } - - /* The rendering mode config determines the pointer that's used for - * VC4_PACKET_STORE_MS_TILE_BUFFER address computations. The kernel - * could handle a no-relocation rendering mode config and deny those - * packets, but instead we just tell the kernel we're doing our color - * rendering to the Z buffer, and just don't emit any of those - * packets. - */ - struct vc4_surface *render_surf = csurf ? csurf : zsurf; - struct vc4_resource *render_tex = vc4_resource(render_surf->base.texture); - cl_start_reloc(&vc4->rcl, 1); - cl_u8(&vc4->rcl, VC4_PACKET_TILE_RENDERING_MODE_CONFIG); - cl_reloc(vc4, &vc4->rcl, render_tex->bo, render_surf->offset); - cl_u16(&vc4->rcl, width); - cl_u16(&vc4->rcl, height); - cl_u16(&vc4->rcl, - VC4_SET_FIELD(render_surf->tiling, - VC4_RENDER_CONFIG_MEMORY_FORMAT) | - VC4_SET_FIELD((vc4_rt_format_is_565(render_surf->base.format) ? - VC4_RENDER_CONFIG_FORMAT_BGR565 : - VC4_RENDER_CONFIG_FORMAT_RGBA8888), - VC4_RENDER_CONFIG_FORMAT)); - - /* The tile buffer normally gets cleared when the previous tile is - * stored. If the clear values changed between frames, then the tile - * buffer has stale clear values in it, so we have to do a store in - * None mode (no writes) so that we trigger the tile buffer clear. - * - * Excess clearing is only a performance cost, since per-tile contents - * will be loaded/stored in the loop below. - */ - if (vc4->cleared & (PIPE_CLEAR_COLOR0 | - PIPE_CLEAR_DEPTH | - PIPE_CLEAR_STENCIL)) { - cl_u8(&vc4->rcl, VC4_PACKET_TILE_COORDINATES); - cl_u8(&vc4->rcl, 0); - cl_u8(&vc4->rcl, 0); - - cl_u8(&vc4->rcl, VC4_PACKET_STORE_TILE_BUFFER_GENERAL); - cl_u16(&vc4->rcl, VC4_LOADSTORE_TILE_BUFFER_NONE); - cl_u32(&vc4->rcl, 0); /* no address, since we're in None mode */ - } - - uint32_t color_hindex = ctex ? vc4_gem_hindex(vc4, ctex->bo) : 0; - uint32_t depth_hindex = ztex ? vc4_gem_hindex(vc4, ztex->bo) : 0; - uint32_t tile_alloc_hindex = vc4_gem_hindex(vc4, vc4->tile_alloc); - - for (int y = min_y_tile; y <= max_y_tile; y++) { - for (int x = min_x_tile; x <= max_x_tile; x++) { - bool end_of_frame = (x == max_x_tile && - y == max_y_tile); - bool coords_emitted = false; - - /* Note that the load doesn't actually occur until the - * tile coords packet is processed, and only one load - * may be outstanding at a time. - */ - if (resolve_uncleared & PIPE_CLEAR_COLOR) { - vc4_store_before_load(vc4, &coords_emitted); - - cl_start_reloc(&vc4->rcl, 1); - cl_u8(&vc4->rcl, VC4_PACKET_LOAD_TILE_BUFFER_GENERAL); - cl_u16(&vc4->rcl, - VC4_SET_FIELD(VC4_LOADSTORE_TILE_BUFFER_COLOR, - VC4_LOADSTORE_TILE_BUFFER_BUFFER) | - VC4_SET_FIELD(csurf->tiling, - VC4_LOADSTORE_TILE_BUFFER_TILING) | - VC4_SET_FIELD(vc4_rt_format_is_565(csurf->base.format) ? - VC4_LOADSTORE_TILE_BUFFER_BGR565 : - VC4_LOADSTORE_TILE_BUFFER_RGBA8888, - VC4_LOADSTORE_TILE_BUFFER_FORMAT)); - cl_reloc_hindex(&vc4->rcl, color_hindex, - csurf->offset); - - vc4_tile_coordinates(vc4, x, y, &coords_emitted); - } - - if (resolve_uncleared & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL)) { - vc4_store_before_load(vc4, &coords_emitted); - - cl_start_reloc(&vc4->rcl, 1); - cl_u8(&vc4->rcl, VC4_PACKET_LOAD_TILE_BUFFER_GENERAL); - cl_u16(&vc4->rcl, - VC4_SET_FIELD(VC4_LOADSTORE_TILE_BUFFER_ZS, - VC4_LOADSTORE_TILE_BUFFER_BUFFER) | - VC4_SET_FIELD(zsurf->tiling, - VC4_LOADSTORE_TILE_BUFFER_TILING)); - cl_reloc_hindex(&vc4->rcl, depth_hindex, - zsurf->offset); - - vc4_tile_coordinates(vc4, x, y, &coords_emitted); - } - - /* Clipping depends on tile coordinates having been - * emitted, so make sure it's happened even if - * everything was cleared to start. - */ - vc4_tile_coordinates(vc4, x, y, &coords_emitted); - - /* Wait for the binner before jumping to the first - * tile's lists. - */ - if (x == min_x_tile && y == min_y_tile) - cl_u8(&vc4->rcl, VC4_PACKET_WAIT_ON_SEMAPHORE); - - cl_start_reloc(&vc4->rcl, 1); - cl_u8(&vc4->rcl, VC4_PACKET_BRANCH_TO_SUB_LIST); - cl_reloc_hindex(&vc4->rcl, tile_alloc_hindex, - (y * stride_in_tiles + x) * 32); - - if (vc4->resolve & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL)) { - vc4_tile_coordinates(vc4, x, y, &coords_emitted); - - cl_start_reloc(&vc4->rcl, 1); - cl_u8(&vc4->rcl, VC4_PACKET_STORE_TILE_BUFFER_GENERAL); - cl_u16(&vc4->rcl, - VC4_SET_FIELD(VC4_LOADSTORE_TILE_BUFFER_ZS, - VC4_LOADSTORE_TILE_BUFFER_BUFFER) | - VC4_SET_FIELD(zsurf->tiling, - VC4_LOADSTORE_TILE_BUFFER_TILING) | - VC4_STORE_TILE_BUFFER_DISABLE_COLOR_CLEAR); - cl_reloc_hindex(&vc4->rcl, depth_hindex, - zsurf->offset | - ((end_of_frame && - !(vc4->resolve & PIPE_CLEAR_COLOR0)) ? - VC4_LOADSTORE_TILE_BUFFER_EOF : 0)); - - coords_emitted = false; - } - - if (vc4->resolve & PIPE_CLEAR_COLOR0) { - vc4_tile_coordinates(vc4, x, y, &coords_emitted); - if (end_of_frame) { - cl_u8(&vc4->rcl, - VC4_PACKET_STORE_MS_TILE_BUFFER_AND_EOF); - } else { - cl_u8(&vc4->rcl, - VC4_PACKET_STORE_MS_TILE_BUFFER); - } - - coords_emitted = false; - } - - /* One of the bits needs to have been set that would - * have triggered an EOF. - */ - assert(vc4->resolve & (PIPE_CLEAR_COLOR0 | - PIPE_CLEAR_DEPTH | - PIPE_CLEAR_STENCIL)); - /* Any coords emitted must also have been consumed by - * a store. - */ - assert(!coords_emitted); - } - } - - if (vc4->resolve & PIPE_CLEAR_COLOR0) - ctex->writes++; - - if (vc4->resolve & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL)) - ztex->writes++; -} - void vc4_flush(struct pipe_context *pctx) { struct vc4_context *vc4 = vc4_context(pctx); + struct pipe_surface *cbuf = vc4->framebuffer.cbufs[0]; + struct pipe_surface *zsbuf = vc4->framebuffer.zsbuf; if (!vc4->needs_flush) return; @@ -324,7 +65,31 @@ vc4_flush(struct pipe_context *pctx) /* The FLUSH caps all of our bin lists with a VC4_PACKET_RETURN. */ cl_u8(&vc4->bcl, VC4_PACKET_FLUSH); - vc4_setup_rcl(vc4); + if (cbuf && (vc4->resolve & PIPE_CLEAR_COLOR0)) { + pipe_surface_reference(&vc4->color_write, cbuf); + if (!(vc4->cleared & PIPE_CLEAR_COLOR0)) { + pipe_surface_reference(&vc4->color_read, cbuf); + } else { + pipe_surface_reference(&vc4->color_read, NULL); + } + + } else { + pipe_surface_reference(&vc4->color_write, NULL); + pipe_surface_reference(&vc4->color_read, NULL); + } + + if (vc4->framebuffer.zsbuf && + (vc4->resolve & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL))) { + pipe_surface_reference(&vc4->zs_write, zsbuf); + if (!(vc4->cleared & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL))) { + pipe_surface_reference(&vc4->zs_read, zsbuf); + } else { + pipe_surface_reference(&vc4->zs_read, NULL); + } + } else { + pipe_surface_reference(&vc4->zs_write, NULL); + pipe_surface_reference(&vc4->zs_read, NULL); + } vc4_job_submit(vc4); } diff --git a/src/gallium/drivers/vc4/vc4_context.h b/src/gallium/drivers/vc4/vc4_context.h index 41dacb9172d..ad5d0b153ff 100644 --- a/src/gallium/drivers/vc4/vc4_context.h +++ b/src/gallium/drivers/vc4/vc4_context.h @@ -178,12 +178,18 @@ struct vc4_context { struct vc4_screen *screen; struct vc4_cl bcl; - struct vc4_cl rcl; struct vc4_cl shader_rec; struct vc4_cl uniforms; struct vc4_cl bo_handles; struct vc4_cl bo_pointers; uint32_t shader_rec_count; + + /** @{ Surfaces to submit rendering for. */ + struct pipe_surface *color_read; + struct pipe_surface *color_write; + struct pipe_surface *zs_read; + struct pipe_surface *zs_write; + /** @} */ /** @{ * Bounding box of the scissor across all queued drawing. * @@ -194,6 +200,13 @@ struct vc4_context { uint32_t draw_max_x; uint32_t draw_max_y; /** @} */ + /** @{ + * Width/height of the color framebuffer being rendered to, + * for VC4_TILE_RENDERING_MODE_CONFIG. + */ + uint32_t draw_width; + uint32_t draw_height; + /** @} */ struct vc4_bo *tile_alloc; struct vc4_bo *tile_state; diff --git a/src/gallium/drivers/vc4/vc4_draw.c b/src/gallium/drivers/vc4/vc4_draw.c index 15743ea7671..3e181d0606a 100644 --- a/src/gallium/drivers/vc4/vc4_draw.c +++ b/src/gallium/drivers/vc4/vc4_draw.c @@ -129,6 +129,8 @@ vc4_start_draw(struct vc4_context *vc4) vc4->needs_flush = true; vc4->draw_call_queued = true; + vc4->draw_width = width; + vc4->draw_height = height; } static void diff --git a/src/gallium/drivers/vc4/vc4_drm.h b/src/gallium/drivers/vc4/vc4_drm.h index 062fd3b687e..5f1ee4fa125 100644 --- a/src/gallium/drivers/vc4/vc4_drm.h +++ b/src/gallium/drivers/vc4/vc4_drm.h @@ -38,6 +38,15 @@ #define DRM_IOCTL_VC4_CREATE_BO DRM_IOWR( DRM_COMMAND_BASE + DRM_VC4_CREATE_BO, struct drm_vc4_create_bo) #define DRM_IOCTL_VC4_MMAP_BO DRM_IOWR( DRM_COMMAND_BASE + DRM_VC4_MMAP_BO, struct drm_vc4_mmap_bo) +struct drm_vc4_submit_rcl_surface { + uint32_t hindex; /* Handle index, or ~0 if not present. */ + uint32_t offset; /* Offset to start of buffer. */ + /* + * Bits for either render config (color_ms_write) or load/store packet. + */ + uint16_t bits; + uint16_t pad; +}; /** * struct drm_vc4_submit_cl - ioctl argument for submitting commands to the 3D @@ -62,16 +71,6 @@ struct drm_vc4_submit_cl { */ uint64_t bin_cl; - /* Pointer to the render command list. - * - * The render command list contains a set of packets to load the - * current tile's state (reading from memory, or just clearing it) - * into the GPU, then call into the tile allocation BO to run the - * stored rendering for that tile, then store the tile's state back to - * memory. - */ - uint64_t render_cl; - /* Pointer to the shader records. * * Shader records are the structures read by the hardware that contain @@ -102,8 +101,6 @@ struct drm_vc4_submit_cl { /* Size in bytes of the binner command list. */ uint32_t bin_cl_size; - /* Size in bytes of the render command list */ - uint32_t render_cl_size; /* Size in bytes of the set of shader records. */ uint32_t shader_rec_size; /* Number of shader records. @@ -119,8 +116,25 @@ struct drm_vc4_submit_cl { /* Number of BO handles passed in (size is that times 4). */ uint32_t bo_handle_count; + /* RCL setup: */ + uint16_t width; + uint16_t height; + uint8_t min_x_tile; + uint8_t min_y_tile; + uint8_t max_x_tile; + uint8_t max_y_tile; + struct drm_vc4_submit_rcl_surface color_read; + struct drm_vc4_submit_rcl_surface color_ms_write; + struct drm_vc4_submit_rcl_surface zs_read; + struct drm_vc4_submit_rcl_surface zs_write; + uint32_t clear_color[2]; + uint32_t clear_z; + uint8_t clear_s; + + uint32_t pad:24; + +#define VC4_SUBMIT_CL_USE_CLEAR_COLOR (1 << 0) uint32_t flags; - uint32_t pad; /* Returned value of the seqno of this render job (for the * wait ioctl). diff --git a/src/gallium/drivers/vc4/vc4_job.c b/src/gallium/drivers/vc4/vc4_job.c index 76037162102..dcade15443a 100644 --- a/src/gallium/drivers/vc4/vc4_job.c +++ b/src/gallium/drivers/vc4/vc4_job.c @@ -33,7 +33,6 @@ void vc4_job_init(struct vc4_context *vc4) { vc4_init_cl(vc4, &vc4->bcl); - vc4_init_cl(vc4, &vc4->rcl); vc4_init_cl(vc4, &vc4->shader_rec); vc4_init_cl(vc4, &vc4->uniforms); vc4_init_cl(vc4, &vc4->bo_handles); @@ -50,7 +49,6 @@ vc4_job_reset(struct vc4_context *vc4) vc4_bo_unreference(&referenced_bos[i]); } vc4_reset_cl(&vc4->bcl); - vc4_reset_cl(&vc4->rcl); vc4_reset_cl(&vc4->shader_rec); vc4_reset_cl(&vc4->uniforms); vc4_reset_cl(&vc4->bo_handles); @@ -75,6 +73,70 @@ vc4_job_reset(struct vc4_context *vc4) vc4->draw_max_y = 0; } +static void +vc4_submit_setup_rcl_surface(struct vc4_context *vc4, + struct drm_vc4_submit_rcl_surface *submit_surf, + struct pipe_surface *psurf, + bool is_depth, bool is_write) +{ + struct vc4_surface *surf = vc4_surface(psurf); + + if (!surf) { + submit_surf->hindex = ~0; + return; + } + + struct vc4_resource *rsc = vc4_resource(psurf->texture); + submit_surf->hindex = vc4_gem_hindex(vc4, rsc->bo); + submit_surf->offset = surf->offset; + + if (is_depth) { + submit_surf->bits = + VC4_SET_FIELD(VC4_LOADSTORE_TILE_BUFFER_ZS, + VC4_LOADSTORE_TILE_BUFFER_BUFFER); + + } else { + submit_surf->bits = + VC4_SET_FIELD(VC4_LOADSTORE_TILE_BUFFER_COLOR, + VC4_LOADSTORE_TILE_BUFFER_BUFFER) | + VC4_SET_FIELD(vc4_rt_format_is_565(psurf->format) ? + VC4_LOADSTORE_TILE_BUFFER_BGR565 : + VC4_LOADSTORE_TILE_BUFFER_RGBA8888, + VC4_LOADSTORE_TILE_BUFFER_FORMAT); + } + submit_surf->bits |= + VC4_SET_FIELD(surf->tiling, VC4_LOADSTORE_TILE_BUFFER_TILING); + + if (is_write) + rsc->writes++; +} + +static void +vc4_submit_setup_ms_rcl_surface(struct vc4_context *vc4, + struct drm_vc4_submit_rcl_surface *submit_surf, + struct pipe_surface *psurf) +{ + struct vc4_surface *surf = vc4_surface(psurf); + + if (!surf) { + submit_surf->hindex = ~0; + return; + } + + struct vc4_resource *rsc = vc4_resource(psurf->texture); + submit_surf->hindex = vc4_gem_hindex(vc4, rsc->bo); + submit_surf->offset = surf->offset; + + submit_surf->bits = + VC4_SET_FIELD(vc4_rt_format_is_565(surf->base.format) ? + VC4_RENDER_CONFIG_FORMAT_BGR565 : + VC4_RENDER_CONFIG_FORMAT_RGBA8888, + VC4_RENDER_CONFIG_FORMAT) | + VC4_SET_FIELD(surf->tiling, VC4_RENDER_CONFIG_MEMORY_FORMAT); + + rsc->writes++; +} + /** * Submits the job to the kernel and then reinitializes it. */ @@ -84,26 +146,49 @@ vc4_job_submit(struct vc4_context *vc4) if (vc4_debug & VC4_DEBUG_CL) { fprintf(stderr, "BCL:\n"); vc4_dump_cl(vc4->bcl.base, vc4->bcl.next - vc4->bcl.base, false); - fprintf(stderr, "RCL:\n"); - vc4_dump_cl(vc4->rcl.base, vc4->rcl.next - vc4->rcl.base, true); } struct drm_vc4_submit_cl submit; memset(&submit, 0, sizeof(submit)); + cl_ensure_space(&vc4->bo_handles, 4 * sizeof(uint32_t)); + cl_ensure_space(&vc4->bo_pointers, 4 * sizeof(struct vc4_bo *)); + + vc4_submit_setup_rcl_surface(vc4, &submit.color_read, + vc4->color_read, false, false); + vc4_submit_setup_ms_rcl_surface(vc4, &submit.color_ms_write, + vc4->color_write); + vc4_submit_setup_rcl_surface(vc4, &submit.zs_read, + vc4->zs_read, true, false); + vc4_submit_setup_rcl_surface(vc4, &submit.zs_write, + vc4->zs_write, true, true); + submit.bo_handles = (uintptr_t)vc4->bo_handles.base; submit.bo_handle_count = (vc4->bo_handles.next - vc4->bo_handles.base) / 4; submit.bin_cl = (uintptr_t)vc4->bcl.base; submit.bin_cl_size = vc4->bcl.next - vc4->bcl.base; - submit.render_cl = (uintptr_t)vc4->rcl.base; - submit.render_cl_size = vc4->rcl.next - vc4->rcl.base; submit.shader_rec = (uintptr_t)vc4->shader_rec.base; submit.shader_rec_size = vc4->shader_rec.next - vc4->shader_rec.base; submit.shader_rec_count = vc4->shader_rec_count; submit.uniforms = (uintptr_t)vc4->uniforms.base; submit.uniforms_size = vc4->uniforms.next - vc4->uniforms.base; + assert(vc4->draw_min_x != ~0 && vc4->draw_min_y != ~0); + submit.min_x_tile = vc4->draw_min_x / 64; + submit.min_y_tile = vc4->draw_min_y / 64; + submit.max_x_tile = (vc4->draw_max_x - 1) / 64; + submit.max_y_tile = (vc4->draw_max_y - 1) / 64; + submit.width = vc4->draw_width; + submit.height = vc4->draw_height; + if (vc4->cleared) { + submit.flags |= VC4_SUBMIT_CL_USE_CLEAR_COLOR; + submit.clear_color[0] = vc4->clear_color[0]; + submit.clear_color[1] = vc4->clear_color[1]; + submit.clear_z = vc4->clear_depth; + submit.clear_s = vc4->clear_stencil; + } + if (!(vc4_debug & VC4_DEBUG_NORAST)) { int ret; From 1d45e44b2f9e52d6eebe84ab08da6b7393011f95 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Wed, 17 Jun 2015 13:24:06 -0700 Subject: [PATCH 701/834] vc4: Move tile state/alloc allocation into the kernel. This avoids a security issue where userspace could have written the tile state/tile alloc behind the GPU's back, and will apparently be necessary for fixing stability bugs (tile state buffers are missing some top bits for the tile alloc's address). --- src/gallium/drivers/vc4/kernel/vc4_drv.h | 5 +- src/gallium/drivers/vc4/kernel/vc4_packet.h | 20 ++-- .../drivers/vc4/kernel/vc4_render_cl.c | 3 +- src/gallium/drivers/vc4/kernel/vc4_validate.c | 97 +++++++++---------- src/gallium/drivers/vc4/vc4_context.c | 2 - src/gallium/drivers/vc4/vc4_context.h | 3 - src/gallium/drivers/vc4/vc4_draw.c | 37 +------ src/gallium/drivers/vc4/vc4_simulator.c | 1 + .../drivers/vc4/vc4_simulator_validate.h | 1 + 9 files changed, 70 insertions(+), 99 deletions(-) diff --git a/src/gallium/drivers/vc4/kernel/vc4_drv.h b/src/gallium/drivers/vc4/kernel/vc4_drv.h index 83802dd774a..1fd8aa9fb28 100644 --- a/src/gallium/drivers/vc4/kernel/vc4_drv.h +++ b/src/gallium/drivers/vc4/kernel/vc4_drv.h @@ -28,8 +28,6 @@ enum vc4_bo_mode { VC4_MODE_UNDECIDED, - VC4_MODE_TILE_ALLOC, - VC4_MODE_TSDA, VC4_MODE_RENDER, VC4_MODE_SHADER, }; @@ -91,7 +89,8 @@ struct vc4_exec_info { bool found_start_tile_binning_packet; bool found_increment_semaphore_packet; uint8_t bin_tiles_x, bin_tiles_y; - struct drm_gem_cma_object *tile_alloc_bo; + struct drm_gem_cma_object *tile_bo; + uint32_t tile_alloc_offset; /** * Computed addresses pointing into exec_bo where we start the diff --git a/src/gallium/drivers/vc4/kernel/vc4_packet.h b/src/gallium/drivers/vc4/kernel/vc4_packet.h index 764a125c6e8..88cfc0fa9f0 100644 --- a/src/gallium/drivers/vc4/kernel/vc4_packet.h +++ b/src/gallium/drivers/vc4/kernel/vc4_packet.h @@ -232,15 +232,19 @@ enum vc4_packet { /** @{ bits in the last u8 of VC4_PACKET_TILE_BINNING_MODE_CONFIG */ #define VC4_BIN_CONFIG_DB_NON_MS (1 << 7) -#define VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE_32 (0 << 5) -#define VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE_64 (1 << 5) -#define VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE_128 (2 << 5) -#define VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE_256 (3 << 5) +#define VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE_MASK VC4_MASK(6, 5) +#define VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE_SHIFT 5 +#define VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE_32 0 +#define VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE_64 1 +#define VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE_128 2 +#define VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE_256 3 -#define VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE_32 (0 << 3) -#define VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE_64 (1 << 3) -#define VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE_128 (2 << 3) -#define VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE_256 (3 << 3) +#define VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE_MASK VC4_MASK(4, 3) +#define VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE_SHIFT 3 +#define VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE_32 0 +#define VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE_64 1 +#define VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE_128 2 +#define VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE_256 3 #define VC4_BIN_CONFIG_AUTO_INIT_TSDA (1 << 2) #define VC4_BIN_CONFIG_TILE_BUFFER_64BIT (1 << 1) diff --git a/src/gallium/drivers/vc4/kernel/vc4_render_cl.c b/src/gallium/drivers/vc4/kernel/vc4_render_cl.c index de6070fec72..e2d907ad91f 100644 --- a/src/gallium/drivers/vc4/kernel/vc4_render_cl.c +++ b/src/gallium/drivers/vc4/kernel/vc4_render_cl.c @@ -140,7 +140,8 @@ static void emit_tile(struct vc4_exec_info *exec, if (has_bin) { rcl_u8(setup, VC4_PACKET_BRANCH_TO_SUB_LIST); - rcl_u32(setup, (exec->tile_alloc_bo->paddr + + rcl_u32(setup, (exec->tile_bo->paddr + + exec->tile_alloc_offset + (y * exec->bin_tiles_x + x) * 32)); } diff --git a/src/gallium/drivers/vc4/kernel/vc4_validate.c b/src/gallium/drivers/vc4/kernel/vc4_validate.c index 80b0e653d80..a0b67a7e50b 100644 --- a/src/gallium/drivers/vc4/kernel/vc4_validate.c +++ b/src/gallium/drivers/vc4/kernel/vc4_validate.c @@ -375,15 +375,10 @@ validate_nv_shader_state(VALIDATE_ARGS) static int validate_tile_binning_config(VALIDATE_ARGS) { - struct drm_gem_cma_object *tile_allocation; - struct drm_gem_cma_object *tile_state_data_array; + struct drm_device *dev = exec->exec_bo->base.dev; uint8_t flags; - uint32_t tile_allocation_size; - uint32_t tile_alloc_init_block_size; - - if (!vc4_use_handle(exec, 0, VC4_MODE_TILE_ALLOC, &tile_allocation) || - !vc4_use_handle(exec, 1, VC4_MODE_TSDA, &tile_state_data_array)) - return -EINVAL; + uint32_t tile_state_size, tile_alloc_size; + uint32_t tile_count; if (exec->found_tile_binning_mode_config_packet) { DRM_ERROR("Duplicate VC4_PACKET_TILE_BINNING_MODE_CONFIG\n"); @@ -393,6 +388,7 @@ validate_tile_binning_config(VALIDATE_ARGS) exec->bin_tiles_x = *(uint8_t *)(untrusted + 12); exec->bin_tiles_y = *(uint8_t *)(untrusted + 13); + tile_count = exec->bin_tiles_x * exec->bin_tiles_y; flags = *(uint8_t *)(untrusted + 14); if (exec->bin_tiles_x == 0 || @@ -402,15 +398,6 @@ validate_tile_binning_config(VALIDATE_ARGS) return -EINVAL; } - /* Our validation relies on the user not getting to set up their own - * tile state/tile allocation BO contents. - */ - if (!(flags & VC4_BIN_CONFIG_AUTO_INIT_TSDA)) { - DRM_ERROR("binning config missing " - "VC4_BIN_CONFIG_AUTO_INIT_TSDA\n"); - return -EINVAL; - } - if (flags & (VC4_BIN_CONFIG_DB_NON_MS | VC4_BIN_CONFIG_TILE_BUFFER_64BIT | VC4_BIN_CONFIG_MS_MODE_4X)) { @@ -418,40 +405,52 @@ validate_tile_binning_config(VALIDATE_ARGS) return -EINVAL; } - if (*(uint32_t *)(untrusted + 0) != 0) { - DRM_ERROR("tile allocation offset != 0 unsupported\n"); - return -EINVAL; - } - tile_allocation_size = *(uint32_t *)(untrusted + 4); - if (tile_allocation_size > tile_allocation->base.size) { - DRM_ERROR("tile allocation size %d > BO size %d\n", - tile_allocation_size, tile_allocation->base.size); - return -EINVAL; - } - *(uint32_t *)validated = tile_allocation->paddr; - exec->tile_alloc_bo = tile_allocation; + /* The tile state data array is 48 bytes per tile, and we put it at + * the start of a BO containing both it and the tile alloc. + */ + tile_state_size = 48 * tile_count; - tile_alloc_init_block_size = 1 << (5 + ((flags >> 5) & 3)); - if (exec->bin_tiles_x * exec->bin_tiles_y * - tile_alloc_init_block_size > tile_allocation_size) { - DRM_ERROR("tile init exceeds tile alloc size (%d vs %d)\n", - exec->bin_tiles_x * exec->bin_tiles_y * - tile_alloc_init_block_size, - tile_allocation_size); - return -EINVAL; - } + /* Since the tile alloc array will follow us, align. */ + exec->tile_alloc_offset = roundup(tile_state_size, 4096); - if (*(uint32_t *)(untrusted + 8) != 0) { - DRM_ERROR("TSDA offset != 0 unsupported\n"); - return -EINVAL; - } - if (exec->bin_tiles_x * exec->bin_tiles_y * 48 > - tile_state_data_array->base.size) { - DRM_ERROR("TSDA of %db too small for %dx%d bin config\n", - tile_state_data_array->base.size, - exec->bin_tiles_x, exec->bin_tiles_y); - } - *(uint32_t *)(validated + 8) = tile_state_data_array->paddr; + *(uint8_t *)(validated + 14) = + ((flags & ~(VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE_MASK | + VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE_MASK)) | + VC4_BIN_CONFIG_AUTO_INIT_TSDA | + VC4_SET_FIELD(VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE_32, + VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE) | + VC4_SET_FIELD(VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE_128, + VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE)); + + /* Initial block size. */ + tile_alloc_size = 32 * tile_count; + + /* + * The initial allocation gets rounded to the next 256 bytes before + * the hardware starts fulfilling further allocations. + */ + tile_alloc_size = roundup(tile_alloc_size, 256); + + /* Add space for the extra allocations. This is what gets used first, + * before overflow memory. It must have at least 4096 bytes, but we + * want to avoid overflow memory usage if possible. + */ + tile_alloc_size += 1024 * 1024; + + exec->tile_bo = drm_gem_cma_create(dev, exec->tile_alloc_offset + + tile_alloc_size); + if (!exec->tile_bo) + return -ENOMEM; + list_addtail(&to_vc4_bo(&exec->tile_bo->base)->unref_head, + &exec->unref_list); + + /* tile alloc address. */ + *(uint32_t *)(validated + 0) = (exec->tile_bo->paddr + + exec->tile_alloc_offset); + /* tile alloc size. */ + *(uint32_t *)(validated + 4) = tile_alloc_size; + /* tile state address. */ + *(uint32_t *)(validated + 8) = exec->tile_bo->paddr; return 0; } diff --git a/src/gallium/drivers/vc4/vc4_context.c b/src/gallium/drivers/vc4/vc4_context.c index ebd357f7065..630f8e68896 100644 --- a/src/gallium/drivers/vc4/vc4_context.c +++ b/src/gallium/drivers/vc4/vc4_context.c @@ -184,8 +184,6 @@ vc4_context_destroy(struct pipe_context *pctx) pipe_surface_reference(&vc4->framebuffer.cbufs[0], NULL); pipe_surface_reference(&vc4->framebuffer.zsbuf, NULL); - vc4_bo_unreference(&vc4->tile_alloc); - vc4_bo_unreference(&vc4->tile_state); vc4_program_fini(pctx); diff --git a/src/gallium/drivers/vc4/vc4_context.h b/src/gallium/drivers/vc4/vc4_context.h index ad5d0b153ff..d5d6be16f6e 100644 --- a/src/gallium/drivers/vc4/vc4_context.h +++ b/src/gallium/drivers/vc4/vc4_context.h @@ -208,9 +208,6 @@ struct vc4_context { uint32_t draw_height; /** @} */ - struct vc4_bo *tile_alloc; - struct vc4_bo *tile_state; - struct util_slab_mempool transfer_pool; struct blitter_context *blitter; diff --git a/src/gallium/drivers/vc4/vc4_draw.c b/src/gallium/drivers/vc4/vc4_draw.c index 3e181d0606a..5e6d70d6f33 100644 --- a/src/gallium/drivers/vc4/vc4_draw.c +++ b/src/gallium/drivers/vc4/vc4_draw.c @@ -72,44 +72,15 @@ vc4_start_draw(struct vc4_context *vc4) uint32_t tilew = align(width, 64) / 64; uint32_t tileh = align(height, 64) / 64; - /* Tile alloc memory setup: We use an initial alloc size of 32b. The - * hardware then aligns that to 256b (we use 4096, because all of our - * BO allocations align to that anyway), then for some reason the - * simulator wants an extra page available, even if you have overflow - * memory set up. - * - * XXX: The binner only does 28-bit addressing math, so the tile alloc - * and tile state should be in the same BO and that BO needs to not - * cross a 256MB boundary, somehow. - */ - uint32_t tile_alloc_size = 32 * tilew * tileh; - tile_alloc_size = align(tile_alloc_size, 4096); - tile_alloc_size += 4096; - uint32_t tile_state_size = 48 * tilew * tileh; - if (!vc4->tile_alloc || vc4->tile_alloc->size < tile_alloc_size) { - vc4_bo_unreference(&vc4->tile_alloc); - vc4->tile_alloc = vc4_bo_alloc(vc4->screen, tile_alloc_size, - "tile_alloc"); - } - if (!vc4->tile_state || vc4->tile_state->size < tile_state_size) { - vc4_bo_unreference(&vc4->tile_state); - vc4->tile_state = vc4_bo_alloc(vc4->screen, tile_state_size, - "tile_state"); - } - // Tile state data is 48 bytes per tile, I think it can be thrown away // as soon as binning is finished. - cl_start_reloc(&vc4->bcl, 2); cl_u8(&vc4->bcl, VC4_PACKET_TILE_BINNING_MODE_CONFIG); - cl_reloc(vc4, &vc4->bcl, vc4->tile_alloc, 0); - cl_u32(&vc4->bcl, vc4->tile_alloc->size); - cl_reloc(vc4, &vc4->bcl, vc4->tile_state, 0); + cl_u32(&vc4->bcl, 0); /* tile alloc addr, filled by kernel */ + cl_u32(&vc4->bcl, 0); /* tile alloc size, filled by kernel */ + cl_u32(&vc4->bcl, 0); /* tile state addr, filled by kernel */ cl_u8(&vc4->bcl, tilew); cl_u8(&vc4->bcl, tileh); - cl_u8(&vc4->bcl, - VC4_BIN_CONFIG_AUTO_INIT_TSDA | - VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE_32 | - VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE_32); + cl_u8(&vc4->bcl, 0); /* flags, filled by kernel. */ /* START_TILE_BINNING resets the statechange counters in the hardware, * which are what is used when a primitive is binned to a tile to diff --git a/src/gallium/drivers/vc4/vc4_simulator.c b/src/gallium/drivers/vc4/vc4_simulator.c index 2e4d8798f8e..b58013dd2ee 100644 --- a/src/gallium/drivers/vc4/vc4_simulator.c +++ b/src/gallium/drivers/vc4/vc4_simulator.c @@ -45,6 +45,7 @@ vc4_wrap_bo_with_cma(struct drm_device *dev, struct vc4_bo *bo) drm_bo->bo = bo; obj->base.size = size; + obj->base.dev = dev; obj->vaddr = screen->simulator_mem_base + dev->simulator_mem_next; obj->paddr = simpenrose_hw_addr(obj->vaddr); diff --git a/src/gallium/drivers/vc4/vc4_simulator_validate.h b/src/gallium/drivers/vc4/vc4_simulator_validate.h index c3b7a638f93..2bb36b253bb 100644 --- a/src/gallium/drivers/vc4/vc4_simulator_validate.h +++ b/src/gallium/drivers/vc4/vc4_simulator_validate.h @@ -66,6 +66,7 @@ struct drm_device { struct drm_gem_object { uint32_t size; + struct drm_device *dev; }; struct drm_gem_cma_object { From 997fc807b2f71ef65b4601d6db33d0f912c18d3f Mon Sep 17 00:00:00 2001 From: Boyan Ding Date: Tue, 16 Jun 2015 11:08:33 +0800 Subject: [PATCH 702/834] egl/x11: Set version of swrastLoader to 2 which it actually implements instead of the newest version defined in dri_interface.h Cc: "10.5 10.6" Signed-off-by: Boyan Ding Reviewed-by: Ian Romanick Reviewed-by: Emil Velikov --- src/egl/drivers/dri2/platform_x11.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/egl/drivers/dri2/platform_x11.c b/src/egl/drivers/dri2/platform_x11.c index e0d0fdc8205..2d5b71746e8 100644 --- a/src/egl/drivers/dri2/platform_x11.c +++ b/src/egl/drivers/dri2/platform_x11.c @@ -1112,7 +1112,7 @@ dri2_initialize_x11_swrast(_EGLDriver *drv, _EGLDisplay *disp) goto cleanup_conn; dri2_dpy->swrast_loader_extension.base.name = __DRI_SWRAST_LOADER; - dri2_dpy->swrast_loader_extension.base.version = __DRI_SWRAST_LOADER_VERSION; + dri2_dpy->swrast_loader_extension.base.version = 2; dri2_dpy->swrast_loader_extension.getDrawableInfo = swrastGetDrawableInfo; dri2_dpy->swrast_loader_extension.putImage = swrastPutImage; dri2_dpy->swrast_loader_extension.getImage = swrastGetImage; From 6b0378e483ba53359545ac8b774dbdd81c2fab3f Mon Sep 17 00:00:00 2001 From: Emil Velikov Date: Thu, 18 Jun 2015 12:59:28 +0100 Subject: [PATCH 703/834] ilo: remove missing ilo_fence.h from the sources list Signed-off-by: Emil Velikov --- src/gallium/drivers/ilo/Makefile.sources | 1 - 1 file changed, 1 deletion(-) diff --git a/src/gallium/drivers/ilo/Makefile.sources b/src/gallium/drivers/ilo/Makefile.sources index 68870f44337..e5a0950dc7c 100644 --- a/src/gallium/drivers/ilo/Makefile.sources +++ b/src/gallium/drivers/ilo/Makefile.sources @@ -17,7 +17,6 @@ C_SOURCES := \ core/ilo_dev.h \ core/ilo_format.c \ core/ilo_format.h \ - core/ilo_fence.h \ core/ilo_image.c \ core/ilo_image.h \ core/ilo_state_cc.c \ From c40e7ee7c47cb24264fd77ef37fab99dea4c299a Mon Sep 17 00:00:00 2001 From: Ilia Mirkin Date: Wed, 17 Jun 2015 15:07:14 -0400 Subject: [PATCH 704/834] glsl: handle conversions to double when comparing param matches This allows mod(int, int) to become selected as float mod when doubles are supported. Signed-off-by: Ilia Mirkin Reviewed-by: Chris Forbes Cc: "10.6" --- src/glsl/ir_function.cpp | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/glsl/ir_function.cpp b/src/glsl/ir_function.cpp index 2b2643c64a2..13194439003 100644 --- a/src/glsl/ir_function.cpp +++ b/src/glsl/ir_function.cpp @@ -148,9 +148,11 @@ get_parameter_match_type(const ir_variable *param, if (from_type == to_type) return PARAMETER_EXACT_MATCH; - /* XXX: When ARB_gpu_shader_fp64 support is added, check for float->double, - * and int/uint->double conversions - */ + if (to_type->base_type == GLSL_TYPE_DOUBLE) { + if (from_type->base_type == GLSL_TYPE_FLOAT) + return PARAMETER_FLOAT_TO_DOUBLE; + return PARAMETER_INT_TO_DOUBLE; + } if (to_type->base_type == GLSL_TYPE_FLOAT) return PARAMETER_INT_TO_FLOAT; From b6e238023c4f8af2328dc3bcab1d73a3e19f4fbb Mon Sep 17 00:00:00 2001 From: Ilia Mirkin Date: Wed, 17 Jun 2015 15:09:26 -0400 Subject: [PATCH 705/834] glsl: add version checks to conditionals for builtin variable enablement A number of builtin variables have checks based on the extension being enabled, but were missing enablement via a higher GLSL version. Signed-off-by: Ilia Mirkin Reviewed-by: Timothy Arceri Cc: "10.5 10.6" --- src/glsl/builtin_variables.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/glsl/builtin_variables.cpp b/src/glsl/builtin_variables.cpp index 6806aa1f962..c52b2526797 100644 --- a/src/glsl/builtin_variables.cpp +++ b/src/glsl/builtin_variables.cpp @@ -876,9 +876,9 @@ void builtin_variable_generator::generate_gs_special_vars() { add_output(VARYING_SLOT_LAYER, int_t, "gl_Layer"); - if (state->ARB_viewport_array_enable) + if (state->is_version(410, 0) || state->ARB_viewport_array_enable) add_output(VARYING_SLOT_VIEWPORT, int_t, "gl_ViewportIndex"); - if (state->ARB_gpu_shader5_enable) + if (state->is_version(400, 0) || state->ARB_gpu_shader5_enable) add_system_value(SYSTEM_VALUE_INVOCATION_ID, int_t, "gl_InvocationID"); /* Although gl_PrimitiveID appears in tessellation control and tessellation @@ -946,7 +946,7 @@ builtin_variable_generator::generate_fs_special_vars() var->enable_extension_warning("GL_AMD_shader_stencil_export"); } - if (state->ARB_sample_shading_enable) { + if (state->is_version(400, 0) || state->ARB_sample_shading_enable) { add_system_value(SYSTEM_VALUE_SAMPLE_ID, int_t, "gl_SampleID"); add_system_value(SYSTEM_VALUE_SAMPLE_POS, vec2_t, "gl_SamplePosition"); /* From the ARB_sample_shading specification: @@ -959,11 +959,11 @@ builtin_variable_generator::generate_fs_special_vars() add_output(FRAG_RESULT_SAMPLE_MASK, array(int_t, 1), "gl_SampleMask"); } - if (state->ARB_gpu_shader5_enable) { + if (state->is_version(400, 0) || state->ARB_gpu_shader5_enable) { add_system_value(SYSTEM_VALUE_SAMPLE_MASK_IN, array(int_t, 1), "gl_SampleMaskIn"); } - if (state->ARB_fragment_layer_viewport_enable) { + if (state->is_version(430, 0) || state->ARB_fragment_layer_viewport_enable) { add_input(VARYING_SLOT_LAYER, int_t, "gl_Layer"); add_input(VARYING_SLOT_VIEWPORT, int_t, "gl_ViewportIndex"); } From 770f141866654dab969302f720228497f0fb35fd Mon Sep 17 00:00:00 2001 From: Ilia Mirkin Date: Wed, 17 Jun 2015 23:00:44 -0400 Subject: [PATCH 706/834] mesa: add GL_PROGRAM_PIPELINE support in KHR_debug calls This was apparently missed when ARB_sso support was added. Add label support to pipeline objects just like all the other debug-related objects. Signed-off-by: Ilia Mirkin Reviewed-by: Timothy Arceri Cc: "10.5 10.6" --- src/mesa/main/mtypes.h | 2 ++ src/mesa/main/objectlabel.c | 10 ++++++++-- src/mesa/main/pipelineobj.c | 21 +++++++++++---------- src/mesa/main/pipelineobj.h | 3 +++ 4 files changed, 24 insertions(+), 12 deletions(-) diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h index ffa7f0cd704..983b9dc307b 100644 --- a/src/mesa/main/mtypes.h +++ b/src/mesa/main/mtypes.h @@ -2815,6 +2815,8 @@ struct gl_pipeline_object mtx_t Mutex; + GLchar *Label; /**< GL_KHR_debug */ + /** * Programs used for rendering * diff --git a/src/mesa/main/objectlabel.c b/src/mesa/main/objectlabel.c index aecb5b1fa51..5626054687b 100644 --- a/src/mesa/main/objectlabel.c +++ b/src/mesa/main/objectlabel.c @@ -30,6 +30,7 @@ #include "enums.h" #include "fbobject.h" #include "objectlabel.h" +#include "pipelineobj.h" #include "queryobj.h" #include "samplerobj.h" #include "shaderobj.h" @@ -214,8 +215,13 @@ get_label_pointer(struct gl_context *ctx, GLenum identifier, GLuint name, } break; case GL_PROGRAM_PIPELINE: - /* requires GL 4.2 */ - goto invalid_enum; + { + struct gl_pipeline_object *pipe = + _mesa_lookup_pipeline_object(ctx, name); + if (pipe) + labelPtr = &pipe->Label; + } + break; default: goto invalid_enum; } diff --git a/src/mesa/main/pipelineobj.c b/src/mesa/main/pipelineobj.c index b4795ffe46b..279ae2078fe 100644 --- a/src/mesa/main/pipelineobj.c +++ b/src/mesa/main/pipelineobj.c @@ -65,6 +65,7 @@ _mesa_delete_pipeline_object(struct gl_context *ctx, _mesa_reference_shader_program(ctx, &obj->ActiveProgram, NULL); mtx_destroy(&obj->Mutex); + free(obj->Label); ralloc_free(obj); } @@ -136,8 +137,8 @@ _mesa_free_pipeline_data(struct gl_context *ctx) * a non-existent ID. The spec defines ID 0 as being technically * non-existent. */ -static inline struct gl_pipeline_object * -lookup_pipeline_object(struct gl_context *ctx, GLuint id) +struct gl_pipeline_object * +_mesa_lookup_pipeline_object(struct gl_context *ctx, GLuint id) { if (id == 0) return NULL; @@ -225,7 +226,7 @@ _mesa_UseProgramStages(GLuint pipeline, GLbitfield stages, GLuint program) { GET_CURRENT_CONTEXT(ctx); - struct gl_pipeline_object *pipe = lookup_pipeline_object(ctx, pipeline); + struct gl_pipeline_object *pipe = _mesa_lookup_pipeline_object(ctx, pipeline); struct gl_shader_program *shProg = NULL; GLbitfield any_valid_stages; @@ -337,7 +338,7 @@ _mesa_ActiveShaderProgram(GLuint pipeline, GLuint program) { GET_CURRENT_CONTEXT(ctx); struct gl_shader_program *shProg = NULL; - struct gl_pipeline_object *pipe = lookup_pipeline_object(ctx, pipeline); + struct gl_pipeline_object *pipe = _mesa_lookup_pipeline_object(ctx, pipeline); if (program != 0) { shProg = _mesa_lookup_shader_program_err(ctx, program, @@ -399,7 +400,7 @@ _mesa_BindProgramPipeline(GLuint pipeline) */ if (pipeline) { /* non-default pipeline object */ - newObj = lookup_pipeline_object(ctx, pipeline); + newObj = _mesa_lookup_pipeline_object(ctx, pipeline); if (!newObj) { _mesa_error(ctx, GL_INVALID_OPERATION, "glBindProgramPipeline(non-gen name)"); @@ -468,7 +469,7 @@ _mesa_DeleteProgramPipelines(GLsizei n, const GLuint *pipelines) for (i = 0; i < n; i++) { struct gl_pipeline_object *obj = - lookup_pipeline_object(ctx, pipelines[i]); + _mesa_lookup_pipeline_object(ctx, pipelines[i]); if (obj) { assert(obj->Name == pipelines[i]); @@ -568,7 +569,7 @@ _mesa_IsProgramPipeline(GLuint pipeline) { GET_CURRENT_CONTEXT(ctx); - struct gl_pipeline_object *obj = lookup_pipeline_object(ctx, pipeline); + struct gl_pipeline_object *obj = _mesa_lookup_pipeline_object(ctx, pipeline); if (obj == NULL) return GL_FALSE; @@ -582,7 +583,7 @@ void GLAPIENTRY _mesa_GetProgramPipelineiv(GLuint pipeline, GLenum pname, GLint *params) { GET_CURRENT_CONTEXT(ctx); - struct gl_pipeline_object *pipe = lookup_pipeline_object(ctx, pipeline); + struct gl_pipeline_object *pipe = _mesa_lookup_pipeline_object(ctx, pipeline); /* Are geometry shaders available in this context? */ @@ -841,7 +842,7 @@ _mesa_ValidateProgramPipeline(GLuint pipeline) { GET_CURRENT_CONTEXT(ctx); - struct gl_pipeline_object *pipe = lookup_pipeline_object(ctx, pipeline); + struct gl_pipeline_object *pipe = _mesa_lookup_pipeline_object(ctx, pipeline); if (!pipe) { _mesa_error(ctx, GL_INVALID_OPERATION, @@ -859,7 +860,7 @@ _mesa_GetProgramPipelineInfoLog(GLuint pipeline, GLsizei bufSize, { GET_CURRENT_CONTEXT(ctx); - struct gl_pipeline_object *pipe = lookup_pipeline_object(ctx, pipeline); + struct gl_pipeline_object *pipe = _mesa_lookup_pipeline_object(ctx, pipeline); if (!pipe) { _mesa_error(ctx, GL_INVALID_VALUE, diff --git a/src/mesa/main/pipelineobj.h b/src/mesa/main/pipelineobj.h index b57bcb99e5c..6dee775ab5e 100644 --- a/src/mesa/main/pipelineobj.h +++ b/src/mesa/main/pipelineobj.h @@ -45,6 +45,9 @@ _mesa_init_pipeline(struct gl_context *ctx); extern void _mesa_free_pipeline_data(struct gl_context *ctx); +extern struct gl_pipeline_object * +_mesa_lookup_pipeline_object(struct gl_context *ctx, GLuint id); + extern void _mesa_reference_pipeline_object_(struct gl_context *ctx, struct gl_pipeline_object **ptr, From 2310a65c28f809442c24fc8893c65ce7c7a4dca3 Mon Sep 17 00:00:00 2001 From: Jordan Justen Date: Tue, 16 Jun 2015 14:27:15 -0700 Subject: [PATCH 707/834] i965/compute: Fix undefined code with right_mask for SIMD32 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Although we don't support SIMD32, krh pointed out that the left shift by 32 is undefined by C/C++ for 32-bit integers. Suggested-by: Kristian Høgsberg Signed-off-by: Jordan Justen Reviewed-by: Kenneth Graunke --- src/mesa/drivers/dri/i965/brw_compute.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mesa/drivers/dri/i965/brw_compute.c b/src/mesa/drivers/dri/i965/brw_compute.c index b3d6de51adc..5693ab507d4 100644 --- a/src/mesa/drivers/dri/i965/brw_compute.c +++ b/src/mesa/drivers/dri/i965/brw_compute.c @@ -45,7 +45,7 @@ brw_emit_gpgpu_walker(struct brw_context *brw, const GLuint *num_groups) unsigned thread_width_max = (group_size + simd_size - 1) / simd_size; - uint32_t right_mask = (1u << simd_size) - 1; + uint32_t right_mask = 0xffffffffu >> (32 - simd_size); const unsigned right_non_aligned = group_size & (simd_size - 1); if (right_non_aligned != 0) right_mask >>= (simd_size - right_non_aligned); From 22af95af8316f2888a3935cdf774ff0997b3dd42 Mon Sep 17 00:00:00 2001 From: Matt Turner Date: Thu, 18 Jun 2015 16:14:50 -0700 Subject: [PATCH 708/834] i965: Add missing braces around if-statement. Fixes a performance problem caused by commit b639ed2f. Reviewed-by: Chris Forbes Reviewed-by: Jason Ekstrand Reviewed-by: Ben Widawsky Reviewed-by: Kenneth Graunke Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=90895 --- src/mesa/drivers/dri/i965/brw_meta_fast_clear.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/mesa/drivers/dri/i965/brw_meta_fast_clear.c b/src/mesa/drivers/dri/i965/brw_meta_fast_clear.c index c0c8dfa608d..49f2e3e498c 100644 --- a/src/mesa/drivers/dri/i965/brw_meta_fast_clear.c +++ b/src/mesa/drivers/dri/i965/brw_meta_fast_clear.c @@ -339,12 +339,13 @@ is_color_fast_clear_compatible(struct brw_context *brw, mesa_format format, const union gl_color_union *color) { - if (_mesa_is_format_integer_color(format)) + if (_mesa_is_format_integer_color(format)) { if (brw->gen >= 8) { perf_debug("Integer fast clear not enabled for (%s)", _mesa_get_format_name(format)); } return false; + } for (int i = 0; i < 4; i++) { if (color->f[i] != 0.0 && color->f[i] != 1.0 && From 5974841fd0be7e2c336f63bd9ef416723e1923dc Mon Sep 17 00:00:00 2001 From: Ilia Mirkin Date: Thu, 18 Jun 2015 19:08:24 -0400 Subject: [PATCH 709/834] glsl: guard gl_NumSamples enablement on ARB_sample_shading gl_NumSamples should only be enabled when ARB_sample_shading is enabled. Signed-off-by: Ilia Mirkin Reviewed-by: Anuj Phogat --- src/glsl/builtin_variables.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/glsl/builtin_variables.cpp b/src/glsl/builtin_variables.cpp index c52b2526797..a765d35fde0 100644 --- a/src/glsl/builtin_variables.cpp +++ b/src/glsl/builtin_variables.cpp @@ -764,7 +764,8 @@ builtin_variable_generator::generate_constants() void builtin_variable_generator::generate_uniforms() { - add_uniform(int_t, "gl_NumSamples"); + if (state->is_version(400, 0) || state->ARB_sample_shading_enable) + add_uniform(int_t, "gl_NumSamples"); add_uniform(type("gl_DepthRangeParameters"), "gl_DepthRange"); add_uniform(array(vec4_t, VERT_ATTRIB_MAX), "gl_CurrentAttribVertMESA"); add_uniform(array(vec4_t, VARYING_SLOT_MAX), "gl_CurrentAttribFragMESA"); From f734d2556013e9239e91f43b563b5b1d8f03ada4 Mon Sep 17 00:00:00 2001 From: Jose Fonseca Date: Mon, 15 Jun 2015 18:29:02 +0100 Subject: [PATCH 710/834] glsl: Fail linkage when UBO exceeds GL_MAX_UNIFORM_BLOCK_SIZE. It's not totally clear whether other Mesa drivers can safely cope with over-sized UBOs, but at least for llvmpipe receiving a UBO larger than its limit causes problems, as it won't fit into its internal display lists. This fixes piglit "arb_uniform_buffer_object-maxuniformblocksize fsexceed" without regressions for llvmpipe. NVIDIA driver also fails to link the shader from "arb_uniform_buffer_object-maxuniformblocksize fsexceed". Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=65525 PS: I don't recommend cherry-picking this for Mesa stable, as some app might inadvertently been relying on UBOs larger than GL_MAX_UNIFORM_BLOCK_SIZE to work on other drivers, so even if this commit is universally accepted it's probably best to let it mature in master for a while. Reviewed-by: Roland Scheidegger --- src/glsl/linker.cpp | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/glsl/linker.cpp b/src/glsl/linker.cpp index 99783800b7f..4a726d4e2e7 100644 --- a/src/glsl/linker.cpp +++ b/src/glsl/linker.cpp @@ -2355,6 +2355,13 @@ check_resources(struct gl_context *ctx, struct gl_shader_program *prog) unsigned total_uniform_blocks = 0; for (unsigned i = 0; i < prog->NumUniformBlocks; i++) { + if (prog->UniformBlocks[i].UniformBufferSize > ctx->Const.MaxUniformBlockSize) { + linker_error(prog, "Uniform block %s too big (%d/%d)\n", + prog->UniformBlocks[i].Name, + prog->UniformBlocks[i].UniformBufferSize, + ctx->Const.MaxUniformBlockSize); + } + for (unsigned j = 0; j < MESA_SHADER_STAGES; j++) { if (prog->UniformBlockStageIndex[j][i] != -1) { blocks[j]++; From afeb92220690c8f27cdc56c30e109ca175d51d83 Mon Sep 17 00:00:00 2001 From: Jose Fonseca Date: Thu, 18 Jun 2015 15:47:00 +0100 Subject: [PATCH 711/834] llvmpipe: Truncate the binned constants to max const buffer size. Tested with Ilia Mirkin's gzdoom.trace and "arb_uniform_buffer_object-maxuniformblocksize fsexceed" piglit test without my earlier fix to fail linkage when UBO exceeds GL_MAX_UNIFORM_BLOCK_SIZE. Reviewed-by: Roland Scheidegger --- src/gallium/auxiliary/gallivm/lp_bld_limits.h | 6 +++++- src/gallium/drivers/llvmpipe/lp_setup.c | 5 ++++- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/src/gallium/auxiliary/gallivm/lp_bld_limits.h b/src/gallium/auxiliary/gallivm/lp_bld_limits.h index 49064feddef..db503514881 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_limits.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_limits.h @@ -51,8 +51,12 @@ #define LP_MAX_TGSI_PREDS 16 +#define LP_MAX_TGSI_CONSTS 4096 + #define LP_MAX_TGSI_CONST_BUFFERS 16 +#define LP_MAX_TGSI_CONST_BUFFER_SIZE (LP_MAX_TGSI_CONSTS * sizeof(float[4])) + /* * For quick access we cache registers in statically * allocated arrays. Here we define the maximum size @@ -100,7 +104,7 @@ gallivm_get_shader_param(enum pipe_shader_cap param) case PIPE_SHADER_CAP_MAX_OUTPUTS: return 32; case PIPE_SHADER_CAP_MAX_CONST_BUFFER_SIZE: - return sizeof(float[4]) * 4096; + return LP_MAX_TGSI_CONST_BUFFER_SIZE; case PIPE_SHADER_CAP_MAX_CONST_BUFFERS: return PIPE_MAX_CONSTANT_BUFFERS; case PIPE_SHADER_CAP_MAX_TEMPS: diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index 56292c68c5f..4c8167a9e7d 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -1069,10 +1069,13 @@ try_update_scene_state( struct lp_setup_context *setup ) if (setup->dirty & LP_SETUP_NEW_CONSTANTS) { for (i = 0; i < Elements(setup->constants); ++i) { struct pipe_resource *buffer = setup->constants[i].current.buffer; - const unsigned current_size = setup->constants[i].current.buffer_size; + const unsigned current_size = MIN2(setup->constants[i].current.buffer_size, + LP_MAX_TGSI_CONST_BUFFER_SIZE); const ubyte *current_data = NULL; int num_constants; + STATIC_ASSERT(DATA_BLOCK_SIZE >= LP_MAX_TGSI_CONST_BUFFER_SIZE); + if (buffer) { /* resource buffer */ current_data = (ubyte *) llvmpipe_resource_data(buffer); From 2ce2b80c6fd7ba5effbdf86ca3affe10a9c70492 Mon Sep 17 00:00:00 2001 From: Timothy Arceri Date: Fri, 19 Jun 2015 13:03:36 +1000 Subject: [PATCH 712/834] docs: update developer info Update piglit link to the current Piglit website. Add note about updating patchwork when sending patch revisions. Acked-by: Matt Turner --- docs/devinfo.html | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/docs/devinfo.html b/docs/devinfo.html index 0da18b9b709..8ebf80f40e8 100644 --- a/docs/devinfo.html +++ b/docs/devinfo.html @@ -244,7 +244,7 @@ to update the tests themselves.

        Whenever possible and applicable, test the patch with -Piglit to +Piglit to check for regressions.

        @@ -266,6 +266,12 @@ re-sending the whole series). Using --in-reply-to makes it harder for reviewers to accidentally review old patches.

        +

        +When submitting follow-up patches you should also login to +patchwork and change the +state of your old patches to Superseded. +

        +

        Reviewing Patches

        From 12c1c0706d4356819cfbaa15c3d71402a42e3539 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Sat, 13 Jun 2015 07:58:53 -0600 Subject: [PATCH 713/834] tgsi: new comments, assertion for executing TGSI_OPCODE_CAL --- src/gallium/auxiliary/tgsi/tgsi_exec.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c index a098a82be63..fde99b9e494 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.c +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c @@ -4401,8 +4401,12 @@ exec_instruction( mach->BreakStack[mach->BreakStackTop++] = mach->BreakType; mach->FuncStack[mach->FuncStackTop++] = mach->FuncMask; - /* Finally, jump to the subroutine */ + /* Finally, jump to the subroutine. The label is a pointer + * (an instruction number) to the BGNSUB instruction. + */ *pc = inst->Label.Label; + assert(mach->Instructions[*pc].Instruction.Opcode + == TGSI_OPCODE_BGNSUB); } break; From 5ad5d44af57a815c6eb16d4d61070135acb55f37 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Sat, 13 Jun 2015 08:07:08 -0600 Subject: [PATCH 714/834] tgsi: add comments for ureg_emit_label() --- src/gallium/auxiliary/tgsi/tgsi_ureg.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/gallium/auxiliary/tgsi/tgsi_ureg.c b/src/gallium/auxiliary/tgsi/tgsi_ureg.c index 0eaf1dfa7ae..201a849ef95 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_ureg.c +++ b/src/gallium/auxiliary/tgsi/tgsi_ureg.c @@ -1069,6 +1069,12 @@ ureg_emit_insn(struct ureg_program *ureg, } +/** + * Emit a label token. + * \param label_token returns a token number indicating where the label + * needs to be patched later. Later, this value should be passed to the + * ureg_fixup_label() function. + */ void ureg_emit_label(struct ureg_program *ureg, unsigned extended_token, From b8249de646e75f8af0c84d7f06b7805fe555e13e Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Tue, 16 Jun 2015 15:32:45 -0600 Subject: [PATCH 715/834] st/wgl: fix WGL_SAMPLE_BUFFERS_ARB query Only report 1 for WGL_SAMPLE_BUFFERS_ARB if the number of samples per pixel > 1. Reviewed-by: Matthew McClure --- src/gallium/state_trackers/wgl/stw_ext_pixelformat.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gallium/state_trackers/wgl/stw_ext_pixelformat.c b/src/gallium/state_trackers/wgl/stw_ext_pixelformat.c index 91682d115a4..9f466ba1735 100644 --- a/src/gallium/state_trackers/wgl/stw_ext_pixelformat.c +++ b/src/gallium/state_trackers/wgl/stw_ext_pixelformat.c @@ -232,7 +232,7 @@ stw_query_attrib( break; case WGL_SAMPLE_BUFFERS_ARB: - *pvalue = 1; + *pvalue = (pfi->stvis.samples > 1); break; case WGL_SAMPLES_ARB: From 0925e5f5bc843237e534313dd5b99095ecbdd987 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Tue, 16 Jun 2015 15:32:45 -0600 Subject: [PATCH 716/834] st/wgl: respect sample count when creating framebuffer surfaces Use the visual/pixel format's sample count instead of zero. Reviewed-by: Matthew McClure --- src/gallium/state_trackers/wgl/stw_st.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/gallium/state_trackers/wgl/stw_st.c b/src/gallium/state_trackers/wgl/stw_st.c index e95c37fcecd..6325ab1ce29 100644 --- a/src/gallium/state_trackers/wgl/stw_st.c +++ b/src/gallium/state_trackers/wgl/stw_st.c @@ -77,6 +77,7 @@ stw_st_framebuffer_validate_locked(struct st_framebuffer_iface *stfb, templ.depth0 = 1; templ.array_size = 1; templ.last_level = 0; + templ.nr_samples = stwfb->stvis.samples; for (i = 0; i < ST_ATTACHMENT_COUNT; i++) { enum pipe_format format; From 9405c1b3b0b207409931166a608276198a068cb8 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Tue, 16 Jun 2015 15:32:45 -0600 Subject: [PATCH 717/834] st/wgl: add support for multisample pixel formats Create pixel formats with 0, 4, 8 and 16 samples per pixel. Add a SVGA_FORCE_MSAA env var to force creating all pixel formats with a particular sample count. This is useful for testing Mesa/GLUT/ etc. programs which don't ordinarily use multisample. Reviewed-by: Matthew McClure --- .../state_trackers/wgl/stw_pixelformat.c | 29 +++++++++++++------ 1 file changed, 20 insertions(+), 9 deletions(-) diff --git a/src/gallium/state_trackers/wgl/stw_pixelformat.c b/src/gallium/state_trackers/wgl/stw_pixelformat.c index b0cd5abd27a..db6cf8ee30f 100644 --- a/src/gallium/state_trackers/wgl/stw_pixelformat.c +++ b/src/gallium/state_trackers/wgl/stw_pixelformat.c @@ -113,7 +113,9 @@ stw_pf_doublebuffer[] = { const unsigned stw_pf_multisample[] = { 0, - 4 + 4, + 8, + 16 }; @@ -222,23 +224,32 @@ add_color_format_variants(const struct stw_pf_color_info *color, unsigned ms, db, ds, acc; unsigned bind_flags = PIPE_BIND_RENDER_TARGET; unsigned num_added = 0; + int force_samples = 0; + + /* Since GLUT for Windows doesn't support MSAA we have an env var + * to force all pixel formats to have a particular number of samples. + */ + { + const char *samples= getenv("SVGA_FORCE_MSAA"); + if (samples) + force_samples = atoi(samples); + } if (!extended) { bind_flags |= PIPE_BIND_DISPLAY_TARGET; } - if (!screen->is_format_supported(screen, color->format, - PIPE_TEXTURE_2D, 0, bind_flags)) { - return 0; - } - for (ms = 0; ms < Elements(stw_pf_multisample); ms++) { unsigned samples = stw_pf_multisample[ms]; - /* FIXME: re-enabled MSAA when we can query it */ - if (samples) + if (force_samples && samples != force_samples) continue; + if (!screen->is_format_supported(screen, color->format, + PIPE_TEXTURE_2D, samples, bind_flags)) { + continue; + } + for (db = 0; db < Elements(stw_pf_doublebuffer); db++) { unsigned doublebuffer = stw_pf_doublebuffer[db]; @@ -246,7 +257,7 @@ add_color_format_variants(const struct stw_pf_color_info *color, const struct stw_pf_depth_info *depth = &stw_pf_depth_stencil[ds]; if (!screen->is_format_supported(screen, depth->format, - PIPE_TEXTURE_2D, 0, + PIPE_TEXTURE_2D, samples, PIPE_BIND_DEPTH_STENCIL)) { continue; } From 528bd94432b20becc1f436da75f7a102416dabeb Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Tue, 16 Jun 2015 15:32:46 -0600 Subject: [PATCH 718/834] st/wgl: set PIPE_BIND_SAMPLER_VIEW for window color buffers To allow sampling from the surface for things like glCopyPixels or glCopyTexSubImage. Reviewed-by: Charmaine Lee --- src/gallium/state_trackers/wgl/stw_st.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/gallium/state_trackers/wgl/stw_st.c b/src/gallium/state_trackers/wgl/stw_st.c index 6325ab1ce29..0a9116cbb73 100644 --- a/src/gallium/state_trackers/wgl/stw_st.c +++ b/src/gallium/state_trackers/wgl/stw_st.c @@ -96,6 +96,7 @@ stw_st_framebuffer_validate_locked(struct st_framebuffer_iface *stfb, case ST_ATTACHMENT_BACK_LEFT: format = stwfb->stvis.color_format; bind = PIPE_BIND_DISPLAY_TARGET | + PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_RENDER_TARGET; break; case ST_ATTACHMENT_DEPTH_STENCIL: From eee9247018d710659f14678715a85e4ad6f54366 Mon Sep 17 00:00:00 2001 From: Jose Fonseca Date: Tue, 16 Jun 2015 15:32:46 -0600 Subject: [PATCH 719/834] st/wgl: Don't return core profile for 3.1 contexts. WGL_CONTEXT_PROFILE_MASK_ARB doesn't apply to desktop OpenGL versions less than 3.2 -- applications can't specify whether they want a core or a compat 3.1 context -- instead they are supposed the check whether the returned context advertises GL_ARB_compatibility extension. Mesa doesn't support compatability contexts for version higher than 3.1, so we used to return core profile context, but this makes several Windows applications unhappy, because they just assume they got a compatability context without checking. So it seems safer to on Windows to never return core profile for 3.1, ie, just fail the context creation. VMware PR1365920. Reviewed-by: Brian Paul --- src/gallium/state_trackers/wgl/stw_context.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/src/gallium/state_trackers/wgl/stw_context.c b/src/gallium/state_trackers/wgl/stw_context.c index 2ed6c2bfac9..3e99cc44db7 100644 --- a/src/gallium/state_trackers/wgl/stw_context.c +++ b/src/gallium/state_trackers/wgl/stw_context.c @@ -226,14 +226,13 @@ stw_create_context_attribs(HDC hdc, INT iLayerPlane, DHGLRC hShareContext, * be implemented, as determined by the implementation. * * The core profile of version 3.2 or greater." * - * and because Mesa doesn't support GL_ARB_compatibility, the only chance to - * honour a 3.1 context is through core profile. + * But Mesa doesn't support GL_ARB_compatibility, while most prevalent + * Windows OpenGL implementations do, and unfortunately many Windows + * applications don't check whether they receive or not a context with + * GL_ARB_compatibility, so returning a core profile here does more harm + * than good. */ - if (majorVersion == 3 && minorVersion == 1) { - attribs.profile = ST_PROFILE_OPENGL_CORE; - } else { - attribs.profile = ST_PROFILE_DEFAULT; - } + attribs.profile = ST_PROFILE_DEFAULT; break; case WGL_CONTEXT_ES_PROFILE_BIT_EXT: if (majorVersion >= 2) { From 8d005a643ed94c1871ec854bc069366cdda6581f Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Tue, 16 Jun 2015 15:32:46 -0600 Subject: [PATCH 720/834] stw: add some no-op functions for GL_EXT_dsa, GL_NV_half_float MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Viewperf 12 calls wglGetProcAddress() to get pointers to some unsupported DSA and half-float functions. We return NULL but Viewperf doesn't check for null before trying to jump through the pointer. That causes a crash. This patch adds no-op functions to call instead (used by the next patch). This avoids the crash but the rendering is incorrect. Some DSA functions are being added to Mesa at this time so we may be able to remove some of these no-ops in the future. More no-op functions may be added as needed. VMware PR1383421 Reviewed-by: José Fonseca --- .../state_trackers/wgl/Makefile.sources | 1 + src/gallium/state_trackers/wgl/stw_nopfuncs.c | 464 ++++++++++++++++++ src/gallium/state_trackers/wgl/stw_nopfuncs.h | 11 + 3 files changed, 476 insertions(+) create mode 100644 src/gallium/state_trackers/wgl/stw_nopfuncs.c create mode 100644 src/gallium/state_trackers/wgl/stw_nopfuncs.h diff --git a/src/gallium/state_trackers/wgl/Makefile.sources b/src/gallium/state_trackers/wgl/Makefile.sources index d204efd429a..8c463d5f18e 100644 --- a/src/gallium/state_trackers/wgl/Makefile.sources +++ b/src/gallium/state_trackers/wgl/Makefile.sources @@ -8,6 +8,7 @@ C_SOURCES := \ stw_ext_swapinterval.c \ stw_framebuffer.c \ stw_getprocaddress.c \ + stw_nopfuncs.c \ stw_pixelformat.c \ stw_st.c \ stw_tls.c \ diff --git a/src/gallium/state_trackers/wgl/stw_nopfuncs.c b/src/gallium/state_trackers/wgl/stw_nopfuncs.c new file mode 100644 index 00000000000..d69c0134f37 --- /dev/null +++ b/src/gallium/state_trackers/wgl/stw_nopfuncs.c @@ -0,0 +1,464 @@ +/************************************************************************** + * + * Copyright 2015 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * No-op GL API functions. + * + * Some OpenGL apps (like Viewperf12) call wglGetProcAddress() to get + * a pointer to an extension function, get a NULL pointer, but don't bother + * to check for NULL before jumping through the pointer. This causes a + * crash. + * + * As a work-around we provide some no-op functions here to avoid those + * crashes. + */ + +#include +#include "stw_nopfuncs.h" +#include "util/u_debug.h" + + +static void +warning(const char *name) +{ + /* use name+4 to skip "nop_" prefix */ + _debug_printf("Application calling unsupported %s function\n", name+4); +} + +static void APIENTRY +nop_glBindMultiTextureEXT(GLenum texunit, GLenum target, GLuint texture) +{ + warning(__func__); +} + +static void APIENTRY +nop_glColor3hNV(GLhalfNV red, GLhalfNV green, GLhalfNV blue) +{ + warning(__func__); +} + +static void APIENTRY +nop_glColor3hvNV(const GLhalfNV *v) +{ + warning(__func__); +} + +static void APIENTRY +nop_glColor4hNV(GLhalfNV red, GLhalfNV green, GLhalfNV blue, GLhalfNV alpha) +{ + warning(__func__); +} + +static void APIENTRY +nop_glColor4hvNV(const GLhalfNV *v) +{ + warning(__func__); +} + +static void APIENTRY +nop_glDisableClientStateIndexedEXT(GLenum array, GLuint index) +{ + warning(__func__); +} + +static void APIENTRY +nop_glEnableClientStateIndexedEXT(GLenum array, GLuint index) +{ + warning(__func__); +} + +static void APIENTRY +nop_glFogCoordhNV(GLhalfNV fog) +{ + warning(__func__); +} + +static void APIENTRY +nop_glFogCoordhvNV(const GLhalfNV *fog) +{ + warning(__func__); +} + +static void APIENTRY +nop_glGetNamedBufferParameterivEXT(GLuint buffer, GLenum pname, GLint *params) +{ + warning(__func__); +} + +static void APIENTRY +nop_glGetNamedBufferSubDataEXT(GLuint buffer, GLintptr offset, GLsizeiptr size, void *data) +{ + warning(__func__); +} + +static void *APIENTRY +nop_glMapNamedBufferEXT(GLuint buffer, GLenum access) +{ + warning(__func__); + return NULL; +} + +static void APIENTRY +nop_glMatrixLoadfEXT(GLenum mode, const GLfloat *m) +{ + warning(__func__); +} + +static void APIENTRY +nop_glMatrixLoadIdentityEXT(GLenum mode) +{ + warning(__func__); +} + +static void APIENTRY +nop_glMultiTexCoord1hNV(GLenum target, GLhalfNV s) +{ + warning(__func__); +} + +static void APIENTRY +nop_glMultiTexCoord1hvNV(GLenum target, const GLhalfNV *v) +{ + warning(__func__); +} + +static void APIENTRY +nop_glMultiTexCoord2hNV(GLenum target, GLhalfNV s, GLhalfNV t) +{ + warning(__func__); +} + +static void APIENTRY +nop_glMultiTexCoord2hvNV(GLenum target, const GLhalfNV *v) +{ + warning(__func__); +} + +static void APIENTRY +nop_glMultiTexCoord3hNV(GLenum target, GLhalfNV s, GLhalfNV t, GLhalfNV r) +{ + warning(__func__); +} + +static void APIENTRY +nop_glMultiTexCoord3hvNV(GLenum target, const GLhalfNV *v) +{ + warning(__func__); +} + +static void APIENTRY +nop_glMultiTexCoord4hNV(GLenum target, GLhalfNV s, GLhalfNV t, GLhalfNV r, GLhalfNV q) +{ + warning(__func__); +} + +static void APIENTRY +nop_glMultiTexCoord4hvNV(GLenum target, const GLhalfNV *v) +{ + warning(__func__); +} + +static void APIENTRY +nop_glMultiTexCoordPointerEXT(GLenum texunit, GLint size, GLenum type, GLsizei stride, const void *pointer) +{ + warning(__func__); +} + +static void APIENTRY +nop_glMultiTexEnvfEXT(GLenum texunit, GLenum target, GLenum pname, GLfloat param) +{ + warning(__func__); +} + +static void APIENTRY +nop_glMultiTexEnvfvEXT(GLenum texunit, GLenum target, GLenum pname, const GLfloat *params) +{ + warning(__func__); +} + +static void APIENTRY +nop_glMultiTexEnviEXT(GLenum texunit, GLenum target, GLenum pname, GLint param) +{ + warning(__func__); +} + +static void APIENTRY +nop_glMultiTexGenfvEXT(GLenum texunit, GLenum coord, GLenum pname, const GLfloat *params) +{ + warning(__func__); +} + +static void APIENTRY +nop_glMultiTexGeniEXT(GLenum texunit, GLenum coord, GLenum pname, GLint param) +{ + warning(__func__); +} + +static void APIENTRY +nop_glNamedBufferDataEXT(GLuint buffer, GLsizeiptr size, const void *data, GLenum usage) +{ + warning(__func__); +} + +static void APIENTRY +nop_glNamedBufferSubDataEXT(GLuint buffer, GLintptr offset, GLsizeiptr size, const void *data) +{ + warning(__func__); +} + +static void APIENTRY +nop_glNamedProgramLocalParameter4fvEXT(GLuint program, GLenum target, GLuint index, const GLfloat *params) +{ + warning(__func__); +} + +static void APIENTRY +nop_glNamedProgramLocalParameters4fvEXT(GLuint program, GLenum target, GLuint index, GLsizei count, const GLfloat *params) +{ + warning(__func__); +} + +static void APIENTRY +nop_glNormal3hNV(GLhalfNV nx, GLhalfNV ny, GLhalfNV nz) +{ + warning(__func__); +} + +static void APIENTRY +nop_glNormal3hvNV(const GLhalfNV *v) +{ + warning(__func__); +} + +static void APIENTRY +nop_glPatchParameterfv(GLenum pname, const GLfloat *values) +{ + warning(__func__); +} + +static void APIENTRY +nop_glPatchParameteri(GLenum pname, GLint value) +{ + warning(__func__); +} + +static void APIENTRY +nop_glSecondaryColor3hNV(GLhalfNV red, GLhalfNV green, GLhalfNV blue) +{ + warning(__func__); +} + +static void APIENTRY +nop_glSecondaryColor3hvNV(const GLhalfNV *v) +{ + warning(__func__); +} + +static void APIENTRY +nop_glTexCoord1hNV(GLhalfNV s) +{ + warning(__func__); +} + +static void APIENTRY +nop_glTexCoord1hvNV(const GLhalfNV *v) +{ + warning(__func__); +} + +static void APIENTRY +nop_glTexCoord2hNV(GLhalfNV s, GLhalfNV t) +{ + warning(__func__); +} + +static void APIENTRY +nop_glTexCoord2hvNV(const GLhalfNV *v) +{ + warning(__func__); +} + +static void APIENTRY +nop_glTexCoord3hNV(GLhalfNV s, GLhalfNV t, GLhalfNV r) +{ + warning(__func__); +} + +static void APIENTRY +nop_glTexCoord3hvNV(const GLhalfNV *v) +{ + warning(__func__); +} + +static void APIENTRY +nop_glTexCoord4hNV(GLhalfNV s, GLhalfNV t, GLhalfNV r, GLhalfNV q) +{ + warning(__func__); +} + +static void APIENTRY +nop_glTexCoord4hvNV(const GLhalfNV *v) +{ + warning(__func__); +} + +static void APIENTRY +nop_glTextureParameterfEXT(GLuint texture, GLenum target, GLenum pname, GLfloat param) +{ + warning(__func__); +} + +static void APIENTRY +nop_glTextureParameterfvEXT(GLuint texture, GLenum target, GLenum pname, const GLfloat *params) +{ + warning(__func__); +} + +static void APIENTRY +nop_glTextureParameteriEXT(GLuint texture, GLenum target, GLenum pname, GLint param) +{ + warning(__func__); +} + +static GLboolean APIENTRY +nop_glUnmapNamedBufferEXT(GLuint buffer) +{ + warning(__func__); + return GL_FALSE; +} + +static void APIENTRY +nop_glVertex2hNV(GLhalfNV x, GLhalfNV y) +{ + warning(__func__); +} + +static void APIENTRY +nop_glVertex2hvNV(const GLhalfNV *v) +{ + warning(__func__); +} + +static void APIENTRY +nop_glVertex3hNV(GLhalfNV x, GLhalfNV y, GLhalfNV z) +{ + warning(__func__); +} + +static void APIENTRY +nop_glVertex3hvNV(const GLhalfNV *v) +{ + warning(__func__); +} + +static void APIENTRY +nop_glVertex4hNV(GLhalfNV x, GLhalfNV y, GLhalfNV z, GLhalfNV w) +{ + warning(__func__); +} + +static void APIENTRY +nop_glVertex4hvNV(const GLhalfNV *v) +{ + warning(__func__); +} + + +PROC +stw_get_nop_function(const char *name) +{ + struct { + const char *name; + PROC p; + } table[] = { + { "glBindMultiTextureEXT", (PROC) nop_glBindMultiTextureEXT }, + { "glColor3hNV", (PROC) nop_glColor3hNV }, + { "glColor3hvNV", (PROC) nop_glColor3hvNV }, + { "glColor4hNV", (PROC) nop_glColor4hNV }, + { "glColor4hvNV", (PROC) nop_glColor4hvNV }, + { "glDisableClientStateIndexedEXT", (PROC) nop_glDisableClientStateIndexedEXT }, + { "glEnableClientStateIndexedEXT", (PROC) nop_glEnableClientStateIndexedEXT }, + { "glFogCoordhNV", (PROC) nop_glFogCoordhNV }, + { "glFogCoordhvNV", (PROC) nop_glFogCoordhvNV }, + { "glGetNamedBufferParameterivEXT", (PROC) nop_glGetNamedBufferParameterivEXT }, + { "glGetNamedBufferSubDataEXT", (PROC) nop_glGetNamedBufferSubDataEXT }, + { "glMapNamedBufferEXT", (PROC) nop_glMapNamedBufferEXT }, + { "glMatrixLoadfEXT", (PROC) nop_glMatrixLoadfEXT }, + { "glMatrixLoadIdentityEXT", (PROC) nop_glMatrixLoadIdentityEXT }, + { "glMultiTexCoord1hNV", (PROC) nop_glMultiTexCoord1hNV }, + { "glMultiTexCoord1hvNV", (PROC) nop_glMultiTexCoord1hvNV }, + { "glMultiTexCoord2hNV", (PROC) nop_glMultiTexCoord2hNV }, + { "glMultiTexCoord2hvNV", (PROC) nop_glMultiTexCoord2hvNV }, + { "glMultiTexCoord3hNV", (PROC) nop_glMultiTexCoord3hNV }, + { "glMultiTexCoord3hvNV", (PROC) nop_glMultiTexCoord3hvNV }, + { "glMultiTexCoord4hNV", (PROC) nop_glMultiTexCoord4hNV }, + { "glMultiTexCoord4hvNV", (PROC) nop_glMultiTexCoord4hvNV }, + { "glMultiTexCoordPointerEXT", (PROC) nop_glMultiTexCoordPointerEXT }, + { "glMultiTexEnvfEXT", (PROC) nop_glMultiTexEnvfEXT }, + { "glMultiTexEnvfvEXT", (PROC) nop_glMultiTexEnvfvEXT }, + { "glMultiTexEnviEXT", (PROC) nop_glMultiTexEnviEXT }, + { "glMultiTexGenfvEXT", (PROC) nop_glMultiTexGenfvEXT }, + { "glMultiTexGeniEXT", (PROC) nop_glMultiTexGeniEXT }, + { "glNamedBufferDataEXT", (PROC) nop_glNamedBufferDataEXT }, + { "glNamedBufferSubDataEXT", (PROC) nop_glNamedBufferSubDataEXT }, + { "glNamedProgramLocalParameter4fvEXT", (PROC) nop_glNamedProgramLocalParameter4fvEXT }, + { "glNamedProgramLocalParameters4fvEXT", (PROC) nop_glNamedProgramLocalParameters4fvEXT }, + { "glNormal3hNV", (PROC) nop_glNormal3hNV }, + { "glNormal3hvNV", (PROC) nop_glNormal3hvNV }, + { "glPatchParameterfv", (PROC) nop_glPatchParameterfv }, + { "glPatchParameteri", (PROC) nop_glPatchParameteri }, + { "glSecondaryColor3hNV", (PROC) nop_glSecondaryColor3hNV }, + { "glSecondaryColor3hvNV", (PROC) nop_glSecondaryColor3hvNV }, + { "glTexCoord1hNV", (PROC) nop_glTexCoord1hNV }, + { "glTexCoord1hvNV", (PROC) nop_glTexCoord1hvNV }, + { "glTexCoord2hNV", (PROC) nop_glTexCoord2hNV }, + { "glTexCoord2hvNV", (PROC) nop_glTexCoord2hvNV }, + { "glTexCoord3hNV", (PROC) nop_glTexCoord3hNV }, + { "glTexCoord3hvNV", (PROC) nop_glTexCoord3hvNV }, + { "glTexCoord4hNV", (PROC) nop_glTexCoord4hNV }, + { "glTexCoord4hvNV", (PROC) nop_glTexCoord4hvNV }, + { "glTextureParameterfEXT", (PROC) nop_glTextureParameterfEXT }, + { "glTextureParameterfvEXT", (PROC) nop_glTextureParameterfvEXT }, + { "glTextureParameteriEXT", (PROC) nop_glTextureParameteriEXT }, + { "glUnmapNamedBufferEXT", (PROC) nop_glUnmapNamedBufferEXT }, + { "glVertex2hNV", (PROC) nop_glVertex2hNV }, + { "glVertex2hvNV", (PROC) nop_glVertex2hvNV }, + { "glVertex3hNV", (PROC) nop_glVertex3hNV }, + { "glVertex3hvNV", (PROC) nop_glVertex3hvNV }, + { "glVertex4hNV", (PROC) nop_glVertex4hNV }, + { "glVertex4hvNV", (PROC) nop_glVertex4hvNV }, + { NULL, NULL } + }; + + int i; + + for (i = 0; table[i].name; i++) { + if (strcmp(table[i].name, name) == 0) + return table[i].p; + } + return NULL; +} diff --git a/src/gallium/state_trackers/wgl/stw_nopfuncs.h b/src/gallium/state_trackers/wgl/stw_nopfuncs.h new file mode 100644 index 00000000000..f00d420accf --- /dev/null +++ b/src/gallium/state_trackers/wgl/stw_nopfuncs.h @@ -0,0 +1,11 @@ + + +#ifndef STW_NOPFUNCS_H +#define STW_NOPFUNCS_H + + +PROC +stw_get_nop_function(const char *name); + + +#endif /* STW_NOPFUNCS_H */ From 73bdf4ba86751983dff011ac488ac60321d70a7f Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Tue, 16 Jun 2015 15:32:46 -0600 Subject: [PATCH 721/834] stw: use new stw_get_nop_function() function to avoid Viewperf 12 crashes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Also, print a warning if we do return NULL from wglGetProcAddress() to help spot this sort of problem in the future. Reviewed-by: José Fonseca --- .../state_trackers/wgl/stw_getprocaddress.c | 21 +++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/src/gallium/state_trackers/wgl/stw_getprocaddress.c b/src/gallium/state_trackers/wgl/stw_getprocaddress.c index 2ffeec1a2fb..33949b6606f 100644 --- a/src/gallium/state_trackers/wgl/stw_getprocaddress.c +++ b/src/gallium/state_trackers/wgl/stw_getprocaddress.c @@ -35,6 +35,7 @@ #include "glapi/glapi.h" #include "stw_device.h" #include "stw_icd.h" +#include "stw_nopfuncs.h" struct stw_extension_entry { @@ -79,6 +80,7 @@ DrvGetProcAddress( LPCSTR lpszProc ) { const struct stw_extension_entry *entry; + PROC p; if (!stw_dev) return NULL; @@ -88,8 +90,23 @@ DrvGetProcAddress( if (strcmp( lpszProc, entry->name ) == 0) return entry->proc; - if (lpszProc[0] == 'g' && lpszProc[1] == 'l') - return (PROC) _glapi_get_proc_address( lpszProc ); + if (lpszProc[0] == 'g' && lpszProc[1] == 'l') { + p = (PROC) _glapi_get_proc_address(lpszProc); + if (p) + return p; + } + /* If we get here, we'd normally just return NULL, but since some apps + * (like Viewperf12) crash when they try to use the null pointer, try + * returning a pointer to a no-op function instead. + */ + p = stw_get_nop_function(lpszProc); + if (p) { + debug_printf("wglGetProcAddress(\"%s\") returning no-op function\n", + lpszProc); + return p; + } + + debug_printf("wglGetProcAddress(\"%s\") returning NULL\n", lpszProc); return NULL; } From 4c11008eba9f58621bbbae430f8717176045b0ce Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Tue, 16 Jun 2015 15:32:46 -0600 Subject: [PATCH 722/834] st/wgl: fix WGL_SWAP_METHOD_ARB query There are three possible return values (not two): WGL_SWAP_COPY_ARB, WGL_SWAP_EXCHANGE_EXT and WGL_SWAP_UNDEFINED_ARB. VMware bug 1431184 Reviewed-by: Jose Fonseca Reviewed-by: Charmaine Lee --- src/gallium/state_trackers/wgl/stw_ext_pixelformat.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/gallium/state_trackers/wgl/stw_ext_pixelformat.c b/src/gallium/state_trackers/wgl/stw_ext_pixelformat.c index 9f466ba1735..e38086e86d7 100644 --- a/src/gallium/state_trackers/wgl/stw_ext_pixelformat.c +++ b/src/gallium/state_trackers/wgl/stw_ext_pixelformat.c @@ -88,7 +88,12 @@ stw_query_attrib( return TRUE; case WGL_SWAP_METHOD_ARB: - *pvalue = pfi->pfd.dwFlags & PFD_SWAP_COPY ? WGL_SWAP_COPY_ARB : WGL_SWAP_UNDEFINED_ARB; + if (pfi->pfd.dwFlags & PFD_SWAP_COPY) + *pvalue = WGL_SWAP_COPY_ARB; + else if (pfi->pfd.dwFlags & PFD_SWAP_EXCHANGE) + *pvalue = WGL_SWAP_EXCHANGE_EXT; + else + *pvalue = WGL_SWAP_UNDEFINED_ARB; return TRUE; case WGL_SWAP_LAYER_BUFFERS_ARB: From c40f44cc991d9499243063cba95fbdc947c53371 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 18 Jun 2015 17:53:42 -0600 Subject: [PATCH 723/834] gallium: whitespace, formatting clean-up in p_state.h Remove trailing whitespace, move some braces, 78-column wrapping. Trivial. --- src/gallium/include/pipe/p_state.h | 36 +++++++++++++++++------------- 1 file changed, 20 insertions(+), 16 deletions(-) diff --git a/src/gallium/include/pipe/p_state.h b/src/gallium/include/pipe/p_state.h index e01c62c09a4..a18f12e8a87 100644 --- a/src/gallium/include/pipe/p_state.h +++ b/src/gallium/include/pipe/p_state.h @@ -1,8 +1,8 @@ /************************************************************************** - * + * * Copyright 2007 VMware, Inc. * All Rights Reserved. - * + * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including @@ -10,11 +10,11 @@ * distribute, sub license, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: - * + * * The above copyright notice and this permission notice (including the * next paragraph) shall be included in all copies or substantial portions * of the Software. - * + * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. @@ -22,13 +22,13 @@ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * + * **************************************************************************/ /** * @file - * + * * Abstract graphics pipe state objects. * * Basic notes: @@ -217,7 +217,7 @@ struct pipe_shader_state }; -struct pipe_depth_state +struct pipe_depth_state { unsigned enabled:1; /**< depth test enabled? */ unsigned writemask:1; /**< allow depth buffer writes? */ @@ -268,6 +268,7 @@ struct pipe_rt_blend_state unsigned colormask:4; /**< bitmask of PIPE_MASK_R/G/B/A */ }; + struct pipe_blend_state { unsigned independent_blend_enable:1; @@ -285,11 +286,13 @@ struct pipe_blend_color float color[4]; }; + struct pipe_stencil_ref { ubyte ref_value[2]; }; + struct pipe_framebuffer_state { unsigned width, height; @@ -367,10 +370,10 @@ struct pipe_sampler_view struct pipe_context *context; /**< context this view belongs to */ union { struct { - unsigned first_layer:16; /**< first layer to use for array textures */ - unsigned last_layer:16; /**< last layer to use for array textures */ - unsigned first_level:8; /**< first mipmap level to use */ - unsigned last_level:8; /**< last mipmap level to use */ + unsigned first_layer:16; /**< first layer to use for array textures */ + unsigned last_layer:16; /**< last layer to use for array textures */ + unsigned first_level:8; /**< first mipmap level to use */ + unsigned last_level:8; /**< last mipmap level to use */ } tex; struct { unsigned first_element; @@ -455,7 +458,8 @@ struct pipe_vertex_buffer * A constant buffer. A subrange of an existing buffer can be set * as a constant buffer. */ -struct pipe_constant_buffer { +struct pipe_constant_buffer +{ struct pipe_resource *buffer; /**< the actual buffer */ unsigned buffer_offset; /**< offset to start of data in buffer, in bytes */ unsigned buffer_size; /**< how much data can be read in shader */ @@ -474,8 +478,8 @@ struct pipe_constant_buffer { * and the CPU actually doesn't have to query it. * * Note that the buffer_size variable is actually specifying the available - * space in the buffer, not the size of the attached buffer. - * In other words in majority of cases buffer_size would simply be + * space in the buffer, not the size of the attached buffer. + * In other words in majority of cases buffer_size would simply be * 'buffer->width0 - buffer_offset', so buffer_size refers to the size * of the buffer left, after accounting for buffer offset, for stream output * to write to. @@ -511,7 +515,7 @@ struct pipe_vertex_element * this attribute live in? */ unsigned vertex_buffer_index; - + enum pipe_format src_format; }; @@ -642,5 +646,5 @@ struct pipe_compute_state #ifdef __cplusplus } #endif - + #endif From 6ec4e9c28d54877fbaca04b080c249048c6e7634 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 18 Jun 2015 18:03:29 -0600 Subject: [PATCH 724/834] u_vbuf: fix src_offset alignment in u_vbuf_create_vertex_elements() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If the driver says PIPE_CAP_VERTEX_ELEMENT_SRC_OFFSET_4BYTE_ALIGNED_ONLY=1, the driver should never receive a pipe_vertex_element::src_offset value that's not a multiple of four. But the vbuf code wasn't actually adjusting the src_offset value when creating the vertex element state object. We just need to align the src_offset values put in the driver_attribs[] array. See the piglit gl-1.5-vertex-buffer-offsets test. Reviewed-by: Marek Olšák --- src/gallium/auxiliary/util/u_vbuf.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/gallium/auxiliary/util/u_vbuf.c b/src/gallium/auxiliary/util/u_vbuf.c index b1b89bf1b6d..02ae0b840f0 100644 --- a/src/gallium/auxiliary/util/u_vbuf.c +++ b/src/gallium/auxiliary/util/u_vbuf.c @@ -781,10 +781,11 @@ u_vbuf_create_vertex_elements(struct u_vbuf *mgr, unsigned count, ve->compatible_vb_mask_all = ~ve->incompatible_vb_mask_any & used_buffers; ve->incompatible_vb_mask_all = ~ve->compatible_vb_mask_any & used_buffers; - /* Align the formats to the size of DWORD if needed. */ + /* Align the formats and offsets to the size of DWORD if needed. */ if (!mgr->caps.velem_src_offset_unaligned) { for (i = 0; i < count; i++) { ve->native_format_size[i] = align(ve->native_format_size[i], 4); + driver_attribs[i].src_offset = align(ve->ve[i].src_offset, 4); } } From 539cb2b76efd02f14798cad0a5462ee9ed27aa83 Mon Sep 17 00:00:00 2001 From: Ilia Mirkin Date: Fri, 19 Jun 2015 12:08:24 -0400 Subject: [PATCH 725/834] mesa: move ARB_gs5 enums to core, EXT_polygon_offset_clamp to desktop When adding EXT_polygon_offset_clamp, I first made it core-only, and never moved the enum getter back to the GL/GL_CORE section. Similarly, ARB_gs5 is a core-only extension, so move its getters to the GL_CORE section. Signed-off-by: Ilia Mirkin Reviewed-by: Matt Turner --- src/mesa/main/get_hash_params.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/mesa/main/get_hash_params.py b/src/mesa/main/get_hash_params.py index 84c5aa31a68..74ff3ba6619 100644 --- a/src/mesa/main/get_hash_params.py +++ b/src/mesa/main/get_hash_params.py @@ -799,17 +799,14 @@ descriptor=[ [ "MAX_COMPUTE_UNIFORM_COMPONENTS", "CONST(MAX_COMPUTE_UNIFORM_COMPONENTS), extra_ARB_compute_shader" ], [ "MAX_COMPUTE_IMAGE_UNIFORMS", "CONST(MAX_COMPUTE_IMAGE_UNIFORMS), extra_ARB_compute_shader" ], -# GL_ARB_gpu_shader5 - [ "MAX_GEOMETRY_SHADER_INVOCATIONS", "CONST(MAX_GEOMETRY_SHADER_INVOCATIONS), extra_ARB_gpu_shader5" ], - [ "MIN_FRAGMENT_INTERPOLATION_OFFSET", "CONTEXT_FLOAT(Const.MinFragmentInterpolationOffset), extra_ARB_gpu_shader5" ], - [ "MAX_FRAGMENT_INTERPOLATION_OFFSET", "CONTEXT_FLOAT(Const.MaxFragmentInterpolationOffset), extra_ARB_gpu_shader5" ], - [ "FRAGMENT_INTERPOLATION_OFFSET_BITS", "CONST(FRAGMENT_INTERPOLATION_OFFSET_BITS), extra_ARB_gpu_shader5" ], - # GL_ARB_framebuffer_no_attachments ["MAX_FRAMEBUFFER_WIDTH", "CONTEXT_INT(Const.MaxFramebufferWidth), extra_ARB_framebuffer_no_attachments"], ["MAX_FRAMEBUFFER_HEIGHT", "CONTEXT_INT(Const.MaxFramebufferHeight), extra_ARB_framebuffer_no_attachments"], ["MAX_FRAMEBUFFER_LAYERS", "CONTEXT_INT(Const.MaxFramebufferLayers), extra_ARB_framebuffer_no_attachments"], ["MAX_FRAMEBUFFER_SAMPLES", "CONTEXT_INT(Const.MaxFramebufferSamples), extra_ARB_framebuffer_no_attachments"], + +# GL_EXT_polygon_offset_clamp + [ "POLYGON_OFFSET_CLAMP_EXT", "CONTEXT_FLOAT(Polygon.OffsetClamp), extra_EXT_polygon_offset_clamp" ], ]}, # Enums restricted to OpenGL Core profile @@ -824,8 +821,11 @@ descriptor=[ [ "LAYER_PROVOKING_VERTEX", "CONTEXT_ENUM(Light.ProvokingVertex), extra_ARB_viewport_array" ], [ "VIEWPORT_INDEX_PROVOKING_VERTEX", "CONTEXT_ENUM(Light.ProvokingVertex), extra_ARB_viewport_array" ], -# GL_EXT_polygon_offset_clamp - [ "POLYGON_OFFSET_CLAMP_EXT", "CONTEXT_FLOAT(Polygon.OffsetClamp), extra_EXT_polygon_offset_clamp" ], +# GL_ARB_gpu_shader5 + [ "MAX_GEOMETRY_SHADER_INVOCATIONS", "CONST(MAX_GEOMETRY_SHADER_INVOCATIONS), extra_ARB_gpu_shader5" ], + [ "MIN_FRAGMENT_INTERPOLATION_OFFSET", "CONTEXT_FLOAT(Const.MinFragmentInterpolationOffset), extra_ARB_gpu_shader5" ], + [ "MAX_FRAGMENT_INTERPOLATION_OFFSET", "CONTEXT_FLOAT(Const.MaxFragmentInterpolationOffset), extra_ARB_gpu_shader5" ], + [ "FRAGMENT_INTERPOLATION_OFFSET_BITS", "CONST(FRAGMENT_INTERPOLATION_OFFSET_BITS), extra_ARB_gpu_shader5" ], ]} ] From 7c3da3592e8799059abca9cd7c92d61ebfd09f29 Mon Sep 17 00:00:00 2001 From: Ben Widawsky Date: Thu, 18 Jun 2015 18:45:47 -0700 Subject: [PATCH 726/834] i965/gen8: Use HALIGN_16 for single sample mcs buffers The original code meant to do this, but was only checking num_samples == 1 to figure out if a surface was fast clear capable. However, we can allocate single sample miptrees with num_samples == 0 (when it's an internally created buffer). This fixes a bunch of the piglit tests on gen8. Other gens should have been fine. Here is the order of events that allowed this to slip through: t0: I wrote halign patches and tested them. These alignment assertions are for gen8 fast clear surfaces, basically. t1: I pushed bogus perf patch which made fast clears never happen t2: Reworked halign patches based on Chad's feedback and introduced the bug this patch fixes. t2.5: I tested reworked patches, but assertion wasn't hit because of t1. t3. Matt fixed issue in t1 which made fast clears happen here: commit 22af95af8316f2888a3935cdf774ff0997b3dd42 Author: Matt Turner Date: Thu Jun 18 16:14:50 2015 -0700 i965: Add missing braces around if-statement. This logic should match that of the v1 of my halign patch series. Cc: Kenneth Graunke Cc: Matt Turner Reported-by: Kenneth Graunke Signed-off-by: Ben Widawsky Tested-by: Mark Janes --- src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c index 80c52f2feef..6aa969a4930 100644 --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c @@ -501,7 +501,7 @@ intel_miptree_create_layout(struct brw_context *brw, * 6 | ? | ? */ if (intel_miptree_is_fast_clear_capable(brw, mt)) { - if (brw->gen >= 9 || (brw->gen == 8 && num_samples == 1)) + if (brw->gen >= 9 || (brw->gen == 8 && num_samples <= 1)) layout_flags |= MIPTREE_LAYOUT_FORCE_HALIGN16; } else if (brw->gen >= 9 && num_samples > 1) { layout_flags |= MIPTREE_LAYOUT_FORCE_HALIGN16; From a1f84453a2f104a92f9efd353c629177e00e4b5e Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 19 Jun 2015 16:45:44 -0600 Subject: [PATCH 727/834] glsl: fix formatting glitch in _mesa_print_ir() Print the closing ) before the newline. Trivial. --- src/glsl/ir_print_visitor.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/glsl/ir_print_visitor.cpp b/src/glsl/ir_print_visitor.cpp index c991658ab4b..4cbcad4ec61 100644 --- a/src/glsl/ir_print_visitor.cpp +++ b/src/glsl/ir_print_visitor.cpp @@ -72,7 +72,7 @@ _mesa_print_ir(FILE *f, exec_list *instructions, if (ir->ir_type != ir_type_function) fprintf(f, "\n"); } - fprintf(f, "\n)"); + fprintf(f, ")\n"); } void From aa3ec8bc465f8c82cb38e0ed067dbdd9122dbd44 Mon Sep 17 00:00:00 2001 From: Chia-I Wu Date: Fri, 19 Jun 2015 23:24:17 +0800 Subject: [PATCH 728/834] ilo: fix a -Wmaybe-uninitialized warning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ilo_shader.c: In function ‘ilo_shader_select_kernel_sbe’: ilo_shader.c:1140:27: warning: ‘src_skip’ may be used uninitialized in this function [-Wmaybe-uninitialized] --- src/gallium/drivers/ilo/ilo_shader.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/gallium/drivers/ilo/ilo_shader.c b/src/gallium/drivers/ilo/ilo_shader.c index 93a26268a29..5f2b01017e2 100644 --- a/src/gallium/drivers/ilo/ilo_shader.c +++ b/src/gallium/drivers/ilo/ilo_shader.c @@ -1089,6 +1089,7 @@ ilo_shader_select_kernel_sbe(struct ilo_shader_state *shader, src_semantics = source->shader->out.semantic_names; src_indices = source->shader->out.semantic_indices; src_len = source->shader->out.count; + src_skip = 0; assert(src_len >= 2 && src_semantics[0] == TGSI_SEMANTIC_PSIZE && From 73f0d6d22db21f1fa553d8a26687edc5083e3c23 Mon Sep 17 00:00:00 2001 From: Chia-I Wu Date: Fri, 19 Jun 2015 23:29:32 +0800 Subject: [PATCH 729/834] ilo: fix a buffer overrun Add missing parentheses in SURFTYPE_NULL initialization. --- src/gallium/drivers/ilo/core/ilo_state_surface.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gallium/drivers/ilo/core/ilo_state_surface.c b/src/gallium/drivers/ilo/core/ilo_state_surface.c index be7225b7bc4..5be9f8f6270 100644 --- a/src/gallium/drivers/ilo/core/ilo_state_surface.c +++ b/src/gallium/drivers/ilo/core/ilo_state_surface.c @@ -89,7 +89,7 @@ surface_set_gen7_null_SURFACE_STATE(struct ilo_state_surface *surf, STATIC_ASSERT(ARRAY_SIZE(surf->surface) >= 13); surf->surface[0] = dw0; memset(&surf->surface[1], 0, sizeof(uint32_t) * - ((ilo_dev_gen(dev) >= ILO_GEN(8)) ? 13 : 8) - 1); + (((ilo_dev_gen(dev) >= ILO_GEN(8)) ? 13 : 8) - 1)); return true; } From 7b3432b62d25494b3662d12634e34d75e29ec865 Mon Sep 17 00:00:00 2001 From: Chia-I Wu Date: Thu, 18 Jun 2015 13:55:32 +0800 Subject: [PATCH 730/834] ilo: embed pipe_index_buffer in ilo_ib_state Make it obvious that we save a copy of pipe_index_buffer. --- src/gallium/drivers/ilo/ilo_draw.c | 20 +++++----- src/gallium/drivers/ilo/ilo_state.c | 60 ++++++++++++++--------------- src/gallium/drivers/ilo/ilo_state.h | 5 +-- 3 files changed, 40 insertions(+), 45 deletions(-) diff --git a/src/gallium/drivers/ilo/ilo_draw.c b/src/gallium/drivers/ilo/ilo_draw.c index fc91fd312d2..e8e1a4cd14c 100644 --- a/src/gallium/drivers/ilo/ilo_draw.c +++ b/src/gallium/drivers/ilo/ilo_draw.c @@ -452,12 +452,12 @@ draw_vbo_with_sw_restart(struct ilo_context *ilo, } u; /* we will draw with IB mapped */ - if (ib->buffer) { - u.ptr = intel_bo_map(ilo_buffer(ib->buffer)->bo, false); + if (ib->state.buffer) { + u.ptr = intel_bo_map(ilo_buffer(ib->state.buffer)->bo, false); if (u.ptr) - u.u8 += ib->offset; + u.u8 += ib->state.offset; } else { - u.ptr = ib->user_buffer; + u.ptr = ib->state.user_buffer; } if (!u.ptr) @@ -483,7 +483,7 @@ draw_vbo_with_sw_restart(struct ilo_context *ilo, (pipe)->draw_vbo(pipe, &subinfo); \ } while (0) - switch (ib->index_size) { + switch (ib->state.index_size) { case 1: DRAW_VBO_WITH_SW_RESTART(&ilo->base, info, u.u8); break; @@ -500,8 +500,8 @@ draw_vbo_with_sw_restart(struct ilo_context *ilo, #undef DRAW_VBO_WITH_SW_RESTART - if (ib->buffer) - intel_bo_unmap(ilo_buffer(ib->buffer)->bo); + if (ib->state.buffer) + intel_bo_unmap(ilo_buffer(ib->state.buffer)->bo); } static bool @@ -511,9 +511,9 @@ draw_vbo_need_sw_restart(const struct ilo_context *ilo, /* the restart index is fixed prior to GEN7.5 */ if (ilo_dev_gen(ilo->dev) < ILO_GEN(7.5)) { const unsigned cut_index = - (ilo->state_vector.ib.index_size == 1) ? 0xff : - (ilo->state_vector.ib.index_size == 2) ? 0xffff : - (ilo->state_vector.ib.index_size == 4) ? 0xffffffff : 0; + (ilo->state_vector.ib.state.index_size == 1) ? 0xff : + (ilo->state_vector.ib.state.index_size == 2) ? 0xffff : + (ilo->state_vector.ib.state.index_size == 4) ? 0xffffffff : 0; if (info->restart_index < cut_index) return true; diff --git a/src/gallium/drivers/ilo/ilo_state.c b/src/gallium/drivers/ilo/ilo_state.c index 84fd7991c26..0b16f3b81b9 100644 --- a/src/gallium/drivers/ilo/ilo_state.c +++ b/src/gallium/drivers/ilo/ilo_state.c @@ -368,7 +368,8 @@ finalize_index_buffer(struct ilo_context *ilo) { struct ilo_state_vector *vec = &ilo->state_vector; const bool need_upload = (vec->draw->indexed && - (vec->ib.user_buffer || vec->ib.offset % vec->ib.index_size)); + (vec->ib.state.user_buffer || + vec->ib.state.offset % vec->ib.state.index_size)); struct pipe_resource *current_hw_res = NULL; if (!(vec->dirty & ILO_DIRTY_IB) && !need_upload) @@ -377,45 +378,47 @@ finalize_index_buffer(struct ilo_context *ilo) pipe_resource_reference(¤t_hw_res, vec->ib.hw_resource); if (need_upload) { - const unsigned offset = vec->ib.index_size * vec->draw->start; - const unsigned size = vec->ib.index_size * vec->draw->count; + const unsigned offset = vec->ib.state.index_size * vec->draw->start; + const unsigned size = vec->ib.state.index_size * vec->draw->count; unsigned hw_offset; - if (vec->ib.user_buffer) { + if (vec->ib.state.user_buffer) { u_upload_data(ilo->uploader, 0, size, - vec->ib.user_buffer + offset, &hw_offset, &vec->ib.hw_resource); - } - else { - u_upload_buffer(ilo->uploader, 0, vec->ib.offset + offset, size, - vec->ib.buffer, &hw_offset, &vec->ib.hw_resource); + vec->ib.state.user_buffer + offset, + &hw_offset, &vec->ib.hw_resource); + } else { + u_upload_buffer(ilo->uploader, 0, + vec->ib.state.offset + offset, size, vec->ib.state.buffer, + &hw_offset, &vec->ib.hw_resource); } /* the HW offset should be aligned */ - assert(hw_offset % vec->ib.index_size == 0); - vec->ib.draw_start_offset = hw_offset / vec->ib.index_size; + assert(hw_offset % vec->ib.state.index_size == 0); + vec->ib.draw_start_offset = hw_offset / vec->ib.state.index_size; /* * INDEX[vec->draw->start] in the original buffer is INDEX[0] in the HW * resource */ vec->ib.draw_start_offset -= vec->draw->start; - } - else { - pipe_resource_reference(&vec->ib.hw_resource, vec->ib.buffer); + } else { + pipe_resource_reference(&vec->ib.hw_resource, vec->ib.state.buffer); /* note that index size may be zero when the draw is not indexed */ - if (vec->draw->indexed) - vec->ib.draw_start_offset = vec->ib.offset / vec->ib.index_size; - else + if (vec->draw->indexed) { + vec->ib.draw_start_offset = + vec->ib.state.offset / vec->ib.state.index_size; + } else { vec->ib.draw_start_offset = 0; + } } /* treat the IB as clean if the HW states do not change */ if (vec->ib.hw_resource == current_hw_res && - vec->ib.hw_index_size == vec->ib.index_size) + vec->ib.hw_index_size == vec->ib.state.index_size) vec->dirty &= ~ILO_DIRTY_IB; else - vec->ib.hw_index_size = vec->ib.index_size; + vec->ib.hw_index_size = vec->ib.state.index_size; pipe_resource_reference(¤t_hw_res, NULL); } @@ -1809,16 +1812,11 @@ ilo_set_index_buffer(struct pipe_context *pipe, struct ilo_state_vector *vec = &ilo_context(pipe)->state_vector; if (state) { - pipe_resource_reference(&vec->ib.buffer, state->buffer); - vec->ib.user_buffer = state->user_buffer; - vec->ib.offset = state->offset; - vec->ib.index_size = state->index_size; - } - else { - pipe_resource_reference(&vec->ib.buffer, NULL); - vec->ib.user_buffer = NULL; - vec->ib.offset = 0; - vec->ib.index_size = 0; + pipe_resource_reference(&vec->ib.state.buffer, state->buffer); + vec->ib.state = *state; + } else { + pipe_resource_reference(&vec->ib.state.buffer, NULL); + memset(&vec->ib.state, 0, sizeof(vec->ib.state)); } vec->dirty |= ILO_DIRTY_IB; @@ -2292,7 +2290,7 @@ ilo_state_vector_cleanup(struct ilo_state_vector *vec) pipe_resource_reference(&vec->vb.states[i].buffer, NULL); } - pipe_resource_reference(&vec->ib.buffer, NULL); + pipe_resource_reference(&vec->ib.state.buffer, NULL); pipe_resource_reference(&vec->ib.hw_resource, NULL); for (i = 0; i < vec->so.count; i++) @@ -2355,7 +2353,7 @@ ilo_state_vector_resource_renamed(struct ilo_state_vector *vec, } } - if (vec->ib.buffer == res) { + if (vec->ib.state.buffer == res) { states |= ILO_DIRTY_IB; /* diff --git a/src/gallium/drivers/ilo/ilo_state.h b/src/gallium/drivers/ilo/ilo_state.h index 91c2a8d01dc..9ce7744948c 100644 --- a/src/gallium/drivers/ilo/ilo_state.h +++ b/src/gallium/drivers/ilo/ilo_state.h @@ -162,10 +162,7 @@ struct ilo_vb_state { }; struct ilo_ib_state { - struct pipe_resource *buffer; - const void *user_buffer; - unsigned offset; - unsigned index_size; + struct pipe_index_buffer state; /* these are not valid until the state is finalized */ struct pipe_resource *hw_resource; From e8d297b7a108fcf1cb688fe1db89e83b8f85e091 Mon Sep 17 00:00:00 2001 From: Chia-I Wu Date: Tue, 16 Jun 2015 23:11:06 +0800 Subject: [PATCH 731/834] ilo: add 3DSTATE_VF to ilo_state_vf 3DSTATE_VF specifies cut index enable and cut index. Cut index enable is specified in 3DSTATE_INDEX_BUFFER instead prior to Gen7.5. Both commands are added. --- .../drivers/ilo/core/ilo_builder_3d_top.h | 49 ++++---- src/gallium/drivers/ilo/core/ilo_state_vf.c | 109 +++++++++++++++++- src/gallium/drivers/ilo/core/ilo_state_vf.h | 13 ++- .../drivers/ilo/genhw/gen_render_3d.xml.h | 12 +- src/gallium/drivers/ilo/ilo_render.c | 4 - src/gallium/drivers/ilo/ilo_render_gen.h | 2 - src/gallium/drivers/ilo/ilo_render_gen6.c | 24 ++-- src/gallium/drivers/ilo/ilo_render_gen8.c | 11 +- src/gallium/drivers/ilo/ilo_state.c | 32 ++++- 9 files changed, 190 insertions(+), 66 deletions(-) diff --git a/src/gallium/drivers/ilo/core/ilo_builder_3d_top.h b/src/gallium/drivers/ilo/core/ilo_builder_3d_top.h index bfd94344103..e4ee9cf3af4 100644 --- a/src/gallium/drivers/ilo/core/ilo_builder_3d_top.h +++ b/src/gallium/drivers/ilo/core/ilo_builder_3d_top.h @@ -190,8 +190,7 @@ gen7_3DSTATE_URB_GS(struct ilo_builder *builder, static inline void gen75_3DSTATE_VF(struct ilo_builder *builder, - bool enable_cut_index, - uint32_t cut_index) + const struct ilo_state_vf *vf) { const uint8_t cmd_len = 2; uint32_t *dw; @@ -200,11 +199,10 @@ gen75_3DSTATE_VF(struct ilo_builder *builder, ilo_builder_batch_pointer(builder, cmd_len, &dw); - dw[0] = GEN75_RENDER_CMD(3D, 3DSTATE_VF) | (cmd_len - 2); - if (enable_cut_index) - dw[0] |= GEN75_VF_DW0_CUT_INDEX_ENABLE; - - dw[1] = cut_index; + /* see vf_params_set_gen75_3DSTATE_VF() */ + dw[0] = GEN75_RENDER_CMD(3D, 3DSTATE_VF) | (cmd_len - 2) | + vf->cut[0]; + dw[1] = vf->cut[1]; } static inline void @@ -444,13 +442,13 @@ gen6_3DSTATE_VERTEX_ELEMENTS(struct ilo_builder *builder, static inline void gen6_3DSTATE_INDEX_BUFFER(struct ilo_builder *builder, - const struct ilo_ib_state *ib, - bool enable_cut_index) + const struct ilo_state_vf *vf, + const struct ilo_ib_state *ib) { const uint8_t cmd_len = 3; struct ilo_buffer *buf = ilo_buffer(ib->hw_resource); uint32_t start_offset, end_offset; - int format; + enum gen_index_format format; uint32_t *dw; unsigned pos; @@ -459,23 +457,19 @@ gen6_3DSTATE_INDEX_BUFFER(struct ilo_builder *builder, if (!buf) return; - /* this is moved to the new 3DSTATE_VF */ - if (ilo_dev_gen(builder->dev) >= ILO_GEN(7.5)) - assert(!enable_cut_index); - switch (ib->hw_index_size) { case 4: - format = GEN6_IB_DW0_FORMAT_DWORD; + format = GEN6_INDEX_DWORD; break; case 2: - format = GEN6_IB_DW0_FORMAT_WORD; + format = GEN6_INDEX_WORD; break; case 1: - format = GEN6_IB_DW0_FORMAT_BYTE; + format = GEN6_INDEX_BYTE; break; default: assert(!"unknown index size"); - format = GEN6_IB_DW0_FORMAT_BYTE; + format = GEN6_INDEX_BYTE; break; } @@ -494,9 +488,11 @@ gen6_3DSTATE_INDEX_BUFFER(struct ilo_builder *builder, dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_INDEX_BUFFER) | (cmd_len - 2) | builder->mocs << GEN6_IB_DW0_MOCS__SHIFT | - format; - if (enable_cut_index) - dw[0] |= GEN6_IB_DW0_CUT_INDEX_ENABLE; + format << GEN6_IB_DW0_FORMAT__SHIFT; + + /* see vf_params_set_gen6_3dstate_index_buffer() */ + if (ilo_dev_gen(builder->dev) <= ILO_GEN(7)) + dw[0] |= vf->cut[0]; ilo_builder_batch_reloc(builder, pos + 1, buf->bo, start_offset, 0); ilo_builder_batch_reloc(builder, pos + 2, buf->bo, end_offset, 0); @@ -504,6 +500,7 @@ gen6_3DSTATE_INDEX_BUFFER(struct ilo_builder *builder, static inline void gen8_3DSTATE_INDEX_BUFFER(struct ilo_builder *builder, + const struct ilo_state_vf *vf, const struct ilo_ib_state *ib) { const uint8_t cmd_len = 5; @@ -519,24 +516,24 @@ gen8_3DSTATE_INDEX_BUFFER(struct ilo_builder *builder, switch (ib->hw_index_size) { case 4: - format = GEN8_IB_DW1_FORMAT_DWORD; + format = GEN6_INDEX_DWORD; break; case 2: - format = GEN8_IB_DW1_FORMAT_WORD; + format = GEN6_INDEX_WORD; break; case 1: - format = GEN8_IB_DW1_FORMAT_BYTE; + format = GEN6_INDEX_BYTE; break; default: assert(!"unknown index size"); - format = GEN8_IB_DW1_FORMAT_BYTE; + format = GEN6_INDEX_BYTE; break; } pos = ilo_builder_batch_pointer(builder, cmd_len, &dw); dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_INDEX_BUFFER) | (cmd_len - 2); - dw[1] = format | + dw[1] = format << GEN8_IB_DW1_FORMAT__SHIFT | builder->mocs << GEN8_IB_DW1_MOCS__SHIFT; dw[4] = buf->bo_size; diff --git a/src/gallium/drivers/ilo/core/ilo_state_vf.c b/src/gallium/drivers/ilo/core/ilo_state_vf.c index f9a462db254..571af3d5aea 100644 --- a/src/gallium/drivers/ilo/core/ilo_state_vf.c +++ b/src/gallium/drivers/ilo/core/ilo_state_vf.c @@ -339,6 +339,94 @@ vf_params_set_gen8_3DSTATE_VF_SGVS(struct ilo_state_vf *vf, return true; } +static uint32_t +get_gen6_fixed_cut_index(const struct ilo_dev *dev, + enum gen_index_format format) +{ + const uint32_t fixed = ~0u; + + ILO_DEV_ASSERT(dev, 6, 7); + + switch (format) { + case GEN6_INDEX_BYTE: return (uint8_t) fixed; + case GEN6_INDEX_WORD: return (uint16_t) fixed; + case GEN6_INDEX_DWORD: return (uint32_t) fixed; + default: + assert(!"unknown index format"); + return fixed; + } +} + +static bool +get_gen6_cut_index_supported(const struct ilo_dev *dev, + enum gen_3dprim_type topology) +{ + ILO_DEV_ASSERT(dev, 6, 8); + + /* + * See the Sandy Bridge PRM, volume 2 part 1, page 80 and the Haswell PRM, + * volume 7, page 456. + */ + switch (topology) { + case GEN6_3DPRIM_TRIFAN: + case GEN6_3DPRIM_QUADLIST: + case GEN6_3DPRIM_QUADSTRIP: + case GEN6_3DPRIM_POLYGON: + case GEN6_3DPRIM_LINELOOP: + return (ilo_dev_gen(dev) >= ILO_GEN(7.5)); + case GEN6_3DPRIM_RECTLIST: + case GEN6_3DPRIM_TRIFAN_NOSTIPPLE: + return false; + default: + return true; + } +} + +static bool +vf_params_set_gen6_3dstate_index_buffer(struct ilo_state_vf *vf, + const struct ilo_dev *dev, + const struct ilo_state_vf_params_info *params) +{ + uint32_t dw0 = 0; + + ILO_DEV_ASSERT(dev, 6, 7); + + /* cut index only, as in 3DSTATE_VF */ + if (params->cut_index_enable) { + assert(get_gen6_cut_index_supported(dev, params->cv_topology)); + assert(get_gen6_fixed_cut_index(dev, params->cv_index_format) == + params->cut_index); + + dw0 |= GEN6_IB_DW0_CUT_INDEX_ENABLE; + } + + STATIC_ASSERT(ARRAY_SIZE(vf->cut) >= 1); + vf->cut[0] = dw0; + + return true; +} + +static bool +vf_params_set_gen75_3DSTATE_VF(struct ilo_state_vf *vf, + const struct ilo_dev *dev, + const struct ilo_state_vf_params_info *params) +{ + uint32_t dw0 = 0; + + ILO_DEV_ASSERT(dev, 7.5, 8); + + if (params->cut_index_enable) { + assert(get_gen6_cut_index_supported(dev, params->cv_topology)); + dw0 |= GEN75_VF_DW0_CUT_INDEX_ENABLE; + } + + STATIC_ASSERT(ARRAY_SIZE(vf->cut) >= 2); + vf->cut[0] = dw0; + vf->cut[1] = params->cut_index; + + return true; +} + bool ilo_state_vf_init(struct ilo_state_vf *vf, const struct ilo_dev *dev, @@ -354,6 +442,7 @@ ilo_state_vf_init(struct ilo_state_vf *vf, vf->user_ve = (uint32_t (*)[2]) info->data; ret &= vf_set_gen6_3DSTATE_VERTEX_ELEMENTS(vf, dev, info); + ret &= ilo_state_vf_set_params(vf, dev, &info->params); assert(ret); @@ -429,7 +518,7 @@ ilo_state_vf_set_params(struct ilo_state_vf *vf, if (params->last_element_edge_flag) { assert(vf->edge_flag_supported); if (ilo_dev_gen(dev) == ILO_GEN(6)) - assert(!params->cv_is_quad); + assert(params->cv_topology != GEN6_3DPRIM_QUADLIST); } if (vf->edge_flag_supported) { @@ -439,6 +528,11 @@ ilo_state_vf_set_params(struct ilo_state_vf *vf, sizeof(vf->user_ve[vf->user_ve_count - 1])); } + if (ilo_dev_gen(dev) >= ILO_GEN(7.5)) + ret &= vf_params_set_gen75_3DSTATE_VF(vf, dev, params); + else + ret &= vf_params_set_gen6_3dstate_index_buffer(vf, dev, params); + assert(ret); return ret; @@ -453,6 +547,11 @@ ilo_state_vf_full_delta(const struct ilo_state_vf *vf, if (ilo_dev_gen(dev) >= ILO_GEN(8)) delta->dirty |= ILO_STATE_VF_3DSTATE_VF_SGVS; + + if (ilo_dev_gen(dev) >= ILO_GEN(7.5)) + delta->dirty |= ILO_STATE_VF_3DSTATE_VF; + else + delta->dirty |= ILO_STATE_VF_3DSTATE_INDEX_BUFFER; } void @@ -478,4 +577,12 @@ ilo_state_vf_get_delta(const struct ilo_state_vf *vf, if (vf->sgvs[0] != old->sgvs[0]) delta->dirty |= ILO_STATE_VF_3DSTATE_VF_SGVS; } + + if (ilo_dev_gen(dev) >= ILO_GEN(7.5)) { + if (memcmp(vf->cut, old->cut, sizeof(vf->cut))) + delta->dirty |= ILO_STATE_VF_3DSTATE_VF; + } else { + if (vf->cut[0] != old->cut[0]) + delta->dirty |= ILO_STATE_VF_3DSTATE_INDEX_BUFFER; + } } diff --git a/src/gallium/drivers/ilo/core/ilo_state_vf.h b/src/gallium/drivers/ilo/core/ilo_state_vf.h index 7238e661d35..49a0eb3aa14 100644 --- a/src/gallium/drivers/ilo/core/ilo_state_vf.h +++ b/src/gallium/drivers/ilo/core/ilo_state_vf.h @@ -48,6 +48,8 @@ enum ilo_state_vf_dirty_bits { ILO_STATE_VF_3DSTATE_VERTEX_ELEMENTS = (1 << 0), ILO_STATE_VF_3DSTATE_VF_SGVS = (1 << 1), + ILO_STATE_VF_3DSTATE_VF = (1 << 2), + ILO_STATE_VF_3DSTATE_INDEX_BUFFER = (1 << 3), }; /** @@ -68,7 +70,7 @@ struct ilo_state_vf_element_info { * VF parameters. */ struct ilo_state_vf_params_info { - bool cv_is_quad; + enum gen_3dprim_type cv_topology; /* prepend an attribute of zeros */ bool prepend_zeros; @@ -78,8 +80,15 @@ struct ilo_state_vf_params_info { bool prepend_instanceid; bool last_element_edge_flag; + + enum gen_index_format cv_index_format; + bool cut_index_enable; + uint32_t cut_index; }; +/** + * Vertex fetch. + */ struct ilo_state_vf_info { void *data; size_t data_size; @@ -101,6 +110,8 @@ struct ilo_state_vf { uint32_t last_user_ve[2][2]; bool edge_flag_supported; + + uint32_t cut[2]; }; struct ilo_state_vf_delta { diff --git a/src/gallium/drivers/ilo/genhw/gen_render_3d.xml.h b/src/gallium/drivers/ilo/genhw/gen_render_3d.xml.h index 1abfef987b5..60e3922e286 100644 --- a/src/gallium/drivers/ilo/genhw/gen_render_3d.xml.h +++ b/src/gallium/drivers/ilo/genhw/gen_render_3d.xml.h @@ -105,6 +105,12 @@ enum gen_state_alignment { GEN8_ALIGNMENT_SURFACE_STATE = 0x40, }; +enum gen_index_format { + GEN6_INDEX_BYTE = 0x0, + GEN6_INDEX_WORD = 0x1, + GEN6_INDEX_DWORD = 0x2, +}; + enum gen_vf_component { GEN6_VFCOMP_NOSTORE = 0x0, GEN6_VFCOMP_STORE_SRC = 0x1, @@ -366,9 +372,6 @@ enum gen_msrast_mode { #define GEN6_IB_DW0_CUT_INDEX_ENABLE (0x1 << 10) #define GEN6_IB_DW0_FORMAT__MASK 0x00000300 #define GEN6_IB_DW0_FORMAT__SHIFT 8 -#define GEN6_IB_DW0_FORMAT_BYTE (0x0 << 8) -#define GEN6_IB_DW0_FORMAT_WORD (0x1 << 8) -#define GEN6_IB_DW0_FORMAT_DWORD (0x2 << 8) @@ -376,9 +379,6 @@ enum gen_msrast_mode { #define GEN8_IB_DW1_FORMAT__MASK 0x00000300 #define GEN8_IB_DW1_FORMAT__SHIFT 8 -#define GEN8_IB_DW1_FORMAT_BYTE (0x0 << 8) -#define GEN8_IB_DW1_FORMAT_WORD (0x1 << 8) -#define GEN8_IB_DW1_FORMAT_DWORD (0x2 << 8) #define GEN8_IB_DW1_MOCS__MASK 0x0000007f #define GEN8_IB_DW1_MOCS__SHIFT 0 diff --git a/src/gallium/drivers/ilo/ilo_render.c b/src/gallium/drivers/ilo/ilo_render.c index 910ed8c9608..21f75de11a0 100644 --- a/src/gallium/drivers/ilo/ilo_render.c +++ b/src/gallium/drivers/ilo/ilo_render.c @@ -334,7 +334,6 @@ draw_session_prepare(struct ilo_render *render, render->instruction_bo_changed = true; session->prim_changed = true; - session->primitive_restart_changed = true; ilo_state_urb_full_delta(&vec->urb, render->dev, &session->urb_delta); ilo_state_vf_full_delta(&vec->ve->vf, render->dev, &session->vf_delta); @@ -350,8 +349,6 @@ draw_session_prepare(struct ilo_render *render, } else { session->prim_changed = (render->state.reduced_prim != session->reduced_prim); - session->primitive_restart_changed = - (render->state.primitive_restart != vec->draw->primitive_restart); ilo_state_urb_get_delta(&vec->urb, render->dev, &render->state.urb, &session->urb_delta); @@ -390,7 +387,6 @@ draw_session_end(struct ilo_render *render, render->instruction_bo_changed = false; render->state.reduced_prim = session->reduced_prim; - render->state.primitive_restart = vec->draw->primitive_restart; render->state.urb = vec->urb; render->state.rs = vec->rasterizer->rs; diff --git a/src/gallium/drivers/ilo/ilo_render_gen.h b/src/gallium/drivers/ilo/ilo_render_gen.h index ae14e779e1f..6bbc0a8e3f1 100644 --- a/src/gallium/drivers/ilo/ilo_render_gen.h +++ b/src/gallium/drivers/ilo/ilo_render_gen.h @@ -82,7 +82,6 @@ struct ilo_render { */ uint32_t deferred_pipe_control_dw1; - bool primitive_restart; int reduced_prim; int so_max_vertices; @@ -143,7 +142,6 @@ struct ilo_render_draw_session { int reduced_prim; bool prim_changed; - bool primitive_restart_changed; struct ilo_state_urb_delta urb_delta; struct ilo_state_vf_delta vf_delta; diff --git a/src/gallium/drivers/ilo/ilo_render_gen6.c b/src/gallium/drivers/ilo/ilo_render_gen6.c index 1414f12b439..89c87349b62 100644 --- a/src/gallium/drivers/ilo/ilo_render_gen6.c +++ b/src/gallium/drivers/ilo/ilo_render_gen6.c @@ -413,24 +413,18 @@ gen6_draw_vf(struct ilo_render *r, { if (ilo_dev_gen(r->dev) >= ILO_GEN(7.5)) { /* 3DSTATE_INDEX_BUFFER */ - if (DIRTY(IB) || r->batch_bo_changed) { - gen6_3DSTATE_INDEX_BUFFER(r->builder, - &vec->ib, false); - } + if ((session->vf_delta.dirty & ILO_STATE_VF_3DSTATE_INDEX_BUFFER) || + DIRTY(IB) || r->batch_bo_changed) + gen6_3DSTATE_INDEX_BUFFER(r->builder, &vec->ve->vf, &vec->ib); /* 3DSTATE_VF */ - if (session->primitive_restart_changed) { - gen75_3DSTATE_VF(r->builder, vec->draw->primitive_restart, - vec->draw->restart_index); - } - } - else { + if (session->vf_delta.dirty & ILO_STATE_VF_3DSTATE_VF) + gen75_3DSTATE_VF(r->builder, &vec->ve->vf); + } else { /* 3DSTATE_INDEX_BUFFER */ - if (DIRTY(IB) || session->primitive_restart_changed || - r->batch_bo_changed) { - gen6_3DSTATE_INDEX_BUFFER(r->builder, - &vec->ib, vec->draw->primitive_restart); - } + if ((session->vf_delta.dirty & ILO_STATE_VF_3DSTATE_INDEX_BUFFER) || + DIRTY(IB) || r->batch_bo_changed) + gen6_3DSTATE_INDEX_BUFFER(r->builder, &vec->ve->vf, &vec->ib); } /* 3DSTATE_VERTEX_BUFFERS */ diff --git a/src/gallium/drivers/ilo/ilo_render_gen8.c b/src/gallium/drivers/ilo/ilo_render_gen8.c index 495dbc3a283..7e0d2060698 100644 --- a/src/gallium/drivers/ilo/ilo_render_gen8.c +++ b/src/gallium/drivers/ilo/ilo_render_gen8.c @@ -202,14 +202,13 @@ gen8_draw_vf(struct ilo_render *r, int i; /* 3DSTATE_INDEX_BUFFER */ - if (DIRTY(IB) || r->batch_bo_changed) - gen8_3DSTATE_INDEX_BUFFER(r->builder, &vec->ib); + if ((session->vf_delta.dirty & ILO_STATE_VF_3DSTATE_INDEX_BUFFER) || + DIRTY(IB) || r->batch_bo_changed) + gen8_3DSTATE_INDEX_BUFFER(r->builder, &vec->ve->vf, &vec->ib); /* 3DSTATE_VF */ - if (session->primitive_restart_changed) { - gen75_3DSTATE_VF(r->builder, vec->draw->primitive_restart, - vec->draw->restart_index); - } + if (session->vf_delta.dirty & ILO_STATE_VF_3DSTATE_VF) + gen75_3DSTATE_VF(r->builder, &vec->ve->vf); /* 3DSTATE_VERTEX_BUFFERS */ if (DIRTY(VB) || DIRTY(VE) || r->batch_bo_changed) { diff --git a/src/gallium/drivers/ilo/ilo_state.c b/src/gallium/drivers/ilo/ilo_state.c index 0b16f3b81b9..a01e9da69ec 100644 --- a/src/gallium/drivers/ilo/ilo_state.c +++ b/src/gallium/drivers/ilo/ilo_state.c @@ -39,6 +39,19 @@ #include "ilo_shader.h" #include "ilo_state.h" +static enum gen_index_format +ilo_translate_index_size(unsigned index_size) +{ + switch (index_size) { + case 1: return GEN6_INDEX_BYTE; + case 2: return GEN6_INDEX_WORD; + case 4: return GEN6_INDEX_DWORD; + default: + assert(!"unknown index size"); + return GEN6_INDEX_BYTE; + } +} + static enum gen_mip_filter ilo_translate_mip_filter(unsigned filter) { @@ -375,6 +388,7 @@ finalize_index_buffer(struct ilo_context *ilo) if (!(vec->dirty & ILO_DIRTY_IB) && !need_upload) return; + /* make sure vec->ib.hw_resource changes when reallocated */ pipe_resource_reference(¤t_hw_res, vec->ib.hw_resource); if (need_upload) { @@ -429,8 +443,8 @@ finalize_vertex_elements(struct ilo_context *ilo) const struct ilo_dev *dev = ilo->dev; struct ilo_state_vector *vec = &ilo->state_vector; struct ilo_ve_state *ve = vec->ve; - const bool is_quad = (vec->draw->mode == PIPE_PRIM_QUADS || - vec->draw->mode == PIPE_PRIM_QUAD_STRIP); + const enum gen_3dprim_type topology = + gen6_3d_translate_pipe_prim(vec->draw->mode); const bool last_element_edge_flag = (vec->vs && ilo_shader_get_kernel_param(vec->vs, ILO_KERNEL_VS_INPUT_EDGEFLAG)); const bool prepend_vertexid = (vec->vs && @@ -438,16 +452,24 @@ finalize_vertex_elements(struct ilo_context *ilo) const bool prepend_instanceid = (vec->vs && ilo_shader_get_kernel_param(vec->vs, ILO_KERNEL_VS_INPUT_INSTANCEID)); + const enum gen_index_format index_format = (vec->draw->indexed) ? + ilo_translate_index_size(vec->ib.state.index_size) : GEN6_INDEX_DWORD; /* check for non-orthogonal states */ - if (ve->vf_params.cv_is_quad != is_quad || + if (ve->vf_params.cv_topology != topology || ve->vf_params.prepend_vertexid != prepend_vertexid || ve->vf_params.prepend_instanceid != prepend_instanceid || - ve->vf_params.last_element_edge_flag != last_element_edge_flag) { - ve->vf_params.cv_is_quad = is_quad; + ve->vf_params.last_element_edge_flag != last_element_edge_flag || + ve->vf_params.cv_index_format != index_format || + ve->vf_params.cut_index_enable != vec->draw->primitive_restart || + ve->vf_params.cut_index != vec->draw->restart_index) { + ve->vf_params.cv_topology = topology; ve->vf_params.prepend_vertexid = prepend_vertexid; ve->vf_params.prepend_instanceid = prepend_instanceid; ve->vf_params.last_element_edge_flag = last_element_edge_flag; + ve->vf_params.cv_index_format = index_format; + ve->vf_params.cut_index_enable = vec->draw->primitive_restart; + ve->vf_params.cut_index = vec->draw->restart_index; ilo_state_vf_set_params(&ve->vf, dev, &ve->vf_params); From 4555211028394673f8ad68f3de9c12e9a1f93160 Mon Sep 17 00:00:00 2001 From: Chia-I Wu Date: Thu, 18 Jun 2015 14:26:29 +0800 Subject: [PATCH 732/834] ilo: add 3DSTATE_VF_INSTANCING to ilo_state_vf 3DSTATE_VF_INSTANCING specifies instancing enable and step rate. They are specified along with 3DSTATE_VERTEX_BUFFERS instead prior to Gen8. Both commands are added. --- .../drivers/ilo/core/ilo_builder_3d_top.h | 50 ++++---- src/gallium/drivers/ilo/core/ilo_state_vf.c | 117 +++++++++++++++--- src/gallium/drivers/ilo/core/ilo_state_vf.h | 28 +++-- .../drivers/ilo/genhw/gen_render_3d.xml.h | 4 +- src/gallium/drivers/ilo/ilo_blitter.h | 2 +- src/gallium/drivers/ilo/ilo_render_gen6.c | 7 +- src/gallium/drivers/ilo/ilo_render_gen8.c | 18 +-- src/gallium/drivers/ilo/ilo_state.c | 8 +- src/gallium/drivers/ilo/ilo_state.h | 3 +- 9 files changed, 169 insertions(+), 68 deletions(-) diff --git a/src/gallium/drivers/ilo/core/ilo_builder_3d_top.h b/src/gallium/drivers/ilo/core/ilo_builder_3d_top.h index e4ee9cf3af4..782746591d9 100644 --- a/src/gallium/drivers/ilo/core/ilo_builder_3d_top.h +++ b/src/gallium/drivers/ilo/core/ilo_builder_3d_top.h @@ -264,7 +264,8 @@ gen8_3DSTATE_VF_TOPOLOGY(struct ilo_builder *builder, static inline void gen8_3DSTATE_VF_INSTANCING(struct ilo_builder *builder, - int vb_index, uint32_t step_rate) + const struct ilo_state_vf *vf, + uint32_t attr) { const uint8_t cmd_len = 3; uint32_t *dw; @@ -274,10 +275,15 @@ gen8_3DSTATE_VF_INSTANCING(struct ilo_builder *builder, ilo_builder_batch_pointer(builder, cmd_len, &dw); dw[0] = GEN8_RENDER_CMD(3D, 3DSTATE_VF_INSTANCING) | (cmd_len - 2); - dw[1] = vb_index; - if (step_rate) - dw[1] |= GEN8_INSTANCING_DW1_ENABLE; - dw[2] = step_rate; + dw[1] = attr << GEN8_INSTANCING_DW1_VE_INDEX__SHIFT; + dw[2] = 0; + /* see vf_set_gen8_3DSTATE_VF_INSTANCING() */ + if (attr >= vf->internal_ve_count) { + attr -= vf->internal_ve_count; + + dw[1] |= vf->user_instancing[attr][0]; + dw[2] |= vf->user_instancing[attr][1]; + } } static inline void @@ -298,9 +304,9 @@ gen8_3DSTATE_VF_SGVS(struct ilo_builder *builder, static inline void gen6_3DSTATE_VERTEX_BUFFERS(struct ilo_builder *builder, + const struct ilo_state_vf *vf, const struct ilo_vb_state *vb, const unsigned *vb_mapping, - const unsigned *instance_divisors, unsigned vb_count) { uint8_t cmd_len; @@ -327,9 +333,9 @@ gen6_3DSTATE_VERTEX_BUFFERS(struct ilo_builder *builder, pos++; for (hw_idx = 0; hw_idx < vb_count; hw_idx++) { - const unsigned instance_divisor = instance_divisors[hw_idx]; const unsigned pipe_idx = vb_mapping[hw_idx]; const struct pipe_vertex_buffer *cso = &vb->states[pipe_idx]; + const int8_t elem = vf->vb_to_first_elem[hw_idx]; dw[0] = hw_idx << GEN6_VB_DW0_INDEX__SHIFT; @@ -341,19 +347,19 @@ gen6_3DSTATE_VERTEX_BUFFERS(struct ilo_builder *builder, if (ilo_dev_gen(builder->dev) >= ILO_GEN(7)) dw[0] |= GEN7_VB_DW0_ADDR_MODIFIED; - if (instance_divisor) - dw[0] |= GEN6_VB_DW0_ACCESS_INSTANCEDATA; - else - dw[0] |= GEN6_VB_DW0_ACCESS_VERTEXDATA; + dw[1] = 0; + dw[2] = 0; + dw[3] = 0; - /* use null vb if there is no buffer or the stride is out of range */ - if (!cso->buffer || cso->stride > 2048) { + /* see vf_set_gen6_vertex_buffer_state() */ + if (ilo_dev_gen(builder->dev) < ILO_GEN(8) && elem >= 0) { + dw[0] |= vf->user_instancing[elem][0]; + dw[3] |= vf->user_instancing[elem][1]; + } + + /* use null vb if there is no VE/buffer or the stride is out of range */ + if (elem < 0 || !cso->buffer || cso->stride > 2048) { dw[0] |= GEN6_VB_DW0_IS_NULL; - dw[1] = 0; - dw[2] = 0; - dw[3] = (ilo_dev_gen(builder->dev) >= ILO_GEN(8)) ? - 0 : instance_divisor; - continue; } @@ -371,8 +377,6 @@ gen6_3DSTATE_VERTEX_BUFFERS(struct ilo_builder *builder, const uint32_t start_offset = cso->buffer_offset; const uint32_t end_offset = buf->bo_size - 1; - dw[3] = instance_divisor; - ilo_builder_batch_reloc(builder, pos + 1, buf->bo, start_offset, 0); ilo_builder_batch_reloc(builder, pos + 2, buf->bo, end_offset, 0); } @@ -431,12 +435,16 @@ gen6_3DSTATE_VERTEX_ELEMENTS(struct ilo_builder *builder, dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_VERTEX_ELEMENTS) | (cmd_len - 2); dw++; - /* see vf_set_gen6_3DSTATE_VERTEX_ELEMENTS() */ + /* + * see vf_params_set_gen6_internal_ve() and + * vf_set_gen6_3DSTATE_VERTEX_ELEMENTS() + */ if (vf->internal_ve_count) { memcpy(dw, vf->internal_ve, sizeof(vf->internal_ve[0]) * vf->internal_ve_count); dw += 2 * vf->internal_ve_count; } + memcpy(dw, vf->user_ve, sizeof(vf->user_ve[0]) * vf->user_ve_count); } diff --git a/src/gallium/drivers/ilo/core/ilo_state_vf.c b/src/gallium/drivers/ilo/core/ilo_state_vf.c index 571af3d5aea..4126560aee9 100644 --- a/src/gallium/drivers/ilo/core/ilo_state_vf.c +++ b/src/gallium/drivers/ilo/core/ilo_state_vf.c @@ -221,6 +221,66 @@ vf_set_gen6_3DSTATE_VERTEX_ELEMENTS(struct ilo_state_vf *vf, return true; } +static bool +vf_set_gen6_vertex_buffer_state(struct ilo_state_vf *vf, + const struct ilo_dev *dev, + const struct ilo_state_vf_info *info) +{ + uint8_t i; + + ILO_DEV_ASSERT(dev, 6, 7.5); + + memset(vf->vb_to_first_elem, -1, sizeof(vf->vb_to_first_elem)); + + for (i = 0; i < info->element_count; i++) { + const struct ilo_state_vf_element_info *elem = &info->elements[i]; + + STATIC_ASSERT(ARRAY_SIZE(vf->user_instancing[i]) >= 2); + /* instancing enable only */ + vf->user_instancing[i][0] = (elem->instancing_enable) ? + GEN6_VB_DW0_ACCESS_INSTANCEDATA : + GEN6_VB_DW0_ACCESS_VERTEXDATA; + vf->user_instancing[i][1] = elem->instancing_step_rate; + + /* + * Instancing is per VB, not per VE, before Gen8. Set up a VB-to-VE + * mapping as well. + */ + if (vf->vb_to_first_elem[elem->buffer] < 0) { + vf->vb_to_first_elem[elem->buffer] = i; + } else { + const struct ilo_state_vf_element_info *first = + &info->elements[vf->vb_to_first_elem[elem->buffer]]; + + assert(elem->instancing_enable == first->instancing_enable && + elem->instancing_step_rate == first->instancing_step_rate); + } + } + + return true; +} + +static bool +vf_set_gen8_3DSTATE_VF_INSTANCING(struct ilo_state_vf *vf, + const struct ilo_dev *dev, + const struct ilo_state_vf_info *info) +{ + uint8_t i; + + ILO_DEV_ASSERT(dev, 8, 8); + + for (i = 0; i < info->element_count; i++) { + const struct ilo_state_vf_element_info *elem = &info->elements[i]; + + STATIC_ASSERT(ARRAY_SIZE(vf->user_instancing[i]) >= 2); + vf->user_instancing[i][0] = (elem->instancing_enable) ? + GEN8_INSTANCING_DW1_ENABLE : 0; + vf->user_instancing[i][1] = elem->instancing_step_rate; + } + + return true; +} + static uint32_t get_gen6_component_zeros(const struct ilo_dev *dev) { @@ -254,7 +314,9 @@ vf_params_set_gen6_internal_ve(struct ilo_state_vf *vf, { const bool prepend_ids = (params->prepend_vertexid || params->prepend_instanceid); - uint8_t internal_ve_count = 0; + uint8_t internal_ve_count = 0, i; + uint32_t dw1[2]; + ILO_DEV_ASSERT(dev, 6, 8); @@ -279,29 +341,23 @@ vf_params_set_gen6_internal_ve(struct ilo_state_vf *vf, * * - [DevILK+] Element[0] must be valid." */ - if (params->prepend_zeros || (!user_ve_count && !prepend_ids)) { - STATIC_ASSERT(ARRAY_SIZE(vf->internal_ve[internal_ve_count]) >= 2); - vf->internal_ve[internal_ve_count][0] = GEN6_VE_DW0_VALID; - vf->internal_ve[internal_ve_count][1] = get_gen6_component_zeros(dev); - internal_ve_count++; - } + if (params->prepend_zeros || (!user_ve_count && !prepend_ids)) + dw1[internal_ve_count++] = get_gen6_component_zeros(dev); if (prepend_ids) { - uint32_t dw1; - if (ilo_dev_gen(dev) >= ILO_GEN(8)) { /* placeholder for 3DSTATE_VF_SGVS */ - dw1 = get_gen6_component_zeros(dev); + dw1[internal_ve_count++] = get_gen6_component_zeros(dev); } else { - dw1 = get_gen6_component_ids(dev, - params->prepend_vertexid, - params->prepend_instanceid); + dw1[internal_ve_count++] = get_gen6_component_ids(dev, + params->prepend_vertexid, params->prepend_instanceid); } + } - STATIC_ASSERT(ARRAY_SIZE(vf->internal_ve[internal_ve_count]) >= 2); - vf->internal_ve[internal_ve_count][0] = GEN6_VE_DW0_VALID; - vf->internal_ve[internal_ve_count][1] = dw1; - internal_ve_count++; + for (i = 0; i < internal_ve_count; i++) { + STATIC_ASSERT(ARRAY_SIZE(vf->internal_ve[i]) >= 2); + vf->internal_ve[i][0] = GEN6_VE_DW0_VALID; + vf->internal_ve[i][1] = dw1[i]; } vf->internal_ve_count = internal_ve_count; @@ -440,9 +496,16 @@ ilo_state_vf_init(struct ilo_state_vf *vf, assert(ilo_state_vf_data_size(dev, info->element_count) <= info->data_size); vf->user_ve = (uint32_t (*)[2]) info->data; + vf->user_instancing = + (uint32_t (*)[2]) (vf->user_ve + info->element_count); ret &= vf_set_gen6_3DSTATE_VERTEX_ELEMENTS(vf, dev, info); + if (ilo_dev_gen(dev) >= ILO_GEN(8)) + ret &= vf_set_gen8_3DSTATE_VF_INSTANCING(vf, dev, info); + else + ret &= vf_set_gen6_vertex_buffer_state(vf, dev, info); + ret &= ilo_state_vf_set_params(vf, dev, &info->params); assert(ret); @@ -545,8 +608,12 @@ ilo_state_vf_full_delta(const struct ilo_state_vf *vf, { delta->dirty = ILO_STATE_VF_3DSTATE_VERTEX_ELEMENTS; - if (ilo_dev_gen(dev) >= ILO_GEN(8)) - delta->dirty |= ILO_STATE_VF_3DSTATE_VF_SGVS; + if (ilo_dev_gen(dev) >= ILO_GEN(8)) { + delta->dirty |= ILO_STATE_VF_3DSTATE_VF_SGVS | + ILO_STATE_VF_3DSTATE_VF_INSTANCING; + } else { + delta->dirty |= ILO_STATE_VF_3DSTATE_VERTEX_BUFFERS; + } if (ilo_dev_gen(dev) >= ILO_GEN(7.5)) delta->dirty |= ILO_STATE_VF_3DSTATE_VF; @@ -561,7 +628,8 @@ ilo_state_vf_get_delta(const struct ilo_state_vf *vf, struct ilo_state_vf_delta *delta) { /* no shallow copying */ - assert(vf->user_ve != old->user_ve); + assert(vf->user_ve != old->user_ve && + vf->user_instancing != old->user_instancing); delta->dirty = 0; @@ -573,6 +641,15 @@ ilo_state_vf_get_delta(const struct ilo_state_vf *vf, sizeof(vf->user_ve[0]) * vf->user_ve_count)) delta->dirty |= ILO_STATE_VF_3DSTATE_VERTEX_ELEMENTS; + if (vf->user_ve_count != old->user_ve_count || + memcmp(vf->user_instancing, old->user_instancing, + sizeof(vf->user_instancing[0]) * vf->user_ve_count)) { + if (ilo_dev_gen(dev) >= ILO_GEN(8)) + delta->dirty |= ILO_STATE_VF_3DSTATE_VF_INSTANCING; + else + delta->dirty |= ILO_STATE_VF_3DSTATE_VERTEX_BUFFERS; + } + if (ilo_dev_gen(dev) >= ILO_GEN(8)) { if (vf->sgvs[0] != old->sgvs[0]) delta->dirty |= ILO_STATE_VF_3DSTATE_VF_SGVS; diff --git a/src/gallium/drivers/ilo/core/ilo_state_vf.h b/src/gallium/drivers/ilo/core/ilo_state_vf.h index 49a0eb3aa14..c51da10270f 100644 --- a/src/gallium/drivers/ilo/core/ilo_state_vf.h +++ b/src/gallium/drivers/ilo/core/ilo_state_vf.h @@ -48,8 +48,10 @@ enum ilo_state_vf_dirty_bits { ILO_STATE_VF_3DSTATE_VERTEX_ELEMENTS = (1 << 0), ILO_STATE_VF_3DSTATE_VF_SGVS = (1 << 1), - ILO_STATE_VF_3DSTATE_VF = (1 << 2), - ILO_STATE_VF_3DSTATE_INDEX_BUFFER = (1 << 3), + ILO_STATE_VF_3DSTATE_VF_INSTANCING = (1 << 2), + ILO_STATE_VF_3DSTATE_VERTEX_BUFFERS = (1 << 3), + ILO_STATE_VF_3DSTATE_VF = (1 << 4), + ILO_STATE_VF_3DSTATE_INDEX_BUFFER = (1 << 5), }; /** @@ -64,6 +66,10 @@ struct ilo_state_vf_element_info { uint8_t component_count; bool is_integer; bool is_double; + + /* must be the same for those share the same buffer before Gen8 */ + bool instancing_enable; + uint32_t instancing_step_rate; }; /** @@ -100,16 +106,19 @@ struct ilo_state_vf_info { }; struct ilo_state_vf { - /* two VEs are reserved for internal use */ - uint32_t internal_ve[2][2]; uint32_t (*user_ve)[2]; - uint8_t internal_ve_count; + uint32_t (*user_instancing)[2]; + int8_t vb_to_first_elem[ILO_STATE_VF_MAX_BUFFER_COUNT]; uint8_t user_ve_count; - uint32_t sgvs[1]; - - uint32_t last_user_ve[2][2]; bool edge_flag_supported; + uint32_t last_user_ve[2][2]; + + /* two VEs are reserved for internal use */ + uint32_t internal_ve[2][2]; + uint8_t internal_ve_count; + + uint32_t sgvs[1]; uint32_t cut[2]; }; @@ -122,7 +131,8 @@ static inline size_t ilo_state_vf_data_size(const struct ilo_dev *dev, uint8_t element_count) { const struct ilo_state_vf *vf = NULL; - return sizeof(vf->user_ve[0]) * element_count; + return (sizeof(vf->user_ve[0]) + + sizeof(vf->user_instancing[0])) * element_count; } bool diff --git a/src/gallium/drivers/ilo/genhw/gen_render_3d.xml.h b/src/gallium/drivers/ilo/genhw/gen_render_3d.xml.h index 60e3922e286..52173fe5d07 100644 --- a/src/gallium/drivers/ilo/genhw/gen_render_3d.xml.h +++ b/src/gallium/drivers/ilo/genhw/gen_render_3d.xml.h @@ -394,8 +394,8 @@ enum gen_msrast_mode { #define GEN8_INSTANCING_DW1_ENABLE (0x1 << 8) -#define GEN8_INSTANCING_DW1_VB_INDEX__MASK 0x0000003f -#define GEN8_INSTANCING_DW1_VB_INDEX__SHIFT 0 +#define GEN8_INSTANCING_DW1_VE_INDEX__MASK 0x0000003f +#define GEN8_INSTANCING_DW1_VE_INDEX__SHIFT 0 #define GEN8_3DSTATE_VF_SGVS__SIZE 2 diff --git a/src/gallium/drivers/ilo/ilo_blitter.h b/src/gallium/drivers/ilo/ilo_blitter.h index 08690f30378..6af6046e1a9 100644 --- a/src/gallium/drivers/ilo/ilo_blitter.h +++ b/src/gallium/drivers/ilo/ilo_blitter.h @@ -60,7 +60,7 @@ struct ilo_blitter { float vertices[3][2]; struct pipe_draw_info draw; - uint32_t vf_data[2]; + uint32_t vf_data[4]; struct ilo_state_vf vf; struct ilo_state_vs vs; diff --git a/src/gallium/drivers/ilo/ilo_render_gen6.c b/src/gallium/drivers/ilo/ilo_render_gen6.c index 89c87349b62..f997274397a 100644 --- a/src/gallium/drivers/ilo/ilo_render_gen6.c +++ b/src/gallium/drivers/ilo/ilo_render_gen6.c @@ -428,9 +428,10 @@ gen6_draw_vf(struct ilo_render *r, } /* 3DSTATE_VERTEX_BUFFERS */ - if (DIRTY(VB) || DIRTY(VE) || r->batch_bo_changed) { - gen6_3DSTATE_VERTEX_BUFFERS(r->builder, &vec->vb, vec->ve->vb_mapping, - vec->ve->instance_divisors, vec->ve->vb_count); + if ((session->vf_delta.dirty & ILO_STATE_VF_3DSTATE_VERTEX_BUFFERS) || + DIRTY(VB) || DIRTY(VE) || r->batch_bo_changed) { + gen6_3DSTATE_VERTEX_BUFFERS(r->builder, &vec->ve->vf, &vec->vb, + vec->ve->vb_mapping, vec->ve->vb_count); } /* 3DSTATE_VERTEX_ELEMENTS */ diff --git a/src/gallium/drivers/ilo/ilo_render_gen8.c b/src/gallium/drivers/ilo/ilo_render_gen8.c index 7e0d2060698..3b8589ce8f1 100644 --- a/src/gallium/drivers/ilo/ilo_render_gen8.c +++ b/src/gallium/drivers/ilo/ilo_render_gen8.c @@ -199,8 +199,6 @@ gen8_draw_vf(struct ilo_render *r, const struct ilo_state_vector *vec, struct ilo_render_draw_session *session) { - int i; - /* 3DSTATE_INDEX_BUFFER */ if ((session->vf_delta.dirty & ILO_STATE_VF_3DSTATE_INDEX_BUFFER) || DIRTY(IB) || r->batch_bo_changed) @@ -211,9 +209,10 @@ gen8_draw_vf(struct ilo_render *r, gen75_3DSTATE_VF(r->builder, &vec->ve->vf); /* 3DSTATE_VERTEX_BUFFERS */ - if (DIRTY(VB) || DIRTY(VE) || r->batch_bo_changed) { - gen6_3DSTATE_VERTEX_BUFFERS(r->builder, &vec->vb, vec->ve->vb_mapping, - vec->ve->instance_divisors, vec->ve->vb_count); + if ((session->vf_delta.dirty & ILO_STATE_VF_3DSTATE_VERTEX_BUFFERS) || + DIRTY(VB) || DIRTY(VE) || r->batch_bo_changed) { + gen6_3DSTATE_VERTEX_BUFFERS(r->builder, &vec->ve->vf, &vec->vb, + vec->ve->vb_mapping, vec->ve->vb_count); } /* 3DSTATE_VERTEX_ELEMENTS */ @@ -223,9 +222,12 @@ gen8_draw_vf(struct ilo_render *r, gen8_3DSTATE_VF_TOPOLOGY(r->builder, gen6_3d_translate_pipe_prim(vec->draw->mode)); - for (i = 0; i < vec->ve->vb_count; i++) { - gen8_3DSTATE_VF_INSTANCING(r->builder, i, - vec->ve->instance_divisors[i]); + if (session->vf_delta.dirty & ILO_STATE_VF_3DSTATE_VF_INSTANCING) { + const uint8_t attr_count = ilo_state_vf_get_attr_count(&vec->ve->vf); + uint8_t i; + + for (i = 0; i < attr_count; i++) + gen8_3DSTATE_VF_INSTANCING(r->builder, &vec->ve->vf, i); } if (session->vf_delta.dirty & ILO_STATE_VF_3DSTATE_VF_SGVS) diff --git a/src/gallium/drivers/ilo/ilo_state.c b/src/gallium/drivers/ilo/ilo_state.c index a01e9da69ec..d6144e4035f 100644 --- a/src/gallium/drivers/ilo/ilo_state.c +++ b/src/gallium/drivers/ilo/ilo_state.c @@ -1329,6 +1329,7 @@ ilo_create_vertex_elements_state(struct pipe_context *pipe, { const struct ilo_dev *dev = ilo_context(pipe)->dev; struct ilo_state_vf_element_info vf_elements[PIPE_MAX_ATTRIBS]; + unsigned instance_divisors[PIPE_MAX_ATTRIBS]; struct ilo_state_vf_info vf_info; struct ilo_ve_state *ve; unsigned i; @@ -1347,7 +1348,7 @@ ilo_create_vertex_elements_state(struct pipe_context *pipe, */ for (hw_idx = 0; hw_idx < ve->vb_count; hw_idx++) { if (ve->vb_mapping[hw_idx] == elem->vertex_buffer_index && - ve->instance_divisors[hw_idx] == elem->instance_divisor) + instance_divisors[hw_idx] == elem->instance_divisor) break; } @@ -1356,7 +1357,7 @@ ilo_create_vertex_elements_state(struct pipe_context *pipe, hw_idx = ve->vb_count++; ve->vb_mapping[hw_idx] = elem->vertex_buffer_index; - ve->instance_divisors[hw_idx] = elem->instance_divisor; + instance_divisors[hw_idx] = elem->instance_divisor; } attr->buffer = hw_idx; @@ -1367,6 +1368,9 @@ ilo_create_vertex_elements_state(struct pipe_context *pipe, attr->is_integer = util_format_is_pure_integer(elem->src_format); attr->is_double = (util_format_is_float(elem->src_format) && attr->format_size == attr->component_count * 8); + + attr->instancing_enable = (elem->instance_divisor != 0); + attr->instancing_step_rate = elem->instance_divisor; } memset(&vf_info, 0, sizeof(vf_info)); diff --git a/src/gallium/drivers/ilo/ilo_state.h b/src/gallium/drivers/ilo/ilo_state.h index 9ce7744948c..66c671a01e5 100644 --- a/src/gallium/drivers/ilo/ilo_state.h +++ b/src/gallium/drivers/ilo/ilo_state.h @@ -147,11 +147,10 @@ struct ilo_shader_state; struct ilo_ve_state { unsigned vb_mapping[PIPE_MAX_ATTRIBS]; - unsigned instance_divisors[PIPE_MAX_ATTRIBS]; unsigned vb_count; /* these are not valid until the state is finalized */ - uint32_t vf_data[PIPE_MAX_ATTRIBS][2]; + uint32_t vf_data[PIPE_MAX_ATTRIBS][4]; struct ilo_state_vf_params_info vf_params; struct ilo_state_vf vf; }; From da4878cb807f46e6053731a177c3c75497aaf4fb Mon Sep 17 00:00:00 2001 From: Chia-I Wu Date: Fri, 19 Jun 2015 15:06:50 +0800 Subject: [PATCH 733/834] ilo: add ilo_state_vertex_buffer Being a parameter-like state, we may want to get rid of ilo_state_vertex_buffer_info or ilo_state_vertex_buffer eventually. But we want them now as they are how we do cross-validation right now. --- .../drivers/ilo/core/ilo_builder_3d_top.h | 56 ++++----- src/gallium/drivers/ilo/core/ilo_state_vf.c | 113 ++++++++++++++++-- src/gallium/drivers/ilo/core/ilo_state_vf.h | 29 ++++- src/gallium/drivers/ilo/ilo_render_gen6.c | 4 +- src/gallium/drivers/ilo/ilo_render_gen8.c | 4 +- src/gallium/drivers/ilo/ilo_state.c | 36 +++++- src/gallium/drivers/ilo/ilo_state.h | 1 + 7 files changed, 189 insertions(+), 54 deletions(-) diff --git a/src/gallium/drivers/ilo/core/ilo_builder_3d_top.h b/src/gallium/drivers/ilo/core/ilo_builder_3d_top.h index 782746591d9..bb20c7fd87a 100644 --- a/src/gallium/drivers/ilo/core/ilo_builder_3d_top.h +++ b/src/gallium/drivers/ilo/core/ilo_builder_3d_top.h @@ -305,13 +305,12 @@ gen8_3DSTATE_VF_SGVS(struct ilo_builder *builder, static inline void gen6_3DSTATE_VERTEX_BUFFERS(struct ilo_builder *builder, const struct ilo_state_vf *vf, - const struct ilo_vb_state *vb, - const unsigned *vb_mapping, + const struct ilo_state_vertex_buffer *vb, unsigned vb_count) { uint8_t cmd_len; uint32_t *dw; - unsigned pos, hw_idx; + unsigned pos, i; ILO_DEV_ASSERT(builder->dev, 6, 8); @@ -332,53 +331,40 @@ gen6_3DSTATE_VERTEX_BUFFERS(struct ilo_builder *builder, dw++; pos++; - for (hw_idx = 0; hw_idx < vb_count; hw_idx++) { - const unsigned pipe_idx = vb_mapping[hw_idx]; - const struct pipe_vertex_buffer *cso = &vb->states[pipe_idx]; - const int8_t elem = vf->vb_to_first_elem[hw_idx]; + for (i = 0; i < vb_count; i++) { + const struct ilo_state_vertex_buffer *b = &vb[i]; - dw[0] = hw_idx << GEN6_VB_DW0_INDEX__SHIFT; + /* see vertex_buffer_set_gen8_vertex_buffer_state() */ + dw[0] = b->vb[0] | + i << GEN6_VB_DW0_INDEX__SHIFT; if (ilo_dev_gen(builder->dev) >= ILO_GEN(8)) dw[0] |= builder->mocs << GEN8_VB_DW0_MOCS__SHIFT; else dw[0] |= builder->mocs << GEN6_VB_DW0_MOCS__SHIFT; - if (ilo_dev_gen(builder->dev) >= ILO_GEN(7)) - dw[0] |= GEN7_VB_DW0_ADDR_MODIFIED; - dw[1] = 0; dw[2] = 0; dw[3] = 0; - /* see vf_set_gen6_vertex_buffer_state() */ - if (ilo_dev_gen(builder->dev) < ILO_GEN(8) && elem >= 0) { - dw[0] |= vf->user_instancing[elem][0]; - dw[3] |= vf->user_instancing[elem][1]; - } - - /* use null vb if there is no VE/buffer or the stride is out of range */ - if (elem < 0 || !cso->buffer || cso->stride > 2048) { - dw[0] |= GEN6_VB_DW0_IS_NULL; - continue; - } - - dw[0] |= cso->stride << GEN6_VB_DW0_PITCH__SHIFT; - if (ilo_dev_gen(builder->dev) >= ILO_GEN(8)) { - const struct ilo_buffer *buf = ilo_buffer(cso->buffer); - const uint32_t start_offset = cso->buffer_offset; + if (b->need_bo) + ilo_builder_batch_reloc64(builder, pos + 1, b->bo, b->vb[1], 0); - ilo_builder_batch_reloc64(builder, pos + 1, - buf->bo, start_offset, 0); - dw[3] = buf->bo_size; + dw[3] |= b->vb[2]; } else { - const struct ilo_buffer *buf = ilo_buffer(cso->buffer); - const uint32_t start_offset = cso->buffer_offset; - const uint32_t end_offset = buf->bo_size - 1; + const int8_t elem = vf->vb_to_first_elem[i]; - ilo_builder_batch_reloc(builder, pos + 1, buf->bo, start_offset, 0); - ilo_builder_batch_reloc(builder, pos + 2, buf->bo, end_offset, 0); + /* see vf_set_gen6_vertex_buffer_state() */ + if (elem >= 0) { + dw[0] |= vf->user_instancing[elem][0]; + dw[3] |= vf->user_instancing[elem][1]; + } + + if (b->need_bo) { + ilo_builder_batch_reloc(builder, pos + 1, b->bo, b->vb[1], 0); + ilo_builder_batch_reloc(builder, pos + 2, b->bo, b->vb[2], 0); + } } dw += 4; diff --git a/src/gallium/drivers/ilo/core/ilo_state_vf.c b/src/gallium/drivers/ilo/core/ilo_state_vf.c index 4126560aee9..92e0380e5c3 100644 --- a/src/gallium/drivers/ilo/core/ilo_state_vf.c +++ b/src/gallium/drivers/ilo/core/ilo_state_vf.c @@ -26,6 +26,7 @@ */ #include "ilo_debug.h" +#include "ilo_buffer.h" #include "ilo_state_vf.h" static bool @@ -66,18 +67,6 @@ vf_validate_gen6_elements(const struct ilo_dev *dev, assert(elem->buffer < ILO_STATE_VF_MAX_BUFFER_COUNT); assert(elem->vertex_offset < max_vertex_offset); - - /* - * From the Sandy Bridge PRM, volume 2 part 1, page 86: - * - * "64-bit floating point values must be 64-bit aligned in memory, - * or UNPREDICTABLE data will be fetched. When accessing an element - * containing 64-bit floating point values, the Buffer Starting - * Address and Source Element Offset values must add to a 64-bit - * aligned address, and BufferPitch must be a multiple of 64-bits." - */ - if (elem->is_double) - assert(elem->vertex_offset % 8 == 0); } return true; @@ -483,6 +472,89 @@ vf_params_set_gen75_3DSTATE_VF(struct ilo_state_vf *vf, return true; } +static bool +vertex_buffer_validate_gen6(const struct ilo_dev *dev, + const struct ilo_state_vertex_buffer_info *info) +{ + ILO_DEV_ASSERT(dev, 6, 8); + + if (info->buf) + assert(info->offset < info->buf->bo_size && info->size); + + /* + * From the Sandy Bridge PRM, volume 2 part 1, page 86: + * + * "(Buffer Pitch) + * Range [DevCTG+]: [0,2048] Bytes" + */ + assert(info->stride <= 2048); + + /* + * From the Sandy Bridge PRM, volume 2 part 1, page 86: + * + * "64-bit floating point values must be 64-bit aligned in memory, or + * UNPREDICTABLE data will be fetched. When accessing an element + * containing 64-bit floating point values, the Buffer Starting + * Address and Source Element Offset values must add to a 64-bit + * aligned address, and BufferPitch must be a multiple of 64-bits." + */ + if (info->cv_has_double) { + assert(info->stride % 8 == 0); + assert((info->offset + info->cv_double_vertex_offset_mod_8) % 8 == 0); + } + + return true; +} + +static uint32_t +vertex_buffer_get_gen6_size(const struct ilo_dev *dev, + const struct ilo_state_vertex_buffer_info *info) +{ + ILO_DEV_ASSERT(dev, 6, 8); + + if (!info->buf) + return 0; + + return (info->offset + info->size <= info->buf->bo_size) ? info->size : + info->buf->bo_size - info->offset; +} + +static bool +vertex_buffer_set_gen8_vertex_buffer_state(struct ilo_state_vertex_buffer *vb, + const struct ilo_dev *dev, + const struct ilo_state_vertex_buffer_info *info) +{ + const uint32_t size = vertex_buffer_get_gen6_size(dev, info); + uint32_t dw0; + + ILO_DEV_ASSERT(dev, 6, 8); + + if (!vertex_buffer_validate_gen6(dev, info)) + return false; + + dw0 = info->stride << GEN6_VB_DW0_PITCH__SHIFT; + + if (ilo_dev_gen(dev) >= ILO_GEN(7)) + dw0 |= GEN7_VB_DW0_ADDR_MODIFIED; + if (!info->buf) + dw0 |= GEN6_VB_DW0_IS_NULL; + + STATIC_ASSERT(ARRAY_SIZE(vb->vb) >= 3); + vb->vb[0] = dw0; + vb->vb[1] = info->offset; + + if (ilo_dev_gen(dev) >= ILO_GEN(8)) { + vb->vb[2] = size; + } else { + /* address of the last valid byte */ + vb->vb[2] = (size) ? info->offset + size - 1 : 0; + } + + vb->need_bo = (info->buf != NULL); + + return true; +} + bool ilo_state_vf_init(struct ilo_state_vf *vf, const struct ilo_dev *dev, @@ -663,3 +735,20 @@ ilo_state_vf_get_delta(const struct ilo_state_vf *vf, delta->dirty |= ILO_STATE_VF_3DSTATE_INDEX_BUFFER; } } + +/** + * No need to initialize first. + */ +bool +ilo_state_vertex_buffer_set_info(struct ilo_state_vertex_buffer *vb, + const struct ilo_dev *dev, + const struct ilo_state_vertex_buffer_info *info) +{ + bool ret = true; + + ret &= vertex_buffer_set_gen8_vertex_buffer_state(vb, dev, info); + + assert(ret); + + return ret; +} diff --git a/src/gallium/drivers/ilo/core/ilo_state_vf.h b/src/gallium/drivers/ilo/core/ilo_state_vf.h index c51da10270f..488f92f9b40 100644 --- a/src/gallium/drivers/ilo/core/ilo_state_vf.h +++ b/src/gallium/drivers/ilo/core/ilo_state_vf.h @@ -65,7 +65,6 @@ struct ilo_state_vf_element_info { uint8_t format_size; uint8_t component_count; bool is_integer; - bool is_double; /* must be the same for those share the same buffer before Gen8 */ bool instancing_enable; @@ -127,6 +126,29 @@ struct ilo_state_vf_delta { uint32_t dirty; }; +struct ilo_buffer; + +struct ilo_state_vertex_buffer_info { + const struct ilo_buffer *buf; + uint32_t offset; + uint32_t size; + + uint16_t stride; + + /* doubles must be at 64-bit aligned addresses */ + bool cv_has_double; + uint8_t cv_double_vertex_offset_mod_8; +}; + +struct ilo_state_vertex_buffer { + uint32_t vb[3]; + + bool need_bo; + + /* managed by users */ + struct intel_bo *bo; +}; + static inline size_t ilo_state_vf_data_size(const struct ilo_dev *dev, uint8_t element_count) { @@ -172,4 +194,9 @@ ilo_state_vf_get_delta(const struct ilo_state_vf *vf, const struct ilo_state_vf *old, struct ilo_state_vf_delta *delta); +bool +ilo_state_vertex_buffer_set_info(struct ilo_state_vertex_buffer *vb, + const struct ilo_dev *dev, + const struct ilo_state_vertex_buffer_info *info); + #endif /* ILO_STATE_VF_H */ diff --git a/src/gallium/drivers/ilo/ilo_render_gen6.c b/src/gallium/drivers/ilo/ilo_render_gen6.c index f997274397a..8415b136002 100644 --- a/src/gallium/drivers/ilo/ilo_render_gen6.c +++ b/src/gallium/drivers/ilo/ilo_render_gen6.c @@ -430,8 +430,8 @@ gen6_draw_vf(struct ilo_render *r, /* 3DSTATE_VERTEX_BUFFERS */ if ((session->vf_delta.dirty & ILO_STATE_VF_3DSTATE_VERTEX_BUFFERS) || DIRTY(VB) || DIRTY(VE) || r->batch_bo_changed) { - gen6_3DSTATE_VERTEX_BUFFERS(r->builder, &vec->ve->vf, &vec->vb, - vec->ve->vb_mapping, vec->ve->vb_count); + gen6_3DSTATE_VERTEX_BUFFERS(r->builder, &vec->ve->vf, + vec->vb.vb, vec->ve->vb_count); } /* 3DSTATE_VERTEX_ELEMENTS */ diff --git a/src/gallium/drivers/ilo/ilo_render_gen8.c b/src/gallium/drivers/ilo/ilo_render_gen8.c index 3b8589ce8f1..4c1c08bbd25 100644 --- a/src/gallium/drivers/ilo/ilo_render_gen8.c +++ b/src/gallium/drivers/ilo/ilo_render_gen8.c @@ -211,8 +211,8 @@ gen8_draw_vf(struct ilo_render *r, /* 3DSTATE_VERTEX_BUFFERS */ if ((session->vf_delta.dirty & ILO_STATE_VF_3DSTATE_VERTEX_BUFFERS) || DIRTY(VB) || DIRTY(VE) || r->batch_bo_changed) { - gen6_3DSTATE_VERTEX_BUFFERS(r->builder, &vec->ve->vf, &vec->vb, - vec->ve->vb_mapping, vec->ve->vb_count); + gen6_3DSTATE_VERTEX_BUFFERS(r->builder, &vec->ve->vf, + vec->vb.vb, vec->ve->vb_count); } /* 3DSTATE_VERTEX_ELEMENTS */ diff --git a/src/gallium/drivers/ilo/ilo_state.c b/src/gallium/drivers/ilo/ilo_state.c index d6144e4035f..e24f8fa316f 100644 --- a/src/gallium/drivers/ilo/ilo_state.c +++ b/src/gallium/drivers/ilo/ilo_state.c @@ -477,6 +477,39 @@ finalize_vertex_elements(struct ilo_context *ilo) } } +static void +finalize_vertex_buffers(struct ilo_context *ilo) +{ + const struct ilo_dev *dev = ilo->dev; + struct ilo_state_vector *vec = &ilo->state_vector; + struct ilo_state_vertex_buffer_info info; + unsigned i; + + if (!(vec->dirty & (ILO_DIRTY_VE | ILO_DIRTY_VB))) + return; + + memset(&info, 0, sizeof(info)); + + for (i = 0; i < vec->ve->vb_count; i++) { + const unsigned pipe_idx = vec->ve->vb_mapping[i]; + const struct pipe_vertex_buffer *cso = &vec->vb.states[pipe_idx]; + + if (cso->buffer) { + info.buf = ilo_buffer(cso->buffer); + info.offset = cso->buffer_offset; + info.size = info.buf->bo_size; + + info.stride = cso->stride; + + vec->vb.vb[i].bo = info.buf->bo; + } else { + memset(&info, 0, sizeof(info)); + } + + ilo_state_vertex_buffer_set_info(&vec->vb.vb[i], dev, &info); + } +} + static void finalize_urb(struct ilo_context *ilo) { @@ -728,6 +761,7 @@ ilo_finalize_3d_states(struct ilo_context *ilo, finalize_constant_buffers(ilo); finalize_index_buffer(ilo); finalize_vertex_elements(ilo); + finalize_vertex_buffers(ilo); finalize_urb(ilo); finalize_rasterizer(ilo); @@ -1366,8 +1400,6 @@ ilo_create_vertex_elements_state(struct pipe_context *pipe, attr->format_size = util_format_get_blocksize(elem->src_format); attr->component_count = util_format_get_nr_components(elem->src_format); attr->is_integer = util_format_is_pure_integer(elem->src_format); - attr->is_double = (util_format_is_float(elem->src_format) && - attr->format_size == attr->component_count * 8); attr->instancing_enable = (elem->instance_divisor != 0); attr->instancing_step_rate = elem->instance_divisor; diff --git a/src/gallium/drivers/ilo/ilo_state.h b/src/gallium/drivers/ilo/ilo_state.h index 66c671a01e5..2b3147fc355 100644 --- a/src/gallium/drivers/ilo/ilo_state.h +++ b/src/gallium/drivers/ilo/ilo_state.h @@ -157,6 +157,7 @@ struct ilo_ve_state { struct ilo_vb_state { struct pipe_vertex_buffer states[PIPE_MAX_ATTRIBS]; + struct ilo_state_vertex_buffer vb[PIPE_MAX_ATTRIBS]; uint32_t enabled_mask; }; From 9904e647cca0a15c80557ed7bcc6893faf147436 Mon Sep 17 00:00:00 2001 From: Chia-I Wu Date: Fri, 19 Jun 2015 15:10:02 +0800 Subject: [PATCH 734/834] ilo: add ilo_state_index_buffer It serves the same purpose as ilo_state_vertex_buffer does. --- .../drivers/ilo/core/ilo_builder_3d_top.h | 95 +++++----------- src/gallium/drivers/ilo/core/ilo_state_vf.c | 104 ++++++++++++++++++ src/gallium/drivers/ilo/core/ilo_state_vf.h | 22 ++++ src/gallium/drivers/ilo/ilo_render_gen6.c | 4 +- src/gallium/drivers/ilo/ilo_render_gen8.c | 2 +- src/gallium/drivers/ilo/ilo_state.c | 13 +++ src/gallium/drivers/ilo/ilo_state.h | 1 + 7 files changed, 171 insertions(+), 70 deletions(-) diff --git a/src/gallium/drivers/ilo/core/ilo_builder_3d_top.h b/src/gallium/drivers/ilo/core/ilo_builder_3d_top.h index bb20c7fd87a..6a45d701f1d 100644 --- a/src/gallium/drivers/ilo/core/ilo_builder_3d_top.h +++ b/src/gallium/drivers/ilo/core/ilo_builder_3d_top.h @@ -437,102 +437,63 @@ gen6_3DSTATE_VERTEX_ELEMENTS(struct ilo_builder *builder, static inline void gen6_3DSTATE_INDEX_BUFFER(struct ilo_builder *builder, const struct ilo_state_vf *vf, - const struct ilo_ib_state *ib) + const struct ilo_state_index_buffer *ib) { const uint8_t cmd_len = 3; - struct ilo_buffer *buf = ilo_buffer(ib->hw_resource); - uint32_t start_offset, end_offset; - enum gen_index_format format; - uint32_t *dw; + uint32_t dw0, *dw; unsigned pos; ILO_DEV_ASSERT(builder->dev, 6, 7.5); - if (!buf) - return; - - switch (ib->hw_index_size) { - case 4: - format = GEN6_INDEX_DWORD; - break; - case 2: - format = GEN6_INDEX_WORD; - break; - case 1: - format = GEN6_INDEX_BYTE; - break; - default: - assert(!"unknown index size"); - format = GEN6_INDEX_BYTE; - break; - } + dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_INDEX_BUFFER) | (cmd_len - 2) | + builder->mocs << GEN6_IB_DW0_MOCS__SHIFT; /* - * set start_offset to 0 here and adjust pipe_draw_info::start with - * ib->draw_start_offset in 3DPRIMITIVE + * see index_buffer_set_gen8_3DSTATE_INDEX_BUFFER() and + * vf_params_set_gen6_3dstate_index_buffer() */ - start_offset = 0; - end_offset = buf->bo_size; - - /* end_offset must also be aligned and is inclusive */ - end_offset -= (end_offset % ib->hw_index_size); - end_offset--; + dw0 |= ib->ib[0]; + if (ilo_dev_gen(builder->dev) <= ILO_GEN(7)) + dw0 |= vf->cut[0]; pos = ilo_builder_batch_pointer(builder, cmd_len, &dw); - dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_INDEX_BUFFER) | (cmd_len - 2) | - builder->mocs << GEN6_IB_DW0_MOCS__SHIFT | - format << GEN6_IB_DW0_FORMAT__SHIFT; - - /* see vf_params_set_gen6_3dstate_index_buffer() */ - if (ilo_dev_gen(builder->dev) <= ILO_GEN(7)) - dw[0] |= vf->cut[0]; - - ilo_builder_batch_reloc(builder, pos + 1, buf->bo, start_offset, 0); - ilo_builder_batch_reloc(builder, pos + 2, buf->bo, end_offset, 0); + dw[0] = dw0; + if (ib->need_bo) { + ilo_builder_batch_reloc(builder, pos + 1, ib->bo, ib->ib[1], 0); + ilo_builder_batch_reloc(builder, pos + 2, ib->bo, ib->ib[2], 0); + } else { + dw[1] = 0; + dw[2] = 0; + } } static inline void gen8_3DSTATE_INDEX_BUFFER(struct ilo_builder *builder, const struct ilo_state_vf *vf, - const struct ilo_ib_state *ib) + const struct ilo_state_index_buffer *ib) { const uint8_t cmd_len = 5; - struct ilo_buffer *buf = ilo_buffer(ib->hw_resource); - int format; uint32_t *dw; unsigned pos; ILO_DEV_ASSERT(builder->dev, 8, 8); - if (!buf) - return; - - switch (ib->hw_index_size) { - case 4: - format = GEN6_INDEX_DWORD; - break; - case 2: - format = GEN6_INDEX_WORD; - break; - case 1: - format = GEN6_INDEX_BYTE; - break; - default: - assert(!"unknown index size"); - format = GEN6_INDEX_BYTE; - break; - } - pos = ilo_builder_batch_pointer(builder, cmd_len, &dw); dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_INDEX_BUFFER) | (cmd_len - 2); - dw[1] = format << GEN8_IB_DW1_FORMAT__SHIFT | + /* see index_buffer_set_gen8_3DSTATE_INDEX_BUFFER() */ + dw[1] = ib->ib[0] | builder->mocs << GEN8_IB_DW1_MOCS__SHIFT; - dw[4] = buf->bo_size; - /* ignore ib->offset here in favor of adjusting 3DPRIMITIVE */ - ilo_builder_batch_reloc64(builder, pos + 2, buf->bo, 0, 0); + if (ib->need_bo) { + ilo_builder_batch_reloc64(builder, pos + 2, ib->bo, ib->ib[1], 0); + } else { + dw[2] = 0; + dw[3] = 0; + } + + dw[4] = ib->ib[2]; } static inline void diff --git a/src/gallium/drivers/ilo/core/ilo_state_vf.c b/src/gallium/drivers/ilo/core/ilo_state_vf.c index 92e0380e5c3..09e0f7f2293 100644 --- a/src/gallium/drivers/ilo/core/ilo_state_vf.c +++ b/src/gallium/drivers/ilo/core/ilo_state_vf.c @@ -555,6 +555,93 @@ vertex_buffer_set_gen8_vertex_buffer_state(struct ilo_state_vertex_buffer *vb, return true; } +static uint32_t +get_index_format_size(enum gen_index_format format) +{ + switch (format) { + case GEN6_INDEX_BYTE: return 1; + case GEN6_INDEX_WORD: return 2; + case GEN6_INDEX_DWORD: return 4; + default: + assert(!"unknown index format"); + return 1; + } +} + +static bool +index_buffer_validate_gen6(const struct ilo_dev *dev, + const struct ilo_state_index_buffer_info *info) +{ + const uint32_t format_size = get_index_format_size(info->format); + + ILO_DEV_ASSERT(dev, 6, 8); + + /* + * From the Sandy Bridge PRM, volume 2 part 1, page 79: + * + * "This field (Buffer Starting Address) contains the size-aligned (as + * specified by Index Format) Graphics Address of the first element of + * interest within the index buffer." + */ + assert(info->offset % format_size == 0); + + if (info->buf) + assert(info->offset < info->buf->bo_size && info->size); + + return true; +} + +static uint32_t +index_buffer_get_gen6_size(const struct ilo_dev *dev, + const struct ilo_state_index_buffer_info *info) +{ + uint32_t size; + + ILO_DEV_ASSERT(dev, 6, 8); + + if (!info->buf) + return 0; + + size = (info->offset + info->size <= info->buf->bo_size) ? info->size : + info->buf->bo_size - info->offset; + + if (ilo_dev_gen(dev) < ILO_GEN(8)) { + const uint32_t format_size = get_index_format_size(info->format); + size -= (size % format_size); + } + + return size; +} + +static bool +index_buffer_set_gen8_3DSTATE_INDEX_BUFFER(struct ilo_state_index_buffer *ib, + const struct ilo_dev *dev, + const struct ilo_state_index_buffer_info *info) +{ + const uint32_t size = index_buffer_get_gen6_size(dev, info); + + ILO_DEV_ASSERT(dev, 6, 8); + + if (!index_buffer_validate_gen6(dev, info)) + return false; + + STATIC_ASSERT(ARRAY_SIZE(ib->ib) >= 3); + if (ilo_dev_gen(dev) >= ILO_GEN(8)) { + ib->ib[0] = info->format << GEN8_IB_DW1_FORMAT__SHIFT; + ib->ib[1] = info->offset; + ib->ib[2] = size; + } else { + ib->ib[0] = info->format << GEN6_IB_DW0_FORMAT__SHIFT; + ib->ib[1] = info->offset; + /* address of the last valid byte, or 0 */ + ib->ib[2] = (size) ? info->offset + size - 1 : 0; + } + + ib->need_bo = (info->buf != NULL); + + return true; +} + bool ilo_state_vf_init(struct ilo_state_vf *vf, const struct ilo_dev *dev, @@ -752,3 +839,20 @@ ilo_state_vertex_buffer_set_info(struct ilo_state_vertex_buffer *vb, return ret; } + +/** + * No need to initialize first. + */ +bool +ilo_state_index_buffer_set_info(struct ilo_state_index_buffer *ib, + const struct ilo_dev *dev, + const struct ilo_state_index_buffer_info *info) +{ + bool ret = true; + + ret &= index_buffer_set_gen8_3DSTATE_INDEX_BUFFER(ib, dev, info); + + assert(ret); + + return ret; +} diff --git a/src/gallium/drivers/ilo/core/ilo_state_vf.h b/src/gallium/drivers/ilo/core/ilo_state_vf.h index 488f92f9b40..39750d8aafe 100644 --- a/src/gallium/drivers/ilo/core/ilo_state_vf.h +++ b/src/gallium/drivers/ilo/core/ilo_state_vf.h @@ -149,6 +149,23 @@ struct ilo_state_vertex_buffer { struct intel_bo *bo; }; +struct ilo_state_index_buffer_info { + const struct ilo_buffer *buf; + uint32_t offset; + uint32_t size; + + enum gen_index_format format; +}; + +struct ilo_state_index_buffer { + uint32_t ib[3]; + + bool need_bo; + + /* managed by users */ + struct intel_bo *bo; +}; + static inline size_t ilo_state_vf_data_size(const struct ilo_dev *dev, uint8_t element_count) { @@ -199,4 +216,9 @@ ilo_state_vertex_buffer_set_info(struct ilo_state_vertex_buffer *vb, const struct ilo_dev *dev, const struct ilo_state_vertex_buffer_info *info); +bool +ilo_state_index_buffer_set_info(struct ilo_state_index_buffer *ib, + const struct ilo_dev *dev, + const struct ilo_state_index_buffer_info *info); + #endif /* ILO_STATE_VF_H */ diff --git a/src/gallium/drivers/ilo/ilo_render_gen6.c b/src/gallium/drivers/ilo/ilo_render_gen6.c index 8415b136002..0623714c19f 100644 --- a/src/gallium/drivers/ilo/ilo_render_gen6.c +++ b/src/gallium/drivers/ilo/ilo_render_gen6.c @@ -415,7 +415,7 @@ gen6_draw_vf(struct ilo_render *r, /* 3DSTATE_INDEX_BUFFER */ if ((session->vf_delta.dirty & ILO_STATE_VF_3DSTATE_INDEX_BUFFER) || DIRTY(IB) || r->batch_bo_changed) - gen6_3DSTATE_INDEX_BUFFER(r->builder, &vec->ve->vf, &vec->ib); + gen6_3DSTATE_INDEX_BUFFER(r->builder, &vec->ve->vf, &vec->ib.ib); /* 3DSTATE_VF */ if (session->vf_delta.dirty & ILO_STATE_VF_3DSTATE_VF) @@ -424,7 +424,7 @@ gen6_draw_vf(struct ilo_render *r, /* 3DSTATE_INDEX_BUFFER */ if ((session->vf_delta.dirty & ILO_STATE_VF_3DSTATE_INDEX_BUFFER) || DIRTY(IB) || r->batch_bo_changed) - gen6_3DSTATE_INDEX_BUFFER(r->builder, &vec->ve->vf, &vec->ib); + gen6_3DSTATE_INDEX_BUFFER(r->builder, &vec->ve->vf, &vec->ib.ib); } /* 3DSTATE_VERTEX_BUFFERS */ diff --git a/src/gallium/drivers/ilo/ilo_render_gen8.c b/src/gallium/drivers/ilo/ilo_render_gen8.c index 4c1c08bbd25..8956e5f6b2e 100644 --- a/src/gallium/drivers/ilo/ilo_render_gen8.c +++ b/src/gallium/drivers/ilo/ilo_render_gen8.c @@ -202,7 +202,7 @@ gen8_draw_vf(struct ilo_render *r, /* 3DSTATE_INDEX_BUFFER */ if ((session->vf_delta.dirty & ILO_STATE_VF_3DSTATE_INDEX_BUFFER) || DIRTY(IB) || r->batch_bo_changed) - gen8_3DSTATE_INDEX_BUFFER(r->builder, &vec->ve->vf, &vec->ib); + gen8_3DSTATE_INDEX_BUFFER(r->builder, &vec->ve->vf, &vec->ib.ib); /* 3DSTATE_VF */ if (session->vf_delta.dirty & ILO_STATE_VF_3DSTATE_VF) diff --git a/src/gallium/drivers/ilo/ilo_state.c b/src/gallium/drivers/ilo/ilo_state.c index e24f8fa316f..966a6e0470c 100644 --- a/src/gallium/drivers/ilo/ilo_state.c +++ b/src/gallium/drivers/ilo/ilo_state.c @@ -379,11 +379,13 @@ finalize_constant_buffers(struct ilo_context *ilo) static void finalize_index_buffer(struct ilo_context *ilo) { + const struct ilo_dev *dev = ilo->dev; struct ilo_state_vector *vec = &ilo->state_vector; const bool need_upload = (vec->draw->indexed && (vec->ib.state.user_buffer || vec->ib.state.offset % vec->ib.state.index_size)); struct pipe_resource *current_hw_res = NULL; + struct ilo_state_index_buffer_info info; if (!(vec->dirty & ILO_DIRTY_IB) && !need_upload) return; @@ -435,6 +437,17 @@ finalize_index_buffer(struct ilo_context *ilo) vec->ib.hw_index_size = vec->ib.state.index_size; pipe_resource_reference(¤t_hw_res, NULL); + + memset(&info, 0, sizeof(info)); + if (vec->ib.hw_resource) { + info.buf = ilo_buffer(vec->ib.hw_resource); + info.size = info.buf->bo_size; + info.format = ilo_translate_index_size(vec->ib.hw_index_size); + + vec->ib.ib.bo = info.buf->bo; + } + + ilo_state_index_buffer_set_info(&vec->ib.ib, dev, &info); } static void diff --git a/src/gallium/drivers/ilo/ilo_state.h b/src/gallium/drivers/ilo/ilo_state.h index 2b3147fc355..d990269171b 100644 --- a/src/gallium/drivers/ilo/ilo_state.h +++ b/src/gallium/drivers/ilo/ilo_state.h @@ -167,6 +167,7 @@ struct ilo_ib_state { /* these are not valid until the state is finalized */ struct pipe_resource *hw_resource; unsigned hw_index_size; + struct ilo_state_index_buffer ib; /* an offset to be added to pipe_draw_info::start */ int64_t draw_start_offset; }; From e3372c4bfb8d5960714651ca7d3f1acc0018a8fa Mon Sep 17 00:00:00 2001 From: Chia-I Wu Date: Mon, 15 Jun 2015 15:17:45 +0800 Subject: [PATCH 735/834] ilo: add ilo_state_sol_buffer It serves the same purpose as ilo_state_vertex_buffer does. --- .../drivers/ilo/core/ilo_builder_3d_top.h | 103 +++++---- src/gallium/drivers/ilo/core/ilo_state_sol.c | 210 +++++++++++++++--- src/gallium/drivers/ilo/core/ilo_state_sol.h | 42 +++- src/gallium/drivers/ilo/ilo_render_gen7.c | 20 +- src/gallium/drivers/ilo/ilo_state.c | 33 ++- src/gallium/drivers/ilo/ilo_state.h | 8 + 6 files changed, 315 insertions(+), 101 deletions(-) diff --git a/src/gallium/drivers/ilo/core/ilo_builder_3d_top.h b/src/gallium/drivers/ilo/core/ilo_builder_3d_top.h index 6a45d701f1d..398586bbad8 100644 --- a/src/gallium/drivers/ilo/core/ilo_builder_3d_top.h +++ b/src/gallium/drivers/ilo/core/ilo_builder_3d_top.h @@ -756,11 +756,13 @@ gen7_3DSTATE_STREAMOUT(struct ilo_builder *builder, dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_STREAMOUT) | (cmd_len - 2); /* see sol_set_gen7_3DSTATE_STREAMOUT() */ - dw[1] = sol->so[0]; - dw[2] = sol->so[1]; + dw[1] = sol->streamout[0]; + dw[2] = sol->streamout[1]; if (ilo_dev_gen(builder->dev) >= ILO_GEN(8)) { - dw[3] = sol->so[2]; - dw[4] = sol->so[3]; + dw[3] = sol->strides[1] << GEN8_SO_DW3_BUFFER1_PITCH__SHIFT | + sol->strides[0] << GEN8_SO_DW3_BUFFER0_PITCH__SHIFT; + dw[4] = sol->strides[3] << GEN8_SO_DW4_BUFFER3_PITCH__SHIFT | + sol->strides[2] << GEN8_SO_DW4_BUFFER2_PITCH__SHIFT; } } @@ -797,8 +799,8 @@ gen7_3DSTATE_SO_DECL_LIST(struct ilo_builder *builder, dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_SO_DECL_LIST) | (cmd_len - 2); /* see sol_set_gen7_3DSTATE_SO_DECL_LIST() */ - dw[1] = sol->so[4]; - dw[2] = sol->so[5]; + dw[1] = sol->so_decl[0]; + dw[2] = sol->so_decl[1]; memcpy(&dw[3], sol->decl, sizeof(sol->decl[0]) * sol->decl_count); if (sol->decl_count < cmd_decl_count) { @@ -808,74 +810,77 @@ gen7_3DSTATE_SO_DECL_LIST(struct ilo_builder *builder, } static inline void -gen7_3DSTATE_SO_BUFFER(struct ilo_builder *builder, int index, int stride, - const struct pipe_stream_output_target *so_target) +gen7_3DSTATE_SO_BUFFER(struct ilo_builder *builder, + const struct ilo_state_sol *sol, + const struct ilo_state_sol_buffer *sb, + uint8_t buffer) { - const uint8_t cmd_len = (ilo_dev_gen(builder->dev) >= ILO_GEN(8)) ? 8 : 4; - struct ilo_buffer *buf; - int start, end; + const uint8_t cmd_len = 4; uint32_t *dw; unsigned pos; - ILO_DEV_ASSERT(builder->dev, 7, 8); + ILO_DEV_ASSERT(builder->dev, 7, 7.5); - buf = ilo_buffer(so_target->buffer); - - /* DWord-aligned */ - assert(stride % 4 == 0); - assert(so_target->buffer_offset % 4 == 0); - - stride &= ~3; - start = so_target->buffer_offset & ~3; - end = (start + so_target->buffer_size) & ~3; + assert(buffer < ILO_STATE_SOL_MAX_BUFFER_COUNT); pos = ilo_builder_batch_pointer(builder, cmd_len, &dw); dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_SO_BUFFER) | (cmd_len - 2); - dw[1] = index << GEN7_SO_BUF_DW1_INDEX__SHIFT | - stride; + /* see sol_buffer_set_gen7_3dstate_so_buffer() */ + dw[1] = buffer << GEN7_SO_BUF_DW1_INDEX__SHIFT | + builder->mocs << GEN7_SO_BUF_DW1_MOCS__SHIFT | + sol->strides[buffer] << GEN7_SO_BUF_DW1_PITCH__SHIFT; - if (ilo_dev_gen(builder->dev) >= ILO_GEN(8)) { - dw[1] |= builder->mocs << GEN8_SO_BUF_DW1_MOCS__SHIFT; - - dw[4] = end - start; - dw[5] = 0; - dw[6] = 0; - dw[7] = 0; - - ilo_builder_batch_reloc64(builder, pos + 2, - buf->bo, start, INTEL_RELOC_WRITE); + if (sb->need_bo) { + ilo_builder_batch_reloc(builder, pos + 2, sb->bo, + sb->so_buf[0], INTEL_RELOC_WRITE); + ilo_builder_batch_reloc(builder, pos + 3, sb->bo, + sb->so_buf[1], INTEL_RELOC_WRITE); } else { - dw[1] |= builder->mocs << GEN7_SO_BUF_DW1_MOCS__SHIFT; - - ilo_builder_batch_reloc(builder, pos + 2, - buf->bo, start, INTEL_RELOC_WRITE); - ilo_builder_batch_reloc(builder, pos + 3, - buf->bo, end, INTEL_RELOC_WRITE); + dw[2] = 0; + dw[3] = 0; } } static inline void -gen7_disable_3DSTATE_SO_BUFFER(struct ilo_builder *builder, int index) +gen8_3DSTATE_SO_BUFFER(struct ilo_builder *builder, + const struct ilo_state_sol *sol, + const struct ilo_state_sol_buffer *sb, + uint8_t buffer) { - const uint8_t cmd_len = (ilo_dev_gen(builder->dev) >= ILO_GEN(8)) ? 8 : 4; + const uint8_t cmd_len = 8; uint32_t *dw; + unsigned pos; - ILO_DEV_ASSERT(builder->dev, 7, 8); + ILO_DEV_ASSERT(builder->dev, 8, 8); - ilo_builder_batch_pointer(builder, cmd_len, &dw); + pos = ilo_builder_batch_pointer(builder, cmd_len, &dw); dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_SO_BUFFER) | (cmd_len - 2); - dw[1] = index << GEN7_SO_BUF_DW1_INDEX__SHIFT; - dw[2] = 0; - dw[3] = 0; + /* see sol_buffer_set_gen8_3dstate_so_buffer() */ + dw[1] = sb->so_buf[0] | + buffer << GEN7_SO_BUF_DW1_INDEX__SHIFT | + builder->mocs << GEN8_SO_BUF_DW1_MOCS__SHIFT; - if (ilo_dev_gen(builder->dev) >= ILO_GEN(8)) { - dw[4] = 0; + if (sb->need_bo) { + ilo_builder_batch_reloc64(builder, pos + 2, sb->bo, + sb->so_buf[1], INTEL_RELOC_WRITE); + } else { + dw[2] = 0; + dw[3] = 0; + } + + dw[4] = sb->so_buf[2]; + + if (sb->need_write_offset_bo) { + ilo_builder_batch_reloc64(builder, pos + 5, sb->write_offset_bo, + sizeof(uint32_t) * buffer, INTEL_RELOC_WRITE); + } else { dw[5] = 0; dw[6] = 0; - dw[7] = 0; } + + dw[7] = sb->so_buf[3]; } static inline void diff --git a/src/gallium/drivers/ilo/core/ilo_state_sol.c b/src/gallium/drivers/ilo/core/ilo_state_sol.c index dbc4b894f6a..38c0b719ab3 100644 --- a/src/gallium/drivers/ilo/core/ilo_state_sol.c +++ b/src/gallium/drivers/ilo/core/ilo_state_sol.c @@ -26,6 +26,7 @@ */ #include "ilo_debug.h" +#include "ilo_buffer.h" #include "ilo_state_sol.h" static bool @@ -134,7 +135,7 @@ sol_validate_gen7(const struct ilo_dev *dev, } static bool -sol_set_gen7_3DSTATE_STREAMOUT(struct ilo_state_sol *so, +sol_set_gen7_3DSTATE_STREAMOUT(struct ilo_state_sol *sol, const struct ilo_dev *dev, const struct ilo_state_sol_info *info) { @@ -176,12 +177,10 @@ sol_set_gen7_3DSTATE_STREAMOUT(struct ilo_state_sol *so, dw1 |= GEN7_SO_DW1_STATISTICS; if (ilo_dev_gen(dev) < ILO_GEN(8)) { - const uint8_t buffer_enables = - ((bool) info->buffer_strides[3]) << 3 | - ((bool) info->buffer_strides[2]) << 2 | - ((bool) info->buffer_strides[1]) << 1 | - ((bool) info->buffer_strides[0]); - + const uint8_t buffer_enables = ((bool) info->buffer_strides[3]) << 3 | + ((bool) info->buffer_strides[2]) << 2 | + ((bool) info->buffer_strides[1]) << 1 | + ((bool) info->buffer_strides[0]); dw1 |= buffer_enables << GEN7_SO_DW1_BUFFER_ENABLES__SHIFT; } @@ -194,27 +193,17 @@ sol_set_gen7_3DSTATE_STREAMOUT(struct ilo_state_sol *so, vue_read[0].offset << GEN7_SO_DW2_STREAM0_READ_OFFSET__SHIFT | vue_read[0].len << GEN7_SO_DW2_STREAM0_READ_LEN__SHIFT; - STATIC_ASSERT(ARRAY_SIZE(so->so) >= 4); - so->so[0] = dw1; - so->so[1] = dw2; + STATIC_ASSERT(ARRAY_SIZE(sol->streamout) >= 2); + sol->streamout[0] = dw1; + sol->streamout[1] = dw2; - if (ilo_dev_gen(dev) >= ILO_GEN(8)) { - uint32_t dw3, dw4; - - dw3 = info->buffer_strides[1] << GEN8_SO_DW3_BUFFER1_PITCH__SHIFT | - info->buffer_strides[0] << GEN8_SO_DW3_BUFFER0_PITCH__SHIFT; - dw4 = info->buffer_strides[3] << GEN8_SO_DW4_BUFFER3_PITCH__SHIFT | - info->buffer_strides[2] << GEN8_SO_DW4_BUFFER2_PITCH__SHIFT; - - so->so[2] = dw3; - so->so[3] = dw4; - } + memcpy(sol->strides, info->buffer_strides, sizeof(sol->strides)); return true; } static bool -sol_set_gen7_3DSTATE_SO_DECL_LIST(struct ilo_state_sol *so, +sol_set_gen7_3DSTATE_SO_DECL_LIST(struct ilo_state_sol *sol, const struct ilo_dev *dev, const struct ilo_state_sol_info *info, uint8_t max_decl_count) @@ -264,25 +253,146 @@ sol_set_gen7_3DSTATE_SO_DECL_LIST(struct ilo_state_sol *so, decl_counts[1] << GEN7_SO_DECL_DW2_STREAM1_ENTRY_COUNT__SHIFT | decl_counts[0] << GEN7_SO_DECL_DW2_STREAM0_ENTRY_COUNT__SHIFT; - STATIC_ASSERT(ARRAY_SIZE(so->so) >= 6); - so->so[4] = dw1; - so->so[5] = dw2; + STATIC_ASSERT(ARRAY_SIZE(sol->so_decl) >= 2); + sol->so_decl[0] = dw1; + sol->so_decl[1] = dw2; - STATIC_ASSERT(ARRAY_SIZE(so->decl[0]) == 2); - memcpy(so->decl, decl_list, sizeof(so->decl[0]) * max_decl_count); - so->decl_count = max_decl_count; + STATIC_ASSERT(ARRAY_SIZE(sol->decl[0]) == 2); + memcpy(sol->decl, decl_list, sizeof(sol->decl[0]) * max_decl_count); + sol->decl_count = max_decl_count; + + return true; +} + +static bool +sol_buffer_validate_gen7(const struct ilo_dev *dev, + const struct ilo_state_sol_buffer_info *info) +{ + ILO_DEV_ASSERT(dev, 7, 8); + + if (info->buf) + assert(info->offset < info->buf->bo_size && info->size); + + /* + * From the Ivy Bridge PRM, volume 2 part 1, page 208: + * + * "(Surface Base Address) This field specifies the starting DWord + * address..." + */ + assert(info->offset % 4 == 0); + + /* Gen8+ only */ + if (info->write_offset_load || info->write_offset_save) + assert(ilo_dev_gen(dev) >= ILO_GEN(8)); + + /* + * From the Broadwell PRM, volume 2b, page 206: + * + * "This field (Stream Offset) specifies the Offset in stream output + * buffer to start at, or whether to append to the end of an existing + * buffer. The Offset must be DWORD aligned." + */ + if (info->write_offset_imm_enable) { + assert(info->write_offset_load); + assert(info->write_offset_imm % 4 == 0); + } + + return true; +} + +static uint32_t +sol_buffer_get_gen6_size(const struct ilo_dev *dev, + const struct ilo_state_sol_buffer_info *info) +{ + uint32_t size; + + ILO_DEV_ASSERT(dev, 6, 8); + + if (!info->buf) + return 0; + + size = (info->offset + info->size <= info->buf->bo_size) ? info->size : + info->buf->bo_size - info->offset; + + /* + * From the Ivy Bridge PRM, volume 2 part 1, page 208: + * + * "(Surface End Address) This field specifies the ending DWord + * address..." + */ + size &= ~3; + + return size; +} + +static bool +sol_buffer_set_gen7_3dstate_so_buffer(struct ilo_state_sol_buffer *sb, + const struct ilo_dev *dev, + const struct ilo_state_sol_buffer_info *info) +{ + const uint32_t size = sol_buffer_get_gen6_size(dev, info); + + ILO_DEV_ASSERT(dev, 7, 7.5); + + if (!sol_buffer_validate_gen7(dev, info)) + return false; + + STATIC_ASSERT(ARRAY_SIZE(sb->so_buf) >= 2); + sb->so_buf[0] = info->offset; + sb->so_buf[1] = (size) ? info->offset + size : 0; + + return true; +} + +static bool +sol_buffer_set_gen8_3dstate_so_buffer(struct ilo_state_sol_buffer *sb, + const struct ilo_dev *dev, + const struct ilo_state_sol_buffer_info *info) +{ + const uint32_t size = sol_buffer_get_gen6_size(dev, info); + uint32_t dw1; + + ILO_DEV_ASSERT(dev, 8, 8); + + if (!sol_buffer_validate_gen7(dev, info)) + return false; + + dw1 = 0; + + if (info->buf) + dw1 |= GEN8_SO_BUF_DW1_ENABLE; + if (info->write_offset_load) + dw1 |= GEN8_SO_BUF_DW1_OFFSET_WRITE_ENABLE; + if (info->write_offset_save) + dw1 |= GEN8_SO_BUF_DW1_OFFSET_ENABLE; + + STATIC_ASSERT(ARRAY_SIZE(sb->so_buf) >= 4); + sb->so_buf[0] = dw1; + sb->so_buf[1] = info->offset; + + /* + * From the Broadwell PRM, volume 2b, page 205: + * + * "This field (Surface Size) specifies the size of buffer in number + * DWords minus 1 of the buffer in Graphics Memory." + */ + sb->so_buf[2] = (size) ? size / 4 - 1 : 0; + + /* load from imm or sb->write_offset_bo */ + sb->so_buf[3] = (info->write_offset_imm_enable) ? + info->write_offset_imm : ~0u; return true; } bool -ilo_state_sol_init(struct ilo_state_sol *so, +ilo_state_sol_init(struct ilo_state_sol *sol, const struct ilo_dev *dev, const struct ilo_state_sol_info *info) { bool ret = true; - assert(ilo_is_zeroed(so, sizeof(*so))); + assert(ilo_is_zeroed(sol, sizeof(*sol))); assert(ilo_is_zeroed(info->data, info->data_size)); if (ilo_dev_gen(dev) >= ILO_GEN(7)) { @@ -295,10 +405,10 @@ ilo_state_sol_init(struct ilo_state_sol *so, } assert(ilo_state_sol_data_size(dev, max_decl_count) <= info->data_size); - so->decl = (uint32_t (*)[2]) info->data; + sol->decl = (uint32_t (*)[2]) info->data; - ret &= sol_set_gen7_3DSTATE_STREAMOUT(so, dev, info); - ret &= sol_set_gen7_3DSTATE_SO_DECL_LIST(so, dev, info, max_decl_count); + ret &= sol_set_gen7_3DSTATE_STREAMOUT(sol, dev, info); + ret &= sol_set_gen7_3DSTATE_SO_DECL_LIST(sol, dev, info, max_decl_count); } assert(ret); @@ -318,3 +428,37 @@ ilo_state_sol_init_disabled(struct ilo_state_sol *sol, return ilo_state_sol_init(sol, dev, &info); } + +bool +ilo_state_sol_buffer_init(struct ilo_state_sol_buffer *sb, + const struct ilo_dev *dev, + const struct ilo_state_sol_buffer_info *info) +{ + bool ret = true; + + assert(ilo_is_zeroed(sb, sizeof(*sb))); + + if (ilo_dev_gen(dev) >= ILO_GEN(8)) + ret &= sol_buffer_set_gen8_3dstate_so_buffer(sb, dev, info); + else + ret &= sol_buffer_set_gen7_3dstate_so_buffer(sb, dev, info); + + sb->need_bo = (info->size > 0); + sb->need_write_offset_bo = (info->write_offset_save || + (info->write_offset_load && !info->write_offset_imm_enable)); + + assert(ret); + + return ret; +} + +bool +ilo_state_sol_buffer_init_disabled(struct ilo_state_sol_buffer *sb, + const struct ilo_dev *dev) +{ + struct ilo_state_sol_buffer_info info; + + memset(&info, 0, sizeof(info)); + + return ilo_state_sol_buffer_init(sb, dev, &info); +} diff --git a/src/gallium/drivers/ilo/core/ilo_state_sol.h b/src/gallium/drivers/ilo/core/ilo_state_sol.h index c5c693e5e56..2513fcb4979 100644 --- a/src/gallium/drivers/ilo/core/ilo_state_sol.h +++ b/src/gallium/drivers/ilo/core/ilo_state_sol.h @@ -99,12 +99,43 @@ struct ilo_state_sol_info { }; struct ilo_state_sol { - uint32_t so[6]; + uint32_t streamout[2]; + uint16_t strides[4]; + uint32_t so_decl[2]; uint32_t (*decl)[2]; uint8_t decl_count; }; +struct ilo_buffer; + +struct ilo_state_sol_buffer_info { + const struct ilo_buffer *buf; + uint32_t offset; + uint32_t size; + + /* + * Gen8+ only. When enabled, require a write offset bo of at least + * (sizeof(uint32_t) * ILO_STATE_SOL_MAX_BUFFER_COUNT) bytes + */ + bool write_offset_load; + bool write_offset_save; + + bool write_offset_imm_enable; + uint32_t write_offset_imm; +}; + +struct ilo_state_sol_buffer { + uint32_t so_buf[4]; + + bool need_bo; + bool need_write_offset_bo; + + /* managed by users */ + struct intel_bo *bo; + struct intel_bo *write_offset_bo; +}; + static inline size_t ilo_state_sol_data_size(const struct ilo_dev *dev, uint8_t max_decl_count) { @@ -123,4 +154,13 @@ ilo_state_sol_init_disabled(struct ilo_state_sol *sol, const struct ilo_dev *dev, bool render_disable); +bool +ilo_state_sol_buffer_init(struct ilo_state_sol_buffer *sb, + const struct ilo_dev *dev, + const struct ilo_state_sol_buffer_info *info); + +bool +ilo_state_sol_buffer_init_disabled(struct ilo_state_sol_buffer *sb, + const struct ilo_dev *dev); + #endif /* ILO_STATE_SOL_H */ diff --git a/src/gallium/drivers/ilo/ilo_render_gen7.c b/src/gallium/drivers/ilo/ilo_render_gen7.c index e4d2bf064da..7d0e4c44829 100644 --- a/src/gallium/drivers/ilo/ilo_render_gen7.c +++ b/src/gallium/drivers/ilo/ilo_render_gen7.c @@ -431,19 +431,21 @@ gen7_draw_sol(struct ilo_render *r, /* 3DSTATE_SO_BUFFER */ if ((DIRTY(SO) || dirty_sh || r->batch_bo_changed) && vec->so.enabled) { - const struct pipe_stream_output_info *so_info; int i; - so_info = ilo_shader_get_kernel_so_info(shader); + for (i = 0; i < ILO_STATE_SOL_MAX_BUFFER_COUNT; i++) { + const struct pipe_stream_output_target *target = + (i < vec->so.count && vec->so.states[i]) ? + vec->so.states[i] : NULL; + const struct ilo_state_sol_buffer *sb = (target) ? + &((const struct ilo_stream_output_target *) target)->sb : + &vec->so.dummy_sb; - for (i = 0; i < vec->so.count; i++) { - const int stride = so_info->stride[i] * 4; /* in bytes */ - - gen7_3DSTATE_SO_BUFFER(r->builder, i, stride, vec->so.states[i]); + if (ilo_dev_gen(r->dev) >= ILO_GEN(8)) + gen8_3DSTATE_SO_BUFFER(r->builder, sol, sb, i); + else + gen7_3DSTATE_SO_BUFFER(r->builder, sol, sb, i); } - - for (; i < 4; i++) - gen7_disable_3DSTATE_SO_BUFFER(r->builder, i); } /* 3DSTATE_SO_DECL_LIST */ diff --git a/src/gallium/drivers/ilo/ilo_state.c b/src/gallium/drivers/ilo/ilo_state.c index 966a6e0470c..62e31809fb7 100644 --- a/src/gallium/drivers/ilo/ilo_state.c +++ b/src/gallium/drivers/ilo/ilo_state.c @@ -1899,19 +1899,28 @@ ilo_create_stream_output_target(struct pipe_context *pipe, unsigned buffer_offset, unsigned buffer_size) { - struct pipe_stream_output_target *target; + const struct ilo_dev *dev = ilo_context(pipe)->dev; + struct ilo_stream_output_target *target; + struct ilo_state_sol_buffer_info info; - target = MALLOC_STRUCT(pipe_stream_output_target); + target = CALLOC_STRUCT(ilo_stream_output_target); assert(target); - pipe_reference_init(&target->reference, 1); - target->buffer = NULL; - pipe_resource_reference(&target->buffer, res); - target->context = pipe; - target->buffer_offset = buffer_offset; - target->buffer_size = buffer_size; + pipe_reference_init(&target->base.reference, 1); + pipe_resource_reference(&target->base.buffer, res); + target->base.context = pipe; + target->base.buffer_offset = buffer_offset; + target->base.buffer_size = buffer_size; - return target; + memset(&info, 0, sizeof(info)); + info.buf = ilo_buffer(res); + info.offset = buffer_offset; + info.size = buffer_size; + + ilo_state_sol_buffer_init(&target->sb, dev, &info); + target->sb.bo = info.buf->bo; + + return &target->base; } static void @@ -2338,6 +2347,8 @@ ilo_state_vector_init(const struct ilo_dev *dev, ilo_state_ds_init_disabled(&vec->disabled_ds, dev); ilo_state_gs_init_disabled(&vec->disabled_gs, dev); + ilo_state_sol_buffer_init_disabled(&vec->so.dummy_sb, dev); + ilo_state_surface_init_for_null(&vec->fb.null_rt, dev); ilo_state_zs_init_for_null(&vec->fb.null_zs, dev); @@ -2439,6 +2450,10 @@ ilo_state_vector_resource_renamed(struct ilo_state_vector *vec, for (i = 0; i < vec->so.count; i++) { if (vec->so.states[i]->buffer == res) { + struct ilo_stream_output_target *target = + (struct ilo_stream_output_target *) vec->so.states[i]; + + target->sb.bo = ilo_buffer(res)->bo; states |= ILO_DIRTY_SO; break; } diff --git a/src/gallium/drivers/ilo/ilo_state.h b/src/gallium/drivers/ilo/ilo_state.h index d990269171b..e4746d0969b 100644 --- a/src/gallium/drivers/ilo/ilo_state.h +++ b/src/gallium/drivers/ilo/ilo_state.h @@ -217,11 +217,19 @@ struct ilo_view_state { unsigned count; }; +struct ilo_stream_output_target { + struct pipe_stream_output_target base; + + struct ilo_state_sol_buffer sb; +}; + struct ilo_so_state { struct pipe_stream_output_target *states[ILO_MAX_SO_BUFFERS]; unsigned count; unsigned append_bitmask; + struct ilo_state_sol_buffer dummy_sb; + bool enabled; }; From dcb5bad3a3a8ff116c32ecb01827ea8461fa2baa Mon Sep 17 00:00:00 2001 From: Chia-I Wu Date: Thu, 18 Jun 2015 22:47:20 +0800 Subject: [PATCH 736/834] ilo: move gen6_so_SURFACE_STATE() out of core It does not belong to core. --- .../drivers/ilo/core/ilo_builder_3d_top.h | 52 ------------------ src/gallium/drivers/ilo/ilo_render_surface.c | 53 +++++++++++++++++++ 2 files changed, 53 insertions(+), 52 deletions(-) diff --git a/src/gallium/drivers/ilo/core/ilo_builder_3d_top.h b/src/gallium/drivers/ilo/core/ilo_builder_3d_top.h index 398586bbad8..a8873ed5e2f 100644 --- a/src/gallium/drivers/ilo/core/ilo_builder_3d_top.h +++ b/src/gallium/drivers/ilo/core/ilo_builder_3d_top.h @@ -1316,58 +1316,6 @@ gen6_SURFACE_STATE(struct ilo_builder *builder, return state_offset; } -static inline uint32_t -gen6_so_SURFACE_STATE(struct ilo_builder *builder, - const struct pipe_stream_output_target *so, - const struct pipe_stream_output_info *so_info, - int so_index) -{ - struct ilo_buffer *buf = ilo_buffer(so->buffer); - struct ilo_state_surface_buffer_info info; - struct ilo_state_surface surf; - - ILO_DEV_ASSERT(builder->dev, 6, 6); - - memset(&info, 0, sizeof(info)); - info.buf = buf; - info.access = ILO_STATE_SURFACE_ACCESS_DP_SVB; - - switch (so_info->output[so_index].num_components) { - case 1: - info.format = GEN6_FORMAT_R32_FLOAT; - info.format_size = 4; - break; - case 2: - info.format = GEN6_FORMAT_R32G32_FLOAT; - info.format_size = 8; - break; - case 3: - info.format = GEN6_FORMAT_R32G32B32_FLOAT; - info.format_size = 12; - break; - case 4: - info.format = GEN6_FORMAT_R32G32B32A32_FLOAT; - info.format_size = 16; - break; - default: - assert(!"unexpected SO components length"); - info.format = GEN6_FORMAT_R32_FLOAT; - info.format_size = 4; - break; - } - - info.struct_size = - so_info->stride[so_info->output[so_index].output_buffer] * 4; - info.offset = so->buffer_offset + so_info->output[so_index].dst_offset * 4; - info.size = so->buffer_size - so_info->output[so_index].dst_offset * 4; - - memset(&surf, 0, sizeof(surf)); - ilo_state_surface_init_for_buffer(&surf, builder->dev, &info); - surf.bo = info.buf->bo; - - return gen6_SURFACE_STATE(builder, &surf); -} - static inline uint32_t gen6_SAMPLER_STATE(struct ilo_builder *builder, const struct ilo_state_sampler *samplers, diff --git a/src/gallium/drivers/ilo/ilo_render_surface.c b/src/gallium/drivers/ilo/ilo_render_surface.c index bbdd5fe7a0a..ad053564294 100644 --- a/src/gallium/drivers/ilo/ilo_render_surface.c +++ b/src/gallium/drivers/ilo/ilo_render_surface.c @@ -29,12 +29,65 @@ #include "ilo_common.h" #include "ilo_blitter.h" +#include "ilo_resource.h" #include "ilo_shader.h" #include "ilo_state.h" #include "ilo_render_gen.h" #define DIRTY(state) (session->pipe_dirty & ILO_DIRTY_ ## state) +static inline uint32_t +gen6_so_SURFACE_STATE(struct ilo_builder *builder, + const struct pipe_stream_output_target *so, + const struct pipe_stream_output_info *so_info, + int so_index) +{ + struct ilo_buffer *buf = ilo_buffer(so->buffer); + struct ilo_state_surface_buffer_info info; + struct ilo_state_surface surf; + + ILO_DEV_ASSERT(builder->dev, 6, 6); + + memset(&info, 0, sizeof(info)); + info.buf = buf; + info.access = ILO_STATE_SURFACE_ACCESS_DP_SVB; + + switch (so_info->output[so_index].num_components) { + case 1: + info.format = GEN6_FORMAT_R32_FLOAT; + info.format_size = 4; + break; + case 2: + info.format = GEN6_FORMAT_R32G32_FLOAT; + info.format_size = 8; + break; + case 3: + info.format = GEN6_FORMAT_R32G32B32_FLOAT; + info.format_size = 12; + break; + case 4: + info.format = GEN6_FORMAT_R32G32B32A32_FLOAT; + info.format_size = 16; + break; + default: + assert(!"unexpected SO components length"); + info.format = GEN6_FORMAT_R32_FLOAT; + info.format_size = 4; + break; + } + + info.struct_size = + so_info->stride[so_info->output[so_index].output_buffer] * 4; + info.offset = so->buffer_offset + so_info->output[so_index].dst_offset * 4; + info.size = so->buffer_size - so_info->output[so_index].dst_offset * 4; + + memset(&surf, 0, sizeof(surf)); + ilo_state_surface_init_for_buffer(&surf, builder->dev, &info); + surf.bo = info.buf->bo; + + return gen6_SURFACE_STATE(builder, &surf); +} + static void gen6_emit_draw_surface_rt(struct ilo_render *r, const struct ilo_state_vector *vec, From 244caba2502402b93876cb89952ac05e6d87c5b2 Mon Sep 17 00:00:00 2001 From: Chia-I Wu Date: Sat, 20 Jun 2015 00:34:29 +0800 Subject: [PATCH 737/834] ilo: avoid ilo_ib_state in genX_3DPRIMITIVE() ilo_ib_state is not in core. --- src/gallium/drivers/ilo/core/ilo_builder_3d.h | 12 ++++-------- src/gallium/drivers/ilo/ilo_render_gen.h | 6 ++++-- 2 files changed, 8 insertions(+), 10 deletions(-) diff --git a/src/gallium/drivers/ilo/core/ilo_builder_3d.h b/src/gallium/drivers/ilo/core/ilo_builder_3d.h index 6cf1732ee1c..8d8a79599bd 100644 --- a/src/gallium/drivers/ilo/core/ilo_builder_3d.h +++ b/src/gallium/drivers/ilo/core/ilo_builder_3d.h @@ -38,14 +38,12 @@ static inline void gen6_3DPRIMITIVE(struct ilo_builder *builder, const struct pipe_draw_info *info, - const struct ilo_ib_state *ib) + int64_t start_offset) { const uint8_t cmd_len = 6; const int prim = gen6_3d_translate_pipe_prim(info->mode); const int vb_access = (info->indexed) ? GEN6_3DPRIM_DW0_ACCESS_RANDOM : GEN6_3DPRIM_DW0_ACCESS_SEQUENTIAL; - const uint32_t vb_start = info->start + - ((info->indexed) ? ib->draw_start_offset : 0); uint32_t *dw; ILO_DEV_ASSERT(builder->dev, 6, 6); @@ -57,7 +55,7 @@ gen6_3DPRIMITIVE(struct ilo_builder *builder, prim << GEN6_3DPRIM_DW0_TYPE__SHIFT | (cmd_len - 2); dw[1] = info->count; - dw[2] = vb_start; + dw[2] = info->start + start_offset; dw[3] = info->instance_count; dw[4] = info->start_instance; dw[5] = info->index_bias; @@ -66,14 +64,12 @@ gen6_3DPRIMITIVE(struct ilo_builder *builder, static inline void gen7_3DPRIMITIVE(struct ilo_builder *builder, const struct pipe_draw_info *info, - const struct ilo_ib_state *ib) + int64_t start_offset) { const uint8_t cmd_len = 7; const int prim = gen6_3d_translate_pipe_prim(info->mode); const int vb_access = (info->indexed) ? GEN7_3DPRIM_DW1_ACCESS_RANDOM : GEN7_3DPRIM_DW1_ACCESS_SEQUENTIAL; - const uint32_t vb_start = info->start + - ((info->indexed) ? ib->draw_start_offset : 0); uint32_t *dw; ILO_DEV_ASSERT(builder->dev, 7, 8); @@ -83,7 +79,7 @@ gen7_3DPRIMITIVE(struct ilo_builder *builder, dw[0] = GEN6_RENDER_CMD(3D, 3DPRIMITIVE) | (cmd_len - 2); dw[1] = vb_access | prim; dw[2] = info->count; - dw[3] = vb_start; + dw[3] = info->start + start_offset; dw[4] = info->instance_count; dw[5] = info->start_instance; dw[6] = info->index_bias; diff --git a/src/gallium/drivers/ilo/ilo_render_gen.h b/src/gallium/drivers/ilo/ilo_render_gen.h index 6bbc0a8e3f1..00c8113a45d 100644 --- a/src/gallium/drivers/ilo/ilo_render_gen.h +++ b/src/gallium/drivers/ilo/ilo_render_gen.h @@ -389,6 +389,8 @@ ilo_render_3dprimitive(struct ilo_render *r, const struct pipe_draw_info *info, const struct ilo_ib_state *ib) { + const int64_t start_offset = (info->indexed) ? ib->draw_start_offset : 0; + ILO_DEV_ASSERT(r->dev, 6, 8); if (r->state.deferred_pipe_control_dw1) @@ -396,9 +398,9 @@ ilo_render_3dprimitive(struct ilo_render *r, /* 3DPRIMITIVE */ if (ilo_dev_gen(r->dev) >= ILO_GEN(7)) - gen7_3DPRIMITIVE(r->builder, info, ib); + gen7_3DPRIMITIVE(r->builder, info, start_offset); else - gen6_3DPRIMITIVE(r->builder, info, ib); + gen6_3DPRIMITIVE(r->builder, info, start_offset); r->state.current_pipe_control_dw1 = 0; assert(!r->state.deferred_pipe_control_dw1); From 028590cbc758e877b963ba430f0a0cb49e882a6b Mon Sep 17 00:00:00 2001 From: Chia-I Wu Date: Thu, 18 Jun 2015 22:48:14 +0800 Subject: [PATCH 738/834] ilo: clean up header includes Core is more self-contained now. --- src/gallium/drivers/ilo/core/ilo_builder_3d_bottom.h | 1 + src/gallium/drivers/ilo/core/ilo_builder_3d_top.h | 3 +-- src/gallium/drivers/ilo/ilo_render_gen6.c | 1 + src/gallium/drivers/ilo/ilo_render_gen7.c | 1 + src/gallium/drivers/ilo/ilo_render_gen8.c | 1 + 5 files changed, 5 insertions(+), 2 deletions(-) diff --git a/src/gallium/drivers/ilo/core/ilo_builder_3d_bottom.h b/src/gallium/drivers/ilo/core/ilo_builder_3d_bottom.h index f7f95f493b7..6d9e3699125 100644 --- a/src/gallium/drivers/ilo/core/ilo_builder_3d_bottom.h +++ b/src/gallium/drivers/ilo/core/ilo_builder_3d_bottom.h @@ -38,6 +38,7 @@ #include "ilo_state_sbe.h" #include "ilo_state_shader.h" #include "ilo_state_viewport.h" +#include "ilo_state_zs.h" #include "ilo_builder.h" #include "ilo_builder_3d_top.h" diff --git a/src/gallium/drivers/ilo/core/ilo_builder_3d_top.h b/src/gallium/drivers/ilo/core/ilo_builder_3d_top.h index a8873ed5e2f..42d171fc0d2 100644 --- a/src/gallium/drivers/ilo/core/ilo_builder_3d_top.h +++ b/src/gallium/drivers/ilo/core/ilo_builder_3d_top.h @@ -29,8 +29,6 @@ #define ILO_BUILDER_3D_TOP_H #include "genhw/genhw.h" -#include "../ilo_resource.h" -#include "../ilo_state.h" #include "intel_winsys.h" #include "ilo_core.h" @@ -38,6 +36,7 @@ #include "ilo_state_sampler.h" #include "ilo_state_shader.h" #include "ilo_state_sol.h" +#include "ilo_state_surface.h" #include "ilo_state_urb.h" #include "ilo_state_vf.h" #include "ilo_builder.h" diff --git a/src/gallium/drivers/ilo/ilo_render_gen6.c b/src/gallium/drivers/ilo/ilo_render_gen6.c index 0623714c19f..b2bc2dcface 100644 --- a/src/gallium/drivers/ilo/ilo_render_gen6.c +++ b/src/gallium/drivers/ilo/ilo_render_gen6.c @@ -33,6 +33,7 @@ #include "ilo_blitter.h" #include "ilo_query.h" +#include "ilo_resource.h" #include "ilo_shader.h" #include "ilo_state.h" #include "ilo_render_gen.h" diff --git a/src/gallium/drivers/ilo/ilo_render_gen7.c b/src/gallium/drivers/ilo/ilo_render_gen7.c index 7d0e4c44829..4c54edeeb96 100644 --- a/src/gallium/drivers/ilo/ilo_render_gen7.c +++ b/src/gallium/drivers/ilo/ilo_render_gen7.c @@ -30,6 +30,7 @@ #include "core/ilo_builder_render.h" #include "ilo_blitter.h" +#include "ilo_resource.h" #include "ilo_shader.h" #include "ilo_state.h" #include "ilo_render_gen.h" diff --git a/src/gallium/drivers/ilo/ilo_render_gen8.c b/src/gallium/drivers/ilo/ilo_render_gen8.c index 8956e5f6b2e..f86871f852f 100644 --- a/src/gallium/drivers/ilo/ilo_render_gen8.c +++ b/src/gallium/drivers/ilo/ilo_render_gen8.c @@ -30,6 +30,7 @@ #include "core/ilo_builder_render.h" #include "ilo_blitter.h" +#include "ilo_resource.h" #include "ilo_shader.h" #include "ilo_state.h" #include "ilo_render_gen.h" From bb107110a4d97191841985076dd9f2fbd0937dfc Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Fri, 19 Jun 2015 19:41:25 -0700 Subject: [PATCH 739/834] vc4: Fix write-only texsubimage when we had to align. We need to make sure that when we store the aligned box, we've got initialized contents in the border. We could potentially just load the border area, but for now let's get text rendering working in X (and fix the GL_TEXTURE_2D errors in piglit's texsubimage test and gl-2.1-pbo/test_tex_image) --- src/gallium/drivers/vc4/vc4_resource.c | 6 +++++- src/mesa/main/extensions.c | 1 + 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/src/gallium/drivers/vc4/vc4_resource.c b/src/gallium/drivers/vc4/vc4_resource.c index 14b135e2f45..cab76406055 100644 --- a/src/gallium/drivers/vc4/vc4_resource.c +++ b/src/gallium/drivers/vc4/vc4_resource.c @@ -162,6 +162,8 @@ vc4_resource_transfer_map(struct pipe_context *pctx, /* We need to align the box to utile boundaries, since that's * what load/store operate on. */ + uint32_t orig_width = ptrans->box.width; + uint32_t orig_height = ptrans->box.height; uint32_t box_start_x = ptrans->box.x & (utile_w - 1); uint32_t box_start_y = ptrans->box.y & (utile_h - 1); ptrans->box.width += box_start_x; @@ -175,7 +177,9 @@ vc4_resource_transfer_map(struct pipe_context *pctx, ptrans->layer_stride = ptrans->stride; trans->map = malloc(ptrans->stride * ptrans->box.height); - if (usage & PIPE_TRANSFER_READ) { + if (usage & PIPE_TRANSFER_READ || + ptrans->box.width != orig_width || + ptrans->box.height != orig_height) { vc4_load_tiled_image(trans->map, ptrans->stride, buf + slice->offset + box->z * rsc->cube_map_stride, diff --git a/src/mesa/main/extensions.c b/src/mesa/main/extensions.c index 4176a69ed7c..b747abaf684 100644 --- a/src/mesa/main/extensions.c +++ b/src/mesa/main/extensions.c @@ -218,6 +218,7 @@ static const struct extension extension_table[] = { { "GL_EXT_discard_framebuffer", o(dummy_true), ES1 | ES2, 2009 }, { "GL_EXT_blend_minmax", o(EXT_blend_minmax), GLL | ES1 | ES2, 1995 }, { "GL_EXT_blend_subtract", o(dummy_true), GLL, 1995 }, + { "GL_EXT_buffer_storage", o(ARB_buffer_storage), ES2, 2015 }, { "GL_EXT_compiled_vertex_array", o(dummy_true), GLL, 1996 }, { "GL_EXT_copy_texture", o(dummy_true), GLL, 1995 }, { "GL_EXT_depth_bounds_test", o(EXT_depth_bounds_test), GL, 2002 }, From c00903867417f1522047b7c50ea9248e1aa2f50c Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Fri, 19 Jun 2015 19:47:44 -0700 Subject: [PATCH 740/834] vc4: Use a defined t value for 1D textures. This doesn't fix the broken 1D cases of texsubimage, but it does prevent segfaulting when dumping the QIR code generated in fbo-1d. --- src/gallium/drivers/vc4/vc4_program.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c index bb45eb1288e..ba47c51d9bd 100644 --- a/src/gallium/drivers/vc4/vc4_program.c +++ b/src/gallium/drivers/vc4/vc4_program.c @@ -325,7 +325,9 @@ ntq_emit_tex(struct vc4_compile *c, nir_tex_instr *instr) switch (instr->src[i].src_type) { case nir_tex_src_coord: s = ntq_get_src(c, instr->src[i].src, 0); - if (instr->sampler_dim != GLSL_SAMPLER_DIM_1D) + if (instr->sampler_dim == GLSL_SAMPLER_DIM_1D) + t = qir_uniform_f(c, 0.5); + else t = ntq_get_src(c, instr->src[i].src, 1); if (instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE) r = ntq_get_src(c, instr->src[i].src, 2); From 97caf2054f6ebd3106ed22ef73622483ef193bf7 Mon Sep 17 00:00:00 2001 From: Emil Velikov Date: Sat, 20 Jun 2015 15:14:45 +0100 Subject: [PATCH 741/834] Add release notes for the 10.5.8 release Signed-off-by: Emil Velikov (cherry picked from commit 24b043aab73ce066ded6e4bc93f589008dfc8484) --- docs/relnotes/10.5.8.html | 111 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 111 insertions(+) create mode 100644 docs/relnotes/10.5.8.html diff --git a/docs/relnotes/10.5.8.html b/docs/relnotes/10.5.8.html new file mode 100644 index 00000000000..8c5c8abad0f --- /dev/null +++ b/docs/relnotes/10.5.8.html @@ -0,0 +1,111 @@ + + + + + Mesa Release Notes + + + + +

        +

        The Mesa 3D Graphics Library

        +
        + + +
        + +

        Mesa 10.5.8 Release Notes / June 20, 2015

        + +

        +Mesa 10.5.8 is a bug fix release which fixes bugs found since the 10.5.7 release. +

        +

        +Mesa 10.5.8 implements the OpenGL 3.3 API, but the version reported by +glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) / +glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used. +Some drivers don't support all the features required in OpenGL 3.3. OpenGL +3.3 is only available if requested at context creation +because compatibility contexts are not supported. +

        + + +

        SHA256 checksums

        +
        +TBD
        +
        + + +

        New features

        +

        None

        + +

        Bug fixes

        + +

        This list is likely incomplete.

        + +
          + +
        • Bug 90310 - Fails to build gallium_dri.so at linking stage with clang because of multiple redefinitions
        • + +
        • Bug 90347 - [NVE0+] Failure to insert texbar under some circumstances (causing bad colors in Terasology)
        • + +
        • Bug 90520 - Register spilling clobbers registers used elsewhere in the shader
        • + +
        • Bug 90905 - mesa: Finish subdir-objects transition
        • + +
        + + +

        Changes

        + +

        Ben Widawsky (1):

        +
          +
        • i965: Disable compaction for EOT send messages
        • +
        + +

        Boyan Ding (1):

        +
          +
        • egl/x11: Set version of swrastLoader to 2
        • +
        + +

        Emil Velikov (2):

        +
          +
        • docs: Add sha256sums for the 10.5.7 release
        • +
        • Update version to 10.5.8
        • +
        + +

        Erik Faye-Lund (1):

        +
          +
        • mesa: build xmlconfig to a separate static library
        • +
        + +

        Francisco Jerez (1):

        +
          +
        • i965: Don't compact instructions with unmapped bits.
        • +
        + +

        Ilia Mirkin (3):

        +
          +
        • nvc0/ir: fix collection of first uses for texture barrier insertion
        • +
        • nv50,nvc0: clamp uniform size to 64k
        • +
        • nvc0/ir: can't have a join on a load with an indirect source
        • +
        + +

        Jason Ekstrand (1):

        +
          +
        • i965/fs: Don't let the EOT send message interfere with the MRF hack
        • +
        + +

        Marek Olšák (1):

        +
          +
        • egl: fix setting context flags
        • +
        + +

        Roland Scheidegger (1):

        +
          +
        • draw: (trivial) fix NULL pointer dereference
        • +
        + + +
        + + From aa28423bcc7cd6b5f2c5f9c8f1a385a79469a439 Mon Sep 17 00:00:00 2001 From: Emil Velikov Date: Sat, 20 Jun 2015 16:37:16 +0100 Subject: [PATCH 742/834] docs: Add sha256sums for the 10.5.8 release Signed-off-by: Emil Velikov (cherry picked from commit a81b1d5512f64ffca1c13a5937e7eb0de24713ae) --- docs/relnotes/10.5.8.html | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docs/relnotes/10.5.8.html b/docs/relnotes/10.5.8.html index 8c5c8abad0f..6239400cdab 100644 --- a/docs/relnotes/10.5.8.html +++ b/docs/relnotes/10.5.8.html @@ -31,7 +31,8 @@ because compatibility contexts are not supported.

        SHA256 checksums

        -TBD
        +611ddcfa3c1bf13f7e6ccac785c8749c3b74c9a78452bac70f8372cf6b209aa0  mesa-10.5.8.tar.gz
        +2866b855c5299a4aed066338c77ff6467c389b2c30ada7647be8758663da2b54  mesa-10.5.8.tar.xz
         
        From 104bff037665075aa2b92964ad2895f45d9a5866 Mon Sep 17 00:00:00 2001 From: Emil Velikov Date: Sat, 20 Jun 2015 16:40:56 +0100 Subject: [PATCH 743/834] docs: add news item and link release notes for mesa 10.5.8 Signed-off-by: Emil Velikov --- docs/index.html | 6 ++++++ docs/relnotes.html | 1 + 2 files changed, 7 insertions(+) diff --git a/docs/index.html b/docs/index.html index 252242495b9..80c6e03e3f1 100644 --- a/docs/index.html +++ b/docs/index.html @@ -16,6 +16,12 @@

        News

        +

        June 20, 2015

        +

        +Mesa 10.5.8 is released. +This is a bug-fix release. +

        +

        June 14, 2015

        Mesa 10.6.0 is released. This is a new diff --git a/docs/relnotes.html b/docs/relnotes.html index a037b9684a2..5fd80025a39 100644 --- a/docs/relnotes.html +++ b/docs/relnotes.html @@ -21,6 +21,7 @@ The release notes summarize what's new or changed in each Mesa release.

          +
        • 10.5.8 release notes
        • 10.6.0 release notes
        • 10.5.7 release notes
        • 10.5.6 release notes From 717376155d2082d7bf94122a1e1d383b39e0b070 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Sat, 20 Jun 2015 15:02:50 -0700 Subject: [PATCH 744/834] mesa: Back out an accidental change I had in a VC4 commit. This was a hack as part of debugging some glamor-on-GLES2 behavior that ended up being an xserver bug. I suspect we can just flip this extension on for GLES2, but the spec says it requires 3.1. --- src/mesa/main/extensions.c | 1 - 1 file changed, 1 deletion(-) diff --git a/src/mesa/main/extensions.c b/src/mesa/main/extensions.c index b747abaf684..4176a69ed7c 100644 --- a/src/mesa/main/extensions.c +++ b/src/mesa/main/extensions.c @@ -218,7 +218,6 @@ static const struct extension extension_table[] = { { "GL_EXT_discard_framebuffer", o(dummy_true), ES1 | ES2, 2009 }, { "GL_EXT_blend_minmax", o(EXT_blend_minmax), GLL | ES1 | ES2, 1995 }, { "GL_EXT_blend_subtract", o(dummy_true), GLL, 1995 }, - { "GL_EXT_buffer_storage", o(ARB_buffer_storage), ES2, 2015 }, { "GL_EXT_compiled_vertex_array", o(dummy_true), GLL, 1996 }, { "GL_EXT_copy_texture", o(dummy_true), GLL, 1995 }, { "GL_EXT_depth_bounds_test", o(EXT_depth_bounds_test), GL, 2002 }, From b13135e06671468d296a33abf4150060f2b2a061 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Wed, 10 Jun 2015 19:49:55 -0400 Subject: [PATCH 745/834] tgsi: update docs for SVIEW usage with TEX* instructions Based on mailing list discussion here: http://lists.freedesktop.org/archives/mesa-dev/2014-November/071583.html Signed-off-by: Rob Clark Reviewed-by: Roland Scheidegger Reviewed-by: Jose Fonseca --- src/gallium/docs/source/tgsi.rst | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/src/gallium/docs/source/tgsi.rst b/src/gallium/docs/source/tgsi.rst index f77702aa6a9..89ca172080e 100644 --- a/src/gallium/docs/source/tgsi.rst +++ b/src/gallium/docs/source/tgsi.rst @@ -2965,6 +2965,18 @@ resource can be one of BUFFER, 1D, 2D, 3D, 1DArray and 2DArray. type must be 1 or 4 entries (if specifying on a per-component level) out of UNORM, SNORM, SINT, UINT and FLOAT. +For TEX\* style texture sample opcodes (as opposed to SAMPLE\* opcodes +which take an explicit SVIEW[#] source register), there may be optionally +SVIEW[#] declarations. In this case, the SVIEW index is implied by the +SAMP index, and there must be a corresponding SVIEW[#] declaration for +each SAMP[#] declaration. Drivers are free to ignore this if they wish. +But note in particular that some drivers need to know the sampler type +(float/int/unsigned) in order to generate the correct code, so cases +where integer textures are sampled, SVIEW[#] declarations should be +used. + +NOTE: It is NOT legal to mix SAMPLE\* style opcodes and TEX\* opcodes +in the same shader. Declaration Resource ^^^^^^^^^^^^^^^^^^^^ From f481af110e6ab42b2d184f225bfe7eb1e66df393 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Wed, 10 Jun 2015 19:51:32 -0400 Subject: [PATCH 746/834] tgsi/transform: add support for SVIEW decls TODO single return_type (use enum) v2: single return_type arg, and use enum Signed-off-by: Rob Clark Reviewed-by: Roland Scheidegger Reviewed-by: Jose Fonseca --- src/gallium/auxiliary/tgsi/tgsi_transform.h | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/src/gallium/auxiliary/tgsi/tgsi_transform.h b/src/gallium/auxiliary/tgsi/tgsi_transform.h index 921aa906527..39d7688ab3b 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_transform.h +++ b/src/gallium/auxiliary/tgsi/tgsi_transform.h @@ -143,6 +143,27 @@ tgsi_transform_sampler_decl(struct tgsi_transform_context *ctx, ctx->emit_declaration(ctx, &decl); } +static INLINE void +tgsi_transform_sampler_view_decl(struct tgsi_transform_context *ctx, + unsigned index, + unsigned target, + enum tgsi_return_type type) +{ + struct tgsi_full_declaration decl; + + decl = tgsi_default_full_declaration(); + decl.Declaration.File = TGSI_FILE_SAMPLER_VIEW; + decl.Declaration.UsageMask = 0xf; + decl.Range.First = + decl.Range.Last = index; + decl.SamplerView.Resource = target; + decl.SamplerView.ReturnTypeX = type; + decl.SamplerView.ReturnTypeY = type; + decl.SamplerView.ReturnTypeZ = type; + decl.SamplerView.ReturnTypeW = type; + + ctx->emit_declaration(ctx, &decl); +} static INLINE void tgsi_transform_immediate_decl(struct tgsi_transform_context *ctx, From b516e68afb548894eff6b1f375c01f6dfafb6aed Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Wed, 10 Jun 2015 19:59:20 -0400 Subject: [PATCH 747/834] draw: updates to support SVIEW decls To allow for shaders which use SVIEW decls for TEX* instructions, we need to preserve the constraint that the shader either has no SVIEW's or it has one matching SVIEW for each SAMP. Signed-off-by: Rob Clark Reviewed-by: Roland Scheidegger Reviewed-by: Jose Fonseca --- src/gallium/auxiliary/draw/draw_pipe_aaline.c | 17 ++++++++++++++++- src/gallium/auxiliary/draw/draw_pipe_pstipple.c | 17 ++++++++++++++++- 2 files changed, 32 insertions(+), 2 deletions(-) diff --git a/src/gallium/auxiliary/draw/draw_pipe_aaline.c b/src/gallium/auxiliary/draw/draw_pipe_aaline.c index 2f14efea96f..936046ea5f5 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_aaline.c +++ b/src/gallium/auxiliary/draw/draw_pipe_aaline.c @@ -51,7 +51,7 @@ /** Approx number of new tokens for instructions in aa_transform_inst() */ -#define NUM_NEW_TOKENS 50 +#define NUM_NEW_TOKENS 53 /** @@ -137,6 +137,7 @@ struct aa_transform_context { uint tempsUsed; /**< bitmask */ int colorOutput; /**< which output is the primary color */ uint samplersUsed; /**< bitfield of samplers used */ + bool hasSview; int freeSampler; /** an available sampler for the pstipple */ int maxInput, maxGeneric; /**< max input index found */ int colorTemp, texTemp; /**< temp registers */ @@ -165,6 +166,9 @@ aa_transform_decl(struct tgsi_transform_context *ctx, aactx->samplersUsed |= 1 << i; } } + else if (decl->Declaration.File == TGSI_FILE_SAMPLER_VIEW) { + aactx->hasSview = true; + } else if (decl->Declaration.File == TGSI_FILE_INPUT) { if ((int) decl->Range.Last > aactx->maxInput) aactx->maxInput = decl->Range.Last; @@ -232,6 +236,17 @@ aa_transform_prolog(struct tgsi_transform_context *ctx) /* declare new sampler */ tgsi_transform_sampler_decl(ctx, aactx->freeSampler); + /* if the src shader has SVIEW decl's for each SAMP decl, we + * need to continue the trend and ensure there is a matching + * SVIEW for the new SAMP we just created + */ + if (aactx->hasSview) { + tgsi_transform_sampler_view_decl(ctx, + aactx->freeSampler, + TGSI_TEXTURE_2D, + TGSI_RETURN_TYPE_FLOAT); + } + /* declare new temp regs */ tgsi_transform_temp_decl(ctx, aactx->texTemp); tgsi_transform_temp_decl(ctx, aactx->colorTemp); diff --git a/src/gallium/auxiliary/draw/draw_pipe_pstipple.c b/src/gallium/auxiliary/draw/draw_pipe_pstipple.c index 8f21c46a43a..445f195e59c 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_pstipple.c +++ b/src/gallium/auxiliary/draw/draw_pipe_pstipple.c @@ -53,7 +53,7 @@ /** Approx number of new tokens for instructions in pstip_transform_inst() */ -#define NUM_NEW_TOKENS 50 +#define NUM_NEW_TOKENS 53 /** @@ -126,6 +126,7 @@ struct pstip_transform_context { int wincoordInput; int maxInput; uint samplersUsed; /**< bitfield of samplers used */ + bool hasSview; int freeSampler; /** an available sampler for the pstipple */ int texTemp; /**< temp registers */ int numImmed; @@ -149,6 +150,9 @@ pstip_transform_decl(struct tgsi_transform_context *ctx, pctx->samplersUsed |= 1 << i; } } + else if (decl->Declaration.File == TGSI_FILE_SAMPLER_VIEW) { + pctx->hasSview = true; + } else if (decl->Declaration.File == TGSI_FILE_INPUT) { pctx->maxInput = MAX2(pctx->maxInput, (int) decl->Range.Last); if (decl->Semantic.Name == TGSI_SEMANTIC_POSITION) @@ -232,6 +236,17 @@ pstip_transform_prolog(struct tgsi_transform_context *ctx) /* declare new sampler */ tgsi_transform_sampler_decl(ctx, pctx->freeSampler); + /* if the src shader has SVIEW decl's for each SAMP decl, we + * need to continue the trend and ensure there is a matching + * SVIEW for the new SAMP we just created + */ + if (pctx->hasSview) { + tgsi_transform_sampler_view_decl(ctx, + pctx->freeSampler, + TGSI_TEXTURE_2D, + TGSI_RETURN_TYPE_FLOAT); + } + /* declare new temp regs */ tgsi_transform_temp_decl(ctx, pctx->texTemp); From e53699298640df7d7659a8ce88b68e43918b600c Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Wed, 10 Jun 2015 20:01:11 -0400 Subject: [PATCH 748/834] util/pstipple: updates for SVIEW decls To allow for shaders which use SVIEW decls for TEX* instructions, we need to preserve the constraint that the shader either has no SVIEW's or it has one matching SVIEW for each SAMP. Signed-off-by: Rob Clark Reviewed-by: Roland Scheidegger Reviewed-by: Jose Fonseca --- src/gallium/auxiliary/util/u_pstipple.c | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/src/gallium/auxiliary/util/u_pstipple.c b/src/gallium/auxiliary/util/u_pstipple.c index 0a20bdb4747..1f65672221f 100644 --- a/src/gallium/auxiliary/util/u_pstipple.c +++ b/src/gallium/auxiliary/util/u_pstipple.c @@ -55,7 +55,7 @@ #include "tgsi/tgsi_scan.h" /** Approx number of new tokens for instructions in pstip_transform_inst() */ -#define NUM_NEW_TOKENS 50 +#define NUM_NEW_TOKENS 53 static void @@ -262,6 +262,7 @@ pstip_transform_prolog(struct tgsi_transform_context *ctx) (struct pstip_transform_context *) ctx; int wincoordInput; int texTemp; + int sampIdx; /* find free texture sampler */ pctx->freeSampler = free_bit(pctx->samplersUsed); @@ -280,9 +281,21 @@ pstip_transform_prolog(struct tgsi_transform_context *ctx) TGSI_INTERPOLATE_LINEAR); } + sampIdx = pctx->hasFixedUnit ? pctx->fixedUnit : pctx->freeSampler; + /* declare new sampler */ - tgsi_transform_sampler_decl(ctx, - pctx->hasFixedUnit ? pctx->fixedUnit : pctx->freeSampler); + tgsi_transform_sampler_decl(ctx, sampIdx); + + /* if the src shader has SVIEW decl's for each SAMP decl, we + * need to continue the trend and ensure there is a matching + * SVIEW for the new SAMP we just created + */ + if (pctx->info.file_max[TGSI_FILE_SAMPLER_VIEW] != -1) { + tgsi_transform_sampler_view_decl(ctx, + sampIdx, + TGSI_TEXTURE_2D, + TGSI_RETURN_TYPE_FLOAT); + } /* Declare temp[0] reg if not already declared. * We can always use temp[0] since this code is before @@ -321,8 +334,7 @@ pstip_transform_prolog(struct tgsi_transform_context *ctx) tgsi_transform_tex_2d_inst(ctx, TGSI_FILE_TEMPORARY, texTemp, TGSI_FILE_TEMPORARY, texTemp, - pctx->hasFixedUnit ? pctx->fixedUnit - : pctx->freeSampler); + sampIdx); /* KILL_IF -texTemp; # if -texTemp < 0, kill fragment */ tgsi_transform_kill_inst(ctx, From 93379748f7e4f5ab22040cdb7a4cccdcfb7954c1 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Wed, 10 Jun 2015 20:02:55 -0400 Subject: [PATCH 749/834] util/blitter (and friends): generate appropriate SVIEW decls Some hardware needs to know the sampler type. Update the blit related shaders to include SVIEW decl. Signed-off-by: Rob Clark Reviewed-by: Jose Fonseca --- src/gallium/auxiliary/util/u_blit.c | 41 +++++++--- src/gallium/auxiliary/util/u_blitter.c | 57 +++++++++++--- src/gallium/auxiliary/util/u_simple_shaders.c | 78 ++++++++++++++----- src/gallium/auxiliary/util/u_simple_shaders.h | 16 ++-- src/gallium/auxiliary/util/u_tests.c | 3 +- src/gallium/tests/trivial/quad-tex.c | 4 +- 6 files changed, 149 insertions(+), 50 deletions(-) diff --git a/src/gallium/auxiliary/util/u_blit.c b/src/gallium/auxiliary/util/u_blit.c index 3f3b5fe63e4..e3f30557a03 100644 --- a/src/gallium/auxiliary/util/u_blit.c +++ b/src/gallium/auxiliary/util/u_blit.c @@ -65,7 +65,7 @@ struct blit_state struct pipe_vertex_element velem[2]; void *vs; - void *fs[PIPE_MAX_TEXTURE_TYPES][TGSI_WRITEMASK_XYZW + 1]; + void *fs[PIPE_MAX_TEXTURE_TYPES][TGSI_WRITEMASK_XYZW + 1][3]; struct pipe_resource *vbuf; /**< quad vertices */ unsigned vbuf_slot; @@ -135,15 +135,17 @@ void util_destroy_blit(struct blit_state *ctx) { struct pipe_context *pipe = ctx->pipe; - unsigned i, j; + unsigned i, j, k; if (ctx->vs) pipe->delete_vs_state(pipe, ctx->vs); for (i = 0; i < Elements(ctx->fs); i++) { for (j = 0; j < Elements(ctx->fs[i]); j++) { - if (ctx->fs[i][j]) - pipe->delete_fs_state(pipe, ctx->fs[i][j]); + for (k = 0; k < Elements(ctx->fs[i][j]); k++) { + if (ctx->fs[i][j][k]) + pipe->delete_fs_state(pipe, ctx->fs[i][j][k]); + } } } @@ -158,18 +160,34 @@ util_destroy_blit(struct blit_state *ctx) */ static INLINE void set_fragment_shader(struct blit_state *ctx, uint writemask, + enum pipe_format format, enum pipe_texture_target pipe_tex) { - if (!ctx->fs[pipe_tex][writemask]) { - unsigned tgsi_tex = util_pipe_tex_to_tgsi_tex(pipe_tex, 0); + enum tgsi_return_type stype; + unsigned idx; - ctx->fs[pipe_tex][writemask] = - util_make_fragment_tex_shader_writemask(ctx->pipe, tgsi_tex, - TGSI_INTERPOLATE_LINEAR, - writemask); + if (util_format_is_pure_uint(format)) { + stype = TGSI_RETURN_TYPE_UINT; + idx = 0; + } else if (util_format_is_pure_sint(format)) { + stype = TGSI_RETURN_TYPE_SINT; + idx = 1; + } else { + stype = TGSI_RETURN_TYPE_FLOAT; + idx = 2; } - cso_set_fragment_shader_handle(ctx->cso, ctx->fs[pipe_tex][writemask]); + if (!ctx->fs[pipe_tex][writemask][idx]) { + unsigned tgsi_tex = util_pipe_tex_to_tgsi_tex(pipe_tex, 0); + + ctx->fs[pipe_tex][writemask][idx] = + util_make_fragment_tex_shader_writemask(ctx->pipe, tgsi_tex, + TGSI_INTERPOLATE_LINEAR, + writemask, + stype); + } + + cso_set_fragment_shader_handle(ctx->cso, ctx->fs[pipe_tex][writemask][idx]); } @@ -571,6 +589,7 @@ util_blit_pixels_tex(struct blit_state *ctx, /* shaders */ set_fragment_shader(ctx, TGSI_WRITEMASK_XYZW, + src_sampler_view->format, src_sampler_view->texture->target); set_vertex_shader(ctx); cso_set_tessctrl_shader_handle(ctx->cso, NULL); diff --git a/src/gallium/auxiliary/util/u_blitter.c b/src/gallium/auxiliary/util/u_blitter.c index 16bf90fc9d6..b5ef9a23966 100644 --- a/src/gallium/auxiliary/util/u_blitter.c +++ b/src/gallium/auxiliary/util/u_blitter.c @@ -81,6 +81,8 @@ struct blitter_context_priv /* FS which outputs a color from a texture, where the index is PIPE_TEXTURE_* to be sampled. */ void *fs_texfetch_col[PIPE_MAX_TEXTURE_TYPES]; + void *fs_texfetch_col_uint[PIPE_MAX_TEXTURE_TYPES]; + void *fs_texfetch_col_sint[PIPE_MAX_TEXTURE_TYPES]; /* FS which outputs a depth from a texture, where the index is PIPE_TEXTURE_* to be sampled. */ @@ -90,6 +92,8 @@ struct blitter_context_priv /* FS which outputs one sample from a multisample texture. */ void *fs_texfetch_col_msaa[PIPE_MAX_TEXTURE_TYPES]; + void *fs_texfetch_col_msaa_uint[PIPE_MAX_TEXTURE_TYPES]; + void *fs_texfetch_col_msaa_sint[PIPE_MAX_TEXTURE_TYPES]; void *fs_texfetch_depth_msaa[PIPE_MAX_TEXTURE_TYPES]; void *fs_texfetch_depthstencil_msaa[PIPE_MAX_TEXTURE_TYPES]; void *fs_texfetch_stencil_msaa[PIPE_MAX_TEXTURE_TYPES]; @@ -438,6 +442,10 @@ void util_blitter_destroy(struct blitter_context *blitter) for (i = 0; i < PIPE_MAX_TEXTURE_TYPES; i++) { if (ctx->fs_texfetch_col[i]) ctx->delete_fs_state(pipe, ctx->fs_texfetch_col[i]); + if (ctx->fs_texfetch_col_sint[i]) + ctx->delete_fs_state(pipe, ctx->fs_texfetch_col_sint[i]); + if (ctx->fs_texfetch_col_uint[i]) + ctx->delete_fs_state(pipe, ctx->fs_texfetch_col_uint[i]); if (ctx->fs_texfetch_depth[i]) ctx->delete_fs_state(pipe, ctx->fs_texfetch_depth[i]); if (ctx->fs_texfetch_depthstencil[i]) @@ -447,6 +455,10 @@ void util_blitter_destroy(struct blitter_context *blitter) if (ctx->fs_texfetch_col_msaa[i]) ctx->delete_fs_state(pipe, ctx->fs_texfetch_col_msaa[i]); + if (ctx->fs_texfetch_col_msaa_sint[i]) + ctx->delete_fs_state(pipe, ctx->fs_texfetch_col_msaa_sint[i]); + if (ctx->fs_texfetch_col_msaa_uint[i]) + ctx->delete_fs_state(pipe, ctx->fs_texfetch_col_msaa_uint[i]); if (ctx->fs_texfetch_depth_msaa[i]) ctx->delete_fs_state(pipe, ctx->fs_texfetch_depth_msaa[i]); if (ctx->fs_texfetch_depthstencil_msaa[i]) @@ -844,25 +856,29 @@ static void *blitter_get_fs_texfetch_col(struct blitter_context_priv *ctx, { struct pipe_context *pipe = ctx->base.pipe; unsigned tgsi_tex = util_pipe_tex_to_tgsi_tex(target, src_nr_samples); + enum tgsi_return_type stype; assert(target < PIPE_MAX_TEXTURE_TYPES); + if (util_format_is_pure_uint(format)) + stype = TGSI_RETURN_TYPE_UINT; + else if (util_format_is_pure_sint(format)) + stype = TGSI_RETURN_TYPE_SINT; + else + stype = TGSI_RETURN_TYPE_FLOAT; + if (src_nr_samples > 1) { void **shader; if (dst_nr_samples <= 1) { /* The destination has one sample, so we'll do color resolve. */ - boolean is_uint, is_sint; unsigned index = GET_MSAA_RESOLVE_FS_IDX(src_nr_samples); - is_uint = util_format_is_pure_uint(format); - is_sint = util_format_is_pure_sint(format); - assert(filter < 2); - if (is_uint) + if (stype == TGSI_RETURN_TYPE_UINT) shader = &ctx->fs_resolve_uint[target][index][filter]; - else if (is_sint) + else if (stype == TGSI_RETURN_TYPE_SINT) shader = &ctx->fs_resolve_sint[target][index][filter]; else shader = &ctx->fs_resolve[target][index][filter]; @@ -872,12 +888,12 @@ static void *blitter_get_fs_texfetch_col(struct blitter_context_priv *ctx, if (filter == PIPE_TEX_FILTER_LINEAR) { *shader = util_make_fs_msaa_resolve_bilinear(pipe, tgsi_tex, src_nr_samples, - is_uint, is_sint); + stype); } else { *shader = util_make_fs_msaa_resolve(pipe, tgsi_tex, src_nr_samples, - is_uint, is_sint); + stype); } } } @@ -885,24 +901,37 @@ static void *blitter_get_fs_texfetch_col(struct blitter_context_priv *ctx, /* The destination has multiple samples, we'll do * an MSAA->MSAA copy. */ - shader = &ctx->fs_texfetch_col_msaa[target]; + if (stype == TGSI_RETURN_TYPE_UINT) + shader = &ctx->fs_texfetch_col_msaa_uint[target]; + else if (stype == TGSI_RETURN_TYPE_SINT) + shader = &ctx->fs_texfetch_col_msaa_sint[target]; + else + shader = &ctx->fs_texfetch_col_msaa[target]; /* Create the fragment shader on-demand. */ if (!*shader) { assert(!ctx->cached_all_shaders); - *shader = util_make_fs_blit_msaa_color(pipe, tgsi_tex); + *shader = util_make_fs_blit_msaa_color(pipe, tgsi_tex, stype); } } return *shader; } else { - void **shader = &ctx->fs_texfetch_col[target]; + void **shader; + + if (stype == TGSI_RETURN_TYPE_UINT) + shader = &ctx->fs_texfetch_col_uint[target]; + else if (stype == TGSI_RETURN_TYPE_SINT) + shader = &ctx->fs_texfetch_col_sint[target]; + else + shader = &ctx->fs_texfetch_col[target]; /* Create the fragment shader on-demand. */ if (!*shader) { assert(!ctx->cached_all_shaders); *shader = util_make_fragment_tex_shader(pipe, tgsi_tex, - TGSI_INTERPOLATE_LINEAR); + TGSI_INTERPOLATE_LINEAR, + stype); } return *shader; @@ -1066,6 +1095,10 @@ void util_blitter_cache_all_shaders(struct blitter_context *blitter) */ blitter_get_fs_texfetch_col(ctx, PIPE_FORMAT_R32_FLOAT, target, samples, samples, 0); + blitter_get_fs_texfetch_col(ctx, PIPE_FORMAT_R32_UINT, target, + samples, samples, 0); + blitter_get_fs_texfetch_col(ctx, PIPE_FORMAT_R32_SINT, target, + samples, samples, 0); blitter_get_fs_texfetch_depth(ctx, target, samples); if (ctx->has_stencil_export) { blitter_get_fs_texfetch_depthstencil(ctx, target, samples); diff --git a/src/gallium/auxiliary/util/u_simple_shaders.c b/src/gallium/auxiliary/util/u_simple_shaders.c index c612b67e284..6d29cab9207 100644 --- a/src/gallium/auxiliary/util/u_simple_shaders.c +++ b/src/gallium/auxiliary/util/u_simple_shaders.c @@ -216,7 +216,8 @@ void * util_make_fragment_tex_shader_writemask(struct pipe_context *pipe, unsigned tex_target, unsigned interp_mode, - unsigned writemask ) + unsigned writemask, + enum tgsi_return_type stype) { struct ureg_program *ureg; struct ureg_src sampler; @@ -232,6 +233,8 @@ util_make_fragment_tex_shader_writemask(struct pipe_context *pipe, sampler = ureg_DECL_sampler( ureg, 0 ); + ureg_DECL_sampler_view(ureg, 0, tex_target, stype, stype, stype, stype); + tex = ureg_DECL_fs_input( ureg, TGSI_SEMANTIC_GENERIC, 0, interp_mode ); @@ -268,12 +271,14 @@ util_make_fragment_tex_shader_writemask(struct pipe_context *pipe, */ void * util_make_fragment_tex_shader(struct pipe_context *pipe, unsigned tex_target, - unsigned interp_mode) + unsigned interp_mode, + enum tgsi_return_type stype) { return util_make_fragment_tex_shader_writemask( pipe, tex_target, interp_mode, - TGSI_WRITEMASK_XYZW ); + TGSI_WRITEMASK_XYZW, + stype ); } @@ -298,6 +303,12 @@ util_make_fragment_tex_shader_writedepth(struct pipe_context *pipe, sampler = ureg_DECL_sampler( ureg, 0 ); + ureg_DECL_sampler_view(ureg, 0, tex_target, + TGSI_RETURN_TYPE_FLOAT, + TGSI_RETURN_TYPE_FLOAT, + TGSI_RETURN_TYPE_FLOAT, + TGSI_RETURN_TYPE_FLOAT); + tex = ureg_DECL_fs_input( ureg, TGSI_SEMANTIC_GENERIC, 0, interp_mode ); @@ -343,7 +354,17 @@ util_make_fragment_tex_shader_writedepthstencil(struct pipe_context *pipe, return NULL; depth_sampler = ureg_DECL_sampler( ureg, 0 ); + ureg_DECL_sampler_view(ureg, 0, tex_target, + TGSI_RETURN_TYPE_FLOAT, + TGSI_RETURN_TYPE_FLOAT, + TGSI_RETURN_TYPE_FLOAT, + TGSI_RETURN_TYPE_FLOAT); stencil_sampler = ureg_DECL_sampler( ureg, 1 ); + ureg_DECL_sampler_view(ureg, 0, tex_target, + TGSI_RETURN_TYPE_UINT, + TGSI_RETURN_TYPE_UINT, + TGSI_RETURN_TYPE_UINT, + TGSI_RETURN_TYPE_UINT); tex = ureg_DECL_fs_input( ureg, TGSI_SEMANTIC_GENERIC, 0, @@ -398,6 +419,12 @@ util_make_fragment_tex_shader_writestencil(struct pipe_context *pipe, stencil_sampler = ureg_DECL_sampler( ureg, 0 ); + ureg_DECL_sampler_view(ureg, 0, tex_target, + TGSI_RETURN_TYPE_UINT, + TGSI_RETURN_TYPE_UINT, + TGSI_RETURN_TYPE_UINT, + TGSI_RETURN_TYPE_UINT); + tex = ureg_DECL_fs_input( ureg, TGSI_SEMANTIC_GENERIC, 0, interp_mode ); @@ -512,6 +539,7 @@ util_make_fragment_cloneinput_shader(struct pipe_context *pipe, int num_cbufs, static void * util_make_fs_blit_msaa_gen(struct pipe_context *pipe, unsigned tgsi_tex, + const char *samp_type, const char *output_semantic, const char *output_mask) { @@ -519,6 +547,7 @@ util_make_fs_blit_msaa_gen(struct pipe_context *pipe, "FRAG\n" "DCL IN[0], GENERIC[0], LINEAR\n" "DCL SAMP[0]\n" + "DCL SVIEW[0], %s, %s\n" "DCL OUT[0], %s\n" "DCL TEMP[0]\n" @@ -534,7 +563,8 @@ util_make_fs_blit_msaa_gen(struct pipe_context *pipe, assert(tgsi_tex == TGSI_TEXTURE_2D_MSAA || tgsi_tex == TGSI_TEXTURE_2D_ARRAY_MSAA); - sprintf(text, shader_templ, output_semantic, output_mask, type); + sprintf(text, shader_templ, type, samp_type, + output_semantic, output_mask, type); if (!tgsi_text_translate(text, tokens, Elements(tokens))) { puts(text); @@ -556,9 +586,19 @@ util_make_fs_blit_msaa_gen(struct pipe_context *pipe, */ void * util_make_fs_blit_msaa_color(struct pipe_context *pipe, - unsigned tgsi_tex) + unsigned tgsi_tex, + enum tgsi_return_type stype) { - return util_make_fs_blit_msaa_gen(pipe, tgsi_tex, + const char *samp_type; + + if (stype == TGSI_RETURN_TYPE_UINT) + samp_type = "UINT"; + else if (stype == TGSI_RETURN_TYPE_SINT) + samp_type = "SINT"; + else + samp_type = "FLOAT"; + + return util_make_fs_blit_msaa_gen(pipe, tgsi_tex, samp_type, "COLOR[0]", ""); } @@ -572,7 +612,7 @@ void * util_make_fs_blit_msaa_depth(struct pipe_context *pipe, unsigned tgsi_tex) { - return util_make_fs_blit_msaa_gen(pipe, tgsi_tex, + return util_make_fs_blit_msaa_gen(pipe, tgsi_tex, "FLOAT", "POSITION", ".z"); } @@ -586,7 +626,7 @@ void * util_make_fs_blit_msaa_stencil(struct pipe_context *pipe, unsigned tgsi_tex) { - return util_make_fs_blit_msaa_gen(pipe, tgsi_tex, + return util_make_fs_blit_msaa_gen(pipe, tgsi_tex, "UINT", "STENCIL", ".y"); } @@ -640,7 +680,7 @@ util_make_fs_blit_msaa_depthstencil(struct pipe_context *pipe, void * util_make_fs_msaa_resolve(struct pipe_context *pipe, unsigned tgsi_tex, unsigned nr_samples, - boolean is_uint, boolean is_sint) + enum tgsi_return_type stype) { struct ureg_program *ureg; struct ureg_src sampler, coord; @@ -653,6 +693,7 @@ util_make_fs_msaa_resolve(struct pipe_context *pipe, /* Declarations. */ sampler = ureg_DECL_sampler(ureg, 0); + ureg_DECL_sampler_view(ureg, 0, tgsi_tex, stype, stype, stype, stype); coord = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_GENERIC, 0, TGSI_INTERPOLATE_LINEAR); out = ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, 0); @@ -670,9 +711,9 @@ util_make_fs_msaa_resolve(struct pipe_context *pipe, ureg_imm1u(ureg, i)); ureg_TXF(ureg, tmp, tgsi_tex, ureg_src(tmp_coord), sampler); - if (is_uint) + if (stype == TGSI_RETURN_TYPE_UINT) ureg_U2F(ureg, tmp, ureg_src(tmp)); - else if (is_sint) + else if (stype == TGSI_RETURN_TYPE_SINT) ureg_I2F(ureg, tmp, ureg_src(tmp)); /* Add it to the sum.*/ @@ -683,9 +724,9 @@ util_make_fs_msaa_resolve(struct pipe_context *pipe, ureg_MUL(ureg, tmp_sum, ureg_src(tmp_sum), ureg_imm1f(ureg, 1.0 / nr_samples)); - if (is_uint) + if (stype == TGSI_RETURN_TYPE_UINT) ureg_F2U(ureg, out, ureg_src(tmp_sum)); - else if (is_sint) + else if (stype == TGSI_RETURN_TYPE_SINT) ureg_F2I(ureg, out, ureg_src(tmp_sum)); else ureg_MOV(ureg, out, ureg_src(tmp_sum)); @@ -699,7 +740,7 @@ util_make_fs_msaa_resolve(struct pipe_context *pipe, void * util_make_fs_msaa_resolve_bilinear(struct pipe_context *pipe, unsigned tgsi_tex, unsigned nr_samples, - boolean is_uint, boolean is_sint) + enum tgsi_return_type stype) { struct ureg_program *ureg; struct ureg_src sampler, coord; @@ -713,6 +754,7 @@ util_make_fs_msaa_resolve_bilinear(struct pipe_context *pipe, /* Declarations. */ sampler = ureg_DECL_sampler(ureg, 0); + ureg_DECL_sampler_view(ureg, 0, tgsi_tex, stype, stype, stype, stype); coord = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_GENERIC, 0, TGSI_INTERPOLATE_LINEAR); out = ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, 0); @@ -744,9 +786,9 @@ util_make_fs_msaa_resolve_bilinear(struct pipe_context *pipe, ureg_imm1u(ureg, i)); ureg_TXF(ureg, tmp, tgsi_tex, ureg_src(tmp_coord[c]), sampler); - if (is_uint) + if (stype == TGSI_RETURN_TYPE_UINT) ureg_U2F(ureg, tmp, ureg_src(tmp)); - else if (is_sint) + else if (stype == TGSI_RETURN_TYPE_SINT) ureg_I2F(ureg, tmp, ureg_src(tmp)); /* Add it to the sum.*/ @@ -778,9 +820,9 @@ util_make_fs_msaa_resolve_bilinear(struct pipe_context *pipe, ureg_src(top)); /* Convert to the texture format and return. */ - if (is_uint) + if (stype == TGSI_RETURN_TYPE_UINT) ureg_F2U(ureg, out, ureg_src(tmp)); - else if (is_sint) + else if (stype == TGSI_RETURN_TYPE_SINT) ureg_F2I(ureg, out, ureg_src(tmp)); else ureg_MOV(ureg, out, ureg_src(tmp)); diff --git a/src/gallium/auxiliary/util/u_simple_shaders.h b/src/gallium/auxiliary/util/u_simple_shaders.h index dd282e02a13..08d798ef541 100644 --- a/src/gallium/auxiliary/util/u_simple_shaders.h +++ b/src/gallium/auxiliary/util/u_simple_shaders.h @@ -68,15 +68,16 @@ extern void * util_make_layered_clear_geometry_shader(struct pipe_context *pipe); extern void * -util_make_fragment_tex_shader_writemask(struct pipe_context *pipe, +util_make_fragment_tex_shader_writemask(struct pipe_context *pipe, unsigned tex_target, unsigned interp_mode, - unsigned writemask); + unsigned writemask, + enum tgsi_return_type stype); extern void * util_make_fragment_tex_shader(struct pipe_context *pipe, unsigned tex_target, - unsigned interp_mode); - + unsigned interp_mode, + enum tgsi_return_type stype); extern void * util_make_fragment_tex_shader_writedepth(struct pipe_context *pipe, @@ -115,7 +116,8 @@ util_make_fragment_cloneinput_shader(struct pipe_context *pipe, int num_cbufs, extern void * util_make_fs_blit_msaa_color(struct pipe_context *pipe, - unsigned tgsi_tex); + unsigned tgsi_tex, + enum tgsi_return_type stype); extern void * @@ -136,13 +138,13 @@ util_make_fs_blit_msaa_stencil(struct pipe_context *pipe, void * util_make_fs_msaa_resolve(struct pipe_context *pipe, unsigned tgsi_tex, unsigned nr_samples, - boolean is_uint, boolean is_sint); + enum tgsi_return_type stype); void * util_make_fs_msaa_resolve_bilinear(struct pipe_context *pipe, unsigned tgsi_tex, unsigned nr_samples, - boolean is_uint, boolean is_sint); + enum tgsi_return_type stype); #ifdef __cplusplus } diff --git a/src/gallium/auxiliary/util/u_tests.c b/src/gallium/auxiliary/util/u_tests.c index fe549723c33..6a489d63c09 100644 --- a/src/gallium/auxiliary/util/u_tests.c +++ b/src/gallium/auxiliary/util/u_tests.c @@ -373,7 +373,8 @@ null_sampler_view(struct pipe_context *ctx, unsigned tgsi_tex_target) /* Fragment shader. */ fs = util_make_fragment_tex_shader(ctx, tgsi_tex_target, - TGSI_INTERPOLATE_LINEAR); + TGSI_INTERPOLATE_LINEAR, + TGSI_RETURN_TYPE_FLOAT); cso_set_fragment_shader_handle(cso, fs); /* Vertex shader. */ diff --git a/src/gallium/tests/trivial/quad-tex.c b/src/gallium/tests/trivial/quad-tex.c index abecedbd594..daae577ec4b 100644 --- a/src/gallium/tests/trivial/quad-tex.c +++ b/src/gallium/tests/trivial/quad-tex.c @@ -270,7 +270,9 @@ static void init_prog(struct program *p) } /* fragment shader */ - p->fs = util_make_fragment_tex_shader(p->pipe, TGSI_TEXTURE_2D, TGSI_INTERPOLATE_LINEAR); + p->fs = util_make_fragment_tex_shader(p->pipe, TGSI_TEXTURE_2D, + TGSI_INTERPOLATE_LINEAR, + TGSI_RETURN_TYPE_FLOAT); } static void close_prog(struct program *p) From cb258c1dec1ff348d508a6b02fbc9aa11eb9f829 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Mon, 8 Jun 2015 13:20:30 -0400 Subject: [PATCH 750/834] glsl_to_tgsi: add SVIEW decl support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Freedreno needs sampler type information to deal with int/uint textures. To accomplish this, start creating sampler-view declarations, as suggested here: http://lists.freedesktop.org/archives/mesa-dev/2014-November/071583.html create a sampler-view with index matching the sampler, to encode the texture type (ie. SINT/UINT/FLOAT). Ie: DCL SVIEW[n], 2D, UINT DCL SAMP[n] TEX OUT[1], IN[1], SAMP[n] For tgsi texture instructions which do not take an explicit SVIEW argument, the SVIEW index is implied by the SAMP index. Signed-off-by: Rob Clark Reviewed-by: Roland Scheidegger Reviewed-by: Marek Olšák Reviewed-by: Jose Fonseca --- src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 36 +++++++++++++++++++++- 1 file changed, 35 insertions(+), 1 deletion(-) diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index 03834b69a23..25e30c7deb2 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -239,6 +239,7 @@ public: st_src_reg sampler; /**< sampler register */ int sampler_array_size; /**< 1-based size of sampler array, 1 if not array */ int tex_target; /**< One of TEXTURE_*_INDEX */ + glsl_base_type tex_type; GLboolean tex_shadow; st_src_reg tex_offsets[MAX_GLSL_TEXTURE_OFFSET]; @@ -345,6 +346,8 @@ public: int num_address_regs; int samplers_used; + glsl_base_type sampler_types[PIPE_MAX_SAMPLERS]; + int sampler_targets[PIPE_MAX_SAMPLERS]; /**< One of TGSI_TEXTURE_* */ bool indirect_addr_consts; int wpos_transform_const; @@ -579,6 +582,10 @@ glsl_to_tgsi_visitor::emit_asm(ir_instruction *ir, unsigned op, inst->src[3] = src3; inst->ir = ir; inst->dead_mask = 0; + /* default to float, for paths where this is not initialized + * (since 0==UINT which is likely wrong): + */ + inst->tex_type = GLSL_TYPE_FLOAT; inst->function = NULL; @@ -3324,6 +3331,8 @@ glsl_to_tgsi_visitor::visit(ir_texture *ir) assert(!"Should not get here."); } + inst->tex_type = ir->type->base_type; + this->result = result_src; } @@ -3476,7 +3485,13 @@ count_resources(glsl_to_tgsi_visitor *v, gl_program *prog) foreach_in_list(glsl_to_tgsi_instruction, inst, &v->instructions) { if (is_tex_instruction(inst->op)) { for (int i = 0; i < inst->sampler_array_size; i++) { - v->samplers_used |= 1 << (inst->sampler.index + i); + unsigned idx = inst->sampler.index + i; + v->samplers_used |= 1 << idx; + + debug_assert(idx < (int)ARRAY_SIZE(v->sampler_types)); + v->sampler_types[idx] = inst->tex_type; + v->sampler_targets[idx] = + st_translate_texture_target(inst->tex_target, inst->tex_shadow); if (inst->tex_shadow) { prog->ShadowSamplers |= 1 << (inst->sampler.index + i); @@ -5536,7 +5551,26 @@ st_translate_program( /* texture samplers */ for (i = 0; i < ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits; i++) { if (program->samplers_used & (1 << i)) { + unsigned type; + t->samplers[i] = ureg_DECL_sampler(ureg, i); + + switch (program->sampler_types[i]) { + case GLSL_TYPE_INT: + type = TGSI_RETURN_TYPE_SINT; + break; + case GLSL_TYPE_UINT: + type = TGSI_RETURN_TYPE_UINT; + break; + case GLSL_TYPE_FLOAT: + type = TGSI_RETURN_TYPE_FLOAT; + break; + default: + unreachable("not reached"); + } + + ureg_DECL_sampler_view( ureg, i, program->sampler_targets[i], + type, type, type, type ); } } From b3d2e367167b675c0b402c90220f40f8cd567d7c Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Mon, 8 Jun 2015 14:09:09 -0400 Subject: [PATCH 751/834] gallium/ttn: add texture-type support v2: rebased on using SVIEW to hold type information Signed-off-by: Rob Clark Reviewed-by: Eric Anholt Reviewed-by: Jose Fonseca --- src/gallium/auxiliary/nir/tgsi_to_nir.c | 44 ++++++++++++++++++++++++- 1 file changed, 43 insertions(+), 1 deletion(-) diff --git a/src/gallium/auxiliary/nir/tgsi_to_nir.c b/src/gallium/auxiliary/nir/tgsi_to_nir.c index 1702b41393b..985f766b201 100644 --- a/src/gallium/auxiliary/nir/tgsi_to_nir.c +++ b/src/gallium/auxiliary/nir/tgsi_to_nir.c @@ -58,6 +58,9 @@ struct ttn_compile { struct ttn_reg_info *temp_regs; nir_ssa_def **imm_defs; + unsigned num_samp_types; + nir_alu_type *samp_types; + nir_register *addr_reg; /** @@ -156,6 +159,30 @@ ttn_emit_declaration(struct ttn_compile *c) /* Nothing to record for system values. */ } else if (file == TGSI_FILE_SAMPLER) { /* Nothing to record for samplers. */ + } else if (file == TGSI_FILE_SAMPLER_VIEW) { + struct tgsi_declaration_sampler_view *sview = &decl->SamplerView; + nir_alu_type type; + + assert((sview->ReturnTypeX == sview->ReturnTypeY) && + (sview->ReturnTypeX == sview->ReturnTypeZ) && + (sview->ReturnTypeX == sview->ReturnTypeW)); + + switch (sview->ReturnTypeX) { + case TGSI_RETURN_TYPE_SINT: + type = nir_type_int; + break; + case TGSI_RETURN_TYPE_UINT: + type = nir_type_unsigned; + break; + case TGSI_RETURN_TYPE_FLOAT: + default: + type = nir_type_float; + break; + } + + for (i = 0; i < array_size; i++) { + c->samp_types[decl->Range.First + i] = type; + } } else { nir_variable *var; assert(file == TGSI_FILE_INPUT || @@ -1026,7 +1053,7 @@ ttn_tex(struct ttn_compile *c, nir_alu_dest dest, nir_ssa_def **src) struct tgsi_full_instruction *tgsi_inst = &c->token->FullInstruction; nir_tex_instr *instr; nir_texop op; - unsigned num_srcs, samp = 1, i; + unsigned num_srcs, samp = 1, sview, i; switch (tgsi_inst->Instruction.Opcode) { case TGSI_OPCODE_TEX: @@ -1105,6 +1132,18 @@ ttn_tex(struct ttn_compile *c, nir_alu_dest dest, nir_ssa_def **src) assert(tgsi_inst->Src[samp].Register.File == TGSI_FILE_SAMPLER); instr->sampler_index = tgsi_inst->Src[samp].Register.Index; + /* TODO if we supported any opc's which take an explicit SVIEW + * src, we would use that here instead. But for the "legacy" + * texture opc's the SVIEW index is same as SAMP index: + */ + sview = instr->sampler_index; + + if (sview < c->num_samp_types) { + instr->dest_type = c->samp_types[sview]; + } else { + instr->dest_type = nir_type_float; + } + unsigned src_number = 0; instr->src[src_number].src = @@ -1710,6 +1749,9 @@ tgsi_to_nir(const void *tgsi_tokens, c->imm_defs = rzalloc_array(c, nir_ssa_def *, scan.file_max[TGSI_FILE_IMMEDIATE] + 1); + c->num_samp_types = scan.file_max[TGSI_FILE_SAMPLER_VIEW] + 1; + c->samp_types = rzalloc_array(c, nir_alu_type, c->num_samp_types); + c->if_stack = rzalloc_array(c, struct exec_list *, (scan.opcode_count[TGSI_OPCODE_IF] + scan.opcode_count[TGSI_OPCODE_UIF]) * 2); From c79b2e626c60a29f684bc389f07a712b59fa99cc Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Wed, 29 Apr 2015 08:38:45 -0400 Subject: [PATCH 752/834] util/list: add list_first/last_entry I need an easier way to get at head/tail in ir3. Signed-off-by: Rob Clark --- src/util/list.h | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/util/list.h b/src/util/list.h index 946034710ef..b98ce59ff77 100644 --- a/src/util/list.h +++ b/src/util/list.h @@ -140,6 +140,13 @@ static inline void list_validate(struct list_head *list) - ((char *)&(sample)->member - (char *)(sample))) #endif +#define list_first_entry(ptr, type, member) \ + LIST_ENTRY(type, (ptr)->next, member) + +#define list_last_entry(ptr, type, member) \ + LIST_ENTRY(type, (ptr)->prev, member) + + #define LIST_FOR_EACH_ENTRY(pos, head, member) \ for (pos = NULL, pos = container_of((head)->next, pos, member); \ &pos->member != (head); \ From b33015f8895a37fcae1da2984796cb1ef30f8b13 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Thu, 30 Apr 2015 15:20:03 -0400 Subject: [PATCH 753/834] gallium/ttn: add missing SNE Signed-off-by: Rob Clark Reviewed-by: Kenneth Graunke --- src/gallium/auxiliary/nir/tgsi_to_nir.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/gallium/auxiliary/nir/tgsi_to_nir.c b/src/gallium/auxiliary/nir/tgsi_to_nir.c index 985f766b201..061f39ac6f3 100644 --- a/src/gallium/auxiliary/nir/tgsi_to_nir.c +++ b/src/gallium/auxiliary/nir/tgsi_to_nir.c @@ -1324,6 +1324,7 @@ static const nir_op op_trans[TGSI_OPCODE_LAST] = { [TGSI_OPCODE_SEQ] = nir_op_seq, [TGSI_OPCODE_SGT] = 0, [TGSI_OPCODE_SIN] = nir_op_fsin, + [TGSI_OPCODE_SNE] = nir_op_sne, [TGSI_OPCODE_SLE] = 0, [TGSI_OPCODE_TEX] = 0, [TGSI_OPCODE_TXD] = 0, From 5c8c2e2f97394436effbdd3e0f61eec4590accb2 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Sat, 25 Apr 2015 11:05:27 -0400 Subject: [PATCH 754/834] freedreno/ir3: more builder helpers Use ir3_MOV() builder in a couple of spots, rather than open-coding the instruction construction. Also add ir3_NOP() builder and use that instead of open coding. Signed-off-by: Rob Clark --- src/gallium/drivers/freedreno/ir3/ir3.h | 6 +++++ src/gallium/drivers/freedreno/ir3/ir3_group.c | 23 +++++-------------- .../drivers/freedreno/ir3/ir3_legalize.c | 4 ++-- src/gallium/drivers/freedreno/ir3/ir3_sched.c | 4 ++-- 4 files changed, 16 insertions(+), 21 deletions(-) diff --git a/src/gallium/drivers/freedreno/ir3/ir3.h b/src/gallium/drivers/freedreno/ir3/ir3.h index c0a14a07d48..a7fd1814ff5 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3.h +++ b/src/gallium/drivers/freedreno/ir3/ir3.h @@ -807,6 +807,12 @@ ir3_COV(struct ir3_block *block, struct ir3_instruction *src, return instr; } +static inline struct ir3_instruction * +ir3_NOP(struct ir3_block *block) +{ + return ir3_instr_create(block, 0, OPC_NOP); +} + #define INSTR1(CAT, name) \ static inline struct ir3_instruction * \ ir3_##name(struct ir3_block *block, \ diff --git a/src/gallium/drivers/freedreno/ir3/ir3_group.c b/src/gallium/drivers/freedreno/ir3/ir3_group.c index 782f6e87e56..8eed083866d 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_group.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_group.c @@ -50,19 +50,6 @@ static bool check_stop(struct ir3_instruction *instr) return false; } -static struct ir3_instruction * create_mov(struct ir3_instruction *instr) -{ - struct ir3_instruction *mov; - - mov = ir3_instr_create(instr->block, 1, 0); - mov->cat1.src_type = TYPE_F32; - mov->cat1.dst_type = TYPE_F32; - ir3_reg_create(mov, 0, 0); /* dst */ - ir3_reg_create(mov, 0, IR3_REG_SSA)->instr = instr; - - return mov; -} - /* bleh.. we need to do the same group_n() thing for both inputs/outputs * (where we have a simple instr[] array), and fanin nodes (where we have * an extra indirection via reg->instr). @@ -78,7 +65,8 @@ static struct ir3_instruction *arr_get(void *arr, int idx) } static void arr_insert_mov_out(void *arr, int idx, struct ir3_instruction *instr) { - ((struct ir3_instruction **)arr)[idx] = create_mov(instr); + ((struct ir3_instruction **)arr)[idx] = + ir3_MOV(instr->block, instr, TYPE_F32); } static void arr_insert_mov_in(void *arr, int idx, struct ir3_instruction *instr) { @@ -113,7 +101,8 @@ static struct ir3_instruction *instr_get(void *arr, int idx) } static void instr_insert_mov(void *arr, int idx, struct ir3_instruction *instr) { - ((struct ir3_instruction *)arr)->regs[idx+1]->instr = create_mov(instr); + ((struct ir3_instruction *)arr)->regs[idx+1]->instr = + ir3_MOV(instr->block, instr, TYPE_F32); } static struct group_ops instr_ops = { instr_get, instr_insert_mov }; @@ -210,8 +199,8 @@ static void pad_and_group_input(struct ir3_instruction **input, unsigned n) if (instr) { block = instr->block; } else if (block) { - instr = ir3_instr_create(block, 0, OPC_NOP); - ir3_reg_create(instr, 0, IR3_REG_SSA); /* dst */ + instr = ir3_NOP(block); + ir3_reg_create(instr, 0, IR3_REG_SSA); /* dummy dst */ input[i] = instr; mask |= (1 << i); } diff --git a/src/gallium/drivers/freedreno/ir3/ir3_legalize.c b/src/gallium/drivers/freedreno/ir3/ir3_legalize.c index 2455f7e4efc..61713c25e72 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_legalize.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_legalize.c @@ -134,14 +134,14 @@ static void legalize(struct ir3_legalize_ctx *ctx) */ if ((n->flags & IR3_INSTR_SS) && (n->category >= 5)) { struct ir3_instruction *nop; - nop = ir3_instr_create(block, 0, OPC_NOP); + nop = ir3_NOP(block); nop->flags |= IR3_INSTR_SS; n->flags &= ~IR3_INSTR_SS; } /* need to be able to set (ss) on first instruction: */ if ((shader->instrs_count == 0) && (n->category >= 5)) - ir3_instr_create(block, 0, OPC_NOP); + ir3_NOP(block); if (is_nop(n) && shader->instrs_count) { struct ir3_instruction *last = diff --git a/src/gallium/drivers/freedreno/ir3/ir3_sched.c b/src/gallium/drivers/freedreno/ir3/ir3_sched.c index a790cba129b..5ca6d7b62d5 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_sched.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_sched.c @@ -125,7 +125,7 @@ static void schedule(struct ir3_sched_ctx *ctx, * scheduling and depth calculation.. */ if (ctx->scheduled && is_sfu_or_mem(ctx->scheduled) && is_sfu_or_mem(instr)) - schedule(ctx, ir3_instr_create(block, 0, OPC_NOP), false); + schedule(ctx, ir3_NOP(block), false); /* remove from depth list: */ @@ -453,7 +453,7 @@ static void block_sched(struct ir3_sched_ctx *ctx, struct ir3_block *block) * then it is time for nop's: */ while (cnt > ctx->cnt) - schedule(ctx, ir3_instr_create(block, 0, OPC_NOP), false); + schedule(ctx, ir3_NOP(block), false); } /* at this point, scheduled list is in reverse order, so fix that: */ From 67d994c6761e09205dbc9a0515c510fc9dde02c7 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Thu, 30 Apr 2015 10:10:14 -0400 Subject: [PATCH 755/834] freedreno/ir3: drop dot graph dumping At least for now.. right now the instruction and instruction list printing should suffice, and the re-working of ir3_block would require a lot of changes in that code. Signed-off-by: Rob Clark --- .../drivers/freedreno/Makefile.sources | 2 +- .../drivers/freedreno/freedreno_screen.c | 1 - .../drivers/freedreno/freedreno_util.h | 1 - src/gallium/drivers/freedreno/ir3/ir3.h | 10 +- .../drivers/freedreno/ir3/ir3_cmdline.c | 3 +- .../drivers/freedreno/ir3/ir3_compiler.c | 34 +- .../drivers/freedreno/ir3/ir3_compiler_nir.c | 32 +- src/gallium/drivers/freedreno/ir3/ir3_dump.c | 456 ------------------ src/gallium/drivers/freedreno/ir3/ir3_print.c | 210 ++++++++ src/gallium/drivers/freedreno/ir3/ir3_ra.c | 4 +- 10 files changed, 228 insertions(+), 525 deletions(-) delete mode 100644 src/gallium/drivers/freedreno/ir3/ir3_dump.c create mode 100644 src/gallium/drivers/freedreno/ir3/ir3_print.c diff --git a/src/gallium/drivers/freedreno/Makefile.sources b/src/gallium/drivers/freedreno/Makefile.sources index a565a9c4e4d..809d1a0f8f5 100644 --- a/src/gallium/drivers/freedreno/Makefile.sources +++ b/src/gallium/drivers/freedreno/Makefile.sources @@ -125,13 +125,13 @@ ir3_SOURCES := \ ir3/ir3_compiler.h \ ir3/ir3_cp.c \ ir3/ir3_depth.c \ - ir3/ir3_dump.c \ ir3/ir3_flatten.c \ ir3/ir3_group.c \ ir3/ir3.h \ ir3/ir3_legalize.c \ ir3/ir3_nir.h \ ir3/ir3_nir_lower_if_else.c \ + ir3/ir3_print.c \ ir3/ir3_ra.c \ ir3/ir3_sched.c \ ir3/ir3_shader.c \ diff --git a/src/gallium/drivers/freedreno/freedreno_screen.c b/src/gallium/drivers/freedreno/freedreno_screen.c index 6a5748c73ca..00b9471095e 100644 --- a/src/gallium/drivers/freedreno/freedreno_screen.c +++ b/src/gallium/drivers/freedreno/freedreno_screen.c @@ -68,7 +68,6 @@ static const struct debug_named_value debug_options[] = { {"fraghalf", FD_DBG_FRAGHALF, "Use half-precision in fragment shader"}, {"nobin", FD_DBG_NOBIN, "Disable hw binning"}, {"optmsgs", FD_DBG_OPTMSGS,"Enable optimizer debug messages"}, - {"optdump", FD_DBG_OPTDUMP,"Dump shader DAG to .dot files"}, {"glsl120", FD_DBG_GLSL120,"Temporary flag to force GLSL 120 (rather than 130) on a3xx+"}, {"nocp", FD_DBG_NOCP, "Disable copy-propagation"}, {"nir", FD_DBG_NIR, "Enable experimental NIR compiler"}, diff --git a/src/gallium/drivers/freedreno/freedreno_util.h b/src/gallium/drivers/freedreno/freedreno_util.h index 2735ae41315..aec09ab6616 100644 --- a/src/gallium/drivers/freedreno/freedreno_util.h +++ b/src/gallium/drivers/freedreno/freedreno_util.h @@ -63,7 +63,6 @@ enum adreno_stencil_op fd_stencil_op(unsigned op); #define FD_DBG_FRAGHALF 0x0080 #define FD_DBG_NOBIN 0x0100 #define FD_DBG_OPTMSGS 0x0400 -#define FD_DBG_OPTDUMP 0x0800 #define FD_DBG_GLSL120 0x1000 #define FD_DBG_NOCP 0x2000 #define FD_DBG_NIR 0x4000 diff --git a/src/gallium/drivers/freedreno/ir3/ir3.h b/src/gallium/drivers/freedreno/ir3/ir3.h index a7fd1814ff5..f37dfab3341 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3.h +++ b/src/gallium/drivers/freedreno/ir3/ir3.h @@ -747,12 +747,8 @@ static inline struct ir3_instruction * __ssa_src_n(struct ir3_instruction *instr /* dump: */ -#include -void ir3_dump(struct ir3 *shader, const char *name, - struct ir3_block *block /* XXX maybe 'block' ptr should move to ir3? */, - FILE *f); -void ir3_dump_instr_single(struct ir3_instruction *instr); -void ir3_dump_instr_list(struct ir3_instruction *instr); +void ir3_print(struct ir3 *ir); +void ir3_print_instr(struct ir3_instruction *instr); /* flatten if/else: */ int ir3_block_flatten(struct ir3_block *block); @@ -765,7 +761,7 @@ void ir3_block_depth(struct ir3_block *block); /* copy-propagate: */ void ir3_block_cp(struct ir3_block *block); -/* group neightbors and insert mov's to resolve conflicts: */ +/* group neighbors and insert mov's to resolve conflicts: */ void ir3_block_group(struct ir3_block *block); /* scheduling: */ diff --git a/src/gallium/drivers/freedreno/ir3/ir3_cmdline.c b/src/gallium/drivers/freedreno/ir3/ir3_cmdline.c index 0b16cc1eb54..e42afeaeb21 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_cmdline.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_cmdline.c @@ -30,6 +30,7 @@ #include #include #include +#include #include #include "tgsi/tgsi_parse.h" @@ -243,7 +244,7 @@ int main(int argc, char **argv) while (n < argc) { if (!strcmp(argv[n], "--verbose")) { - fd_mesa_debug |= FD_DBG_OPTDUMP | FD_DBG_MSGS | FD_DBG_OPTMSGS; + fd_mesa_debug |= FD_DBG_MSGS | FD_DBG_OPTMSGS; n++; continue; } diff --git a/src/gallium/drivers/freedreno/ir3/ir3_compiler.c b/src/gallium/drivers/freedreno/ir3/ir3_compiler.c index cc049d3fdfd..25af9f91d3f 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_compiler.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_compiler.c @@ -3515,22 +3515,6 @@ compile_instructions(struct ir3_compile_context *ctx) } } -static void -compile_dump(struct ir3_compile_context *ctx) -{ - const char *name = (ctx->so->type == SHADER_VERTEX) ? "vert" : "frag"; - static unsigned n = 0; - char fname[16]; - FILE *f; - snprintf(fname, sizeof(fname), "%s-%04u.dot", name, n++); - f = fopen(fname, "w"); - if (!f) - return; - ir3_block_depth(ctx->block); - ir3_dump(ctx->ir, name, ctx->block, f); - fclose(f); -} - int ir3_compile_shader(struct ir3_shader_variant *so, const struct tgsi_token *tokens, struct ir3_shader_key key, @@ -3613,20 +3597,15 @@ ir3_compile_shader(struct ir3_shader_variant *so, block->outputs[block->noutputs++] = ctx.kill[i]; } - if (fd_mesa_debug & FD_DBG_OPTDUMP) - compile_dump(&ctx); - ret = ir3_block_flatten(block); if (ret < 0) { DBG("FLATTEN failed!"); goto out; } - if ((ret > 0) && (fd_mesa_debug & FD_DBG_OPTDUMP)) - compile_dump(&ctx); if (fd_mesa_debug & FD_DBG_OPTMSGS) { printf("BEFORE CP:\n"); - ir3_dump_instr_list(block->head); + ir3_print(so->ir); } ir3_block_depth(block); @@ -3641,7 +3620,7 @@ ir3_compile_shader(struct ir3_shader_variant *so, if (fd_mesa_debug & FD_DBG_OPTMSGS) { printf("BEFORE GROUPING:\n"); - ir3_dump_instr_list(block->head); + ir3_print(so->ir); } /* Group left/right neighbors, inserting mov's where needed to @@ -3649,14 +3628,11 @@ ir3_compile_shader(struct ir3_shader_variant *so, */ ir3_block_group(block); - if (fd_mesa_debug & FD_DBG_OPTDUMP) - compile_dump(&ctx); - ir3_block_depth(block); if (fd_mesa_debug & FD_DBG_OPTMSGS) { printf("AFTER DEPTH:\n"); - ir3_dump_instr_list(block->head); + ir3_print(so->ir); } ret = ir3_block_sched(block); @@ -3667,7 +3643,7 @@ ir3_compile_shader(struct ir3_shader_variant *so, if (fd_mesa_debug & FD_DBG_OPTMSGS) { printf("AFTER SCHED:\n"); - ir3_dump_instr_list(block->head); + ir3_print(so->ir); } ret = ir3_block_ra(block, so->type, so->frag_coord, so->frag_face); @@ -3678,7 +3654,7 @@ ir3_compile_shader(struct ir3_shader_variant *so, if (fd_mesa_debug & FD_DBG_OPTMSGS) { printf("AFTER RA:\n"); - ir3_dump_instr_list(block->head); + ir3_print(so->ir); } ir3_block_legalize(block, &so->has_samp, &max_bary); diff --git a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c index 2cf25ea6e0a..8d382e5cf3e 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c @@ -1908,22 +1908,6 @@ fixup_frag_inputs(struct ir3_compile *ctx) block->inputs = inputs; } -static void -compile_dump(struct ir3_compile *ctx) -{ - const char *name = (ctx->so->type == SHADER_VERTEX) ? "vert" : "frag"; - static unsigned n = 0; - char fname[16]; - FILE *f; - snprintf(fname, sizeof(fname), "%s-%04u.dot", name, n++); - f = fopen(fname, "w"); - if (!f) - return; - ir3_block_depth(ctx->block); - ir3_dump(ctx->ir, name, ctx->block, f); - fclose(f); -} - int ir3_compile_shader_nir(struct ir3_shader_variant *so, const struct tgsi_token *tokens, struct ir3_shader_key key) @@ -2008,12 +1992,9 @@ ir3_compile_shader_nir(struct ir3_shader_variant *so, block->outputs[block->noutputs++] = ctx->kill[i]; } - if (fd_mesa_debug & FD_DBG_OPTDUMP) - compile_dump(ctx); - if (fd_mesa_debug & FD_DBG_OPTMSGS) { printf("BEFORE CP:\n"); - ir3_dump_instr_list(block->head); + ir3_print(so->ir); } ir3_block_depth(block); @@ -2022,7 +2003,7 @@ ir3_compile_shader_nir(struct ir3_shader_variant *so, if (fd_mesa_debug & FD_DBG_OPTMSGS) { printf("BEFORE GROUPING:\n"); - ir3_dump_instr_list(block->head); + ir3_print(so->ir); } /* Group left/right neighbors, inserting mov's where needed to @@ -2030,14 +2011,11 @@ ir3_compile_shader_nir(struct ir3_shader_variant *so, */ ir3_block_group(block); - if (fd_mesa_debug & FD_DBG_OPTDUMP) - compile_dump(ctx); - ir3_block_depth(block); if (fd_mesa_debug & FD_DBG_OPTMSGS) { printf("AFTER DEPTH:\n"); - ir3_dump_instr_list(block->head); + ir3_print(so->ir); } ret = ir3_block_sched(block); @@ -2048,7 +2026,7 @@ ir3_compile_shader_nir(struct ir3_shader_variant *so, if (fd_mesa_debug & FD_DBG_OPTMSGS) { printf("AFTER SCHED:\n"); - ir3_dump_instr_list(block->head); + ir3_print(so->ir); } ret = ir3_block_ra(block, so->type, so->frag_coord, so->frag_face); @@ -2059,7 +2037,7 @@ ir3_compile_shader_nir(struct ir3_shader_variant *so, if (fd_mesa_debug & FD_DBG_OPTMSGS) { printf("AFTER RA:\n"); - ir3_dump_instr_list(block->head); + ir3_print(so->ir); } ir3_block_legalize(block, &so->has_samp, &max_bary); diff --git a/src/gallium/drivers/freedreno/ir3/ir3_dump.c b/src/gallium/drivers/freedreno/ir3/ir3_dump.c deleted file mode 100644 index 1614d637b13..00000000000 --- a/src/gallium/drivers/freedreno/ir3/ir3_dump.c +++ /dev/null @@ -1,456 +0,0 @@ -/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */ - -/* - * Copyright (C) 2014 Rob Clark - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Authors: - * Rob Clark - */ - -#include - -#include "ir3.h" - -#define PTRID(x) ((unsigned long)(x)) - -struct ir3_dump_ctx { - FILE *f; - bool verbose; -}; - -static void dump_instr_name(struct ir3_dump_ctx *ctx, - struct ir3_instruction *instr) -{ - /* for debugging: */ - if (ctx->verbose) { -#ifdef DEBUG - fprintf(ctx->f, "%04u:", instr->serialno); -#endif - fprintf(ctx->f, "%03u: ", instr->depth); - } - - if (instr->flags & IR3_INSTR_SY) - fprintf(ctx->f, "(sy)"); - if (instr->flags & IR3_INSTR_SS) - fprintf(ctx->f, "(ss)"); - - if (is_meta(instr)) { - switch(instr->opc) { - case OPC_META_PHI: - fprintf(ctx->f, "Φ"); - break; - default: - /* shouldn't hit here.. just for debugging: */ - switch (instr->opc) { - case OPC_META_INPUT: fprintf(ctx->f, "_meta:in"); break; - case OPC_META_OUTPUT: fprintf(ctx->f, "_meta:out"); break; - case OPC_META_FO: fprintf(ctx->f, "_meta:fo"); break; - case OPC_META_FI: fprintf(ctx->f, "_meta:fi"); break; - case OPC_META_FLOW: fprintf(ctx->f, "_meta:flow"); break; - - default: fprintf(ctx->f, "_meta:%d", instr->opc); break; - } - break; - } - } else if (instr->category == 1) { - static const char *type[] = { - [TYPE_F16] = "f16", - [TYPE_F32] = "f32", - [TYPE_U16] = "u16", - [TYPE_U32] = "u32", - [TYPE_S16] = "s16", - [TYPE_S32] = "s32", - [TYPE_U8] = "u8", - [TYPE_S8] = "s8", - }; - if (instr->cat1.src_type == instr->cat1.dst_type) - fprintf(ctx->f, "mov"); - else - fprintf(ctx->f, "cov"); - fprintf(ctx->f, ".%s%s", type[instr->cat1.src_type], type[instr->cat1.dst_type]); - } else { - fprintf(ctx->f, "%s", ir3_instr_name(instr)); - if (instr->flags & IR3_INSTR_3D) - fprintf(ctx->f, ".3d"); - if (instr->flags & IR3_INSTR_A) - fprintf(ctx->f, ".a"); - if (instr->flags & IR3_INSTR_O) - fprintf(ctx->f, ".o"); - if (instr->flags & IR3_INSTR_P) - fprintf(ctx->f, ".p"); - if (instr->flags & IR3_INSTR_S) - fprintf(ctx->f, ".s"); - if (instr->flags & IR3_INSTR_S2EN) - fprintf(ctx->f, ".s2en"); - } -} - -static void dump_reg_name(struct ir3_dump_ctx *ctx, - struct ir3_register *reg, bool followssa) -{ - if ((reg->flags & (IR3_REG_FABS | IR3_REG_SABS)) && - (reg->flags & (IR3_REG_FNEG | IR3_REG_SNEG | IR3_REG_BNOT))) - fprintf(ctx->f, "(absneg)"); - else if (reg->flags & (IR3_REG_FNEG | IR3_REG_SNEG | IR3_REG_BNOT)) - fprintf(ctx->f, "(neg)"); - else if (reg->flags & (IR3_REG_FABS | IR3_REG_SABS)) - fprintf(ctx->f, "(abs)"); - - if (reg->flags & IR3_REG_IMMED) { - fprintf(ctx->f, "imm[%f,%d,0x%x]", reg->fim_val, reg->iim_val, reg->iim_val); - } else if (reg->flags & IR3_REG_SSA) { - if (ctx->verbose) { - fprintf(ctx->f, "_"); - if (followssa) { - fprintf(ctx->f, "["); - dump_instr_name(ctx, reg->instr); - fprintf(ctx->f, "]"); - } - } - } else if (reg->flags & IR3_REG_RELATIV) { - if (reg->flags & IR3_REG_HALF) - fprintf(ctx->f, "h"); - if (reg->flags & IR3_REG_CONST) - fprintf(ctx->f, "c", reg->num); - else - fprintf(ctx->f, "\x1b[0;31mr\x1b[0m (%u)", reg->num, reg->size); - } else { - if (reg->flags & IR3_REG_HALF) - fprintf(ctx->f, "h"); - if (reg->flags & IR3_REG_CONST) - fprintf(ctx->f, "c%u.%c", reg_num(reg), "xyzw"[reg_comp(reg)]); - else - fprintf(ctx->f, "\x1b[0;31mr%u.%c\x1b[0m", reg_num(reg), "xyzw"[reg_comp(reg)]); - } -} - -static void ir3_instr_dump(struct ir3_dump_ctx *ctx, - struct ir3_instruction *instr); -static void ir3_block_dump(struct ir3_dump_ctx *ctx, - struct ir3_block *block, const char *name); - -static void dump_instr(struct ir3_dump_ctx *ctx, - struct ir3_instruction *instr) -{ - /* if we've already visited this instruction, bail now: */ - if (ir3_instr_check_mark(instr)) - return; - - /* some meta-instructions need to be handled specially: */ - if (is_meta(instr)) { - if ((instr->opc == OPC_META_FO) || - (instr->opc == OPC_META_FI)) { - struct ir3_instruction *src; - foreach_ssa_src(src, instr) - dump_instr(ctx, src); - } else if (instr->opc == OPC_META_FLOW) { - struct ir3_register *reg = instr->regs[1]; - ir3_block_dump(ctx, instr->flow.if_block, "if"); - if (instr->flow.else_block) - ir3_block_dump(ctx, instr->flow.else_block, "else"); - if (reg->flags & IR3_REG_SSA) - dump_instr(ctx, reg->instr); - } else if (instr->opc == OPC_META_PHI) { - /* treat like a normal instruction: */ - ir3_instr_dump(ctx, instr); - } - } else { - ir3_instr_dump(ctx, instr); - } -} - -/* arrarraggh! if link is to something outside of the current block, we - * need to defer emitting the link until the end of the block, since the - * edge triggers pre-creation of the node it links to inside the cluster, - * even though it is meant to be outside.. - */ -static struct { - char buf[40960]; - unsigned n; -} edge_buf; - -/* helper to print or defer: */ -static void printdef(struct ir3_dump_ctx *ctx, - bool defer, const char *fmt, ...) -{ - va_list ap; - va_start(ap, fmt); - if (defer) { - unsigned n = edge_buf.n; - n += vsnprintf(&edge_buf.buf[n], sizeof(edge_buf.buf) - n, - fmt, ap); - edge_buf.n = n; - } else { - vfprintf(ctx->f, fmt, ap); - } - va_end(ap); -} - -static void dump_link2(struct ir3_dump_ctx *ctx, - struct ir3_instruction *instr, const char *target, bool defer) -{ - /* some meta-instructions need to be handled specially: */ - if (is_meta(instr)) { - if (instr->opc == OPC_META_INPUT) { - printdef(ctx, defer, "input%lx::w -> %s", - PTRID(instr->inout.block), - instr->regs[0]->num, target); - } else if (instr->opc == OPC_META_FO) { - struct ir3_register *reg = instr->regs[1]; - dump_link2(ctx, reg->instr, target, defer); - printdef(ctx, defer, "[label=\".%c\"]", - "xyzw"[instr->fo.off & 0x3]); - } else if (instr->opc == OPC_META_FI) { - struct ir3_instruction *src; - - foreach_ssa_src_n(src, i, instr) { - dump_link2(ctx, src, target, defer); - printdef(ctx, defer, "[label=\".%c\"]", - "xyzw"[i & 0x3]); - } - } else if (instr->opc == OPC_META_OUTPUT) { - printdef(ctx, defer, "output%lx::w -> %s", - PTRID(instr->inout.block), - instr->regs[0]->num, target); - } else if (instr->opc == OPC_META_PHI) { - /* treat like a normal instruction: */ - printdef(ctx, defer, "instr%lx: -> %s", PTRID(instr), target); - } - } else { - printdef(ctx, defer, "instr%lx: -> %s", PTRID(instr), target); - } -} - -static void dump_link(struct ir3_dump_ctx *ctx, - struct ir3_instruction *instr, - struct ir3_block *block, const char *target) -{ - bool defer = instr->block != block; - dump_link2(ctx, instr, target, defer); - printdef(ctx, defer, "\n"); -} - -static struct ir3_register *follow_flow(struct ir3_register *reg) -{ - if (reg->flags & IR3_REG_SSA) { - struct ir3_instruction *instr = reg->instr; - /* go with the flow.. */ - if (is_meta(instr) && (instr->opc == OPC_META_FLOW)) - return instr->regs[1]; - } - return reg; -} - -static void ir3_instr_dump(struct ir3_dump_ctx *ctx, - struct ir3_instruction *instr) -{ - struct ir3_register *src; - - fprintf(ctx->f, "instr%lx [shape=record,style=filled,fillcolor=lightgrey,label=\"{", - PTRID(instr)); - dump_instr_name(ctx, instr); - - /* destination register: */ - fprintf(ctx->f, "|"); - - /* source register(s): */ - foreach_src_n(src, i, instr) { - struct ir3_register *reg = follow_flow(src); - - fprintf(ctx->f, "|"); - - if (reg->flags & IR3_REG_SSA) - fprintf(ctx->f, " ", i); - - dump_reg_name(ctx, reg, true); - } - - fprintf(ctx->f, "}\"];\n"); - - /* and recursively dump dependent instructions: */ - foreach_src_n(src, i, instr) { - struct ir3_register *reg = follow_flow(src); - char target[32]; /* link target */ - - if (!(reg->flags & IR3_REG_SSA)) - continue; - - snprintf(target, sizeof(target), "instr%lx:", - PTRID(instr), i); - - dump_instr(ctx, reg->instr); - dump_link(ctx, reg->instr, instr->block, target); - } -} - -static void ir3_block_dump(struct ir3_dump_ctx *ctx, - struct ir3_block *block, const char *name) -{ - unsigned i, n; - - n = edge_buf.n; - - fprintf(ctx->f, "subgraph cluster%lx {\n", PTRID(block)); - fprintf(ctx->f, "label=\"%s\";\n", name); - - /* draw inputs: */ - fprintf(ctx->f, "input%lx [shape=record,label=\"inputs", PTRID(block)); - for (i = 0; i < block->ninputs; i++) - if (block->inputs[i]) - fprintf(ctx->f, "| i%u.%c", i, (i >> 2), "xyzw"[i & 0x3]); - fprintf(ctx->f, "\"];\n"); - - /* draw instruction graph: */ - for (i = 0; i < block->noutputs; i++) - if (block->outputs[i]) - dump_instr(ctx, block->outputs[i]); - - /* draw outputs: */ - fprintf(ctx->f, "output%lx [shape=record,label=\"outputs", PTRID(block)); - for (i = 0; i < block->noutputs; i++) - fprintf(ctx->f, "| o%u.%c", i, (i >> 2), "xyzw"[i & 0x3]); - fprintf(ctx->f, "\"];\n"); - - /* and links to outputs: */ - for (i = 0; i < block->noutputs; i++) { - char target[32]; /* link target */ - - /* NOTE: there could be outputs that are never assigned, - * so skip them - */ - if (!block->outputs[i]) - continue; - - snprintf(target, sizeof(target), "output%lx::e", - PTRID(block), i); - - dump_link(ctx, block->outputs[i], block, target); - } - - fprintf(ctx->f, "}\n"); - - /* and links to inputs: */ - if (block->parent) { - for (i = 0; i < block->ninputs; i++) { - char target[32]; /* link target */ - - if (!block->inputs[i]) - continue; - - dump_instr(ctx, block->inputs[i]); - - snprintf(target, sizeof(target), "input%lx::e", - PTRID(block), i); - - dump_link(ctx, block->inputs[i], block, target); - } - } - - /* dump deferred edges: */ - if (edge_buf.n > n) { - fprintf(ctx->f, "%*s", edge_buf.n - n, &edge_buf.buf[n]); - edge_buf.n = n; - } -} - -void ir3_dump(struct ir3 *shader, const char *name, - struct ir3_block *block /* XXX maybe 'block' ptr should move to ir3? */, - FILE *f) -{ - struct ir3_dump_ctx ctx = { - .f = f, - }; - ir3_clear_mark(shader); - fprintf(ctx.f, "digraph G {\n"); - fprintf(ctx.f, "rankdir=RL;\n"); - fprintf(ctx.f, "nodesep=0.25;\n"); - fprintf(ctx.f, "ranksep=1.5;\n"); - ir3_block_dump(&ctx, block, name); - fprintf(ctx.f, "}\n"); -} - -/* - * For Debugging: - */ - -void -ir3_dump_instr_single(struct ir3_instruction *instr) -{ - struct ir3_dump_ctx ctx = { - .f = stdout, - .verbose = true, - }; - unsigned i; - - dump_instr_name(&ctx, instr); - for (i = 0; i < instr->regs_count; i++) { - struct ir3_register *reg = instr->regs[i]; - printf(i ? ", " : " "); - dump_reg_name(&ctx, reg, !!i); - } - - if (instr->address) { - fprintf(ctx.f, ", address=_"); - fprintf(ctx.f, "["); - dump_instr_name(&ctx, instr->address); - fprintf(ctx.f, "]"); - } - - if (instr->fanin) { - fprintf(ctx.f, ", fanin=_"); - fprintf(ctx.f, "["); - dump_instr_name(&ctx, instr->fanin); - fprintf(ctx.f, "]"); - } - - if (is_meta(instr)) { - if (instr->opc == OPC_META_FO) { - printf(", off=%d", instr->fo.off); - } else if ((instr->opc == OPC_META_FI) && instr->fi.aid) { - printf(", aid=%d", instr->fi.aid); - } - } - - printf("\n"); -} - -void -ir3_dump_instr_list(struct ir3_instruction *instr) -{ - struct ir3_block *block = instr->block; - unsigned n = 0; - - while (instr) { - ir3_dump_instr_single(instr); - if (!is_meta(instr)) - n++; - instr = instr->next; - } - printf("%u instructions\n", n); - - for (n = 0; n < block->noutputs; n++) { - if (!block->outputs[n]) - continue; - printf("out%d: ", n); - ir3_dump_instr_single(block->outputs[n]); - } -} diff --git a/src/gallium/drivers/freedreno/ir3/ir3_print.c b/src/gallium/drivers/freedreno/ir3/ir3_print.c new file mode 100644 index 00000000000..a5c5d3c8efa --- /dev/null +++ b/src/gallium/drivers/freedreno/ir3/ir3_print.c @@ -0,0 +1,210 @@ +/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */ + +/* + * Copyright (C) 2014 Rob Clark + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark + */ + +#include +#include + +#include "ir3.h" + +#define PTRID(x) ((unsigned long)(x)) + +static void print_instr_name(struct ir3_instruction *instr) +{ +#ifdef DEBUG + printf("%04u:", instr->serialno); +#endif + printf("%03u: ", instr->depth); + + if (instr->flags & IR3_INSTR_SY) + printf("(sy)"); + if (instr->flags & IR3_INSTR_SS) + printf("(ss)"); + + if (is_meta(instr)) { + switch(instr->opc) { + case OPC_META_PHI: + printf("Φ"); + break; + default: + /* shouldn't hit here.. just for debugging: */ + switch (instr->opc) { + case OPC_META_INPUT: printf("_meta:in"); break; + case OPC_META_OUTPUT: printf("_meta:out"); break; + case OPC_META_FO: printf("_meta:fo"); break; + case OPC_META_FI: printf("_meta:fi"); break; + case OPC_META_FLOW: printf("_meta:flow"); break; + + default: printf("_meta:%d", instr->opc); break; + } + break; + } + } else if (instr->category == 1) { + static const char *type[] = { + [TYPE_F16] = "f16", + [TYPE_F32] = "f32", + [TYPE_U16] = "u16", + [TYPE_U32] = "u32", + [TYPE_S16] = "s16", + [TYPE_S32] = "s32", + [TYPE_U8] = "u8", + [TYPE_S8] = "s8", + }; + if (instr->cat1.src_type == instr->cat1.dst_type) + printf("mov"); + else + printf("cov"); + printf(".%s%s", type[instr->cat1.src_type], type[instr->cat1.dst_type]); + } else { + printf("%s", ir3_instr_name(instr)); + if (instr->flags & IR3_INSTR_3D) + printf(".3d"); + if (instr->flags & IR3_INSTR_A) + printf(".a"); + if (instr->flags & IR3_INSTR_O) + printf(".o"); + if (instr->flags & IR3_INSTR_P) + printf(".p"); + if (instr->flags & IR3_INSTR_S) + printf(".s"); + if (instr->flags & IR3_INSTR_S2EN) + printf(".s2en"); + } +} + +static void print_reg_name(struct ir3_register *reg, bool followssa) +{ + if ((reg->flags & (IR3_REG_FABS | IR3_REG_SABS)) && + (reg->flags & (IR3_REG_FNEG | IR3_REG_SNEG | IR3_REG_BNOT))) + printf("(absneg)"); + else if (reg->flags & (IR3_REG_FNEG | IR3_REG_SNEG | IR3_REG_BNOT)) + printf("(neg)"); + else if (reg->flags & (IR3_REG_FABS | IR3_REG_SABS)) + printf("(abs)"); + + if (reg->flags & IR3_REG_IMMED) { + printf("imm[%f,%d,0x%x]", reg->fim_val, reg->iim_val, reg->iim_val); + } else if (reg->flags & IR3_REG_SSA) { + printf("_"); + if (followssa) { + printf("["); + print_instr_name(reg->instr); + printf("]"); + } + } else if (reg->flags & IR3_REG_RELATIV) { + if (reg->flags & IR3_REG_HALF) + printf("h"); + if (reg->flags & IR3_REG_CONST) + printf("c", reg->num); + else + printf("\x1b[0;31mr\x1b[0m (%u)", reg->num, reg->size); + } else { + if (reg->flags & IR3_REG_HALF) + printf("h"); + if (reg->flags & IR3_REG_CONST) + printf("c%u.%c", reg_num(reg), "xyzw"[reg_comp(reg)]); + else + printf("\x1b[0;31mr%u.%c\x1b[0m", reg_num(reg), "xyzw"[reg_comp(reg)]); + } +} + +static void +tab(int lvl) +{ + for (int i = 0; i < lvl; i++) + printf("\t"); +} + +static void +print_instr(struct ir3_instruction *instr, int lvl) +{ + unsigned i; + + tab(lvl); + + print_instr_name(instr); + for (i = 0; i < instr->regs_count; i++) { + struct ir3_register *reg = instr->regs[i]; + printf(i ? ", " : " "); + print_reg_name(reg, !!i); + } + + if (instr->address) { + printf(", address=_"); + printf("["); + print_instr_name(instr->address); + printf("]"); + } + + if (instr->fanin) { + printf(", fanin=_"); + printf("["); + print_instr_name(instr->fanin); + printf("]"); + } + + if (is_meta(instr)) { + if (instr->opc == OPC_META_FO) { + printf(", off=%d", instr->fo.off); + } else if ((instr->opc == OPC_META_FI) && instr->fi.aid) { + printf(", aid=%d", instr->fi.aid); + } + } + + printf("\n"); +} + +void ir3_print_instr(struct ir3_instruction *instr) +{ + print_instr(instr, 0); +} + +static void +print_block(struct ir3_block *block, int lvl) +{ + struct ir3_instruction *instr; + tab(lvl); printf("block {\n"); + for (instr = block->head; instr; instr = instr->next) { + print_instr(instr, lvl+1); + } + tab(lvl); printf("}\n"); +} + +void +ir3_print(struct ir3 *ir) +{ + struct ir3_block *block = ir->block; + + print_block(block, 0); + + for (unsigned i = 0; i < block->noutputs; i++) { + if (!block->outputs[i]) + continue; + printf("out%d: ", i); + print_instr(block->outputs[i], 0); + } +} diff --git a/src/gallium/drivers/freedreno/ir3/ir3_ra.c b/src/gallium/drivers/freedreno/ir3/ir3_ra.c index a4235a77a15..501352515b5 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_ra.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_ra.c @@ -78,14 +78,14 @@ struct ir3_ra_ctx { #define ra_dump_list(msg, n) do { \ if (ra_debug) { \ debug_printf("-- " msg); \ - ir3_dump_instr_list(n); \ + ir3_print(n->block->shader); \ } \ } while (0) #define ra_dump_instr(msg, n) do { \ if (ra_debug) { \ debug_printf(">> " msg); \ - ir3_dump_instr_single(n); \ + ir3_print_instr(n); \ } \ } while (0) From adf1659ff5f07d907eca552be3b566e408c8601e Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Thu, 30 Apr 2015 11:38:43 -0400 Subject: [PATCH 756/834] freedreno/ir3: use standard list implementation Use standard list_head double-linked list and related iterators, helpers, etc, rather than weird combo of instruction array and next pointers depending on stage. Now block has an instrs_list. In certain stages where we want to remove and re-add to the blocks list we just use list_replace() to copy the list to a new list_head. Signed-off-by: Rob Clark --- src/gallium/drivers/freedreno/ir3/ir3.c | 27 +++--- src/gallium/drivers/freedreno/ir3/ir3.h | 15 ++-- src/gallium/drivers/freedreno/ir3/ir3_cp.c | 19 +--- src/gallium/drivers/freedreno/ir3/ir3_depth.c | 66 +++++++------- .../drivers/freedreno/ir3/ir3_legalize.c | 65 ++++++-------- src/gallium/drivers/freedreno/ir3/ir3_print.c | 3 +- src/gallium/drivers/freedreno/ir3/ir3_ra.c | 87 ++++++++++++------- src/gallium/drivers/freedreno/ir3/ir3_sched.c | 86 +++++------------- 8 files changed, 160 insertions(+), 208 deletions(-) diff --git a/src/gallium/drivers/freedreno/ir3/ir3.c b/src/gallium/drivers/freedreno/ir3/ir3.c index e015de91c33..84564a9eef7 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3.c +++ b/src/gallium/drivers/freedreno/ir3/ir3.c @@ -81,7 +81,7 @@ void ir3_destroy(struct ir3 *shader) shader->chunk = chunk->next; free(chunk); } - free(shader->instrs); + free(shader->indirects); free(shader->baryfs); free(shader); } @@ -534,28 +534,32 @@ static int (*emit[])(struct ir3_instruction *instr, void *ptr, void * ir3_assemble(struct ir3 *shader, struct ir3_info *info, uint32_t gpu_id) { + struct ir3_block *block = shader->block; uint32_t *ptr, *dwords; - uint32_t i; info->max_reg = -1; info->max_half_reg = -1; info->max_const = -1; info->instrs_count = 0; + info->sizedwords = 0; + + list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) { + info->sizedwords += 2; + } /* need a integer number of instruction "groups" (sets of 16 * instructions on a4xx or sets of 4 instructions on a3xx), * so pad out w/ NOPs if needed: (NOTE each instruction is 64bits) */ if (gpu_id >= 400) { - info->sizedwords = 2 * align(shader->instrs_count, 16); + info->sizedwords = align(info->sizedwords, 16 * 2); } else { - info->sizedwords = 2 * align(shader->instrs_count, 4); + info->sizedwords = align(info->sizedwords, 4 * 2); } ptr = dwords = calloc(4, info->sizedwords); - for (i = 0; i < shader->instrs_count; i++) { - struct ir3_instruction *instr = shader->instrs[i]; + list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) { int ret = emit[instr->category](instr, dwords, info); if (ret) goto fail; @@ -581,14 +585,15 @@ static struct ir3_register * reg_create(struct ir3 *shader, return reg; } -static void insert_instr(struct ir3 *shader, +static void insert_instr(struct ir3_block *block, struct ir3_instruction *instr) { + struct ir3 *shader = block->shader; #ifdef DEBUG static uint32_t serialno = 0; instr->serialno = ++serialno; #endif - array_insert(shader->instrs, instr); + list_addtail(&instr->node, &block->instr_list); if (is_input(instr)) array_insert(shader->baryfs, instr); @@ -625,6 +630,8 @@ struct ir3_block * ir3_block_create(struct ir3 *shader, block->shader = shader; + list_inithead(&block->instr_list); + return block; } @@ -652,7 +659,7 @@ struct ir3_instruction * ir3_instr_create2(struct ir3_block *block, instr->block = block; instr->category = category; instr->opc = opc; - insert_instr(block->shader, instr); + insert_instr(block, instr); return instr; } @@ -677,7 +684,7 @@ struct ir3_instruction * ir3_instr_clone(struct ir3_instruction *instr) *new_instr = *instr; new_instr->regs = regs; - insert_instr(instr->block->shader, new_instr); + insert_instr(instr->block, new_instr); /* clone registers: */ new_instr->regs_count = 0; diff --git a/src/gallium/drivers/freedreno/ir3/ir3.h b/src/gallium/drivers/freedreno/ir3/ir3.h index f37dfab3341..edb5b49e23c 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3.h +++ b/src/gallium/drivers/freedreno/ir3/ir3.h @@ -28,6 +28,7 @@ #include #include "util/u_debug.h" +#include "util/list.h" #include "instr-a3xx.h" #include "disasm.h" /* TODO move 'enum shader_t' somewhere else.. */ @@ -290,7 +291,9 @@ struct ir3_instruction { */ struct ir3_instruction *fanin; - struct ir3_instruction *next; + /* Entry in ir3_block's instruction list: */ + struct list_head node; + #ifdef DEBUG uint32_t serialno; #endif @@ -321,8 +324,6 @@ static inline int ir3_neighbor_count(struct ir3_instruction *instr) struct ir3_heap_chunk; struct ir3 { - unsigned instrs_count, instrs_sz; - struct ir3_instruction **instrs; /* Track bary.f (and ldlv) instructions.. this is needed in * scheduling to ensure that all varying fetches happen before @@ -361,7 +362,7 @@ struct ir3_block { /* only a single address register: */ struct ir3_instruction *address; struct ir3_block *parent; - struct ir3_instruction *head; + struct list_head instr_list; }; struct ir3 * ir3_create(void); @@ -402,11 +403,8 @@ static inline void ir3_clear_mark(struct ir3 *shader) * a block, so tracking the list of instrs globally is * unlikely to be what we want. */ - unsigned i; - for (i = 0; i < shader->instrs_count; i++) { - struct ir3_instruction *instr = shader->instrs[i]; + list_for_each_entry (struct ir3_instruction, instr, &shader->block->instr_list, node) instr->flags &= ~IR3_INSTR_MARK; - } } static inline int ir3_instr_regno(struct ir3_instruction *instr, @@ -756,6 +754,7 @@ int ir3_block_flatten(struct ir3_block *block); /* depth calculation: */ int ir3_delayslots(struct ir3_instruction *assigner, struct ir3_instruction *consumer, unsigned n); +void ir3_insert_by_depth(struct ir3_instruction *instr, struct list_head *list); void ir3_block_depth(struct ir3_block *block); /* copy-propagate: */ diff --git a/src/gallium/drivers/freedreno/ir3/ir3_cp.c b/src/gallium/drivers/freedreno/ir3/ir3_cp.c index fa7d363be7b..350f7dd5e6b 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_cp.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_cp.c @@ -354,13 +354,6 @@ instr_cp(struct ir3_instruction *instr, unsigned *flags) { struct ir3_register *reg; - /* stay within the block.. don't try to operate across - * basic block boundaries or we'll have problems when - * dealing with multiple basic blocks: - */ - if (is_meta(instr) && (instr->opc == OPC_META_INPUT)) - return instr; - if (is_eligible_mov(instr, !!flags)) { struct ir3_register *reg = instr->regs[1]; struct ir3_instruction *src_instr = ssa(reg); @@ -394,11 +387,11 @@ instr_cp(struct ir3_instruction *instr, unsigned *flags) return instr; } -static void block_cp(struct ir3_block *block) +void ir3_block_cp(struct ir3_block *block) { - unsigned i; + ir3_clear_mark(block->shader); - for (i = 0; i < block->noutputs; i++) { + for (unsigned i = 0; i < block->noutputs; i++) { if (block->outputs[i]) { struct ir3_instruction *out = instr_cp(block->outputs[i], NULL); @@ -407,9 +400,3 @@ static void block_cp(struct ir3_block *block) } } } - -void ir3_block_cp(struct ir3_block *block) -{ - ir3_clear_mark(block->shader); - block_cp(block); -} diff --git a/src/gallium/drivers/freedreno/ir3/ir3_depth.c b/src/gallium/drivers/freedreno/ir3/ir3_depth.c index b899c66b37e..601e14a1c85 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_depth.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_depth.c @@ -84,25 +84,25 @@ int ir3_delayslots(struct ir3_instruction *assigner, } } -static void insert_by_depth(struct ir3_instruction *instr) +void +ir3_insert_by_depth(struct ir3_instruction *instr, struct list_head *list) { - struct ir3_block *block = instr->block; - struct ir3_instruction *n = block->head; - struct ir3_instruction *p = NULL; + /* remove from existing spot in list: */ + list_delinit(&instr->node); - while (n && (n != instr) && (n->depth > instr->depth)) { - p = n; - n = n->next; + /* find where to re-insert instruction: */ + list_for_each_entry (struct ir3_instruction, pos, list, node) { + if (pos->depth > instr->depth) { + list_add(&instr->node, &pos->node); + return; + } } - - instr->next = n; - if (p) - p->next = instr; - else - block->head = instr; + /* if we get here, we didn't find an insertion spot: */ + list_addtail(&instr->node, list); } -static void ir3_instr_depth(struct ir3_instruction *instr) +static void +ir3_instr_depth(struct ir3_instruction *instr) { struct ir3_instruction *src; @@ -123,42 +123,38 @@ static void ir3_instr_depth(struct ir3_instruction *instr) instr->depth = MAX2(instr->depth, sd); } - /* meta-instructions don't add cycles, other than PHI.. which - * might translate to a real instruction.. - * - * well, not entirely true, fan-in/out, etc might need to need - * to generate some extra mov's in edge cases, etc.. probably - * we might want to do depth calculation considering the worst - * case for these?? - */ if (!is_meta(instr)) instr->depth++; - insert_by_depth(instr); + ir3_insert_by_depth(instr, &instr->block->instr_list); +} + +static void +remove_unused_by_block(struct ir3_block *block) +{ + list_for_each_entry_safe (struct ir3_instruction, instr, &block->instr_list, node) { + if (!ir3_instr_check_mark(instr)) { + /* mark it, in case it is input, so we can + * remove unused inputs: + */ + instr->depth = DEPTH_UNUSED; + /* and remove from instruction list: */ + list_delinit(&instr->node); + } + } } void ir3_block_depth(struct ir3_block *block) { unsigned i; - block->head = NULL; - ir3_clear_mark(block->shader); for (i = 0; i < block->noutputs; i++) if (block->outputs[i]) ir3_instr_depth(block->outputs[i]); /* mark un-used instructions: */ - for (i = 0; i < block->shader->instrs_count; i++) { - struct ir3_instruction *instr = block->shader->instrs[i]; - - /* just consider instructions within this block: */ - if (instr->block != block) - continue; - - if (!ir3_instr_check_mark(instr)) - instr->depth = DEPTH_UNUSED; - } + remove_unused_by_block(block); /* cleanup unused inputs: */ for (i = 0; i < block->ninputs; i++) { diff --git a/src/gallium/drivers/freedreno/ir3/ir3_legalize.c b/src/gallium/drivers/freedreno/ir3/ir3_legalize.c index 61713c25e72..be0b5ce442c 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_legalize.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_legalize.c @@ -51,12 +51,9 @@ struct ir3_legalize_ctx { static void legalize(struct ir3_legalize_ctx *ctx) { struct ir3_block *block = ctx->block; - struct ir3_instruction *n; - struct ir3 *shader = block->shader; - struct ir3_instruction *end = - ir3_instr_create(block, 0, OPC_END); struct ir3_instruction *last_input = NULL; struct ir3_instruction *last_rel = NULL; + struct list_head instr_list; regmask_t needs_ss_war; /* write after read */ regmask_t needs_ss; regmask_t needs_sy; @@ -65,9 +62,13 @@ static void legalize(struct ir3_legalize_ctx *ctx) regmask_init(&needs_ss); regmask_init(&needs_sy); - shader->instrs_count = 0; + /* remove all the instructions from the list, we'll be adding + * them back in as we go + */ + list_replace(&block->instr_list, &instr_list); + list_inithead(&block->instr_list); - for (n = block->head; n; n = n->next) { + list_for_each_entry_safe (struct ir3_instruction, n, &instr_list, node) { struct ir3_register *reg; unsigned i; @@ -140,12 +141,12 @@ static void legalize(struct ir3_legalize_ctx *ctx) } /* need to be able to set (ss) on first instruction: */ - if ((shader->instrs_count == 0) && (n->category >= 5)) + if (list_empty(&block->instr_list) && (n->category >= 5)) ir3_NOP(block); - if (is_nop(n) && shader->instrs_count) { - struct ir3_instruction *last = - shader->instrs[shader->instrs_count-1]; + if (is_nop(n) && !list_empty(&block->instr_list)) { + struct ir3_instruction *last = list_last_entry(&block->instr_list, + struct ir3_instruction, node); if (is_nop(last) && (last->repeat < 5)) { last->repeat++; last->flags |= n->flags; @@ -153,7 +154,7 @@ static void legalize(struct ir3_legalize_ctx *ctx) } } - shader->instrs[shader->instrs_count++] = n; + list_addtail(&n->node, &block->instr_list); if (is_sfu(n)) regmask_set(&needs_ss, n->regs[0]); @@ -192,35 +193,19 @@ static void legalize(struct ir3_legalize_ctx *ctx) * the (ei) flag: */ if (is_mem(last_input) && (last_input->opc == OPC_LDLV)) { - int i, cnt; + struct ir3_instruction *baryf; - /* note that ir3_instr_create() inserts into - * shader->instrs[] and increments the count.. - * so we need to bump up the cnt initially (to - * avoid it clobbering the last real instr) and - * restore it after. - */ - cnt = ++shader->instrs_count; + /* (ss)bary.f (ei)r63.x, 0, r0.x */ + baryf = ir3_instr_create(block, 2, OPC_BARY_F); + baryf->flags |= IR3_INSTR_SS; + ir3_reg_create(baryf, regid(63, 0), 0); + ir3_reg_create(baryf, 0, IR3_REG_IMMED)->iim_val = 0; + ir3_reg_create(baryf, regid(0, 0), 0); - /* inserting instructions would be a bit nicer if list.. */ - for (i = cnt - 2; i >= 0; i--) { - if (shader->instrs[i] == last_input) { + /* insert the dummy bary.f after last_input: */ + list_add(&baryf->node, &last_input->node); - /* (ss)bary.f (ei)r63.x, 0, r0.x */ - last_input = ir3_instr_create(block, 2, OPC_BARY_F); - last_input->flags |= IR3_INSTR_SS; - ir3_reg_create(last_input, regid(63, 0), 0); - ir3_reg_create(last_input, 0, IR3_REG_IMMED)->iim_val = 0; - ir3_reg_create(last_input, regid(0, 0), 0); - - shader->instrs[i + 1] = last_input; - - break; - } - shader->instrs[i + 1] = shader->instrs[i]; - } - - shader->instrs_count = cnt; + last_input = baryf; } last_input->regs[0]->flags |= IR3_REG_EI; } @@ -228,9 +213,11 @@ static void legalize(struct ir3_legalize_ctx *ctx) if (last_rel) last_rel->flags |= IR3_INSTR_UL; - shader->instrs[shader->instrs_count++] = end; + /* create/add 'end' instruction: */ + ir3_instr_create(block, 0, OPC_END); - shader->instrs[0]->flags |= IR3_INSTR_SS | IR3_INSTR_SY; + list_first_entry(&block->instr_list, struct ir3_instruction, node) + ->flags |= IR3_INSTR_SS | IR3_INSTR_SY; } void ir3_block_legalize(struct ir3_block *block, diff --git a/src/gallium/drivers/freedreno/ir3/ir3_print.c b/src/gallium/drivers/freedreno/ir3/ir3_print.c index a5c5d3c8efa..755c0c23c36 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_print.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_print.c @@ -186,9 +186,8 @@ void ir3_print_instr(struct ir3_instruction *instr) static void print_block(struct ir3_block *block, int lvl) { - struct ir3_instruction *instr; tab(lvl); printf("block {\n"); - for (instr = block->head; instr; instr = instr->next) { + list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) { print_instr(instr, lvl+1); } tab(lvl); printf("}\n"); diff --git a/src/gallium/drivers/freedreno/ir3/ir3_ra.c b/src/gallium/drivers/freedreno/ir3/ir3_ra.c index 501352515b5..95f6a81861e 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_ra.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_ra.c @@ -75,10 +75,10 @@ struct ir3_ra_ctx { # define ra_debug 0 #endif -#define ra_dump_list(msg, n) do { \ +#define ra_dump_list(msg, ir) do { \ if (ra_debug) { \ debug_printf("-- " msg); \ - ir3_print(n->block->shader); \ + ir3_print(ir); \ } \ } while (0) @@ -175,14 +175,13 @@ static void mark_sources(struct ir3_instruction *instr, static void compute_liveregs(struct ir3_ra_ctx *ctx, struct ir3_instruction *instr, regmask_t *liveregs) { - struct ir3_block *block = instr->block; - struct ir3_instruction *n; + struct ir3_block *block = ctx->block; regmask_t written; unsigned i; regmask_init(&written); - for (n = instr->next; n; n = n->next) { + list_for_each_entry (struct ir3_instruction, n, &instr->node, node) { struct ir3_register *r; if (is_meta(n)) @@ -411,9 +410,8 @@ static void instr_assign_src(struct ir3_ra_ctx *ctx, static void instr_assign_srcs(struct ir3_ra_ctx *ctx, struct ir3_instruction *instr, unsigned name) { - struct ir3_instruction *n, *src; - - for (n = instr->next; n && !ctx->error; n = n->next) { + list_for_each_entry (struct ir3_instruction, n, &instr->node, node) { + struct ir3_instruction *src; foreach_ssa_src_n(src, i, n) { unsigned r = i + 1; @@ -424,6 +422,8 @@ static void instr_assign_srcs(struct ir3_ra_ctx *ctx, if (src == instr) instr_assign_src(ctx, n, r, name); } + if (ctx->error) + break; } } @@ -589,14 +589,45 @@ static void instr_assign_array(struct ir3_ra_ctx *ctx, } -static int block_ra(struct ir3_ra_ctx *ctx, struct ir3_block *block) +static bool +block_ra(struct ir3_block *block, void *state) { - struct ir3_instruction *n; + struct ir3_ra_ctx *ctx = state; + ra_dump_list("-------\n", block->shader); + + /* first pass, assign arrays: */ + list_for_each_entry (struct ir3_instruction, n, &block->instr_list, node) { + if (is_meta(n) && (n->opc == OPC_META_FI) && n->fi.aid) { + debug_assert(!n->cp.left); /* don't think this should happen */ + ra_dump_instr("ASSIGN ARRAY: ", n); + instr_assign_array(ctx, n); + ra_dump_list("-------\n", block->shader); + } + + if (ctx->error) + return false; + } + + list_for_each_entry (struct ir3_instruction, n, &block->instr_list, node) { + ra_dump_instr("ASSIGN: ", n); + instr_alloc_and_assign(ctx, ir3_neighbor_first(n)); + ra_dump_list("-------\n", block->shader); + + if (ctx->error) + return false; + } + + return true; +} + +static int +shader_ra(struct ir3_ra_ctx *ctx, struct ir3_block *block) +{ /* frag shader inputs get pre-assigned, since we have some * constraints/unknowns about setup for some of these regs: */ - if ((ctx->type == SHADER_FRAGMENT) && !block->parent) { + if (ctx->type == SHADER_FRAGMENT) { unsigned i = 0, j; if (ctx->frag_face && (i < block->ninputs) && block->inputs[i]) { /* if we have frag_face, it gets hr0.x */ @@ -608,31 +639,23 @@ static int block_ra(struct ir3_ra_ctx *ctx, struct ir3_block *block) instr_assign(ctx, block->inputs[i], j); } - ra_dump_list("-------\n", block->head); - - /* first pass, assign arrays: */ - for (n = block->head; n && !ctx->error; n = n->next) { - if (is_meta(n) && (n->opc == OPC_META_FI) && n->fi.aid) { - debug_assert(!n->cp.left); /* don't think this should happen */ - ra_dump_instr("ASSIGN ARRAY: ", n); - instr_assign_array(ctx, n); - ra_dump_list("-------\n", block->head); - } - } - - for (n = block->head; n && !ctx->error; n = n->next) { - ra_dump_instr("ASSIGN: ", n); - instr_alloc_and_assign(ctx, ir3_neighbor_first(n)); - ra_dump_list("-------\n", block->head); - } + block_ra(block, ctx); return ctx->error ? -1 : 0; } +static bool +block_mark_dst(struct ir3_block *block, void *state) +{ + list_for_each_entry (struct ir3_instruction, n, &block->instr_list, node) + if (n->regs_count > 0) + n->regs[0]->flags |= IR3_REG_SSA; + return true; +} + int ir3_block_ra(struct ir3_block *block, enum shader_t type, bool frag_coord, bool frag_face) { - struct ir3_instruction *n; struct ir3_ra_ctx ctx = { .block = block, .type = type, @@ -648,12 +671,10 @@ int ir3_block_ra(struct ir3_block *block, enum shader_t type, * NOTE: we really should set SSA flag consistently on * every dst register in the frontend. */ - for (n = block->head; n; n = n->next) - if (n->regs_count > 0) - n->regs[0]->flags |= IR3_REG_SSA; + block_mark_dst(block, &ctx); ir3_clear_mark(block->shader); - ret = block_ra(&ctx, block); + ret = shader_ra(&ctx, block); return ret; } diff --git a/src/gallium/drivers/freedreno/ir3/ir3_sched.c b/src/gallium/drivers/freedreno/ir3/ir3_sched.c index 5ca6d7b62d5..fc41f93b884 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_sched.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_sched.c @@ -88,26 +88,21 @@ deepest(struct ir3_instruction **srcs, unsigned nsrcs) return d; } -static unsigned distance(struct ir3_sched_ctx *ctx, - struct ir3_instruction *instr, unsigned maxd) +static unsigned +distance(struct ir3_sched_ctx *ctx, struct ir3_instruction *instr, + unsigned maxd) { - struct ir3_instruction *n = ctx->scheduled; + struct list_head *instr_list = &instr->block->instr_list; unsigned d = 0; - while (n && (n != instr) && (d < maxd)) { + + list_for_each_entry_rev (struct ir3_instruction, n, instr_list, node) { + if ((n == instr) || (d >= maxd)) + break; if (is_alu(n) || is_flow(n)) d++; - n = n->next; } - return d; -} -/* TODO maybe we want double linked list? */ -static struct ir3_instruction * prev(struct ir3_instruction *instr) -{ - struct ir3_instruction *p = instr->block->head; - while (p && (p->next != instr)) - p = p->next; - return p; + return d; } static bool is_sfu_or_mem(struct ir3_instruction *instr) @@ -125,25 +120,11 @@ static void schedule(struct ir3_sched_ctx *ctx, * scheduling and depth calculation.. */ if (ctx->scheduled && is_sfu_or_mem(ctx->scheduled) && is_sfu_or_mem(instr)) - schedule(ctx, ir3_NOP(block), false); + ir3_NOP(block); /* remove from depth list: */ - if (remove) { - struct ir3_instruction *p = prev(instr); - - /* NOTE: this can happen for inputs which are not - * read.. in that case there is no need to schedule - * the input, so just bail: - */ - if (instr != (p ? p->next : block->head)) - return; - - if (p) - p->next = instr->next; - else - block->head = instr->next; - } + list_delinit(&instr->node); if (writes_addr(instr)) { assert(ctx->addr == NULL); @@ -157,7 +138,7 @@ static void schedule(struct ir3_sched_ctx *ctx, instr->flags |= IR3_INSTR_MARK; - instr->next = ctx->scheduled; + list_addtail(&instr->node, &instr->block->instr_list); ctx->scheduled = instr; ctx->cnt++; @@ -284,18 +265,6 @@ static int trysched(struct ir3_sched_ctx *ctx, return SCHEDULED; } -static struct ir3_instruction * reverse(struct ir3_instruction *instr) -{ - struct ir3_instruction *reversed = NULL; - while (instr) { - struct ir3_instruction *next = instr->next; - instr->next = reversed; - reversed = instr; - instr = next; - } - return reversed; -} - static bool uses_current_addr(struct ir3_sched_ctx *ctx, struct ir3_instruction *instr) { @@ -317,16 +286,14 @@ static bool uses_current_pred(struct ir3_sched_ctx *ctx, * other instructions using the current address register: */ static int block_sched_undelayed(struct ir3_sched_ctx *ctx, - struct ir3_block *block) + struct list_head *unscheduled_list) { - struct ir3_instruction *instr = block->head; bool addr_in_use = false; bool pred_in_use = false; bool all_delayed = true; unsigned cnt = ~0, attempted = 0; - while (instr) { - struct ir3_instruction *next = instr->next; + list_for_each_entry_safe(struct ir3_instruction, instr, unscheduled_list, node) { bool addr = uses_current_addr(ctx, instr); bool pred = uses_current_pred(ctx, instr); @@ -347,8 +314,6 @@ static int block_sched_undelayed(struct ir3_sched_ctx *ctx, attempted++; } - - instr = next; } if (!addr_in_use) @@ -408,7 +373,10 @@ static int block_sched_undelayed(struct ir3_sched_ctx *ctx, static void block_sched(struct ir3_sched_ctx *ctx, struct ir3_block *block) { - struct ir3_instruction *instr; + struct list_head unscheduled_list; + + list_replace(&block->instr_list, &unscheduled_list); + list_inithead(&block->instr_list); /* schedule all the shader input's (meta-instr) first so that * the RA step sees that the input registers contain a value @@ -423,31 +391,22 @@ static void block_sched(struct ir3_sched_ctx *ctx, struct ir3_block *block) } } - while ((instr = block->head) && !ctx->error) { - /* NOTE: always grab next *before* trysched(), in case the - * instruction is actually scheduled (and therefore moved - * from depth list into scheduled list) - */ - struct ir3_instruction *next = instr->next; + list_for_each_entry_safe (struct ir3_instruction, instr, &unscheduled_list, node) { int cnt = trysched(ctx, instr); if (cnt == DELAYED) - cnt = block_sched_undelayed(ctx, block); + cnt = block_sched_undelayed(ctx, &unscheduled_list); /* -1 is signal to return up stack, but to us means same as 0: */ cnt = MAX2(0, cnt); cnt += ctx->cnt; - instr = next; /* if deepest remaining instruction cannot be scheduled, try * the increasingly more shallow instructions until needed * number of delay slots is filled: */ - while (instr && (cnt > ctx->cnt)) { - next = instr->next; + list_for_each_entry_safe (struct ir3_instruction, instr, &instr->node, node) trysched(ctx, instr); - instr = next; - } /* and if we run out of instructions that can be scheduled, * then it is time for nop's: @@ -455,9 +414,6 @@ static void block_sched(struct ir3_sched_ctx *ctx, struct ir3_block *block) while (cnt > ctx->cnt) schedule(ctx, ir3_NOP(block), false); } - - /* at this point, scheduled list is in reverse order, so fix that: */ - block->head = reverse(ctx->scheduled); } int ir3_block_sched(struct ir3_block *block) From 7273cb4e933f8be65fc73b9d8c69c76d1078cb14 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Thu, 30 Apr 2015 13:57:15 -0400 Subject: [PATCH 757/834] freedreno/ir3/sched: convert to priority queue Use a more standard priority-queue based scheduling algo. It is simpler and will make things easier once we have multiple basic blocks and flow control. Signed-off-by: Rob Clark --- src/gallium/drivers/freedreno/ir3/ir3.c | 1 + src/gallium/drivers/freedreno/ir3/ir3.h | 3 + .../drivers/freedreno/ir3/ir3_compiler_nir.c | 1 + src/gallium/drivers/freedreno/ir3/ir3_sched.c | 506 +++++++++--------- 4 files changed, 262 insertions(+), 249 deletions(-) diff --git a/src/gallium/drivers/freedreno/ir3/ir3.c b/src/gallium/drivers/freedreno/ir3/ir3.c index 84564a9eef7..aea1b967b07 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3.c +++ b/src/gallium/drivers/freedreno/ir3/ir3.c @@ -82,6 +82,7 @@ void ir3_destroy(struct ir3 *shader) free(chunk); } free(shader->indirects); + free(shader->predicates); free(shader->baryfs); free(shader); } diff --git a/src/gallium/drivers/freedreno/ir3/ir3.h b/src/gallium/drivers/freedreno/ir3/ir3.h index edb5b49e23c..030a74fe21a 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3.h +++ b/src/gallium/drivers/freedreno/ir3/ir3.h @@ -346,6 +346,9 @@ struct ir3 { */ unsigned indirects_count, indirects_sz; struct ir3_instruction **indirects; + /* and same for instructions that consume predicate register: */ + unsigned predicates_count, predicates_sz; + struct ir3_instruction **predicates; struct ir3_block *block; unsigned heap_idx; diff --git a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c index 8d382e5cf3e..caea34c7fd4 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c @@ -1250,6 +1250,7 @@ emit_intrinisic(struct ir3_compile *ctx, nir_intrinsic_instr *intr) cond->regs[0]->num = regid(REG_P0, 0); kill = ir3_KILL(b, cond, 0); + array_insert(ctx->ir->predicates, kill); ctx->kill[ctx->kill_count++] = kill; ctx->so->has_kill = true; diff --git a/src/gallium/drivers/freedreno/ir3/ir3_sched.c b/src/gallium/drivers/freedreno/ir3/ir3_sched.c index fc41f93b884..1d166d879df 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_sched.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_sched.c @@ -31,23 +31,14 @@ #include "ir3.h" -enum { - SCHEDULED = -1, - DELAYED = -2, -}; - /* * Instruction Scheduling: * - * Using the depth sorted list from depth pass, attempt to recursively - * schedule deepest unscheduled path. The first instruction that cannot - * be scheduled, returns the required delay slots it needs, at which - * point we return back up to the top and attempt to schedule by next - * highest depth. After a sufficient number of instructions have been - * scheduled, return back to beginning of list and start again. If you - * reach the end of depth sorted list without being able to insert any - * instruction, insert nop's. Repeat until no more unscheduled - * instructions. + * A priority-queue based scheduling algo. Add eligible instructions, + * ie. ones with all their dependencies scheduled, to the priority + * (depth) sorted queue (list). Pop highest priority instruction off + * the queue and schedule it, add newly eligible instructions to the + * priority queue, rinse, repeat. * * There are a few special cases that need to be handled, since sched * is currently independent of register allocation. Usages of address @@ -60,67 +51,29 @@ enum { */ struct ir3_sched_ctx { - struct ir3_instruction *scheduled; /* last scheduled instr */ + struct ir3_block *block; /* the current block */ + struct ir3_instruction *scheduled; /* last scheduled instr XXX remove*/ struct ir3_instruction *addr; /* current a0.x user, if any */ struct ir3_instruction *pred; /* current p0.x user, if any */ - unsigned cnt; bool error; }; -static struct ir3_instruction * -deepest(struct ir3_instruction **srcs, unsigned nsrcs) -{ - struct ir3_instruction *d = NULL; - unsigned i = 0, id = 0; - - while ((i < nsrcs) && !(d = srcs[id = i])) - i++; - - if (!d) - return NULL; - - for (; i < nsrcs; i++) - if (srcs[i] && (srcs[i]->depth > d->depth)) - d = srcs[id = i]; - - srcs[id] = NULL; - - return d; -} - -static unsigned -distance(struct ir3_sched_ctx *ctx, struct ir3_instruction *instr, - unsigned maxd) -{ - struct list_head *instr_list = &instr->block->instr_list; - unsigned d = 0; - - list_for_each_entry_rev (struct ir3_instruction, n, instr_list, node) { - if ((n == instr) || (d >= maxd)) - break; - if (is_alu(n) || is_flow(n)) - d++; - } - - return d; -} - static bool is_sfu_or_mem(struct ir3_instruction *instr) { return is_sfu(instr) || is_mem(instr); } -static void schedule(struct ir3_sched_ctx *ctx, - struct ir3_instruction *instr, bool remove) +static void +schedule(struct ir3_sched_ctx *ctx, struct ir3_instruction *instr) { - struct ir3_block *block = instr->block; + debug_assert(ctx->block == instr->block); /* maybe there is a better way to handle this than just stuffing * a nop.. ideally we'd know about this constraint in the * scheduling and depth calculation.. */ if (ctx->scheduled && is_sfu_or_mem(ctx->scheduled) && is_sfu_or_mem(instr)) - ir3_NOP(block); + ir3_NOP(ctx->block); /* remove from depth list: */ @@ -140,16 +93,28 @@ static void schedule(struct ir3_sched_ctx *ctx, list_addtail(&instr->node, &instr->block->instr_list); ctx->scheduled = instr; - - ctx->cnt++; } -/* - * Delay-slot calculation. Follows fanin/fanout. - */ +static unsigned +distance(struct ir3_sched_ctx *ctx, struct ir3_instruction *instr, + unsigned maxd) +{ + struct list_head *instr_list = &ctx->block->instr_list; + unsigned d = 0; + + list_for_each_entry_rev (struct ir3_instruction, n, instr_list, node) { + if ((n == instr) || (d >= maxd)) + break; + if (is_alu(n) || is_flow(n)) + d++; + } + + return d; +} /* calculate delay for specified src: */ -static unsigned delay_calc_srcn(struct ir3_sched_ctx *ctx, +static unsigned +delay_calc_srcn(struct ir3_sched_ctx *ctx, struct ir3_instruction *assigner, struct ir3_instruction *consumer, unsigned srcn) { @@ -158,7 +123,10 @@ static unsigned delay_calc_srcn(struct ir3_sched_ctx *ctx, if (is_meta(assigner)) { struct ir3_instruction *src; foreach_ssa_src(src, assigner) { - unsigned d = delay_calc_srcn(ctx, src, consumer, srcn); + unsigned d; + if (src->block != assigner->block) + break; + d = delay_calc_srcn(ctx, src, consumer, srcn); delay = MAX2(delay, d); } } else { @@ -170,48 +138,77 @@ static unsigned delay_calc_srcn(struct ir3_sched_ctx *ctx, } /* calculate delay for instruction (maximum of delay for all srcs): */ -static unsigned delay_calc(struct ir3_sched_ctx *ctx, - struct ir3_instruction *instr) +static unsigned +delay_calc(struct ir3_sched_ctx *ctx, struct ir3_instruction *instr) { unsigned delay = 0; struct ir3_instruction *src; foreach_ssa_src_n(src, i, instr) { - unsigned d = delay_calc_srcn(ctx, src, instr, i); + unsigned d; + if (src->block != instr->block) + continue; + d = delay_calc_srcn(ctx, src, instr, i); delay = MAX2(delay, d); } return delay; } -/* A negative return value signals that an instruction has been newly - * SCHEDULED (or DELAYED due to address or predicate register already - * in use), return back up to the top of the stack (to block_sched()) - */ -static int trysched(struct ir3_sched_ctx *ctx, +struct ir3_sched_notes { + /* there is at least one kill which could be scheduled, except + * for unscheduled bary.f's: + */ + bool blocked_kill; + /* there is at least one instruction that could be scheduled, + * except for conflicting address/predicate register usage: + */ + bool addr_conflict, pred_conflict; +}; + +static bool is_scheduled(struct ir3_instruction *instr) +{ + return !!(instr->flags & IR3_INSTR_MARK); +} + +static bool +check_conflict(struct ir3_sched_ctx *ctx, struct ir3_sched_notes *notes, struct ir3_instruction *instr) { - struct ir3_instruction *srcs[64]; - struct ir3_instruction *src; - unsigned delay, nsrcs = 0; - - /* if already scheduled: */ - if (instr->flags & IR3_INSTR_MARK) - return 0; - - /* figure out our src's, copy 'em out into an array for sorting: */ - foreach_ssa_src(src, instr) { - debug_assert(nsrcs < ARRAY_SIZE(srcs)); - srcs[nsrcs++] = src; + /* if this is a write to address/predicate register, and that + * register is currently in use, we need to defer until it is + * free: + */ + if (writes_addr(instr) && ctx->addr) { + assert(ctx->addr != instr); + notes->addr_conflict = true; + return true; } - /* for each src register in sorted order: - */ - delay = 0; - while ((src = deepest(srcs, nsrcs))) { - delay = trysched(ctx, src); - if (delay) - return delay; + if (writes_pred(instr) && ctx->pred) { + assert(ctx->pred != instr); + notes->pred_conflict = true; + return true; + } + + return false; +} + +/* is this instruction ready to be scheduled? Return negative for not + * ready (updating notes if needed), or >= 0 to indicate number of + * delay slots needed. + */ +static int +instr_eligibility(struct ir3_sched_ctx *ctx, struct ir3_sched_notes *notes, + struct ir3_instruction *instr) +{ + struct ir3_instruction *src; + unsigned delay = 0; + + foreach_ssa_src(src, instr) { + /* if dependency not scheduled, we aren't ready yet: */ + if (!is_scheduled(src)) + return -1; } /* all our dependents are scheduled, figure out if @@ -236,183 +233,194 @@ static int trysched(struct ir3_sched_ctx *ctx, */ if (is_kill(instr)) { struct ir3 *ir = instr->block->shader; - unsigned i; - for (i = 0; i < ir->baryfs_count; i++) { + for (unsigned i = 0; i < ir->baryfs_count; i++) { struct ir3_instruction *baryf = ir->baryfs[i]; if (baryf->depth == DEPTH_UNUSED) continue; - delay = trysched(ctx, baryf); - if (delay) - return delay; - } - } - - /* if this is a write to address/predicate register, and that - * register is currently in use, we need to defer until it is - * free: - */ - if (writes_addr(instr) && ctx->addr) { - assert(ctx->addr != instr); - return DELAYED; - } - if (writes_pred(instr) && ctx->pred) { - assert(ctx->pred != instr); - return DELAYED; - } - - schedule(ctx, instr, true); - return SCHEDULED; -} - -static bool uses_current_addr(struct ir3_sched_ctx *ctx, - struct ir3_instruction *instr) -{ - return instr->address && (ctx->addr == instr->address); -} - -static bool uses_current_pred(struct ir3_sched_ctx *ctx, - struct ir3_instruction *instr) -{ - struct ir3_instruction *src; - foreach_ssa_src(src, instr) - if (ctx->pred == src) - return true; - return false; -} - -/* when we encounter an instruction that writes to the address register - * when it is in use, we delay that instruction and try to schedule all - * other instructions using the current address register: - */ -static int block_sched_undelayed(struct ir3_sched_ctx *ctx, - struct list_head *unscheduled_list) -{ - bool addr_in_use = false; - bool pred_in_use = false; - bool all_delayed = true; - unsigned cnt = ~0, attempted = 0; - - list_for_each_entry_safe(struct ir3_instruction, instr, unscheduled_list, node) { - bool addr = uses_current_addr(ctx, instr); - bool pred = uses_current_pred(ctx, instr); - - if (addr || pred) { - int ret = trysched(ctx, instr); - - if (ret != DELAYED) - all_delayed = false; - - if (ret == SCHEDULED) - cnt = 0; - else if (ret > 0) - cnt = MIN2(cnt, ret); - if (addr) - addr_in_use = true; - if (pred) - pred_in_use = true; - - attempted++; - } - } - - if (!addr_in_use) - ctx->addr = NULL; - - if (!pred_in_use) - ctx->pred = NULL; - - /* detect if we've gotten ourselves into an impossible situation - * and bail if needed - */ - if (all_delayed && (attempted > 0)) { - if (pred_in_use) { - /* TODO we probably need to keep a list of instructions - * that reference predicate, similar to indirects - */ - ctx->error = true; - return DELAYED; - } - if (addr_in_use) { - struct ir3 *ir = ctx->addr->block->shader; - struct ir3_instruction *new_addr = - ir3_instr_clone(ctx->addr); - unsigned i; - - /* original addr is scheduled, but new one isn't: */ - new_addr->flags &= ~IR3_INSTR_MARK; - - for (i = 0; i < ir->indirects_count; i++) { - struct ir3_instruction *indirect = ir->indirects[i]; - - /* skip instructions already scheduled: */ - if (indirect->flags & IR3_INSTR_MARK) - continue; - - /* remap remaining instructions using current addr - * to new addr: - */ - if (indirect->address == ctx->addr) - indirect->address = new_addr; + if (!is_scheduled(baryf)) { + notes->blocked_kill = true; + return -1; } - - /* all remaining indirects remapped to new addr: */ - ctx->addr = NULL; - - /* not really, but this will trigger us to go back to - * main trysched() loop now that we've resolved the - * conflict by duplicating the instr that writes to - * the address register. - */ - return SCHEDULED; } } - return cnt; + if (check_conflict(ctx, notes, instr)) + return -1; + + return 0; } -static void block_sched(struct ir3_sched_ctx *ctx, struct ir3_block *block) +/* move eligible instructions to the priority list: */ +static unsigned +add_eligible_instrs(struct ir3_sched_ctx *ctx, struct ir3_sched_notes *notes, + struct list_head *prio_queue, struct list_head *unscheduled_list) { - struct list_head unscheduled_list; + unsigned min_delay = ~0; + list_for_each_entry_safe (struct ir3_instruction, instr, unscheduled_list, node) { + int e = instr_eligibility(ctx, notes, instr); + if (e < 0) + continue; + min_delay = MIN2(min_delay, e); + if (e == 0) { + /* remove from unscheduled list and into priority queue: */ + list_delinit(&instr->node); + ir3_insert_by_depth(instr, prio_queue); + } + } + + return min_delay; +} + +/* "spill" the address register by remapping any unscheduled + * instructions which depend on the current address register + * to a clone of the instruction which wrote the address reg. + */ +static void +split_addr(struct ir3_sched_ctx *ctx) +{ + struct ir3 *ir = ctx->addr->block->shader; + struct ir3_instruction *new_addr = NULL; + unsigned i; + + debug_assert(ctx->addr); + + for (i = 0; i < ir->indirects_count; i++) { + struct ir3_instruction *indirect = ir->indirects[i]; + + /* skip instructions already scheduled: */ + if (indirect->flags & IR3_INSTR_MARK) + continue; + + /* remap remaining instructions using current addr + * to new addr: + */ + if (indirect->address == ctx->addr) { + if (!new_addr) { + new_addr = ir3_instr_clone(ctx->addr); + /* original addr is scheduled, but new one isn't: */ + new_addr->flags &= ~IR3_INSTR_MARK; + } + indirect->address = new_addr; + } + } + + /* all remaining indirects remapped to new addr: */ + ctx->addr = NULL; +} + +/* "spill" the predicate register by remapping any unscheduled + * instructions which depend on the current predicate register + * to a clone of the instruction which wrote the address reg. + */ +static void +split_pred(struct ir3_sched_ctx *ctx) +{ + struct ir3 *ir = ctx->pred->block->shader; + struct ir3_instruction *new_pred = NULL; + unsigned i; + + debug_assert(ctx->pred); + + for (i = 0; i < ir->predicates_count; i++) { + struct ir3_instruction *predicated = ir->predicates[i]; + + /* skip instructions already scheduled: */ + if (predicated->flags & IR3_INSTR_MARK) + continue; + + /* remap remaining instructions using current pred + * to new pred: + * + * TODO is there ever a case when pred isn't first + * (and only) src? + */ + if (ssa(predicated->regs[1]) == ctx->pred) { + if (!new_pred) { + new_pred = ir3_instr_clone(ctx->pred); + /* original pred is scheduled, but new one isn't: */ + new_pred->flags &= ~IR3_INSTR_MARK; + } + predicated->regs[1]->instr = new_pred; + } + } + + /* all remaining predicated remapped to new pred: */ + ctx->pred = NULL; +} + +static void +sched_block(struct ir3_sched_ctx *ctx, struct ir3_block *block) +{ + struct list_head unscheduled_list, prio_queue; + + ctx->block = block; + + /* move all instructions to the unscheduled list, and + * empty the block's instruction list (to which we will + * be inserting. + */ list_replace(&block->instr_list, &unscheduled_list); list_inithead(&block->instr_list); + list_inithead(&prio_queue); - /* schedule all the shader input's (meta-instr) first so that - * the RA step sees that the input registers contain a value - * from the start of the shader: + /* first a pre-pass to schedule all meta:input/phi instructions + * (which need to appear first so that RA knows the register is + * occupied: */ - if (!block->parent) { - unsigned i; - for (i = 0; i < block->ninputs; i++) { - struct ir3_instruction *in = block->inputs[i]; - if (in) - schedule(ctx, in, true); - } + list_for_each_entry_safe (struct ir3_instruction, instr, &unscheduled_list, node) { + if (is_meta(instr) && ((instr->opc == OPC_META_INPUT) || + (instr->opc == OPC_META_PHI))) + schedule(ctx, instr); } - list_for_each_entry_safe (struct ir3_instruction, instr, &unscheduled_list, node) { - int cnt = trysched(ctx, instr); + while (!(list_empty(&unscheduled_list) && + list_empty(&prio_queue))) { + struct ir3_sched_notes notes = {0}; + unsigned delay; - if (cnt == DELAYED) - cnt = block_sched_undelayed(ctx, &unscheduled_list); + delay = add_eligible_instrs(ctx, ¬es, &prio_queue, &unscheduled_list); - /* -1 is signal to return up stack, but to us means same as 0: */ - cnt = MAX2(0, cnt); - cnt += ctx->cnt; + if (!list_empty(&prio_queue)) { + struct ir3_instruction *instr = list_last_entry(&prio_queue, + struct ir3_instruction, node); + /* ugg, this is a bit ugly, but between the time when + * the instruction became eligible and now, a new + * conflict may have arose.. + */ + if (check_conflict(ctx, ¬es, instr)) { + list_del(&instr->node); + list_addtail(&instr->node, &unscheduled_list); + continue; + } - /* if deepest remaining instruction cannot be scheduled, try - * the increasingly more shallow instructions until needed - * number of delay slots is filled: - */ - list_for_each_entry_safe (struct ir3_instruction, instr, &instr->node, node) - trysched(ctx, instr); - - /* and if we run out of instructions that can be scheduled, - * then it is time for nop's: - */ - while (cnt > ctx->cnt) - schedule(ctx, ir3_NOP(block), false); + schedule(ctx, instr); + } else if (delay == ~0) { + /* nothing available to schedule.. if we are blocked on + * address/predicate register conflict, then break the + * deadlock by cloning the instruction that wrote that + * reg: + */ + if (notes.addr_conflict) { + split_addr(ctx); + } else if (notes.pred_conflict) { + split_pred(ctx); + } else { + debug_assert(0); + ctx->error = true; + return; + } + } else { + /* and if we run out of instructions that can be scheduled, + * then it is time for nop's: + */ + debug_assert(delay <= 6); + while (delay > 0) { + ir3_NOP(block); + delay--; + } + } } } @@ -420,7 +428,7 @@ int ir3_block_sched(struct ir3_block *block) { struct ir3_sched_ctx ctx = {0}; ir3_clear_mark(block->shader); - block_sched(&ctx, block); + sched_block(&ctx, block); if (ctx.error) return -1; return 0; From 0f6faa8ff317634ffb75e6040f2de2019dd80d13 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Sat, 25 Apr 2015 10:22:49 -0400 Subject: [PATCH 758/834] freedreno/ir3: remove tgsi f/e Also remove ir3_flatten which was only used by tgsi f/e. Signed-off-by: Rob Clark --- .../drivers/freedreno/Makefile.sources | 2 - .../drivers/freedreno/freedreno_screen.c | 1 - .../drivers/freedreno/freedreno_util.h | 1 - .../drivers/freedreno/ir3/instr-a3xx.h | 7 +- src/gallium/drivers/freedreno/ir3/ir3.h | 1 - .../drivers/freedreno/ir3/ir3_cmdline.c | 32 +- .../drivers/freedreno/ir3/ir3_compiler.c | 3709 ----------------- .../drivers/freedreno/ir3/ir3_compiler.h | 5 - .../drivers/freedreno/ir3/ir3_flatten.c | 152 - src/gallium/drivers/freedreno/ir3/ir3_group.c | 42 +- src/gallium/drivers/freedreno/ir3/ir3_print.c | 2 - .../drivers/freedreno/ir3/ir3_shader.c | 28 +- 12 files changed, 25 insertions(+), 3957 deletions(-) delete mode 100644 src/gallium/drivers/freedreno/ir3/ir3_compiler.c delete mode 100644 src/gallium/drivers/freedreno/ir3/ir3_flatten.c diff --git a/src/gallium/drivers/freedreno/Makefile.sources b/src/gallium/drivers/freedreno/Makefile.sources index 809d1a0f8f5..6af8754c4af 100644 --- a/src/gallium/drivers/freedreno/Makefile.sources +++ b/src/gallium/drivers/freedreno/Makefile.sources @@ -120,12 +120,10 @@ ir3_SOURCES := \ ir3/disasm-a3xx.c \ ir3/instr-a3xx.h \ ir3/ir3.c \ - ir3/ir3_compiler.c \ ir3/ir3_compiler_nir.c \ ir3/ir3_compiler.h \ ir3/ir3_cp.c \ ir3/ir3_depth.c \ - ir3/ir3_flatten.c \ ir3/ir3_group.c \ ir3/ir3.h \ ir3/ir3_legalize.c \ diff --git a/src/gallium/drivers/freedreno/freedreno_screen.c b/src/gallium/drivers/freedreno/freedreno_screen.c index 00b9471095e..c9d82563e1c 100644 --- a/src/gallium/drivers/freedreno/freedreno_screen.c +++ b/src/gallium/drivers/freedreno/freedreno_screen.c @@ -70,7 +70,6 @@ static const struct debug_named_value debug_options[] = { {"optmsgs", FD_DBG_OPTMSGS,"Enable optimizer debug messages"}, {"glsl120", FD_DBG_GLSL120,"Temporary flag to force GLSL 120 (rather than 130) on a3xx+"}, {"nocp", FD_DBG_NOCP, "Disable copy-propagation"}, - {"nir", FD_DBG_NIR, "Enable experimental NIR compiler"}, DEBUG_NAMED_VALUE_END }; diff --git a/src/gallium/drivers/freedreno/freedreno_util.h b/src/gallium/drivers/freedreno/freedreno_util.h index aec09ab6616..8664e74b335 100644 --- a/src/gallium/drivers/freedreno/freedreno_util.h +++ b/src/gallium/drivers/freedreno/freedreno_util.h @@ -65,7 +65,6 @@ enum adreno_stencil_op fd_stencil_op(unsigned op); #define FD_DBG_OPTMSGS 0x0400 #define FD_DBG_GLSL120 0x1000 #define FD_DBG_NOCP 0x2000 -#define FD_DBG_NIR 0x4000 extern int fd_mesa_debug; extern bool fd_binning_enabled; diff --git a/src/gallium/drivers/freedreno/ir3/instr-a3xx.h b/src/gallium/drivers/freedreno/ir3/instr-a3xx.h index cffa62b6f34..5ead0c86999 100644 --- a/src/gallium/drivers/freedreno/ir3/instr-a3xx.h +++ b/src/gallium/drivers/freedreno/ir3/instr-a3xx.h @@ -191,9 +191,9 @@ typedef enum { OPC_LDLV = 31, /* meta instructions (category -1): */ - /* placeholder instr to mark inputs/outputs: */ + /* placeholder instr to mark shader inputs: */ OPC_META_INPUT = 0, - OPC_META_OUTPUT = 1, + OPC_META_PHI = 1, /* The "fan-in" and "fan-out" instructions are used for keeping * track of instructions that write to multiple dst registers * (fan-out) like texture sample instructions, or read multiple @@ -201,9 +201,6 @@ typedef enum { */ OPC_META_FO = 2, OPC_META_FI = 3, - /* branches/flow control */ - OPC_META_FLOW = 4, - OPC_META_PHI = 5, } opc_t; diff --git a/src/gallium/drivers/freedreno/ir3/ir3.h b/src/gallium/drivers/freedreno/ir3/ir3.h index 030a74fe21a..3c4fd2d46b0 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3.h +++ b/src/gallium/drivers/freedreno/ir3/ir3.h @@ -364,7 +364,6 @@ struct ir3_block { struct ir3_instruction **outputs; /* only a single address register: */ struct ir3_instruction *address; - struct ir3_block *parent; struct list_head instr_list; }; diff --git a/src/gallium/drivers/freedreno/ir3/ir3_cmdline.c b/src/gallium/drivers/freedreno/ir3/ir3_cmdline.c index e42afeaeb21..fb5c6513bcb 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_cmdline.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_cmdline.c @@ -195,16 +195,6 @@ read_file(const char *filename, void **ptr, size_t *size) return 0; } -static void reset_variant(struct ir3_shader_variant *v, const char *msg) -{ - printf("; %s\n", msg); - v->inputs_count = 0; - v->outputs_count = 0; - v->total_in = 0; - v->has_samp = false; - v->immediates_count = 0; -} - static void print_usage(void) { printf("Usage: ir3_compiler [OPTIONS]... FILE\n"); @@ -231,7 +221,6 @@ int main(int argc, char **argv) const char *info; void *ptr; size_t size; - int use_nir = 0; fd_mesa_debug |= FD_DBG_DISASM; @@ -296,11 +285,6 @@ int main(int argc, char **argv) n++; continue; } - if (!strcmp(argv[n], "--nir")) { - use_nir = true; - n++; - continue; - } if (!strcmp(argv[n], "--help")) { print_usage(); @@ -341,20 +325,8 @@ int main(int argc, char **argv) break; } - if (use_nir) { - info = "NIR compiler"; - ret = ir3_compile_shader_nir(&v, toks, key); - } else { - info = "TGSI compiler"; - ret = ir3_compile_shader(&v, toks, key, true); - } - - if (ret) { - reset_variant(&v, "compiler failed, trying without copy propagation!"); - info = "compiler (no copy propagation)"; - ret = ir3_compile_shader(&v, toks, key, false); - } - + info = "NIR compiler"; + ret = ir3_compile_shader_nir(&v, toks, key); if (ret) { fprintf(stderr, "compiler failed!\n"); return ret; diff --git a/src/gallium/drivers/freedreno/ir3/ir3_compiler.c b/src/gallium/drivers/freedreno/ir3/ir3_compiler.c deleted file mode 100644 index 25af9f91d3f..00000000000 --- a/src/gallium/drivers/freedreno/ir3/ir3_compiler.c +++ /dev/null @@ -1,3709 +0,0 @@ -/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */ - -/* - * Copyright (C) 2013 Rob Clark - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Authors: - * Rob Clark - */ - -#include - -#include "pipe/p_state.h" -#include "util/u_string.h" -#include "util/u_memory.h" -#include "util/u_inlines.h" -#include "tgsi/tgsi_lowering.h" -#include "tgsi/tgsi_parse.h" -#include "tgsi/tgsi_ureg.h" -#include "tgsi/tgsi_info.h" -#include "tgsi/tgsi_strings.h" -#include "tgsi/tgsi_dump.h" -#include "tgsi/tgsi_scan.h" - -#include "freedreno_util.h" - -#include "ir3_compiler.h" -#include "ir3_shader.h" - -#include "instr-a3xx.h" -#include "ir3.h" - -struct ir3_compile_context { - const struct tgsi_token *tokens; - bool free_tokens; - struct ir3 *ir; - struct ir3_shader_variant *so; - uint16_t integer_s; - - struct ir3_block *block; - struct ir3_instruction *current_instr; - - /* we need to defer updates to block->outputs[] until the end - * of an instruction (so we don't see new value until *after* - * the src registers are processed) - */ - struct { - struct ir3_instruction *instr, **instrp; - } output_updates[64]; - unsigned num_output_updates; - - /* are we in a sequence of "atomic" instructions? - */ - bool atomic; - - /* For fragment shaders, from the hw perspective the only - * actual input is r0.xy position register passed to bary.f. - * But TGSI doesn't know that, it still declares things as - * IN[] registers. So we do all the input tracking normally - * and fix things up after compile_instructions() - * - * NOTE that frag_pos is the hardware position (possibly it - * is actually an index or tag or some such.. it is *not* - * values that can be directly used for gl_FragCoord..) - */ - struct ir3_instruction *frag_pos, *frag_face, *frag_coord[4]; - - /* For vertex shaders, keep track of the system values sources */ - struct ir3_instruction *vertex_id, *basevertex, *instance_id; - - struct tgsi_parse_context parser; - unsigned type; - - struct tgsi_shader_info info; - - /* hmm, would be nice if tgsi_scan_shader figured this out - * for us: - */ - struct { - unsigned first, last; - struct ir3_instruction *fanin; - } array[MAX_ARRAYS]; - uint32_t array_dirty; - /* offset into array[], per file, of first array info */ - uint8_t array_offsets[TGSI_FILE_COUNT]; - - /* for calculating input/output positions/linkages: */ - unsigned next_inloc; - - /* a4xx (at least patchlevel 0) cannot seem to flat-interpolate - * so we need to use ldlv.u32 to load the varying directly: - */ - bool flat_bypass; - - unsigned num_internal_temps; - struct tgsi_src_register internal_temps[8]; - - /* for looking up which system value is which */ - unsigned sysval_semantics[8]; - - /* idx/slot for last compiler generated immediate */ - unsigned immediate_idx; - - /* stack of branch instructions that mark (potentially nested) - * branch if/else/loop/etc - */ - struct { - struct ir3_instruction *instr, *cond; - bool inv; /* true iff in else leg of branch */ - } branch[16]; - unsigned int branch_count; - - /* list of kill instructions: */ - struct ir3_instruction *kill[16]; - unsigned int kill_count; - - /* used when dst is same as one of the src, to avoid overwriting a - * src element before the remaining scalar instructions that make - * up the vector operation - */ - struct tgsi_dst_register tmp_dst; - struct tgsi_src_register *tmp_src; - - /* just for catching incorrect use of get_dst()/put_dst(): - */ - bool using_tmp_dst; -}; - - -static void vectorize(struct ir3_compile_context *ctx, - struct ir3_instruction *instr, struct tgsi_dst_register *dst, - int nsrcs, ...); -static void create_mov(struct ir3_compile_context *ctx, - struct tgsi_dst_register *dst, struct tgsi_src_register *src); -static type_t get_ftype(struct ir3_compile_context *ctx); -static type_t get_utype(struct ir3_compile_context *ctx); - -static unsigned setup_arrays(struct ir3_compile_context *ctx, unsigned file, unsigned i) -{ - /* ArrayID 0 for a given file is the legacy array spanning the entire file: */ - ctx->array[i].first = 0; - ctx->array[i].last = ctx->info.file_max[file]; - ctx->array_offsets[file] = i; - i += ctx->info.array_max[file] + 1; - return i; -} - -static unsigned -compile_init(struct ir3_compile_context *ctx, struct ir3_shader_variant *so, - const struct tgsi_token *tokens) -{ - unsigned ret, i; - struct tgsi_shader_info *info = &ctx->info; - struct tgsi_lowering_config lconfig = { - .color_two_side = so->key.color_two_side, - .lower_DST = true, - .lower_XPD = true, - .lower_SCS = true, - .lower_LRP = true, - .lower_FRC = true, - .lower_POW = true, - .lower_LIT = true, - .lower_EXP = true, - .lower_LOG = true, - .lower_DP4 = true, - .lower_DP3 = true, - .lower_DPH = true, - .lower_DP2 = true, - .lower_DP2A = true, - }; - - switch (so->type) { - case SHADER_FRAGMENT: - case SHADER_COMPUTE: - lconfig.saturate_s = so->key.fsaturate_s; - lconfig.saturate_t = so->key.fsaturate_t; - lconfig.saturate_r = so->key.fsaturate_r; - ctx->integer_s = so->key.finteger_s; - break; - case SHADER_VERTEX: - lconfig.saturate_s = so->key.vsaturate_s; - lconfig.saturate_t = so->key.vsaturate_t; - lconfig.saturate_r = so->key.vsaturate_r; - ctx->integer_s = so->key.vinteger_s; - break; - } - - if (!so->shader) { - /* hack for standalone compiler which does not have - * screen/context: - */ - } else if (ir3_shader_gpuid(so->shader) >= 400) { - /* a4xx seems to have *no* sam.p */ - lconfig.lower_TXP = ~0; /* lower all txp */ - /* need special handling for "flat" */ - ctx->flat_bypass = true; - } else { - /* a3xx just needs to avoid sam.p for 3d tex */ - lconfig.lower_TXP = (1 << TGSI_TEXTURE_3D); - /* no special handling for "flat" */ - ctx->flat_bypass = false; - } - - ctx->tokens = tgsi_transform_lowering(&lconfig, tokens, &ctx->info); - ctx->free_tokens = !!ctx->tokens; - if (!ctx->tokens) { - /* no lowering */ - ctx->tokens = tokens; - } - ctx->ir = so->ir; - ctx->so = so; - ctx->array_dirty = 0; - ctx->next_inloc = 8; - ctx->num_internal_temps = 0; - ctx->branch_count = 0; - ctx->kill_count = 0; - ctx->block = NULL; - ctx->current_instr = NULL; - ctx->num_output_updates = 0; - ctx->atomic = false; - ctx->frag_pos = NULL; - ctx->frag_face = NULL; - ctx->vertex_id = NULL; - ctx->instance_id = NULL; - ctx->tmp_src = NULL; - ctx->using_tmp_dst = false; - - memset(ctx->frag_coord, 0, sizeof(ctx->frag_coord)); - memset(ctx->array, 0, sizeof(ctx->array)); - memset(ctx->array_offsets, 0, sizeof(ctx->array_offsets)); - -#define FM(x) (1 << TGSI_FILE_##x) - /* NOTE: if relative addressing is used, we set constlen in - * the compiler (to worst-case value) since we don't know in - * the assembler what the max addr reg value can be: - */ - if (info->indirect_files & FM(CONSTANT)) - so->constlen = MIN2(255, ctx->info.const_file_max[0] + 1); - - i = 0; - i += setup_arrays(ctx, TGSI_FILE_INPUT, i); - i += setup_arrays(ctx, TGSI_FILE_TEMPORARY, i); - i += setup_arrays(ctx, TGSI_FILE_OUTPUT, i); - /* any others? we don't track arrays for const..*/ - - /* Immediates go after constants: */ - so->first_immediate = so->first_driver_param = - info->const_file_max[0] + 1; - /* 1 unit for the vertex id base */ - if (so->type == SHADER_VERTEX) - so->first_immediate++; - /* 4 (vec4) units for ubo base addresses */ - so->first_immediate += 4; - ctx->immediate_idx = 4 * (ctx->info.file_max[TGSI_FILE_IMMEDIATE] + 1); - - ret = tgsi_parse_init(&ctx->parser, ctx->tokens); - if (ret != TGSI_PARSE_OK) - return ret; - - ctx->type = ctx->parser.FullHeader.Processor.Processor; - - return ret; -} - -static void -compile_error(struct ir3_compile_context *ctx, const char *format, ...) -{ - va_list ap; - va_start(ap, format); - _debug_vprintf(format, ap); - va_end(ap); - tgsi_dump(ctx->tokens, 0); - debug_assert(0); -} - -#define compile_assert(ctx, cond) do { \ - if (!(cond)) compile_error((ctx), "failed assert: "#cond"\n"); \ - } while (0) - -static void -compile_free(struct ir3_compile_context *ctx) -{ - if (ctx->free_tokens) - free((void *)ctx->tokens); - tgsi_parse_free(&ctx->parser); -} - -struct instr_translater { - void (*fxn)(const struct instr_translater *t, - struct ir3_compile_context *ctx, - struct tgsi_full_instruction *inst); - unsigned tgsi_opc; - opc_t opc; - opc_t hopc; /* opc to use for half_precision mode, if different */ - unsigned arg; -}; - -static void -instr_finish(struct ir3_compile_context *ctx) -{ - unsigned i; - - if (ctx->atomic) - return; - - for (i = 0; i < ctx->num_output_updates; i++) - *(ctx->output_updates[i].instrp) = ctx->output_updates[i].instr; - - ctx->num_output_updates = 0; - - while (ctx->array_dirty) { - unsigned aid = ffs(ctx->array_dirty) - 1; - ctx->array[aid].fanin = NULL; - ctx->array_dirty &= ~(1 << aid); - } -} - -/* For "atomic" groups of instructions, for example the four scalar - * instructions to perform a vec4 operation. Basically this just - * blocks out handling of output_updates so the next scalar instruction - * still sees the result from before the start of the atomic group. - * - * NOTE: when used properly, this could probably replace get/put_dst() - * stuff. - */ -static void -instr_atomic_start(struct ir3_compile_context *ctx) -{ - ctx->atomic = true; -} - -static void -instr_atomic_end(struct ir3_compile_context *ctx) -{ - ctx->atomic = false; - instr_finish(ctx); -} - -static struct ir3_instruction * -instr_create(struct ir3_compile_context *ctx, int category, opc_t opc) -{ - instr_finish(ctx); - return (ctx->current_instr = ir3_instr_create(ctx->block, category, opc)); -} - -static struct ir3_block * -push_block(struct ir3_compile_context *ctx) -{ - struct ir3_block *block; - unsigned ntmp, nin, nout; - -#define SCALAR_REGS(file) (4 * (ctx->info.file_max[TGSI_FILE_ ## file] + 1)) - - /* hmm, give ourselves room to create 8 extra temporaries (vec4): - */ - ntmp = SCALAR_REGS(TEMPORARY); - ntmp += 8 * 4; - - nout = SCALAR_REGS(OUTPUT); - nin = SCALAR_REGS(INPUT) + SCALAR_REGS(SYSTEM_VALUE); - - /* for outermost block, 'inputs' are the actual shader INPUT - * register file. Reads from INPUT registers always go back to - * top block. For nested blocks, 'inputs' is used to track any - * TEMPORARY file register from one of the enclosing blocks that - * is ready in this block. - */ - if (!ctx->block) { - /* NOTE: fragment shaders actually have two inputs (r0.xy, the - * position) - */ - if (ctx->type == TGSI_PROCESSOR_FRAGMENT) { - int n = 2; - if (ctx->info.reads_position) - n += 4; - if (ctx->info.uses_frontface) - n += 4; - nin = MAX2(n, nin); - nout += ARRAY_SIZE(ctx->kill); - } - } else { - nin = ntmp; - } - - block = ir3_block_create(ctx->ir, ntmp, nin, nout); - - if ((ctx->type == TGSI_PROCESSOR_FRAGMENT) && !ctx->block) - block->noutputs -= ARRAY_SIZE(ctx->kill); - - block->parent = ctx->block; - ctx->block = block; - - return block; -} - -static void -pop_block(struct ir3_compile_context *ctx) -{ - ctx->block = ctx->block->parent; - compile_assert(ctx, ctx->block); -} - -static struct ir3_instruction * -create_output(struct ir3_block *block, struct ir3_instruction *instr, - unsigned n) -{ - struct ir3_instruction *out; - - out = ir3_instr_create(block, -1, OPC_META_OUTPUT); - out->inout.block = block; - ir3_reg_create(out, n, 0); - if (instr) - ir3_reg_create(out, 0, IR3_REG_SSA)->instr = instr; - - return out; -} - -static struct ir3_instruction * -create_input(struct ir3_block *block, struct ir3_instruction *instr, - unsigned n) -{ - struct ir3_instruction *in; - - in = ir3_instr_create(block, -1, OPC_META_INPUT); - in->inout.block = block; - ir3_reg_create(in, n, 0); - if (instr) - ir3_reg_create(in, 0, IR3_REG_SSA)->instr = instr; - - return in; -} - -static struct ir3_instruction * -block_input(struct ir3_block *block, unsigned n) -{ - /* references to INPUT register file always go back up to - * top level: - */ - if (block->parent) - return block_input(block->parent, n); - return block->inputs[n]; -} - -/* return temporary in scope, creating if needed meta-input node - * to track block inputs - */ -static struct ir3_instruction * -block_temporary(struct ir3_block *block, unsigned n) -{ - /* references to TEMPORARY register file, find the nearest - * enclosing block which has already assigned this temporary, - * creating meta-input instructions along the way to keep - * track of block inputs - */ - if (block->parent && !block->temporaries[n]) { - /* if already have input for this block, reuse: */ - if (!block->inputs[n]) - block->inputs[n] = block_temporary(block->parent, n); - - /* and create new input to return: */ - return create_input(block, block->inputs[n], n); - } - return block->temporaries[n]; -} - -static struct ir3_instruction * -create_immed(struct ir3_compile_context *ctx, float val) -{ - /* NOTE: *don't* use instr_create() here! - */ - struct ir3_instruction *instr; - instr = ir3_instr_create(ctx->block, 1, 0); - instr->cat1.src_type = get_ftype(ctx); - instr->cat1.dst_type = get_ftype(ctx); - ir3_reg_create(instr, 0, 0); - ir3_reg_create(instr, 0, IR3_REG_IMMED)->fim_val = val; - return instr; -} - -static void -ssa_instr_set(struct ir3_compile_context *ctx, unsigned file, unsigned n, - struct ir3_instruction *instr) -{ - struct ir3_block *block = ctx->block; - unsigned idx = ctx->num_output_updates; - - compile_assert(ctx, idx < ARRAY_SIZE(ctx->output_updates)); - - /* NOTE: defer update of temporaries[idx] or output[idx] - * until instr_finish(), so that if the current instruction - * reads the same TEMP/OUT[] it gets the old value: - * - * bleh.. this might be a bit easier to just figure out - * in instr_finish(). But at that point we've already - * lost information about OUTPUT vs TEMPORARY register - * file.. - */ - - switch (file) { - case TGSI_FILE_OUTPUT: - compile_assert(ctx, n < block->noutputs); - ctx->output_updates[idx].instrp = &block->outputs[n]; - ctx->output_updates[idx].instr = instr; - ctx->num_output_updates++; - break; - case TGSI_FILE_TEMPORARY: - compile_assert(ctx, n < block->ntemporaries); - ctx->output_updates[idx].instrp = &block->temporaries[n]; - ctx->output_updates[idx].instr = instr; - ctx->num_output_updates++; - break; - case TGSI_FILE_ADDRESS: - compile_assert(ctx, n < 1); - ctx->output_updates[idx].instrp = &block->address; - ctx->output_updates[idx].instr = instr; - ctx->num_output_updates++; - break; - } -} - -static struct ir3_instruction * -ssa_instr_get(struct ir3_compile_context *ctx, unsigned file, unsigned n) -{ - struct ir3_block *block = ctx->block; - struct ir3_instruction *instr = NULL; - - switch (file) { - case TGSI_FILE_INPUT: - instr = block_input(ctx->block, n); - break; - case TGSI_FILE_OUTPUT: - /* really this should just happen in case of 'MOV_SAT OUT[n], ..', - * for the following clamp instructions: - */ - instr = block->outputs[n]; - /* we don't have to worry about read from an OUTPUT that was - * assigned outside of the current block, because the _SAT - * clamp instructions will always be in the same block as - * the original instruction which wrote the OUTPUT - */ - compile_assert(ctx, instr); - break; - case TGSI_FILE_TEMPORARY: - instr = block_temporary(ctx->block, n); - if (!instr) { - /* this can happen when registers (or components of a TGSI - * register) are used as src before they have been assigned - * (undefined contents). To avoid confusing the rest of the - * compiler, and to generally keep things peachy, substitute - * an instruction that sets the src to 0.0. Or to keep - * things undefined, I could plug in a random number? :-P - * - * NOTE: *don't* use instr_create() here! - */ - instr = create_immed(ctx, 0.0); - /* no need to recreate the immed for every access: */ - block->temporaries[n] = instr; - } - break; - case TGSI_FILE_SYSTEM_VALUE: - switch (ctx->sysval_semantics[n >> 2]) { - case TGSI_SEMANTIC_VERTEXID_NOBASE: - instr = ctx->vertex_id; - break; - case TGSI_SEMANTIC_BASEVERTEX: - instr = ctx->basevertex; - break; - case TGSI_SEMANTIC_INSTANCEID: - instr = ctx->instance_id; - break; - } - break; - } - - return instr; -} - -static int dst_array_id(struct ir3_compile_context *ctx, - const struct tgsi_dst_register *dst) -{ - // XXX complete hack to recover tgsi_full_dst_register... - // nothing that isn't wrapped in a tgsi_full_dst_register - // should be indirect - const struct tgsi_full_dst_register *fdst = (const void *)dst; - return fdst->Indirect.ArrayID + ctx->array_offsets[dst->File]; -} - -static int src_array_id(struct ir3_compile_context *ctx, - const struct tgsi_src_register *src) -{ - // XXX complete hack to recover tgsi_full_src_register... - // nothing that isn't wrapped in a tgsi_full_src_register - // should be indirect - const struct tgsi_full_src_register *fsrc = (const void *)src; - debug_assert(src->File != TGSI_FILE_CONSTANT); - return fsrc->Indirect.ArrayID + ctx->array_offsets[src->File]; -} - -static struct ir3_instruction * -array_fanin(struct ir3_compile_context *ctx, unsigned aid, unsigned file) -{ - struct ir3_instruction *instr; - - if (ctx->array[aid].fanin) { - instr = ctx->array[aid].fanin; - } else { - unsigned first = ctx->array[aid].first; - unsigned last = ctx->array[aid].last; - unsigned i, j; - - instr = ir3_instr_create2(ctx->block, -1, OPC_META_FI, - 1 + (4 * (last + 1 - first))); - ir3_reg_create(instr, 0, 0); - for (i = first; i <= last; i++) { - for (j = 0; j < 4; j++) { - unsigned n = regid(i, j); - ir3_reg_create(instr, 0, IR3_REG_SSA)->instr = - ssa_instr_get(ctx, file, n); - } - } - ctx->array[aid].fanin = instr; - ctx->array_dirty |= (1 << aid); - } - - return instr; -} - -static void -ssa_dst(struct ir3_compile_context *ctx, struct ir3_instruction *instr, - const struct tgsi_dst_register *dst, unsigned chan) -{ - if (dst->Indirect) { - struct ir3_register *reg = instr->regs[0]; - unsigned i, aid = dst_array_id(ctx, dst); - unsigned first = ctx->array[aid].first; - unsigned last = ctx->array[aid].last; - unsigned off = dst->Index - first; /* vec4 offset */ - - reg->size = 4 * (1 + last - first); - reg->offset = regid(off, chan); - - instr->fanin = array_fanin(ctx, aid, dst->File); - - /* annotate with the array-id, to help out the register- - * assignment stage. At least for the case of indirect - * writes, we should capture enough dependencies to - * preserve the order of reads/writes of the array, so - * the multiple "names" for the array should end up all - * assigned to the same registers. - */ - instr->fanin->fi.aid = aid; - - /* Since we are scalarizing vec4 tgsi instructions/regs, we - * run into a slight complication here. To do the naive thing - * and setup a fanout for each scalar array element would end - * up with the result that the instructions generated for each - * component of the vec4 would end up clobbering each other. - * So we take advantage here of knowing that the array index - * (after the shl.b) will be a multiple of four, and only set - * every fourth scalar component in the array. See also - * fixup_ssa_dst_array() - */ - for (i = first; i <= last; i++) { - struct ir3_instruction *split; - unsigned n = regid(i, chan); - int off = (4 * (i - first)) + chan; - - if (is_meta(instr) && (instr->opc == OPC_META_FO)) - off -= instr->fo.off; - - split = ir3_instr_create(ctx->block, -1, OPC_META_FO); - split->fo.off = off; - ir3_reg_create(split, 0, 0); - ir3_reg_create(split, 0, IR3_REG_SSA)->instr = instr; - - ssa_instr_set(ctx, dst->File, n, split); - } - } else { - /* normal case (not relative addressed GPR) */ - ssa_instr_set(ctx, dst->File, regid(dst->Index, chan), instr); - } -} - -static void -ssa_src(struct ir3_compile_context *ctx, struct ir3_register *reg, - const struct tgsi_src_register *src, unsigned chan) -{ - struct ir3_instruction *instr; - - if (src->Indirect && (src->File != TGSI_FILE_CONSTANT)) { - /* for relative addressing of gpr's (due to register assignment) - * we must generate a fanin instruction to collect all possible - * array elements that the instruction could address together: - */ - unsigned aid = src_array_id(ctx, src); - unsigned first = ctx->array[aid].first; - unsigned last = ctx->array[aid].last; - unsigned off = src->Index - first; /* vec4 offset */ - - reg->size = 4 * (1 + last - first); - reg->offset = regid(off, chan); - - instr = array_fanin(ctx, aid, src->File); - } else if (src->File == TGSI_FILE_CONSTANT && src->Dimension) { - const struct tgsi_full_src_register *fsrc = (const void *)src; - struct ir3_instruction *temp = NULL; - int ubo_regid = regid(ctx->so->first_driver_param, 0) + - fsrc->Dimension.Index - 1; - int offset = 0; - - /* We don't handle indirect UBO array accesses... yet. */ - compile_assert(ctx, !fsrc->Dimension.Indirect); - /* UBOs start at index 1. */ - compile_assert(ctx, fsrc->Dimension.Index > 0); - - if (src->Indirect) { - /* In case of an indirect index, it will have been loaded into an - * address register. There will be a sequence of - * - * shl.b x, val, 2 - * mova a0, x - * - * We rely on this sequence to get the original val out and shift - * it by 4, since we're dealing in vec4 units. - */ - compile_assert(ctx, ctx->block->address); - compile_assert(ctx, ctx->block->address->regs[1]->instr->opc == - OPC_SHL_B); - - temp = instr = instr_create(ctx, 2, OPC_SHL_B); - ir3_reg_create(instr, 0, 0); - ir3_reg_create(instr, 0, IR3_REG_HALF | IR3_REG_SSA)->instr = - ctx->block->address->regs[1]->instr->regs[1]->instr; - ir3_reg_create(instr, 0, IR3_REG_IMMED)->iim_val = 4; - } else if (src->Index >= 64) { - /* Otherwise it's a plain index (in vec4 units). Move it into a - * register. - */ - temp = instr = instr_create(ctx, 1, 0); - instr->cat1.src_type = get_utype(ctx); - instr->cat1.dst_type = get_utype(ctx); - ir3_reg_create(instr, 0, 0); - ir3_reg_create(instr, 0, IR3_REG_IMMED)->iim_val = src->Index * 16; - } else { - /* The offset is small enough to fit into the ldg instruction - * directly. - */ - offset = src->Index * 16; - } - - if (temp) { - /* If there was an offset (most common), add it to the buffer - * address. - */ - instr = instr_create(ctx, 2, OPC_ADD_S); - ir3_reg_create(instr, 0, 0); - ir3_reg_create(instr, 0, IR3_REG_SSA)->instr = temp; - ir3_reg_create(instr, ubo_regid, IR3_REG_CONST); - } else { - /* Otherwise just load the buffer address directly */ - instr = instr_create(ctx, 1, 0); - instr->cat1.src_type = get_utype(ctx); - instr->cat1.dst_type = get_utype(ctx); - ir3_reg_create(instr, 0, 0); - ir3_reg_create(instr, ubo_regid, IR3_REG_CONST); - } - - temp = instr; - - instr = instr_create(ctx, 6, OPC_LDG); - instr->cat6.type = TYPE_U32; - instr->cat6.offset = offset + chan * 4; - ir3_reg_create(instr, 0, 0); - ir3_reg_create(instr, 0, IR3_REG_SSA)->instr = temp; - ir3_reg_create(instr, 0, IR3_REG_IMMED)->iim_val = 1; - - reg->flags &= ~(IR3_REG_RELATIV | IR3_REG_CONST); - } else { - /* normal case (not relative addressed GPR) */ - instr = ssa_instr_get(ctx, src->File, regid(src->Index, chan)); - } - - if (instr) { - reg->flags |= IR3_REG_SSA; - reg->instr = instr; - } else if (reg->flags & IR3_REG_SSA) { - /* special hack for trans_samp() which calls ssa_src() directly - * to build up the collect (fanin) for const src.. (so SSA flag - * set but no src instr... it basically gets lucky because we - * default to 0.0 for "undefined" src instructions, which is - * what it wants. We probably need to give it a better way to - * do this, but for now this hack: - */ - reg->instr = create_immed(ctx, 0.0); - } -} - -static struct ir3_register * -add_dst_reg_wrmask(struct ir3_compile_context *ctx, - struct ir3_instruction *instr, const struct tgsi_dst_register *dst, - unsigned chan, unsigned wrmask) -{ - unsigned flags = 0, num = 0; - struct ir3_register *reg; - - switch (dst->File) { - case TGSI_FILE_OUTPUT: - case TGSI_FILE_TEMPORARY: - /* uses SSA */ - break; - case TGSI_FILE_ADDRESS: - flags |= IR3_REG_ADDR; - /* uses SSA */ - break; - default: - compile_error(ctx, "unsupported dst register file: %s\n", - tgsi_file_name(dst->File)); - break; - } - - if (dst->Indirect) { - flags |= IR3_REG_RELATIV; - - /* shouldn't happen, and we can't cope with it below: */ - compile_assert(ctx, wrmask == 0x1); - - compile_assert(ctx, ctx->block->address); - if (instr->address) - compile_assert(ctx, ctx->block->address == instr->address); - - instr->address = ctx->block->address; - array_insert(ctx->ir->indirects, instr); - } - - reg = ir3_reg_create(instr, regid(num, chan), flags); - reg->wrmask = wrmask; - - if (wrmask == 0x1) { - /* normal case */ - ssa_dst(ctx, instr, dst, chan); - } else if ((dst->File == TGSI_FILE_TEMPORARY) || - (dst->File == TGSI_FILE_OUTPUT) || - (dst->File == TGSI_FILE_ADDRESS)) { - struct ir3_instruction *prev = NULL; - unsigned i; - - compile_assert(ctx, !dst->Indirect); - - /* if instruction writes multiple, we need to create - * some place-holder collect the registers: - */ - for (i = 0; i < 4; i++) { - /* NOTE: slightly ugly that we setup neighbor ptrs - * for FO here, but handle FI in CP pass.. we should - * probably just always setup neighbor ptrs in the - * frontend? - */ - struct ir3_instruction *split = - ir3_instr_create(ctx->block, -1, OPC_META_FO); - split->fo.off = i; - /* unused dst reg: */ - /* NOTE: set SSA flag on dst here, because unused FO's - * which don't get scheduled will end up not in the - * instruction list when RA sets SSA flag on each dst. - * Slight hack. We really should set SSA flag on - * every dst register in the frontend. - */ - ir3_reg_create(split, 0, IR3_REG_SSA); - /* and src reg used to hold original instr */ - ir3_reg_create(split, 0, IR3_REG_SSA)->instr = instr; - if (prev) { - split->cp.left = prev; - split->cp.left_cnt++; - prev->cp.right = split; - prev->cp.right_cnt++; - } - if ((wrmask & (1 << i)) && !ctx->atomic) - ssa_dst(ctx, split, dst, chan+i); - prev = split; - } - } - - return reg; -} - -static struct ir3_register * -add_dst_reg(struct ir3_compile_context *ctx, struct ir3_instruction *instr, - const struct tgsi_dst_register *dst, unsigned chan) -{ - return add_dst_reg_wrmask(ctx, instr, dst, chan, 0x1); -} - -static struct ir3_register * -add_src_reg_wrmask(struct ir3_compile_context *ctx, - struct ir3_instruction *instr, const struct tgsi_src_register *src, - unsigned chan, unsigned wrmask) -{ - unsigned flags = 0, num = 0; - struct ir3_register *reg; - - switch (src->File) { - case TGSI_FILE_IMMEDIATE: - /* TODO if possible, use actual immediate instead of const.. but - * TGSI has vec4 immediates, we can only embed scalar (of limited - * size, depending on instruction..) - */ - flags |= IR3_REG_CONST; - num = src->Index + ctx->so->first_immediate; - break; - case TGSI_FILE_CONSTANT: - flags |= IR3_REG_CONST; - num = src->Index; - break; - case TGSI_FILE_OUTPUT: - /* NOTE: we should only end up w/ OUTPUT file for things like - * clamp()'ing saturated dst instructions - */ - case TGSI_FILE_INPUT: - case TGSI_FILE_TEMPORARY: - case TGSI_FILE_SYSTEM_VALUE: - /* uses SSA */ - break; - default: - compile_error(ctx, "unsupported src register file: %s\n", - tgsi_file_name(src->File)); - break; - } - - /* We seem to have 8 bits (6.2) for dst register always, so I think - * it is safe to assume GPR cannot be >=64 - * - * cat3 instructions only have 8 bits for src2, but cannot take a - * const for src2 - * - * cat5 and cat6 in some cases only has 8 bits, but cannot take a - * const for any src. - * - * Other than that we seem to have 12 bits to encode const src, - * except for cat1 which may only have 11 bits (but that seems like - * a bug) - */ - if (flags & IR3_REG_CONST) - compile_assert(ctx, src->Index < (1 << 9)); - else - compile_assert(ctx, src->Index < (1 << 6)); - - /* NOTE: abs/neg modifiers in tgsi only apply to float */ - if (src->Absolute) - flags |= IR3_REG_FABS; - if (src->Negate) - flags |= IR3_REG_FNEG; - - if (src->Indirect) { - flags |= IR3_REG_RELATIV; - - /* shouldn't happen, and we can't cope with it below: */ - compile_assert(ctx, wrmask == 0x1); - - compile_assert(ctx, ctx->block->address); - if (instr->address) - compile_assert(ctx, ctx->block->address == instr->address); - - instr->address = ctx->block->address; - array_insert(ctx->ir->indirects, instr); - } - - reg = ir3_reg_create(instr, regid(num, chan), flags); - reg->wrmask = wrmask; - - if (wrmask == 0x1) { - /* normal case */ - ssa_src(ctx, reg, src, chan); - } else if ((src->File == TGSI_FILE_TEMPORARY) || - (src->File == TGSI_FILE_OUTPUT) || - (src->File == TGSI_FILE_INPUT)) { - struct ir3_instruction *collect; - unsigned i; - - compile_assert(ctx, !src->Indirect); - - /* if instruction reads multiple, we need to create - * some place-holder collect the registers: - */ - collect = ir3_instr_create(ctx->block, -1, OPC_META_FI); - ir3_reg_create(collect, 0, 0); /* unused dst reg */ - - for (i = 0; i < 4; i++) { - if (wrmask & (1 << i)) { - /* and src reg used point to the original instr */ - ssa_src(ctx, ir3_reg_create(collect, 0, IR3_REG_SSA), - src, chan + i); - } else if (wrmask & ~((i << i) - 1)) { - /* if any remaining components, then dummy - * placeholder src reg to fill in the blanks: - */ - ir3_reg_create(collect, 0, 0); - } - } - - reg->flags |= IR3_REG_SSA; - reg->instr = collect; - } - - return reg; -} - -static struct ir3_register * -add_src_reg(struct ir3_compile_context *ctx, struct ir3_instruction *instr, - const struct tgsi_src_register *src, unsigned chan) -{ - return add_src_reg_wrmask(ctx, instr, src, chan, 0x1); -} - -static void -src_from_dst(struct tgsi_src_register *src, struct tgsi_dst_register *dst) -{ - src->File = dst->File; - src->Indirect = dst->Indirect; - src->Dimension = dst->Dimension; - src->Index = dst->Index; - src->Absolute = 0; - src->Negate = 0; - src->SwizzleX = TGSI_SWIZZLE_X; - src->SwizzleY = TGSI_SWIZZLE_Y; - src->SwizzleZ = TGSI_SWIZZLE_Z; - src->SwizzleW = TGSI_SWIZZLE_W; -} - -/* Get internal-temp src/dst to use for a sequence of instructions - * generated by a single TGSI op. - */ -static struct tgsi_src_register * -get_internal_temp(struct ir3_compile_context *ctx, - struct tgsi_dst_register *tmp_dst) -{ - struct tgsi_src_register *tmp_src; - int n; - - tmp_dst->File = TGSI_FILE_TEMPORARY; - tmp_dst->WriteMask = TGSI_WRITEMASK_XYZW; - tmp_dst->Indirect = 0; - tmp_dst->Dimension = 0; - - /* assign next temporary: */ - n = ctx->num_internal_temps++; - compile_assert(ctx, n < ARRAY_SIZE(ctx->internal_temps)); - tmp_src = &ctx->internal_temps[n]; - - tmp_dst->Index = ctx->info.file_max[TGSI_FILE_TEMPORARY] + n + 1; - - src_from_dst(tmp_src, tmp_dst); - - return tmp_src; -} - -static inline bool -is_const(struct tgsi_src_register *src) -{ - return (src->File == TGSI_FILE_CONSTANT) || - (src->File == TGSI_FILE_IMMEDIATE); -} - -static inline bool -is_relative(struct tgsi_src_register *src) -{ - return src->Indirect; -} - -static inline bool -is_rel_or_const(struct tgsi_src_register *src) -{ - return is_relative(src) || is_const(src); -} - -static type_t -get_ftype(struct ir3_compile_context *ctx) -{ - return TYPE_F32; -} - -static type_t -get_utype(struct ir3_compile_context *ctx) -{ - return TYPE_U32; -} - -static type_t -get_stype(struct ir3_compile_context *ctx) -{ - return TYPE_S32; -} - -static unsigned -src_swiz(struct tgsi_src_register *src, int chan) -{ - switch (chan) { - case 0: return src->SwizzleX; - case 1: return src->SwizzleY; - case 2: return src->SwizzleZ; - case 3: return src->SwizzleW; - } - assert(0); - return 0; -} - -/* for instructions that cannot take a const register as src, if needed - * generate a move to temporary gpr: - */ -static struct tgsi_src_register * -get_unconst(struct ir3_compile_context *ctx, struct tgsi_src_register *src) -{ - struct tgsi_dst_register tmp_dst; - struct tgsi_src_register *tmp_src; - - compile_assert(ctx, is_rel_or_const(src)); - - tmp_src = get_internal_temp(ctx, &tmp_dst); - - create_mov(ctx, &tmp_dst, src); - - return tmp_src; -} - -static void -get_immediate(struct ir3_compile_context *ctx, - struct tgsi_src_register *reg, uint32_t val) -{ - unsigned neg, swiz, idx, i; - /* actually maps 1:1 currently.. not sure if that is safe to rely on: */ - static const unsigned swiz2tgsi[] = { - TGSI_SWIZZLE_X, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_W, - }; - - for (i = 0; i < ctx->immediate_idx; i++) { - swiz = i % 4; - idx = i / 4; - - if (ctx->so->immediates[idx].val[swiz] == val) { - neg = 0; - break; - } - - if (ctx->so->immediates[idx].val[swiz] == -val) { - neg = 1; - break; - } - } - - if (i == ctx->immediate_idx) { - /* need to generate a new immediate: */ - swiz = i % 4; - idx = i / 4; - neg = 0; - ctx->so->immediates[idx].val[swiz] = val; - ctx->so->immediates_count = idx + 1; - ctx->immediate_idx++; - } - - reg->File = TGSI_FILE_IMMEDIATE; - reg->Indirect = 0; - reg->Dimension = 0; - reg->Index = idx; - reg->Absolute = 0; - reg->Negate = neg; - reg->SwizzleX = swiz2tgsi[swiz]; - reg->SwizzleY = swiz2tgsi[swiz]; - reg->SwizzleZ = swiz2tgsi[swiz]; - reg->SwizzleW = swiz2tgsi[swiz]; -} - -static void -create_mov(struct ir3_compile_context *ctx, struct tgsi_dst_register *dst, - struct tgsi_src_register *src) -{ - type_t type_mov = get_ftype(ctx); - unsigned i; - - for (i = 0; i < 4; i++) { - /* move to destination: */ - if (dst->WriteMask & (1 << i)) { - struct ir3_instruction *instr; - - if (src->Absolute || src->Negate) { - /* can't have abs or neg on a mov instr, so use - * absneg.f instead to handle these cases: - */ - instr = instr_create(ctx, 2, OPC_ABSNEG_F); - } else { - instr = instr_create(ctx, 1, 0); - instr->cat1.src_type = type_mov; - instr->cat1.dst_type = type_mov; - } - - add_dst_reg(ctx, instr, dst, i); - add_src_reg(ctx, instr, src, src_swiz(src, i)); - } - } -} - -static void -create_clamp(struct ir3_compile_context *ctx, - struct tgsi_dst_register *dst, struct tgsi_src_register *val, - struct tgsi_src_register *minval, struct tgsi_src_register *maxval) -{ - struct ir3_instruction *instr; - - instr = instr_create(ctx, 2, OPC_MAX_F); - vectorize(ctx, instr, dst, 2, val, 0, minval, 0); - - instr = instr_create(ctx, 2, OPC_MIN_F); - vectorize(ctx, instr, dst, 2, val, 0, maxval, 0); -} - -static void -create_clamp_imm(struct ir3_compile_context *ctx, - struct tgsi_dst_register *dst, - uint32_t minval, uint32_t maxval) -{ - struct tgsi_src_register minconst, maxconst; - struct tgsi_src_register src; - - src_from_dst(&src, dst); - - get_immediate(ctx, &minconst, minval); - get_immediate(ctx, &maxconst, maxval); - - create_clamp(ctx, dst, &src, &minconst, &maxconst); -} - -static struct tgsi_dst_register * -get_dst(struct ir3_compile_context *ctx, struct tgsi_full_instruction *inst) -{ - struct tgsi_dst_register *dst = &inst->Dst[0].Register; - unsigned i; - - compile_assert(ctx, !ctx->using_tmp_dst); - ctx->using_tmp_dst = true; - - for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { - struct tgsi_src_register *src = &inst->Src[i].Register; - if ((src->File == dst->File) && (src->Index == dst->Index)) { - if ((dst->WriteMask == TGSI_WRITEMASK_XYZW) && - (src->SwizzleX == TGSI_SWIZZLE_X) && - (src->SwizzleY == TGSI_SWIZZLE_Y) && - (src->SwizzleZ == TGSI_SWIZZLE_Z) && - (src->SwizzleW == TGSI_SWIZZLE_W)) - continue; - ctx->tmp_src = get_internal_temp(ctx, &ctx->tmp_dst); - ctx->tmp_dst.WriteMask = dst->WriteMask; - dst = &ctx->tmp_dst; - break; - } - } - return dst; -} - -static void -put_dst(struct ir3_compile_context *ctx, struct tgsi_full_instruction *inst, - struct tgsi_dst_register *dst) -{ - compile_assert(ctx, ctx->using_tmp_dst); - ctx->using_tmp_dst = false; - - /* if necessary, add mov back into original dst: */ - if (dst != &inst->Dst[0].Register) { - create_mov(ctx, &inst->Dst[0].Register, ctx->tmp_src); - } -} - -/* helper to generate the necessary repeat and/or additional instructions - * to turn a scalar instruction into a vector operation: - */ -static void -vectorize(struct ir3_compile_context *ctx, struct ir3_instruction *instr, - struct tgsi_dst_register *dst, int nsrcs, ...) -{ - va_list ap; - int i, j, n = 0; - - instr_atomic_start(ctx); - - for (i = 0; i < 4; i++) { - if (dst->WriteMask & (1 << i)) { - struct ir3_instruction *cur; - - if (n++ == 0) { - cur = instr; - } else { - cur = instr_create(ctx, instr->category, instr->opc); - memcpy(cur->info, instr->info, sizeof(cur->info)); - } - - add_dst_reg(ctx, cur, dst, i); - - va_start(ap, nsrcs); - for (j = 0; j < nsrcs; j++) { - struct tgsi_src_register *src = - va_arg(ap, struct tgsi_src_register *); - unsigned flags = va_arg(ap, unsigned); - struct ir3_register *reg; - if (flags & IR3_REG_IMMED) { - reg = ir3_reg_create(cur, 0, IR3_REG_IMMED); - /* this is an ugly cast.. should have put flags first! */ - reg->iim_val = *(int *)&src; - } else { - reg = add_src_reg(ctx, cur, src, src_swiz(src, i)); - } - reg->flags |= flags & ~(IR3_REG_FNEG | IR3_REG_SNEG); - if (flags & IR3_REG_FNEG) - reg->flags ^= IR3_REG_FNEG; - if (flags & IR3_REG_SNEG) - reg->flags ^= IR3_REG_SNEG; - } - va_end(ap); - } - } - - instr_atomic_end(ctx); -} - -/* - * Handlers for TGSI instructions which do not have a 1:1 mapping to - * native instructions: - */ - -static void -trans_clamp(const struct instr_translater *t, - struct ir3_compile_context *ctx, - struct tgsi_full_instruction *inst) -{ - struct tgsi_dst_register *dst = get_dst(ctx, inst); - struct tgsi_src_register *src0 = &inst->Src[0].Register; - struct tgsi_src_register *src1 = &inst->Src[1].Register; - struct tgsi_src_register *src2 = &inst->Src[2].Register; - - create_clamp(ctx, dst, src0, src1, src2); - - put_dst(ctx, inst, dst); -} - -/* ARL(x) = x, but mova from hrN.x to a0.. */ -static void -trans_arl(const struct instr_translater *t, - struct ir3_compile_context *ctx, - struct tgsi_full_instruction *inst) -{ - struct ir3_instruction *instr; - struct tgsi_dst_register tmp_dst; - struct tgsi_src_register *tmp_src; - struct tgsi_dst_register *dst = &inst->Dst[0].Register; - struct tgsi_src_register *src = &inst->Src[0].Register; - unsigned chan = src->SwizzleX; - - compile_assert(ctx, dst->File == TGSI_FILE_ADDRESS); - - /* NOTE: we allocate a temporary from a flat register - * namespace (ignoring half vs full). It turns out - * not to really matter since registers get reassigned - * later in ir3_ra which (hopefully!) can deal a bit - * better with mixed half and full precision. - */ - tmp_src = get_internal_temp(ctx, &tmp_dst); - - /* cov.{u,f}{32,16}s16 Rtmp, Rsrc */ - instr = instr_create(ctx, 1, 0); - instr->cat1.src_type = (t->tgsi_opc == TGSI_OPCODE_ARL) ? - get_ftype(ctx) : get_utype(ctx); - instr->cat1.dst_type = TYPE_S16; - add_dst_reg(ctx, instr, &tmp_dst, chan)->flags |= IR3_REG_HALF; - add_src_reg(ctx, instr, src, chan); - - /* shl.b Rtmp, Rtmp, 2 */ - instr = instr_create(ctx, 2, OPC_SHL_B); - add_dst_reg(ctx, instr, &tmp_dst, chan)->flags |= IR3_REG_HALF; - add_src_reg(ctx, instr, tmp_src, chan)->flags |= IR3_REG_HALF; - ir3_reg_create(instr, 0, IR3_REG_IMMED)->iim_val = 2; - - /* mova a0, Rtmp */ - instr = instr_create(ctx, 1, 0); - instr->cat1.src_type = TYPE_S16; - instr->cat1.dst_type = TYPE_S16; - add_dst_reg(ctx, instr, dst, 0)->flags |= IR3_REG_HALF; - add_src_reg(ctx, instr, tmp_src, chan)->flags |= IR3_REG_HALF; -} - -/* - * texture fetch/sample instructions: - */ - -struct tex_info { - int8_t order[4]; - int8_t args; - unsigned src_wrmask, flags; -}; - -struct target_info { - uint8_t dims; - uint8_t cube; - uint8_t array; - uint8_t shadow; -}; - -static const struct target_info tex_targets[] = { - [TGSI_TEXTURE_1D] = { 1, 0, 0, 0 }, - [TGSI_TEXTURE_2D] = { 2, 0, 0, 0 }, - [TGSI_TEXTURE_3D] = { 3, 0, 0, 0 }, - [TGSI_TEXTURE_CUBE] = { 3, 1, 0, 0 }, - [TGSI_TEXTURE_RECT] = { 2, 0, 0, 0 }, - [TGSI_TEXTURE_SHADOW1D] = { 1, 0, 0, 1 }, - [TGSI_TEXTURE_SHADOW2D] = { 2, 0, 0, 1 }, - [TGSI_TEXTURE_SHADOWRECT] = { 2, 0, 0, 1 }, - [TGSI_TEXTURE_1D_ARRAY] = { 1, 0, 1, 0 }, - [TGSI_TEXTURE_2D_ARRAY] = { 2, 0, 1, 0 }, - [TGSI_TEXTURE_SHADOW1D_ARRAY] = { 1, 0, 1, 1 }, - [TGSI_TEXTURE_SHADOW2D_ARRAY] = { 2, 0, 1, 1 }, - [TGSI_TEXTURE_SHADOWCUBE] = { 3, 1, 0, 1 }, - [TGSI_TEXTURE_2D_MSAA] = { 2, 0, 0, 0 }, - [TGSI_TEXTURE_2D_ARRAY_MSAA] = { 2, 0, 1, 0 }, - [TGSI_TEXTURE_CUBE_ARRAY] = { 3, 1, 1, 0 }, - [TGSI_TEXTURE_SHADOWCUBE_ARRAY] = { 3, 1, 1, 1 }, -}; - -static void -fill_tex_info(struct ir3_compile_context *ctx, - struct tgsi_full_instruction *inst, - struct tex_info *info) -{ - const struct target_info *tgt = &tex_targets[inst->Texture.Texture]; - - if (tgt->dims == 3) - info->flags |= IR3_INSTR_3D; - if (tgt->array) - info->flags |= IR3_INSTR_A; - if (tgt->shadow) - info->flags |= IR3_INSTR_S; - - switch (inst->Instruction.Opcode) { - case TGSI_OPCODE_TXB: - case TGSI_OPCODE_TXB2: - case TGSI_OPCODE_TXL: - case TGSI_OPCODE_TXF: - info->args = 2; - break; - case TGSI_OPCODE_TXP: - info->flags |= IR3_INSTR_P; - /* fallthrough */ - case TGSI_OPCODE_TEX: - case TGSI_OPCODE_TXD: - info->args = 1; - break; - } - - /* - * lay out the first argument in the proper order: - * - actual coordinates first - * - shadow reference - * - array index - * - projection w - * - * bias/lod go into the second arg - */ - int arg, pos = 0; - for (arg = 0; arg < tgt->dims; arg++) - info->order[arg] = pos++; - if (tgt->dims == 1) - info->order[pos++] = -1; - if (tgt->shadow) - info->order[pos++] = MAX2(arg + tgt->array, 2); - if (tgt->array) - info->order[pos++] = arg++; - if (info->flags & IR3_INSTR_P) - info->order[pos++] = 3; - - info->src_wrmask = (1 << pos) - 1; - - for (; pos < 4; pos++) - info->order[pos] = -1; - - assert(pos <= 4); -} - -static bool check_swiz(struct tgsi_src_register *src, const int8_t order[4]) -{ - unsigned i; - for (i = 1; (i < 4) && order[i] >= 0; i++) - if (src_swiz(src, i) != (src_swiz(src, 0) + order[i])) - return false; - return true; -} - -static bool is_1d(unsigned tex) -{ - return tex_targets[tex].dims == 1; -} - -static struct tgsi_src_register * -get_tex_coord(struct ir3_compile_context *ctx, - struct tgsi_full_instruction *inst, - const struct tex_info *tinf) -{ - struct tgsi_src_register *coord = &inst->Src[0].Register; - struct ir3_instruction *instr; - unsigned tex = inst->Texture.Texture; - struct tgsi_dst_register tmp_dst; - struct tgsi_src_register *tmp_src; - type_t type_mov = get_ftype(ctx); - unsigned j; - - /* need to move things around: */ - tmp_src = get_internal_temp(ctx, &tmp_dst); - - for (j = 0; j < 4; j++) { - if (tinf->order[j] < 0) - continue; - instr = instr_create(ctx, 1, 0); /* mov */ - instr->cat1.src_type = type_mov; - instr->cat1.dst_type = type_mov; - add_dst_reg(ctx, instr, &tmp_dst, j); - add_src_reg(ctx, instr, coord, - src_swiz(coord, tinf->order[j])); - } - - /* fix up .y coord: */ - if (is_1d(tex)) { - struct ir3_register *imm; - instr = instr_create(ctx, 1, 0); /* mov */ - instr->cat1.src_type = type_mov; - instr->cat1.dst_type = type_mov; - add_dst_reg(ctx, instr, &tmp_dst, 1); /* .y */ - imm = ir3_reg_create(instr, 0, IR3_REG_IMMED); - if (inst->Instruction.Opcode == TGSI_OPCODE_TXF) - imm->iim_val = 0; - else - imm->fim_val = 0.5; - } - - return tmp_src; -} - -static void -trans_samp(const struct instr_translater *t, - struct ir3_compile_context *ctx, - struct tgsi_full_instruction *inst) -{ - struct ir3_instruction *instr, *collect; - struct ir3_register *reg; - struct tgsi_dst_register *dst = &inst->Dst[0].Register; - struct tgsi_src_register *orig, *coord, *samp, *offset, *dpdx, *dpdy; - struct tgsi_src_register zero; - const struct target_info *tgt = &tex_targets[inst->Texture.Texture]; - struct tex_info tinf; - int i; - - memset(&tinf, 0, sizeof(tinf)); - fill_tex_info(ctx, inst, &tinf); - coord = get_tex_coord(ctx, inst, &tinf); - get_immediate(ctx, &zero, 0); - - switch (inst->Instruction.Opcode) { - case TGSI_OPCODE_TXB2: - orig = &inst->Src[1].Register; - samp = &inst->Src[2].Register; - break; - case TGSI_OPCODE_TXD: - orig = &inst->Src[0].Register; - dpdx = &inst->Src[1].Register; - dpdy = &inst->Src[2].Register; - samp = &inst->Src[3].Register; - if (is_rel_or_const(dpdx)) - dpdx = get_unconst(ctx, dpdx); - if (is_rel_or_const(dpdy)) - dpdy = get_unconst(ctx, dpdy); - break; - default: - orig = &inst->Src[0].Register; - samp = &inst->Src[1].Register; - break; - } - if (tinf.args > 1 && is_rel_or_const(orig)) - orig = get_unconst(ctx, orig); - - /* scale up integer coords for TXF based on the LOD */ - if (inst->Instruction.Opcode == TGSI_OPCODE_TXF) { - struct tgsi_dst_register tmp_dst; - struct tgsi_src_register *tmp_src; - type_t type_mov = get_utype(ctx); - - tmp_src = get_internal_temp(ctx, &tmp_dst); - for (i = 0; i < tgt->dims; i++) { - instr = instr_create(ctx, 2, OPC_SHL_B); - add_dst_reg(ctx, instr, &tmp_dst, i); - add_src_reg(ctx, instr, coord, src_swiz(coord, i)); - add_src_reg(ctx, instr, orig, orig->SwizzleW); - } - if (tgt->dims < 2) { - instr = instr_create(ctx, 1, 0); - instr->cat1.src_type = type_mov; - instr->cat1.dst_type = type_mov; - add_dst_reg(ctx, instr, &tmp_dst, i); - add_src_reg(ctx, instr, &zero, zero.SwizzleX); - i++; - } - if (tgt->array) { - instr = instr_create(ctx, 1, 0); - instr->cat1.src_type = type_mov; - instr->cat1.dst_type = type_mov; - add_dst_reg(ctx, instr, &tmp_dst, i); - add_src_reg(ctx, instr, coord, src_swiz(coord, i)); - } - coord = tmp_src; - } - - if (inst->Texture.NumOffsets) { - struct tgsi_texture_offset *tex_offset = &inst->TexOffsets[0]; - struct tgsi_src_register offset_src = {0}; - - offset_src.File = tex_offset->File; - offset_src.Index = tex_offset->Index; - offset_src.SwizzleX = tex_offset->SwizzleX; - offset_src.SwizzleY = tex_offset->SwizzleY; - offset_src.SwizzleZ = tex_offset->SwizzleZ; - offset = get_unconst(ctx, &offset_src); - tinf.flags |= IR3_INSTR_O; - } - - instr = instr_create(ctx, 5, t->opc); - if (ctx->integer_s & (1 << samp->Index)) - instr->cat5.type = get_utype(ctx); - else - instr->cat5.type = get_ftype(ctx); - instr->cat5.samp = samp->Index; - instr->cat5.tex = samp->Index; - instr->flags |= tinf.flags; - - add_dst_reg_wrmask(ctx, instr, dst, 0, dst->WriteMask); - - reg = ir3_reg_create(instr, 0, IR3_REG_SSA); - - collect = ir3_instr_create2(ctx->block, -1, OPC_META_FI, 12); - ir3_reg_create(collect, 0, 0); - for (i = 0; i < 4; i++) { - if (tinf.src_wrmask & (1 << i)) - ssa_src(ctx, ir3_reg_create(collect, 0, IR3_REG_SSA), - coord, src_swiz(coord, i)); - else if (tinf.src_wrmask & ~((1 << i) - 1)) - ir3_reg_create(collect, 0, 0); - } - - /* Attach derivatives onto the end of the fan-in. Derivatives start after - * the 4th argument, so make sure that fi is padded up to 4 first. - */ - if (inst->Instruction.Opcode == TGSI_OPCODE_TXD) { - while (collect->regs_count < 5) - ssa_src(ctx, ir3_reg_create(collect, 0, IR3_REG_SSA), - &zero, zero.SwizzleX); - for (i = 0; i < tgt->dims; i++) - ssa_src(ctx, ir3_reg_create(collect, 0, IR3_REG_SSA), dpdx, i); - if (tgt->dims < 2) - ssa_src(ctx, ir3_reg_create(collect, 0, IR3_REG_SSA), - &zero, zero.SwizzleX); - for (i = 0; i < tgt->dims; i++) - ssa_src(ctx, ir3_reg_create(collect, 0, IR3_REG_SSA), dpdy, i); - if (tgt->dims < 2) - ssa_src(ctx, ir3_reg_create(collect, 0, IR3_REG_SSA), - &zero, zero.SwizzleX); - tinf.src_wrmask |= ((1 << (2 * MAX2(tgt->dims, 2))) - 1) << 4; - } - - reg->instr = collect; - reg->wrmask = tinf.src_wrmask; - - /* The second argument contains the offsets, followed by the lod/bias - * argument. This is constructed more manually due to the dynamic nature. - */ - if (inst->Texture.NumOffsets == 0 && tinf.args == 1) - return; - - reg = ir3_reg_create(instr, 0, IR3_REG_SSA); - - collect = ir3_instr_create2(ctx->block, -1, OPC_META_FI, 5); - ir3_reg_create(collect, 0, 0); - - if (inst->Texture.NumOffsets) { - for (i = 0; i < tgt->dims; i++) - ssa_src(ctx, ir3_reg_create(collect, 0, IR3_REG_SSA), - offset, i); - if (tgt->dims < 2) - ssa_src(ctx, ir3_reg_create(collect, 0, IR3_REG_SSA), - &zero, zero.SwizzleX); - } - if (inst->Instruction.Opcode == TGSI_OPCODE_TXB2) - ssa_src(ctx, ir3_reg_create(collect, 0, IR3_REG_SSA), - orig, orig->SwizzleX); - else if (tinf.args > 1) - ssa_src(ctx, ir3_reg_create(collect, 0, IR3_REG_SSA), - orig, orig->SwizzleW); - - reg->instr = collect; - reg->wrmask = (1 << (collect->regs_count - 1)) - 1; -} - -static void -trans_txq(const struct instr_translater *t, - struct ir3_compile_context *ctx, - struct tgsi_full_instruction *inst) -{ - struct ir3_instruction *instr; - struct tgsi_dst_register *dst = &inst->Dst[0].Register; - struct tgsi_src_register *level = &inst->Src[0].Register; - struct tgsi_src_register *samp = &inst->Src[1].Register; - const struct target_info *tgt = &tex_targets[inst->Texture.Texture]; - struct tex_info tinf; - - memset(&tinf, 0, sizeof(tinf)); - fill_tex_info(ctx, inst, &tinf); - if (is_rel_or_const(level)) - level = get_unconst(ctx, level); - - instr = instr_create(ctx, 5, OPC_GETSIZE); - instr->cat5.type = get_utype(ctx); - instr->cat5.samp = samp->Index; - instr->cat5.tex = samp->Index; - instr->flags |= tinf.flags; - - if (tgt->array && (dst->WriteMask & (1 << tgt->dims))) { - /* Array size actually ends up in .w rather than .z. This doesn't - * matter for miplevel 0, but for higher mips the value in z is - * minified whereas w stays. Also, the value in TEX_CONST_3_DEPTH is - * returned, which means that we have to add 1 to it for arrays. - */ - struct tgsi_dst_register tmp_dst; - struct tgsi_src_register *tmp_src; - type_t type_mov = get_utype(ctx); - - tmp_src = get_internal_temp(ctx, &tmp_dst); - add_dst_reg_wrmask(ctx, instr, &tmp_dst, 0, - dst->WriteMask | TGSI_WRITEMASK_W); - add_src_reg_wrmask(ctx, instr, level, level->SwizzleX, 0x1); - - if (dst->WriteMask & TGSI_WRITEMASK_X) { - instr = instr_create(ctx, 1, 0); - instr->cat1.src_type = type_mov; - instr->cat1.dst_type = type_mov; - add_dst_reg(ctx, instr, dst, 0); - add_src_reg(ctx, instr, tmp_src, src_swiz(tmp_src, 0)); - } - - if (tgt->dims == 2) { - if (dst->WriteMask & TGSI_WRITEMASK_Y) { - instr = instr_create(ctx, 1, 0); - instr->cat1.src_type = type_mov; - instr->cat1.dst_type = type_mov; - add_dst_reg(ctx, instr, dst, 1); - add_src_reg(ctx, instr, tmp_src, src_swiz(tmp_src, 1)); - } - } - - instr = instr_create(ctx, 2, OPC_ADD_U); - add_dst_reg(ctx, instr, dst, tgt->dims); - add_src_reg(ctx, instr, tmp_src, src_swiz(tmp_src, 3)); - ir3_reg_create(instr, 0, IR3_REG_IMMED)->iim_val = 1; - } else { - add_dst_reg_wrmask(ctx, instr, dst, 0, dst->WriteMask); - add_src_reg_wrmask(ctx, instr, level, level->SwizzleX, 0x1); - } - - if (dst->WriteMask & TGSI_WRITEMASK_W) { - /* The # of levels comes from getinfo.z. We need to add 1 to it, since - * the value in TEX_CONST_0 is zero-based. - */ - struct tgsi_dst_register tmp_dst; - struct tgsi_src_register *tmp_src; - - tmp_src = get_internal_temp(ctx, &tmp_dst); - instr = instr_create(ctx, 5, OPC_GETINFO); - instr->cat5.type = get_utype(ctx); - instr->cat5.samp = samp->Index; - instr->cat5.tex = samp->Index; - add_dst_reg_wrmask(ctx, instr, &tmp_dst, 0, TGSI_WRITEMASK_Z); - - instr = instr_create(ctx, 2, OPC_ADD_U); - add_dst_reg(ctx, instr, dst, 3); - add_src_reg(ctx, instr, tmp_src, src_swiz(tmp_src, 2)); - ir3_reg_create(instr, 0, IR3_REG_IMMED)->iim_val = 1; - } -} - -/* DDX/DDY */ -static void -trans_deriv(const struct instr_translater *t, - struct ir3_compile_context *ctx, - struct tgsi_full_instruction *inst) -{ - struct ir3_instruction *instr; - struct tgsi_dst_register *dst = &inst->Dst[0].Register; - struct tgsi_src_register *src = &inst->Src[0].Register; - static const int8_t order[4] = {0, 1, 2, 3}; - - if (!check_swiz(src, order)) { - struct tgsi_dst_register tmp_dst; - struct tgsi_src_register *tmp_src; - - tmp_src = get_internal_temp(ctx, &tmp_dst); - create_mov(ctx, &tmp_dst, src); - - src = tmp_src; - } - - /* This might be a workaround for hw bug? Blob compiler always - * seems to work two components at a time for dsy/dsx. It does - * actually seem to work in some cases (or at least some piglit - * tests) for four components at a time. But seems more reliable - * to split this into two instructions like the blob compiler - * does: - */ - - instr = instr_create(ctx, 5, t->opc); - instr->cat5.type = get_ftype(ctx); - add_dst_reg_wrmask(ctx, instr, dst, 0, dst->WriteMask & 0x3); - add_src_reg_wrmask(ctx, instr, src, 0, dst->WriteMask & 0x3); - - instr = instr_create(ctx, 5, t->opc); - instr->cat5.type = get_ftype(ctx); - add_dst_reg_wrmask(ctx, instr, dst, 2, (dst->WriteMask >> 2) & 0x3); - add_src_reg_wrmask(ctx, instr, src, 2, (dst->WriteMask >> 2) & 0x3); -} - -/* - * SEQ(a,b) = (a == b) ? 1.0 : 0.0 - * cmps.f.eq tmp0, a, b - * cov.u16f16 dst, tmp0 - * - * SNE(a,b) = (a != b) ? 1.0 : 0.0 - * cmps.f.ne tmp0, a, b - * cov.u16f16 dst, tmp0 - * - * SGE(a,b) = (a >= b) ? 1.0 : 0.0 - * cmps.f.ge tmp0, a, b - * cov.u16f16 dst, tmp0 - * - * SLE(a,b) = (a <= b) ? 1.0 : 0.0 - * cmps.f.le tmp0, a, b - * cov.u16f16 dst, tmp0 - * - * SGT(a,b) = (a > b) ? 1.0 : 0.0 - * cmps.f.gt tmp0, a, b - * cov.u16f16 dst, tmp0 - * - * SLT(a,b) = (a < b) ? 1.0 : 0.0 - * cmps.f.lt tmp0, a, b - * cov.u16f16 dst, tmp0 - * - * CMP(a,b,c) = (a < 0.0) ? b : c - * cmps.f.lt tmp0, a, {0.0} - * sel.b16 dst, b, tmp0, c - */ -static void -trans_cmp(const struct instr_translater *t, - struct ir3_compile_context *ctx, - struct tgsi_full_instruction *inst) -{ - struct ir3_instruction *instr; - struct tgsi_dst_register tmp_dst; - struct tgsi_src_register *tmp_src; - struct tgsi_src_register constval0; - /* final instruction for CMP() uses orig src1 and src2: */ - struct tgsi_dst_register *dst = get_dst(ctx, inst); - struct tgsi_src_register *a0, *a1, *a2; - unsigned condition; - - tmp_src = get_internal_temp(ctx, &tmp_dst); - - a0 = &inst->Src[0].Register; /* a */ - a1 = &inst->Src[1].Register; /* b */ - - switch (t->tgsi_opc) { - case TGSI_OPCODE_SEQ: - case TGSI_OPCODE_FSEQ: - condition = IR3_COND_EQ; - break; - case TGSI_OPCODE_SNE: - case TGSI_OPCODE_FSNE: - condition = IR3_COND_NE; - break; - case TGSI_OPCODE_SGE: - case TGSI_OPCODE_FSGE: - condition = IR3_COND_GE; - break; - case TGSI_OPCODE_SLT: - case TGSI_OPCODE_FSLT: - condition = IR3_COND_LT; - break; - case TGSI_OPCODE_SLE: - condition = IR3_COND_LE; - break; - case TGSI_OPCODE_SGT: - condition = IR3_COND_GT; - break; - case TGSI_OPCODE_CMP: - get_immediate(ctx, &constval0, fui(0.0)); - a0 = &inst->Src[0].Register; /* a */ - a1 = &constval0; /* {0.0} */ - condition = IR3_COND_LT; - break; - default: - compile_assert(ctx, 0); - return; - } - - if (is_const(a0) && is_const(a1)) - a0 = get_unconst(ctx, a0); - - /* cmps.f. tmp, a0, a1 */ - instr = instr_create(ctx, 2, OPC_CMPS_F); - instr->cat2.condition = condition; - vectorize(ctx, instr, &tmp_dst, 2, a0, 0, a1, 0); - - switch (t->tgsi_opc) { - case TGSI_OPCODE_SEQ: - case TGSI_OPCODE_SGE: - case TGSI_OPCODE_SLE: - case TGSI_OPCODE_SNE: - case TGSI_OPCODE_SGT: - case TGSI_OPCODE_SLT: - /* cov.u16f16 dst, tmp0 */ - instr = instr_create(ctx, 1, 0); - instr->cat1.src_type = get_utype(ctx); - instr->cat1.dst_type = get_ftype(ctx); - vectorize(ctx, instr, dst, 1, tmp_src, 0); - break; - case TGSI_OPCODE_FSEQ: - case TGSI_OPCODE_FSGE: - case TGSI_OPCODE_FSNE: - case TGSI_OPCODE_FSLT: - /* absneg.s dst, (neg)tmp0 */ - instr = instr_create(ctx, 2, OPC_ABSNEG_S); - vectorize(ctx, instr, dst, 1, tmp_src, IR3_REG_SNEG); - break; - case TGSI_OPCODE_CMP: - a1 = &inst->Src[1].Register; - a2 = &inst->Src[2].Register; - /* sel.{b32,b16} dst, src2, tmp, src1 */ - instr = instr_create(ctx, 3, OPC_SEL_B32); - vectorize(ctx, instr, dst, 3, a1, 0, tmp_src, 0, a2, 0); - - break; - } - - put_dst(ctx, inst, dst); -} - -/* - * USNE(a,b) = (a != b) ? ~0 : 0 - * cmps.u32.ne dst, a, b - * - * USEQ(a,b) = (a == b) ? ~0 : 0 - * cmps.u32.eq dst, a, b - * - * ISGE(a,b) = (a > b) ? ~0 : 0 - * cmps.s32.ge dst, a, b - * - * USGE(a,b) = (a > b) ? ~0 : 0 - * cmps.u32.ge dst, a, b - * - * ISLT(a,b) = (a < b) ? ~0 : 0 - * cmps.s32.lt dst, a, b - * - * USLT(a,b) = (a < b) ? ~0 : 0 - * cmps.u32.lt dst, a, b - * - */ -static void -trans_icmp(const struct instr_translater *t, - struct ir3_compile_context *ctx, - struct tgsi_full_instruction *inst) -{ - struct ir3_instruction *instr; - struct tgsi_dst_register *dst = get_dst(ctx, inst); - struct tgsi_dst_register tmp_dst; - struct tgsi_src_register *tmp_src; - struct tgsi_src_register *a0, *a1; - unsigned condition; - - a0 = &inst->Src[0].Register; /* a */ - a1 = &inst->Src[1].Register; /* b */ - - switch (t->tgsi_opc) { - case TGSI_OPCODE_USNE: - condition = IR3_COND_NE; - break; - case TGSI_OPCODE_USEQ: - condition = IR3_COND_EQ; - break; - case TGSI_OPCODE_ISGE: - case TGSI_OPCODE_USGE: - condition = IR3_COND_GE; - break; - case TGSI_OPCODE_ISLT: - case TGSI_OPCODE_USLT: - condition = IR3_COND_LT; - break; - - default: - compile_assert(ctx, 0); - return; - } - - if (is_const(a0) && is_const(a1)) - a0 = get_unconst(ctx, a0); - - tmp_src = get_internal_temp(ctx, &tmp_dst); - /* cmps.{u32,s32}. tmp, a0, a1 */ - instr = instr_create(ctx, 2, t->opc); - instr->cat2.condition = condition; - vectorize(ctx, instr, &tmp_dst, 2, a0, 0, a1, 0); - - /* absneg.s dst, (neg)tmp */ - instr = instr_create(ctx, 2, OPC_ABSNEG_S); - vectorize(ctx, instr, dst, 1, tmp_src, IR3_REG_SNEG); - - put_dst(ctx, inst, dst); -} - -/* - * UCMP(a,b,c) = a ? b : c - * sel.b16 dst, b, a, c - */ -static void -trans_ucmp(const struct instr_translater *t, - struct ir3_compile_context *ctx, - struct tgsi_full_instruction *inst) -{ - struct ir3_instruction *instr; - struct tgsi_dst_register *dst = get_dst(ctx, inst); - struct tgsi_src_register *a0, *a1, *a2; - - a0 = &inst->Src[0].Register; /* a */ - a1 = &inst->Src[1].Register; /* b */ - a2 = &inst->Src[2].Register; /* c */ - - if (is_rel_or_const(a0)) - a0 = get_unconst(ctx, a0); - - /* sel.{b32,b16} dst, b, a, c */ - instr = instr_create(ctx, 3, OPC_SEL_B32); - vectorize(ctx, instr, dst, 3, a1, 0, a0, 0, a2, 0); - put_dst(ctx, inst, dst); -} - -/* - * ISSG(a) = a < 0 ? -1 : a > 0 ? 1 : 0 - * cmps.s.lt tmp_neg, a, 0 # 1 if a is negative - * cmps.s.gt tmp_pos, a, 0 # 1 if a is positive - * sub.u dst, tmp_pos, tmp_neg - */ -static void -trans_issg(const struct instr_translater *t, - struct ir3_compile_context *ctx, - struct tgsi_full_instruction *inst) -{ - struct ir3_instruction *instr; - struct tgsi_dst_register *dst = get_dst(ctx, inst); - struct tgsi_src_register *a = &inst->Src[0].Register; - struct tgsi_dst_register neg_dst, pos_dst; - struct tgsi_src_register *neg_src, *pos_src; - - neg_src = get_internal_temp(ctx, &neg_dst); - pos_src = get_internal_temp(ctx, &pos_dst); - - /* cmps.s.lt neg, a, 0 */ - instr = instr_create(ctx, 2, OPC_CMPS_S); - instr->cat2.condition = IR3_COND_LT; - vectorize(ctx, instr, &neg_dst, 2, a, 0, 0, IR3_REG_IMMED); - - /* cmps.s.gt pos, a, 0 */ - instr = instr_create(ctx, 2, OPC_CMPS_S); - instr->cat2.condition = IR3_COND_GT; - vectorize(ctx, instr, &pos_dst, 2, a, 0, 0, IR3_REG_IMMED); - - /* sub.u dst, pos, neg */ - instr = instr_create(ctx, 2, OPC_SUB_U); - vectorize(ctx, instr, dst, 2, pos_src, 0, neg_src, 0); - - put_dst(ctx, inst, dst); -} - - - -/* - * Conditional / Flow control - */ - -static void -push_branch(struct ir3_compile_context *ctx, bool inv, - struct ir3_instruction *instr, struct ir3_instruction *cond) -{ - unsigned int idx = ctx->branch_count++; - compile_assert(ctx, idx < ARRAY_SIZE(ctx->branch)); - ctx->branch[idx].instr = instr; - ctx->branch[idx].inv = inv; - /* else side of branch has same condition: */ - if (!inv) - ctx->branch[idx].cond = cond; -} - -static struct ir3_instruction * -pop_branch(struct ir3_compile_context *ctx) -{ - unsigned int idx = --ctx->branch_count; - return ctx->branch[idx].instr; -} - -static void -trans_if(const struct instr_translater *t, - struct ir3_compile_context *ctx, - struct tgsi_full_instruction *inst) -{ - struct ir3_instruction *instr, *cond; - struct tgsi_src_register *src = &inst->Src[0].Register; - struct tgsi_dst_register tmp_dst; - struct tgsi_src_register *tmp_src; - struct tgsi_src_register constval; - - get_immediate(ctx, &constval, fui(0.0)); - tmp_src = get_internal_temp(ctx, &tmp_dst); - - if (is_const(src)) - src = get_unconst(ctx, src); - - /* cmps.{f,u}.ne tmp0, b, {0.0} */ - instr = instr_create(ctx, 2, t->opc); - add_dst_reg(ctx, instr, &tmp_dst, 0); - add_src_reg(ctx, instr, src, src->SwizzleX); - add_src_reg(ctx, instr, &constval, constval.SwizzleX); - instr->cat2.condition = IR3_COND_NE; - - compile_assert(ctx, instr->regs[1]->flags & IR3_REG_SSA); /* because get_unconst() */ - cond = instr->regs[1]->instr; - - /* meta:flow tmp0 */ - instr = instr_create(ctx, -1, OPC_META_FLOW); - ir3_reg_create(instr, 0, 0); /* dummy dst */ - add_src_reg(ctx, instr, tmp_src, TGSI_SWIZZLE_X); - - push_branch(ctx, false, instr, cond); - instr->flow.if_block = push_block(ctx); -} - -static void -trans_else(const struct instr_translater *t, - struct ir3_compile_context *ctx, - struct tgsi_full_instruction *inst) -{ - struct ir3_instruction *instr; - - pop_block(ctx); - - instr = pop_branch(ctx); - - compile_assert(ctx, (instr->category == -1) && - (instr->opc == OPC_META_FLOW)); - - push_branch(ctx, true, instr, NULL); - instr->flow.else_block = push_block(ctx); -} - -static struct ir3_instruction * -find_temporary(struct ir3_block *block, unsigned n) -{ - if (block->parent && !block->temporaries[n]) - return find_temporary(block->parent, n); - return block->temporaries[n]; -} - -static struct ir3_instruction * -find_output(struct ir3_block *block, unsigned n) -{ - if (block->parent && !block->outputs[n]) - return find_output(block->parent, n); - return block->outputs[n]; -} - -static struct ir3_instruction * -create_phi(struct ir3_compile_context *ctx, struct ir3_instruction *cond, - struct ir3_instruction *a, struct ir3_instruction *b) -{ - struct ir3_instruction *phi; - - compile_assert(ctx, cond); - - /* Either side of the condition could be null.. which - * indicates a variable written on only one side of the - * branch. Normally this should only be variables not - * used outside of that side of the branch. So we could - * just 'return a ? a : b;' in that case. But for better - * defined undefined behavior we just stick in imm{0.0}. - * In the common case of a value only used within the - * one side of the branch, the PHI instruction will not - * get scheduled - */ - if (!a) - a = create_immed(ctx, 0.0); - if (!b) - b = create_immed(ctx, 0.0); - - phi = instr_create(ctx, -1, OPC_META_PHI); - ir3_reg_create(phi, 0, 0); /* dummy dst */ - ir3_reg_create(phi, 0, IR3_REG_SSA)->instr = cond; - ir3_reg_create(phi, 0, IR3_REG_SSA)->instr = a; - ir3_reg_create(phi, 0, IR3_REG_SSA)->instr = b; - - return phi; -} - -static void -trans_endif(const struct instr_translater *t, - struct ir3_compile_context *ctx, - struct tgsi_full_instruction *inst) -{ - struct ir3_instruction *instr; - struct ir3_block *ifb, *elseb; - struct ir3_instruction **ifout, **elseout; - unsigned i, ifnout = 0, elsenout = 0; - - pop_block(ctx); - - instr = pop_branch(ctx); - - compile_assert(ctx, (instr->category == -1) && - (instr->opc == OPC_META_FLOW)); - - ifb = instr->flow.if_block; - elseb = instr->flow.else_block; - /* if there is no else block, the parent block is used for the - * branch-not-taken src of the PHI instructions: - */ - if (!elseb) - elseb = ifb->parent; - - /* worst case sizes: */ - ifnout = ifb->ntemporaries + ifb->noutputs; - elsenout = elseb->ntemporaries + elseb->noutputs; - - ifout = ir3_alloc(ctx->ir, sizeof(ifb->outputs[0]) * ifnout); - if (elseb != ifb->parent) - elseout = ir3_alloc(ctx->ir, sizeof(ifb->outputs[0]) * elsenout); - - ifnout = 0; - elsenout = 0; - - /* generate PHI instructions for any temporaries written: */ - for (i = 0; i < ifb->ntemporaries; i++) { - struct ir3_instruction *a = ifb->temporaries[i]; - struct ir3_instruction *b = elseb->temporaries[i]; - - /* if temporary written in if-block, or if else block - * is present and temporary written in else-block: - */ - if (a || ((elseb != ifb->parent) && b)) { - struct ir3_instruction *phi; - - /* if only written on one side, find the closest - * enclosing update on other side: - */ - if (!a) - a = find_temporary(ifb, i); - if (!b) - b = find_temporary(elseb, i); - - ifout[ifnout] = a; - a = create_output(ifb, a, ifnout++); - - if (elseb != ifb->parent) { - elseout[elsenout] = b; - b = create_output(elseb, b, elsenout++); - } - - phi = create_phi(ctx, instr, a, b); - ctx->block->temporaries[i] = phi; - } - } - - compile_assert(ctx, ifb->noutputs == elseb->noutputs); - - /* .. and any outputs written: */ - for (i = 0; i < ifb->noutputs; i++) { - struct ir3_instruction *a = ifb->outputs[i]; - struct ir3_instruction *b = elseb->outputs[i]; - - /* if output written in if-block, or if else block - * is present and output written in else-block: - */ - if (a || ((elseb != ifb->parent) && b)) { - struct ir3_instruction *phi; - - /* if only written on one side, find the closest - * enclosing update on other side: - */ - if (!a) - a = find_output(ifb, i); - if (!b) - b = find_output(elseb, i); - - ifout[ifnout] = a; - a = create_output(ifb, a, ifnout++); - - if (elseb != ifb->parent) { - elseout[elsenout] = b; - b = create_output(elseb, b, elsenout++); - } - - phi = create_phi(ctx, instr, a, b); - ctx->block->outputs[i] = phi; - } - } - - ifb->noutputs = ifnout; - ifb->outputs = ifout; - - if (elseb != ifb->parent) { - elseb->noutputs = elsenout; - elseb->outputs = elseout; - } - - // TODO maybe we want to compact block->inputs? -} - -/* - * Kill - */ - -static void -trans_kill(const struct instr_translater *t, - struct ir3_compile_context *ctx, - struct tgsi_full_instruction *inst) -{ - struct ir3_instruction *instr, *immed, *cond = NULL; - bool inv = false; - - /* unconditional kill, use enclosing if condition: */ - if (ctx->branch_count > 0) { - unsigned int idx = ctx->branch_count - 1; - cond = ctx->branch[idx].cond; - inv = ctx->branch[idx].inv; - } else { - cond = create_immed(ctx, 1.0); - } - - compile_assert(ctx, cond); - - immed = create_immed(ctx, 0.0); - - /* cmps.f.ne p0.x, cond, {0.0} */ - instr = instr_create(ctx, 2, OPC_CMPS_F); - instr->cat2.condition = IR3_COND_NE; - ir3_reg_create(instr, regid(REG_P0, 0), 0); - ir3_reg_create(instr, 0, IR3_REG_SSA)->instr = cond; - ir3_reg_create(instr, 0, IR3_REG_SSA)->instr = immed; - cond = instr; - - /* kill p0.x */ - instr = instr_create(ctx, 0, OPC_KILL); - instr->cat0.inv = inv; - ir3_reg_create(instr, 0, 0); /* dummy dst */ - ir3_reg_create(instr, 0, IR3_REG_SSA)->instr = cond; - - ctx->kill[ctx->kill_count++] = instr; - - ctx->so->has_kill = true; -} - -/* - * Kill-If - */ - -static void -trans_killif(const struct instr_translater *t, - struct ir3_compile_context *ctx, - struct tgsi_full_instruction *inst) -{ - struct tgsi_src_register *src = &inst->Src[0].Register; - struct ir3_instruction *instr, *immed, *cond = NULL; - bool inv = false; - - immed = create_immed(ctx, 0.0); - - /* cmps.f.ne p0.x, cond, {0.0} */ - instr = instr_create(ctx, 2, OPC_CMPS_F); - instr->cat2.condition = IR3_COND_NE; - ir3_reg_create(instr, regid(REG_P0, 0), 0); - ir3_reg_create(instr, 0, IR3_REG_SSA)->instr = immed; - add_src_reg(ctx, instr, src, src->SwizzleX); - - cond = instr; - - /* kill p0.x */ - instr = instr_create(ctx, 0, OPC_KILL); - instr->cat0.inv = inv; - ir3_reg_create(instr, 0, 0); /* dummy dst */ - ir3_reg_create(instr, 0, IR3_REG_SSA)->instr = cond; - - ctx->kill[ctx->kill_count++] = instr; - - ctx->so->has_kill = true; - -} -/* - * I2F / U2F / F2I / F2U - */ - -static void -trans_cov(const struct instr_translater *t, - struct ir3_compile_context *ctx, - struct tgsi_full_instruction *inst) -{ - struct ir3_instruction *instr; - struct tgsi_dst_register *dst = get_dst(ctx, inst); - struct tgsi_src_register *src = &inst->Src[0].Register; - - // cov.f32s32 dst, tmp0 / - instr = instr_create(ctx, 1, 0); - switch (t->tgsi_opc) { - case TGSI_OPCODE_U2F: - instr->cat1.src_type = TYPE_U32; - instr->cat1.dst_type = TYPE_F32; - break; - case TGSI_OPCODE_I2F: - instr->cat1.src_type = TYPE_S32; - instr->cat1.dst_type = TYPE_F32; - break; - case TGSI_OPCODE_F2U: - instr->cat1.src_type = TYPE_F32; - instr->cat1.dst_type = TYPE_U32; - break; - case TGSI_OPCODE_F2I: - instr->cat1.src_type = TYPE_F32; - instr->cat1.dst_type = TYPE_S32; - break; - - } - vectorize(ctx, instr, dst, 1, src, 0); - put_dst(ctx, inst, dst); -} - -/* - * UMUL / UMAD - * - * There is no 32-bit multiply instruction, so splitting a and b into high and - * low components, we get that - * - * dst = al * bl + ah * bl << 16 + al * bh << 16 - * - * mull.u tmp0, a, b (mul low, i.e. al * bl) - * madsh.m16 tmp1, a, b, tmp0 (mul-add shift high mix, i.e. ah * bl << 16) - * madsh.m16 dst, b, a, tmp1 (i.e. al * bh << 16) - * - * For UMAD, add in the extra argument after mull.u. - */ -static void -trans_umul(const struct instr_translater *t, - struct ir3_compile_context *ctx, - struct tgsi_full_instruction *inst) -{ - struct ir3_instruction *instr; - struct tgsi_dst_register *dst = get_dst(ctx, inst); - struct tgsi_src_register *a = &inst->Src[0].Register; - struct tgsi_src_register *b = &inst->Src[1].Register; - - struct tgsi_dst_register tmp0_dst, tmp1_dst; - struct tgsi_src_register *tmp0_src, *tmp1_src; - - tmp0_src = get_internal_temp(ctx, &tmp0_dst); - tmp1_src = get_internal_temp(ctx, &tmp1_dst); - - if (is_rel_or_const(a)) - a = get_unconst(ctx, a); - if (is_rel_or_const(b)) - b = get_unconst(ctx, b); - - /* mull.u tmp0, a, b */ - instr = instr_create(ctx, 2, OPC_MULL_U); - vectorize(ctx, instr, &tmp0_dst, 2, a, 0, b, 0); - - if (t->tgsi_opc == TGSI_OPCODE_UMAD) { - struct tgsi_src_register *c = &inst->Src[2].Register; - - /* add.u tmp0, tmp0, c */ - instr = instr_create(ctx, 2, OPC_ADD_U); - vectorize(ctx, instr, &tmp0_dst, 2, tmp0_src, 0, c, 0); - } - - /* madsh.m16 tmp1, a, b, tmp0 */ - instr = instr_create(ctx, 3, OPC_MADSH_M16); - vectorize(ctx, instr, &tmp1_dst, 3, a, 0, b, 0, tmp0_src, 0); - - /* madsh.m16 dst, b, a, tmp1 */ - instr = instr_create(ctx, 3, OPC_MADSH_M16); - vectorize(ctx, instr, dst, 3, b, 0, a, 0, tmp1_src, 0); - put_dst(ctx, inst, dst); -} - -/* - * IDIV / UDIV / MOD / UMOD - * - * See NV50LegalizeSSA::handleDIV for the origin of this implementation. For - * MOD/UMOD, it becomes a - [IU]DIV(a, modulus) * modulus. - */ -static void -trans_idiv(const struct instr_translater *t, - struct ir3_compile_context *ctx, - struct tgsi_full_instruction *inst) -{ - struct ir3_instruction *instr; - struct tgsi_dst_register *dst = get_dst(ctx, inst), *premod_dst = dst; - struct tgsi_src_register *a = &inst->Src[0].Register; - struct tgsi_src_register *b = &inst->Src[1].Register; - - struct tgsi_dst_register af_dst, bf_dst, q_dst, r_dst, a_dst, b_dst; - struct tgsi_src_register *af_src, *bf_src, *q_src, *r_src, *a_src, *b_src; - - struct tgsi_src_register negative_2, thirty_one; - type_t src_type; - - if (t->tgsi_opc == TGSI_OPCODE_IDIV || t->tgsi_opc == TGSI_OPCODE_MOD) - src_type = get_stype(ctx); - else - src_type = get_utype(ctx); - - af_src = get_internal_temp(ctx, &af_dst); - bf_src = get_internal_temp(ctx, &bf_dst); - q_src = get_internal_temp(ctx, &q_dst); - r_src = get_internal_temp(ctx, &r_dst); - a_src = get_internal_temp(ctx, &a_dst); - b_src = get_internal_temp(ctx, &b_dst); - - get_immediate(ctx, &negative_2, -2); - get_immediate(ctx, &thirty_one, 31); - - if (t->tgsi_opc == TGSI_OPCODE_MOD || t->tgsi_opc == TGSI_OPCODE_UMOD) - premod_dst = &q_dst; - - /* cov.[us]32f32 af, numerator */ - instr = instr_create(ctx, 1, 0); - instr->cat1.src_type = src_type; - instr->cat1.dst_type = get_ftype(ctx); - vectorize(ctx, instr, &af_dst, 1, a, 0); - - /* cov.[us]32f32 bf, denominator */ - instr = instr_create(ctx, 1, 0); - instr->cat1.src_type = src_type; - instr->cat1.dst_type = get_ftype(ctx); - vectorize(ctx, instr, &bf_dst, 1, b, 0); - - /* Get the absolute values for IDIV */ - if (type_sint(src_type)) { - /* absneg.f af, (abs)af */ - instr = instr_create(ctx, 2, OPC_ABSNEG_F); - vectorize(ctx, instr, &af_dst, 1, af_src, IR3_REG_FABS); - - /* absneg.f bf, (abs)bf */ - instr = instr_create(ctx, 2, OPC_ABSNEG_F); - vectorize(ctx, instr, &bf_dst, 1, bf_src, IR3_REG_FABS); - - /* absneg.s a, (abs)numerator */ - instr = instr_create(ctx, 2, OPC_ABSNEG_S); - vectorize(ctx, instr, &a_dst, 1, a, IR3_REG_SABS); - - /* absneg.s b, (abs)denominator */ - instr = instr_create(ctx, 2, OPC_ABSNEG_S); - vectorize(ctx, instr, &b_dst, 1, b, IR3_REG_SABS); - } else { - /* mov.u32u32 a, numerator */ - instr = instr_create(ctx, 1, 0); - instr->cat1.src_type = src_type; - instr->cat1.dst_type = src_type; - vectorize(ctx, instr, &a_dst, 1, a, 0); - - /* mov.u32u32 b, denominator */ - instr = instr_create(ctx, 1, 0); - instr->cat1.src_type = src_type; - instr->cat1.dst_type = src_type; - vectorize(ctx, instr, &b_dst, 1, b, 0); - } - - /* rcp.f bf, bf */ - instr = instr_create(ctx, 4, OPC_RCP); - vectorize(ctx, instr, &bf_dst, 1, bf_src, 0); - - /* That's right, subtract 2 as an integer from the float */ - /* add.u bf, bf, -2 */ - instr = instr_create(ctx, 2, OPC_ADD_U); - vectorize(ctx, instr, &bf_dst, 2, bf_src, 0, &negative_2, 0); - - /* mul.f q, af, bf */ - instr = instr_create(ctx, 2, OPC_MUL_F); - vectorize(ctx, instr, &q_dst, 2, af_src, 0, bf_src, 0); - - /* cov.f32[us]32 q, q */ - instr = instr_create(ctx, 1, 0); - instr->cat1.src_type = get_ftype(ctx); - instr->cat1.dst_type = src_type; - vectorize(ctx, instr, &q_dst, 1, q_src, 0); - - /* integer multiply q by b */ - /* mull.u r, q, b */ - instr = instr_create(ctx, 2, OPC_MULL_U); - vectorize(ctx, instr, &r_dst, 2, q_src, 0, b_src, 0); - - /* madsh.m16 r, q, b, r */ - instr = instr_create(ctx, 3, OPC_MADSH_M16); - vectorize(ctx, instr, &r_dst, 3, q_src, 0, b_src, 0, r_src, 0); - - /* madsh.m16, r, b, q, r */ - instr = instr_create(ctx, 3, OPC_MADSH_M16); - vectorize(ctx, instr, &r_dst, 3, b_src, 0, q_src, 0, r_src, 0); - - /* sub.u r, a, r */ - instr = instr_create(ctx, 2, OPC_SUB_U); - vectorize(ctx, instr, &r_dst, 2, a_src, 0, r_src, 0); - - /* cov.u32f32, r, r */ - instr = instr_create(ctx, 1, 0); - instr->cat1.src_type = get_utype(ctx); - instr->cat1.dst_type = get_ftype(ctx); - vectorize(ctx, instr, &r_dst, 1, r_src, 0); - - /* mul.f r, r, bf */ - instr = instr_create(ctx, 2, OPC_MUL_F); - vectorize(ctx, instr, &r_dst, 2, r_src, 0, bf_src, 0); - - /* cov.f32u32 r, r */ - instr = instr_create(ctx, 1, 0); - instr->cat1.src_type = get_ftype(ctx); - instr->cat1.dst_type = get_utype(ctx); - vectorize(ctx, instr, &r_dst, 1, r_src, 0); - - /* add.u q, q, r */ - instr = instr_create(ctx, 2, OPC_ADD_U); - vectorize(ctx, instr, &q_dst, 2, q_src, 0, r_src, 0); - - /* mull.u r, q, b */ - instr = instr_create(ctx, 2, OPC_MULL_U); - vectorize(ctx, instr, &r_dst, 2, q_src, 0, b_src, 0); - - /* madsh.m16 r, q, b, r */ - instr = instr_create(ctx, 3, OPC_MADSH_M16); - vectorize(ctx, instr, &r_dst, 3, q_src, 0, b_src, 0, r_src, 0); - - /* madsh.m16 r, b, q, r */ - instr = instr_create(ctx, 3, OPC_MADSH_M16); - vectorize(ctx, instr, &r_dst, 3, b_src, 0, q_src, 0, r_src, 0); - - /* sub.u r, a, r */ - instr = instr_create(ctx, 2, OPC_SUB_U); - vectorize(ctx, instr, &r_dst, 2, a_src, 0, r_src, 0); - - /* cmps.u.ge r, r, b */ - instr = instr_create(ctx, 2, OPC_CMPS_U); - instr->cat2.condition = IR3_COND_GE; - vectorize(ctx, instr, &r_dst, 2, r_src, 0, b_src, 0); - - if (type_uint(src_type)) { - /* add.u dst, q, r */ - instr = instr_create(ctx, 2, OPC_ADD_U); - vectorize(ctx, instr, premod_dst, 2, q_src, 0, r_src, 0); - } else { - /* add.u q, q, r */ - instr = instr_create(ctx, 2, OPC_ADD_U); - vectorize(ctx, instr, &q_dst, 2, q_src, 0, r_src, 0); - - /* negate result based on the original arguments */ - if (is_const(a) && is_const(b)) - a = get_unconst(ctx, a); - - /* xor.b r, numerator, denominator */ - instr = instr_create(ctx, 2, OPC_XOR_B); - vectorize(ctx, instr, &r_dst, 2, a, 0, b, 0); - - /* shr.b r, r, 31 */ - instr = instr_create(ctx, 2, OPC_SHR_B); - vectorize(ctx, instr, &r_dst, 2, r_src, 0, &thirty_one, 0); - - /* absneg.s b, (neg)q */ - instr = instr_create(ctx, 2, OPC_ABSNEG_S); - vectorize(ctx, instr, &b_dst, 1, q_src, IR3_REG_SNEG); - - /* sel.b dst, b, r, q */ - instr = instr_create(ctx, 3, OPC_SEL_B32); - vectorize(ctx, instr, premod_dst, 3, b_src, 0, r_src, 0, q_src, 0); - } - - if (t->tgsi_opc == TGSI_OPCODE_MOD || t->tgsi_opc == TGSI_OPCODE_UMOD) { - /* The division result will have ended up in q. */ - - if (is_rel_or_const(b)) - b = get_unconst(ctx, b); - - /* mull.u r, q, b */ - instr = instr_create(ctx, 2, OPC_MULL_U); - vectorize(ctx, instr, &r_dst, 2, q_src, 0, b, 0); - - /* madsh.m16 r, q, b, r */ - instr = instr_create(ctx, 3, OPC_MADSH_M16); - vectorize(ctx, instr, &r_dst, 3, q_src, 0, b, 0, r_src, 0); - - /* madsh.m16 r, b, q, r */ - instr = instr_create(ctx, 3, OPC_MADSH_M16); - vectorize(ctx, instr, &r_dst, 3, b, 0, q_src, 0, r_src, 0); - - /* sub.u dst, a, r */ - instr = instr_create(ctx, 2, OPC_SUB_U); - vectorize(ctx, instr, dst, 2, a, 0, r_src, 0); - } - - put_dst(ctx, inst, dst); -} - -/* - * Handlers for TGSI instructions which do have 1:1 mapping to native - * instructions: - */ - -static void -instr_cat0(const struct instr_translater *t, - struct ir3_compile_context *ctx, - struct tgsi_full_instruction *inst) -{ - instr_create(ctx, 0, t->opc); -} - -static void -instr_cat1(const struct instr_translater *t, - struct ir3_compile_context *ctx, - struct tgsi_full_instruction *inst) -{ - struct tgsi_dst_register *dst = &inst->Dst[0].Register; - struct tgsi_src_register *src = &inst->Src[0].Register; - - /* NOTE: atomic start/end, rather than in create_mov() since - * create_mov() is used already w/in atomic sequences (and - * we aren't clever enough to deal with the nesting) - */ - instr_atomic_start(ctx); - create_mov(ctx, dst, src); - instr_atomic_end(ctx); -} - -static void -instr_cat2(const struct instr_translater *t, - struct ir3_compile_context *ctx, - struct tgsi_full_instruction *inst) -{ - struct tgsi_dst_register *dst = get_dst(ctx, inst); - struct tgsi_src_register *src0 = &inst->Src[0].Register; - struct tgsi_src_register *src1 = &inst->Src[1].Register; - struct ir3_instruction *instr; - unsigned src0_flags = 0, src1_flags = 0; - - switch (t->tgsi_opc) { - case TGSI_OPCODE_ABS: - src0_flags = IR3_REG_FABS; - break; - case TGSI_OPCODE_IABS: - src0_flags = IR3_REG_SABS; - break; - case TGSI_OPCODE_INEG: - src0_flags = IR3_REG_SNEG; - break; - case TGSI_OPCODE_SUB: - src1_flags = IR3_REG_FNEG; - break; - } - - switch (t->opc) { - case OPC_ABSNEG_F: - case OPC_ABSNEG_S: - case OPC_CLZ_B: - case OPC_CLZ_S: - case OPC_SIGN_F: - case OPC_FLOOR_F: - case OPC_CEIL_F: - case OPC_RNDNE_F: - case OPC_RNDAZ_F: - case OPC_TRUNC_F: - case OPC_NOT_B: - case OPC_BFREV_B: - case OPC_SETRM: - case OPC_CBITS_B: - /* these only have one src reg */ - instr = instr_create(ctx, 2, t->opc); - vectorize(ctx, instr, dst, 1, src0, src0_flags); - break; - default: - if (is_const(src0) && is_const(src1)) - src0 = get_unconst(ctx, src0); - - instr = instr_create(ctx, 2, t->opc); - vectorize(ctx, instr, dst, 2, src0, src0_flags, - src1, src1_flags); - break; - } - - put_dst(ctx, inst, dst); -} - -static void -instr_cat3(const struct instr_translater *t, - struct ir3_compile_context *ctx, - struct tgsi_full_instruction *inst) -{ - struct tgsi_dst_register *dst = get_dst(ctx, inst); - struct tgsi_src_register *src0 = &inst->Src[0].Register; - struct tgsi_src_register *src1 = &inst->Src[1].Register; - struct ir3_instruction *instr; - - /* in particular, can't handle const for src1 for cat3.. - * for mad, we can swap first two src's if needed: - */ - if (is_rel_or_const(src1)) { - if (is_mad(t->opc) && !is_rel_or_const(src0)) { - struct tgsi_src_register *tmp; - tmp = src0; - src0 = src1; - src1 = tmp; - } else { - src1 = get_unconst(ctx, src1); - } - } - - instr = instr_create(ctx, 3, t->opc); - vectorize(ctx, instr, dst, 3, src0, 0, src1, 0, - &inst->Src[2].Register, 0); - put_dst(ctx, inst, dst); -} - -static void -instr_cat4(const struct instr_translater *t, - struct ir3_compile_context *ctx, - struct tgsi_full_instruction *inst) -{ - struct tgsi_dst_register *dst = get_dst(ctx, inst); - struct tgsi_src_register *src = &inst->Src[0].Register; - struct ir3_instruction *instr; - unsigned i; - - /* seems like blob compiler avoids const as src.. */ - if (is_const(src)) - src = get_unconst(ctx, src); - - /* we need to replicate into each component: */ - for (i = 0; i < 4; i++) { - if (dst->WriteMask & (1 << i)) { - instr = instr_create(ctx, 4, t->opc); - add_dst_reg(ctx, instr, dst, i); - add_src_reg(ctx, instr, src, src->SwizzleX); - } - } - - put_dst(ctx, inst, dst); -} - -static const struct instr_translater translaters[TGSI_OPCODE_LAST] = { -#define INSTR(n, f, ...) \ - [TGSI_OPCODE_ ## n] = { .fxn = (f), .tgsi_opc = TGSI_OPCODE_ ## n, ##__VA_ARGS__ } - - INSTR(MOV, instr_cat1), - INSTR(RCP, instr_cat4, .opc = OPC_RCP), - INSTR(RSQ, instr_cat4, .opc = OPC_RSQ), - INSTR(SQRT, instr_cat4, .opc = OPC_SQRT), - INSTR(MUL, instr_cat2, .opc = OPC_MUL_F), - INSTR(ADD, instr_cat2, .opc = OPC_ADD_F), - INSTR(SUB, instr_cat2, .opc = OPC_ADD_F), - INSTR(MIN, instr_cat2, .opc = OPC_MIN_F), - INSTR(MAX, instr_cat2, .opc = OPC_MAX_F), - INSTR(UADD, instr_cat2, .opc = OPC_ADD_U), - INSTR(IMIN, instr_cat2, .opc = OPC_MIN_S), - INSTR(UMIN, instr_cat2, .opc = OPC_MIN_U), - INSTR(IMAX, instr_cat2, .opc = OPC_MAX_S), - INSTR(UMAX, instr_cat2, .opc = OPC_MAX_U), - INSTR(AND, instr_cat2, .opc = OPC_AND_B), - INSTR(OR, instr_cat2, .opc = OPC_OR_B), - INSTR(NOT, instr_cat2, .opc = OPC_NOT_B), - INSTR(XOR, instr_cat2, .opc = OPC_XOR_B), - INSTR(UMUL, trans_umul), - INSTR(UMAD, trans_umul), - INSTR(UDIV, trans_idiv), - INSTR(IDIV, trans_idiv), - INSTR(MOD, trans_idiv), - INSTR(UMOD, trans_idiv), - INSTR(SHL, instr_cat2, .opc = OPC_SHL_B), - INSTR(USHR, instr_cat2, .opc = OPC_SHR_B), - INSTR(ISHR, instr_cat2, .opc = OPC_ASHR_B), - INSTR(IABS, instr_cat2, .opc = OPC_ABSNEG_S), - INSTR(INEG, instr_cat2, .opc = OPC_ABSNEG_S), - INSTR(AND, instr_cat2, .opc = OPC_AND_B), - INSTR(MAD, instr_cat3, .opc = OPC_MAD_F32, .hopc = OPC_MAD_F16), - INSTR(TRUNC, instr_cat2, .opc = OPC_TRUNC_F), - INSTR(CLAMP, trans_clamp), - INSTR(FLR, instr_cat2, .opc = OPC_FLOOR_F), - INSTR(ROUND, instr_cat2, .opc = OPC_RNDNE_F), - INSTR(SSG, instr_cat2, .opc = OPC_SIGN_F), - INSTR(CEIL, instr_cat2, .opc = OPC_CEIL_F), - INSTR(ARL, trans_arl), - INSTR(UARL, trans_arl), - INSTR(EX2, instr_cat4, .opc = OPC_EXP2), - INSTR(LG2, instr_cat4, .opc = OPC_LOG2), - INSTR(ABS, instr_cat2, .opc = OPC_ABSNEG_F), - INSTR(COS, instr_cat4, .opc = OPC_COS), - INSTR(SIN, instr_cat4, .opc = OPC_SIN), - INSTR(TEX, trans_samp, .opc = OPC_SAM), - INSTR(TXP, trans_samp, .opc = OPC_SAM), - INSTR(TXB, trans_samp, .opc = OPC_SAMB), - INSTR(TXB2, trans_samp, .opc = OPC_SAMB), - INSTR(TXL, trans_samp, .opc = OPC_SAML), - INSTR(TXD, trans_samp, .opc = OPC_SAMGQ), - INSTR(TXF, trans_samp, .opc = OPC_ISAML), - INSTR(TXQ, trans_txq), - INSTR(DDX, trans_deriv, .opc = OPC_DSX), - INSTR(DDY, trans_deriv, .opc = OPC_DSY), - INSTR(SGT, trans_cmp), - INSTR(SLT, trans_cmp), - INSTR(FSLT, trans_cmp), - INSTR(SGE, trans_cmp), - INSTR(FSGE, trans_cmp), - INSTR(SLE, trans_cmp), - INSTR(SNE, trans_cmp), - INSTR(FSNE, trans_cmp), - INSTR(SEQ, trans_cmp), - INSTR(FSEQ, trans_cmp), - INSTR(CMP, trans_cmp), - INSTR(USNE, trans_icmp, .opc = OPC_CMPS_U), - INSTR(USEQ, trans_icmp, .opc = OPC_CMPS_U), - INSTR(ISGE, trans_icmp, .opc = OPC_CMPS_S), - INSTR(USGE, trans_icmp, .opc = OPC_CMPS_U), - INSTR(ISLT, trans_icmp, .opc = OPC_CMPS_S), - INSTR(USLT, trans_icmp, .opc = OPC_CMPS_U), - INSTR(UCMP, trans_ucmp), - INSTR(ISSG, trans_issg), - INSTR(IF, trans_if, .opc = OPC_CMPS_F), - INSTR(UIF, trans_if, .opc = OPC_CMPS_U), - INSTR(ELSE, trans_else), - INSTR(ENDIF, trans_endif), - INSTR(END, instr_cat0, .opc = OPC_END), - INSTR(KILL, trans_kill, .opc = OPC_KILL), - INSTR(KILL_IF, trans_killif, .opc = OPC_KILL), - INSTR(I2F, trans_cov), - INSTR(U2F, trans_cov), - INSTR(F2I, trans_cov), - INSTR(F2U, trans_cov), -}; - -static ir3_semantic -decl_semantic(const struct tgsi_declaration_semantic *sem) -{ - return ir3_semantic_name(sem->Name, sem->Index); -} - -static struct ir3_instruction * -decl_in_frag_bary(struct ir3_compile_context *ctx, unsigned regid, - unsigned j, unsigned inloc, bool use_ldlv) -{ - struct ir3_instruction *instr; - struct ir3_register *src; - - if (use_ldlv) { - /* ldlv.u32 dst, l[#inloc], 1 */ - instr = instr_create(ctx, 6, OPC_LDLV); - instr->cat6.type = TYPE_U32; - instr->cat6.iim_val = 1; - ir3_reg_create(instr, regid, 0); /* dummy dst */ - ir3_reg_create(instr, 0, IR3_REG_IMMED)->iim_val = inloc; - ir3_reg_create(instr, 0, IR3_REG_IMMED)->iim_val = 1; - - return instr; - } - - /* bary.f dst, #inloc, r0.x */ - instr = instr_create(ctx, 2, OPC_BARY_F); - ir3_reg_create(instr, regid, 0); /* dummy dst */ - ir3_reg_create(instr, 0, IR3_REG_IMMED)->iim_val = inloc; - src = ir3_reg_create(instr, 0, IR3_REG_SSA); - src->wrmask = 0x3; - src->instr = ctx->frag_pos; - - return instr; -} - -/* TGSI_SEMANTIC_POSITION - * """""""""""""""""""""" - * - * For fragment shaders, TGSI_SEMANTIC_POSITION is used to indicate that - * fragment shader input contains the fragment's window position. The X - * component starts at zero and always increases from left to right. - * The Y component starts at zero and always increases but Y=0 may either - * indicate the top of the window or the bottom depending on the fragment - * coordinate origin convention (see TGSI_PROPERTY_FS_COORD_ORIGIN). - * The Z coordinate ranges from 0 to 1 to represent depth from the front - * to the back of the Z buffer. The W component contains the reciprocol - * of the interpolated vertex position W component. - */ -static struct ir3_instruction * -decl_in_frag_coord(struct ir3_compile_context *ctx, unsigned regid, - unsigned j) -{ - struct ir3_instruction *instr, *src; - - compile_assert(ctx, !ctx->frag_coord[j]); - - ctx->frag_coord[j] = create_input(ctx->block, NULL, 0); - - - switch (j) { - case 0: /* .x */ - case 1: /* .y */ - /* for frag_coord, we get unsigned values.. we need - * to subtract (integer) 8 and divide by 16 (right- - * shift by 4) then convert to float: - */ - - /* add.s tmp, src, -8 */ - instr = instr_create(ctx, 2, OPC_ADD_S); - ir3_reg_create(instr, regid, 0); /* dummy dst */ - ir3_reg_create(instr, 0, IR3_REG_SSA)->instr = ctx->frag_coord[j]; - ir3_reg_create(instr, 0, IR3_REG_IMMED)->iim_val = -8; - src = instr; - - /* shr.b tmp, tmp, 4 */ - instr = instr_create(ctx, 2, OPC_SHR_B); - ir3_reg_create(instr, regid, 0); /* dummy dst */ - ir3_reg_create(instr, 0, IR3_REG_SSA)->instr = src; - ir3_reg_create(instr, 0, IR3_REG_IMMED)->iim_val = 4; - src = instr; - - /* mov.u32f32 dst, tmp */ - instr = instr_create(ctx, 1, 0); - instr->cat1.src_type = TYPE_U32; - instr->cat1.dst_type = TYPE_F32; - ir3_reg_create(instr, regid, 0); /* dummy dst */ - ir3_reg_create(instr, 0, IR3_REG_SSA)->instr = src; - - break; - case 2: /* .z */ - case 3: /* .w */ - /* seems that we can use these as-is: */ - instr = ctx->frag_coord[j]; - break; - default: - compile_error(ctx, "invalid channel\n"); - instr = create_immed(ctx, 0.0); - break; - } - - return instr; -} - -/* TGSI_SEMANTIC_FACE - * """""""""""""""""" - * - * This label applies to fragment shader inputs only and indicates that - * the register contains front/back-face information of the form (F, 0, - * 0, 1). The first component will be positive when the fragment belongs - * to a front-facing polygon, and negative when the fragment belongs to a - * back-facing polygon. - */ -static struct ir3_instruction * -decl_in_frag_face(struct ir3_compile_context *ctx, unsigned regid, - unsigned j) -{ - struct ir3_instruction *instr, *src; - - switch (j) { - case 0: /* .x */ - compile_assert(ctx, !ctx->frag_face); - - ctx->frag_face = create_input(ctx->block, NULL, 0); - - /* for faceness, we always get -1 or 0 (int).. but TGSI expects - * positive vs negative float.. and piglit further seems to - * expect -1.0 or 1.0: - * - * mul.s tmp, hr0.x, 2 - * add.s tmp, tmp, 1 - * mov.s16f32, dst, tmp - * - */ - - instr = instr_create(ctx, 2, OPC_MUL_S); - ir3_reg_create(instr, regid, 0); /* dummy dst */ - ir3_reg_create(instr, 0, IR3_REG_SSA)->instr = ctx->frag_face; - ir3_reg_create(instr, 0, IR3_REG_IMMED)->iim_val = 2; - src = instr; - - instr = instr_create(ctx, 2, OPC_ADD_S); - ir3_reg_create(instr, regid, 0); /* dummy dst */ - ir3_reg_create(instr, 0, IR3_REG_SSA)->instr = src; - ir3_reg_create(instr, 0, IR3_REG_IMMED)->iim_val = 1; - src = instr; - - instr = instr_create(ctx, 1, 0); /* mov */ - instr->cat1.src_type = TYPE_S32; - instr->cat1.dst_type = TYPE_F32; - ir3_reg_create(instr, regid, 0); /* dummy dst */ - ir3_reg_create(instr, 0, IR3_REG_SSA)->instr = src; - - break; - case 1: /* .y */ - case 2: /* .z */ - instr = create_immed(ctx, 0.0); - break; - case 3: /* .w */ - instr = create_immed(ctx, 1.0); - break; - default: - compile_error(ctx, "invalid channel\n"); - instr = create_immed(ctx, 0.0); - break; - } - - return instr; -} - -static void -decl_in(struct ir3_compile_context *ctx, struct tgsi_full_declaration *decl) -{ - struct ir3_shader_variant *so = ctx->so; - unsigned name = decl->Semantic.Name; - unsigned i; - - /* I don't think we should get frag shader input without - * semantic info? Otherwise how do inputs get linked to - * vert outputs? - */ - compile_assert(ctx, (ctx->type == TGSI_PROCESSOR_VERTEX) || - decl->Declaration.Semantic); - - for (i = decl->Range.First; i <= decl->Range.Last; i++) { - unsigned n = so->inputs_count++; - unsigned r = regid(i, 0); - unsigned ncomp, j; - - /* we'll figure out the actual components used after scheduling */ - ncomp = 4; - - DBG("decl in -> r%d", i); - - compile_assert(ctx, n < ARRAY_SIZE(so->inputs)); - - so->inputs[n].semantic = decl_semantic(&decl->Semantic); - so->inputs[n].compmask = (1 << ncomp) - 1; - so->inputs[n].regid = r; - so->inputs[n].inloc = ctx->next_inloc; - so->inputs[n].interpolate = decl->Interp.Interpolate; - - for (j = 0; j < ncomp; j++) { - struct ir3_instruction *instr = NULL; - - if (ctx->type == TGSI_PROCESSOR_FRAGMENT) { - /* for fragment shaders, POSITION and FACE are handled - * specially, not using normal varying / bary.f - */ - if (name == TGSI_SEMANTIC_POSITION) { - so->inputs[n].bary = false; - so->frag_coord = true; - instr = decl_in_frag_coord(ctx, r + j, j); - } else if (name == TGSI_SEMANTIC_FACE) { - so->inputs[n].bary = false; - so->frag_face = true; - instr = decl_in_frag_face(ctx, r + j, j); - } else { - bool use_ldlv = false; - - /* if no interpolation given, pick based on - * semantic: - */ - if (!decl->Declaration.Interpolate) { - switch (decl->Semantic.Name) { - case TGSI_SEMANTIC_COLOR: - so->inputs[n].interpolate = - TGSI_INTERPOLATE_COLOR; - break; - default: - so->inputs[n].interpolate = - TGSI_INTERPOLATE_LINEAR; - } - } - - if (ctx->flat_bypass) { - switch (so->inputs[n].interpolate) { - case TGSI_INTERPOLATE_COLOR: - if (!ctx->so->key.rasterflat) - break; - /* fallthrough */ - case TGSI_INTERPOLATE_CONSTANT: - use_ldlv = true; - break; - } - } - - so->inputs[n].bary = true; - - instr = decl_in_frag_bary(ctx, r + j, j, - so->inputs[n].inloc + j - 8, use_ldlv); - } - } else { - instr = create_input(ctx->block, NULL, (i * 4) + j); - } - - ctx->block->inputs[(i * 4) + j] = instr; - } - - if (so->inputs[n].bary || (ctx->type == TGSI_PROCESSOR_VERTEX)) { - ctx->next_inloc += ncomp; - so->total_in += ncomp; - } - } -} - -static void -decl_sv(struct ir3_compile_context *ctx, struct tgsi_full_declaration *decl) -{ - struct ir3_shader_variant *so = ctx->so; - unsigned r = regid(so->inputs_count, 0); - unsigned n = so->inputs_count++; - - DBG("decl sv -> r%d", n); - - compile_assert(ctx, n < ARRAY_SIZE(so->inputs)); - compile_assert(ctx, decl->Range.First < ARRAY_SIZE(ctx->sysval_semantics)); - - ctx->sysval_semantics[decl->Range.First] = decl->Semantic.Name; - so->inputs[n].semantic = decl_semantic(&decl->Semantic); - so->inputs[n].compmask = 1; - so->inputs[n].regid = r; - so->inputs[n].inloc = ctx->next_inloc; - so->inputs[n].interpolate = TGSI_INTERPOLATE_CONSTANT; - - struct ir3_instruction *instr = NULL; - - switch (decl->Semantic.Name) { - case TGSI_SEMANTIC_VERTEXID_NOBASE: - ctx->vertex_id = instr = create_input(ctx->block, NULL, r); - break; - case TGSI_SEMANTIC_BASEVERTEX: - ctx->basevertex = instr = instr_create(ctx, 1, 0); - instr->cat1.src_type = get_stype(ctx); - instr->cat1.dst_type = get_stype(ctx); - ir3_reg_create(instr, 0, 0); - ir3_reg_create(instr, regid(so->first_driver_param + 4, 0), - IR3_REG_CONST); - break; - case TGSI_SEMANTIC_INSTANCEID: - ctx->instance_id = instr = create_input(ctx->block, NULL, r); - break; - default: - compile_error(ctx, "Unknown semantic: %s\n", - tgsi_semantic_names[decl->Semantic.Name]); - } - - ctx->block->inputs[r] = instr; - ctx->next_inloc++; - so->total_in++; -} - -static void -decl_out(struct ir3_compile_context *ctx, struct tgsi_full_declaration *decl) -{ - struct ir3_shader_variant *so = ctx->so; - unsigned comp = 0; - unsigned name = decl->Semantic.Name; - unsigned i; - - compile_assert(ctx, decl->Declaration.Semantic); - - DBG("decl out[%d] -> r%d", name, decl->Range.First); - - if (ctx->type == TGSI_PROCESSOR_VERTEX) { - switch (name) { - case TGSI_SEMANTIC_POSITION: - so->writes_pos = true; - break; - case TGSI_SEMANTIC_PSIZE: - so->writes_psize = true; - break; - case TGSI_SEMANTIC_COLOR: - case TGSI_SEMANTIC_BCOLOR: - case TGSI_SEMANTIC_GENERIC: - case TGSI_SEMANTIC_FOG: - case TGSI_SEMANTIC_TEXCOORD: - break; - default: - compile_error(ctx, "unknown VS semantic name: %s\n", - tgsi_semantic_names[name]); - } - } else { - switch (name) { - case TGSI_SEMANTIC_POSITION: - comp = 2; /* tgsi will write to .z component */ - so->writes_pos = true; - break; - case TGSI_SEMANTIC_COLOR: - break; - default: - compile_error(ctx, "unknown FS semantic name: %s\n", - tgsi_semantic_names[name]); - } - } - - for (i = decl->Range.First; i <= decl->Range.Last; i++) { - unsigned n = so->outputs_count++; - unsigned ncomp, j; - - ncomp = 4; - - compile_assert(ctx, n < ARRAY_SIZE(so->outputs)); - - so->outputs[n].semantic = decl_semantic(&decl->Semantic); - so->outputs[n].regid = regid(i, comp); - - /* avoid undefined outputs, stick a dummy mov from imm{0.0}, - * which if the output is actually assigned will be over- - * written - */ - for (j = 0; j < ncomp; j++) - ctx->block->outputs[(i * 4) + j] = create_immed(ctx, 0.0); - } -} - -/* from TGSI perspective, we actually have inputs. But most of the "inputs" - * for a fragment shader are just bary.f instructions. The *actual* inputs - * from the hw perspective are the frag_pos and optionally frag_coord and - * frag_face. - */ -static void -fixup_frag_inputs(struct ir3_compile_context *ctx) -{ - struct ir3_shader_variant *so = ctx->so; - struct ir3_block *block = ctx->block; - struct ir3_instruction **inputs; - struct ir3_instruction *instr; - int n, regid = 0; - - block->ninputs = 0; - - n = 4; /* always have frag_pos */ - n += COND(so->frag_face, 4); - n += COND(so->frag_coord, 4); - - inputs = ir3_alloc(ctx->ir, n * (sizeof(struct ir3_instruction *))); - - if (so->frag_face) { - /* this ultimately gets assigned to hr0.x so doesn't conflict - * with frag_coord/frag_pos.. - */ - inputs[block->ninputs++] = ctx->frag_face; - ctx->frag_face->regs[0]->num = 0; - - /* remaining channels not used, but let's avoid confusing - * other parts that expect inputs to come in groups of vec4 - */ - inputs[block->ninputs++] = NULL; - inputs[block->ninputs++] = NULL; - inputs[block->ninputs++] = NULL; - } - - /* since we don't know where to set the regid for frag_coord, - * we have to use r0.x for it. But we don't want to *always* - * use r1.x for frag_pos as that could increase the register - * footprint on simple shaders: - */ - if (so->frag_coord) { - ctx->frag_coord[0]->regs[0]->num = regid++; - ctx->frag_coord[1]->regs[0]->num = regid++; - ctx->frag_coord[2]->regs[0]->num = regid++; - ctx->frag_coord[3]->regs[0]->num = regid++; - - inputs[block->ninputs++] = ctx->frag_coord[0]; - inputs[block->ninputs++] = ctx->frag_coord[1]; - inputs[block->ninputs++] = ctx->frag_coord[2]; - inputs[block->ninputs++] = ctx->frag_coord[3]; - } - - /* we always have frag_pos: */ - so->pos_regid = regid; - - /* r0.x */ - instr = create_input(block, NULL, block->ninputs); - instr->regs[0]->num = regid++; - inputs[block->ninputs++] = instr; - ctx->frag_pos->regs[1]->instr = instr; - - /* r0.y */ - instr = create_input(block, NULL, block->ninputs); - instr->regs[0]->num = regid++; - inputs[block->ninputs++] = instr; - ctx->frag_pos->regs[2]->instr = instr; - - block->inputs = inputs; -} - -static void -compile_instructions(struct ir3_compile_context *ctx) -{ - push_block(ctx); - - /* for fragment shader, we have a single input register (usually - * r0.xy) which is used as the base for bary.f varying fetch instrs: - */ - if (ctx->type == TGSI_PROCESSOR_FRAGMENT) { - struct ir3_instruction *instr; - instr = ir3_instr_create(ctx->block, -1, OPC_META_FI); - ir3_reg_create(instr, 0, 0); - ir3_reg_create(instr, 0, IR3_REG_SSA); /* r0.x */ - ir3_reg_create(instr, 0, IR3_REG_SSA); /* r0.y */ - ctx->frag_pos = instr; - } - - while (!tgsi_parse_end_of_tokens(&ctx->parser)) { - tgsi_parse_token(&ctx->parser); - - switch (ctx->parser.FullToken.Token.Type) { - case TGSI_TOKEN_TYPE_DECLARATION: { - struct tgsi_full_declaration *decl = - &ctx->parser.FullToken.FullDeclaration; - unsigned file = decl->Declaration.File; - if (file == TGSI_FILE_OUTPUT) { - decl_out(ctx, decl); - } else if (file == TGSI_FILE_INPUT) { - decl_in(ctx, decl); - } else if (decl->Declaration.File == TGSI_FILE_SYSTEM_VALUE) { - decl_sv(ctx, decl); - } - - if ((file != TGSI_FILE_CONSTANT) && decl->Declaration.Array) { - int aid = decl->Array.ArrayID + ctx->array_offsets[file]; - - compile_assert(ctx, aid < ARRAY_SIZE(ctx->array)); - - /* legacy ArrayID==0 stuff probably isn't going to work - * well (and is at least untested).. let's just scream: - */ - compile_assert(ctx, aid != 0); - - ctx->array[aid].first = decl->Range.First; - ctx->array[aid].last = decl->Range.Last; - } - break; - } - case TGSI_TOKEN_TYPE_IMMEDIATE: { - /* TODO: if we know the immediate is small enough, and only - * used with instructions that can embed an immediate, we - * can skip this: - */ - struct tgsi_full_immediate *imm = - &ctx->parser.FullToken.FullImmediate; - unsigned n = ctx->so->immediates_count++; - compile_assert(ctx, n < ARRAY_SIZE(ctx->so->immediates)); - memcpy(ctx->so->immediates[n].val, imm->u, 16); - break; - } - case TGSI_TOKEN_TYPE_INSTRUCTION: { - struct tgsi_full_instruction *inst = - &ctx->parser.FullToken.FullInstruction; - unsigned opc = inst->Instruction.Opcode; - const struct instr_translater *t = &translaters[opc]; - - if (t->fxn) { - t->fxn(t, ctx, inst); - ctx->num_internal_temps = 0; - - compile_assert(ctx, !ctx->using_tmp_dst); - } else { - compile_error(ctx, "unknown TGSI opc: %s\n", - tgsi_get_opcode_name(opc)); - } - - if (inst->Instruction.Saturate) { - create_clamp_imm(ctx, &inst->Dst[0].Register, - fui(0.0), fui(1.0)); - } - - instr_finish(ctx); - - break; - } - case TGSI_TOKEN_TYPE_PROPERTY: { - struct tgsi_full_property *prop = - &ctx->parser.FullToken.FullProperty; - switch (prop->Property.PropertyName) { - case TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS: - ctx->so->color0_mrt = !!prop->u[0].Data; - break; - } - } - default: - break; - } - } -} - -int -ir3_compile_shader(struct ir3_shader_variant *so, - const struct tgsi_token *tokens, struct ir3_shader_key key, - bool cp) -{ - struct ir3_compile_context ctx; - struct ir3_block *block; - struct ir3_instruction **inputs; - unsigned i, j, actual_in; - int ret = 0, max_bary; - - assert(!so->ir); - - so->ir = ir3_create(); - - assert(so->ir); - - if (compile_init(&ctx, so, tokens) != TGSI_PARSE_OK) { - DBG("INIT failed!"); - ret = -1; - goto out; - } - - /* for now, until the edge cases are worked out: */ - if (ctx.info.indirect_files_written & (FM(TEMPORARY) | FM(INPUT) | FM(OUTPUT))) - cp = false; - - compile_instructions(&ctx); - - block = ctx.block; - so->ir->block = block; - - /* keep track of the inputs from TGSI perspective.. */ - inputs = block->inputs; - - /* but fixup actual inputs for frag shader: */ - if (ctx.type == TGSI_PROCESSOR_FRAGMENT) - fixup_frag_inputs(&ctx); - - /* at this point, for binning pass, throw away unneeded outputs: */ - if (key.binning_pass) { - for (i = 0, j = 0; i < so->outputs_count; i++) { - unsigned name = sem2name(so->outputs[i].semantic); - unsigned idx = sem2idx(so->outputs[i].semantic); - - /* throw away everything but first position/psize */ - if ((idx == 0) && ((name == TGSI_SEMANTIC_POSITION) || - (name == TGSI_SEMANTIC_PSIZE))) { - if (i != j) { - so->outputs[j] = so->outputs[i]; - block->outputs[(j*4)+0] = block->outputs[(i*4)+0]; - block->outputs[(j*4)+1] = block->outputs[(i*4)+1]; - block->outputs[(j*4)+2] = block->outputs[(i*4)+2]; - block->outputs[(j*4)+3] = block->outputs[(i*4)+3]; - } - j++; - } - } - so->outputs_count = j; - block->noutputs = j * 4; - } - - /* if we want half-precision outputs, mark the output registers - * as half: - */ - if (key.half_precision) { - for (i = 0; i < block->noutputs; i++) { - if (!block->outputs[i]) - continue; - block->outputs[i]->regs[0]->flags |= IR3_REG_HALF; - } - } - - /* at this point, we want the kill's in the outputs array too, - * so that they get scheduled (since they have no dst).. we've - * already ensured that the array is big enough in push_block(): - */ - if (ctx.type == TGSI_PROCESSOR_FRAGMENT) { - for (i = 0; i < ctx.kill_count; i++) - block->outputs[block->noutputs++] = ctx.kill[i]; - } - - ret = ir3_block_flatten(block); - if (ret < 0) { - DBG("FLATTEN failed!"); - goto out; - } - - if (fd_mesa_debug & FD_DBG_OPTMSGS) { - printf("BEFORE CP:\n"); - ir3_print(so->ir); - } - - ir3_block_depth(block); - - /* First remove all the extra mov's (which we could skip if the - * front-end was clever enough not to insert them in the first - * place). Then figure out left/right neighbors, re-inserting - * extra mov's when needed to avoid conflicts. - */ - if (cp && !(fd_mesa_debug & FD_DBG_NOCP)) - ir3_block_cp(block); - - if (fd_mesa_debug & FD_DBG_OPTMSGS) { - printf("BEFORE GROUPING:\n"); - ir3_print(so->ir); - } - - /* Group left/right neighbors, inserting mov's where needed to - * solve conflicts: - */ - ir3_block_group(block); - - ir3_block_depth(block); - - if (fd_mesa_debug & FD_DBG_OPTMSGS) { - printf("AFTER DEPTH:\n"); - ir3_print(so->ir); - } - - ret = ir3_block_sched(block); - if (ret) { - DBG("SCHED failed!"); - goto out; - } - - if (fd_mesa_debug & FD_DBG_OPTMSGS) { - printf("AFTER SCHED:\n"); - ir3_print(so->ir); - } - - ret = ir3_block_ra(block, so->type, so->frag_coord, so->frag_face); - if (ret) { - DBG("RA failed!"); - goto out; - } - - if (fd_mesa_debug & FD_DBG_OPTMSGS) { - printf("AFTER RA:\n"); - ir3_print(so->ir); - } - - ir3_block_legalize(block, &so->has_samp, &max_bary); - - /* fixup input/outputs: */ - for (i = 0; i < so->outputs_count; i++) { - so->outputs[i].regid = block->outputs[i*4]->regs[0]->num; - /* preserve hack for depth output.. tgsi writes depth to .z, - * but what we give the hw is the scalar register: - */ - if ((ctx.type == TGSI_PROCESSOR_FRAGMENT) && - (sem2name(so->outputs[i].semantic) == TGSI_SEMANTIC_POSITION)) - so->outputs[i].regid += 2; - } - /* Note that some or all channels of an input may be unused: */ - actual_in = 0; - for (i = 0; i < so->inputs_count; i++) { - unsigned j, regid = ~0, compmask = 0; - so->inputs[i].ncomp = 0; - for (j = 0; j < 4; j++) { - struct ir3_instruction *in = inputs[(i*4) + j]; - if (in) { - compmask |= (1 << j); - regid = in->regs[0]->num - j; - actual_in++; - so->inputs[i].ncomp++; - } - } - so->inputs[i].regid = regid; - so->inputs[i].compmask = compmask; - } - - /* fragment shader always gets full vec4's even if it doesn't - * fetch all components, but vertex shader we need to update - * with the actual number of components fetch, otherwise thing - * will hang due to mismaptch between VFD_DECODE's and - * TOTALATTRTOVS - */ - if (so->type == SHADER_VERTEX) - so->total_in = actual_in; - else - so->total_in = align(max_bary + 1, 4); - -out: - if (ret) { - ir3_destroy(so->ir); - so->ir = NULL; - } - compile_free(&ctx); - - return ret; -} diff --git a/src/gallium/drivers/freedreno/ir3/ir3_compiler.h b/src/gallium/drivers/freedreno/ir3/ir3_compiler.h index 9213386e00c..89a40b50ef3 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_compiler.h +++ b/src/gallium/drivers/freedreno/ir3/ir3_compiler.h @@ -31,12 +31,7 @@ #include "ir3_shader.h" - int ir3_compile_shader_nir(struct ir3_shader_variant *so, const struct tgsi_token *tokens, struct ir3_shader_key key); -int ir3_compile_shader(struct ir3_shader_variant *so, - const struct tgsi_token *tokens, - struct ir3_shader_key key, bool cp); - #endif /* IR3_COMPILER_H_ */ diff --git a/src/gallium/drivers/freedreno/ir3/ir3_flatten.c b/src/gallium/drivers/freedreno/ir3/ir3_flatten.c deleted file mode 100644 index 419cd9dfcd4..00000000000 --- a/src/gallium/drivers/freedreno/ir3/ir3_flatten.c +++ /dev/null @@ -1,152 +0,0 @@ -/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */ - -/* - * Copyright (C) 2014 Rob Clark - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Authors: - * Rob Clark - */ - -#include - -#include "ir3.h" - -/* - * Flatten: flatten out legs of if/else, etc - * - * TODO probably should use some heuristic to decide to not flatten - * if one side of the other is too large / deeply nested / whatever? - */ - -struct ir3_flatten_ctx { - struct ir3_block *block; - unsigned cnt; -}; - -static struct ir3_register *unwrap(struct ir3_register *reg) -{ - - if (reg->flags & IR3_REG_SSA) { - struct ir3_instruction *instr = reg->instr; - if (is_meta(instr)) { - switch (instr->opc) { - case OPC_META_OUTPUT: - case OPC_META_FLOW: - if (instr->regs_count > 1) - return instr->regs[1]; - return NULL; - default: - break; - } - } - } - return reg; -} - -static void ir3_instr_flatten(struct ir3_flatten_ctx *ctx, - struct ir3_instruction *instr) -{ - struct ir3_instruction *src; - - /* if we've already visited this instruction, bail now: */ - if (ir3_instr_check_mark(instr)) - return; - - instr->block = ctx->block; - - /* TODO: maybe some threshold to decide whether to - * flatten or not?? - */ - if (is_meta(instr)) { - if (instr->opc == OPC_META_PHI) { - struct ir3_register *cond, *t, *f; - - cond = unwrap(instr->regs[1]); - t = unwrap(instr->regs[2]); /* true val */ - f = unwrap(instr->regs[3]); /* false val */ - - /* must have cond, but t or f may be null if only written - * one one side of the if/else (in which case we can just - * convert the PHI to a simple move). - */ - assert(cond); - assert(t || f); - - if (t && f) { - /* convert the PHI instruction to sel.{b16,b32} */ - instr->category = 3; - - /* instruction type based on dst size: */ - if (instr->regs[0]->flags & IR3_REG_HALF) - instr->opc = OPC_SEL_B16; - else - instr->opc = OPC_SEL_B32; - - instr->regs[1] = t; - instr->regs[2] = cond; - instr->regs[3] = f; - } else { - /* convert to simple mov: */ - instr->category = 1; - instr->cat1.dst_type = TYPE_F32; - instr->cat1.src_type = TYPE_F32; - instr->regs_count = 2; - instr->regs[1] = t ? t : f; - } - - ctx->cnt++; - } else if ((instr->opc == OPC_META_INPUT) && - (instr->regs_count == 2)) { - type_t ftype; - - if (instr->regs[0]->flags & IR3_REG_HALF) - ftype = TYPE_F16; - else - ftype = TYPE_F32; - - /* convert meta:input to mov: */ - instr->category = 1; - instr->cat1.src_type = ftype; - instr->cat1.dst_type = ftype; - } - } - - /* recursively visit children: */ - foreach_ssa_src(src, instr) - ir3_instr_flatten(ctx, src); -} - -/* return >= 0 is # of phi's flattened, < 0 is error */ -int ir3_block_flatten(struct ir3_block *block) -{ - struct ir3_flatten_ctx ctx = { - .block = block, - }; - unsigned i; - - ir3_clear_mark(block->shader); - for(i = 0; i < block->noutputs; i++) - if (block->outputs[i]) - ir3_instr_flatten(&ctx, block->outputs[i]); - - return ctx.cnt; -} diff --git a/src/gallium/drivers/freedreno/ir3/ir3_group.c b/src/gallium/drivers/freedreno/ir3/ir3_group.c index 8eed083866d..d744477aad1 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_group.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_group.c @@ -218,34 +218,32 @@ static void block_find_neighbors(struct ir3_block *block) { unsigned i; + /* shader inputs/outputs themselves must be contiguous as well: + * + * NOTE: group inputs first, since we only insert mov's + * *before* the conflicted instr (and that would go badly + * for inputs). By doing inputs first, we should never + * have a conflict on inputs.. pushing any conflict to + * resolve to the outputs, for stuff like: + * + * MOV OUT[n], IN[m].wzyx + * + * NOTE: we assume here inputs/outputs are grouped in vec4. + * This logic won't quite cut it if we don't align smaller + * on vec4 boundaries + */ + for (i = 0; i < block->ninputs; i += 4) + pad_and_group_input(&block->inputs[i], 4); + for (i = 0; i < block->noutputs; i += 4) + group_n(&arr_ops_out, &block->outputs[i], 4); + + for (i = 0; i < block->noutputs; i++) { if (block->outputs[i]) { struct ir3_instruction *instr = block->outputs[i]; instr_find_neighbors(instr); } } - - /* shader inputs/outputs themselves must be contiguous as well: - */ - if (!block->parent) { - /* NOTE: group inputs first, since we only insert mov's - * *before* the conflicted instr (and that would go badly - * for inputs). By doing inputs first, we should never - * have a conflict on inputs.. pushing any conflict to - * resolve to the outputs, for stuff like: - * - * MOV OUT[n], IN[m].wzyx - * - * NOTE: we assume here inputs/outputs are grouped in vec4. - * This logic won't quite cut it if we don't align smaller - * on vec4 boundaries - */ - for (i = 0; i < block->ninputs; i += 4) - pad_and_group_input(&block->inputs[i], 4); - for (i = 0; i < block->noutputs; i += 4) - group_n(&arr_ops_out, &block->outputs[i], 4); - - } } void ir3_block_group(struct ir3_block *block) diff --git a/src/gallium/drivers/freedreno/ir3/ir3_print.c b/src/gallium/drivers/freedreno/ir3/ir3_print.c index 755c0c23c36..986a5bc7be0 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_print.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_print.c @@ -54,10 +54,8 @@ static void print_instr_name(struct ir3_instruction *instr) /* shouldn't hit here.. just for debugging: */ switch (instr->opc) { case OPC_META_INPUT: printf("_meta:in"); break; - case OPC_META_OUTPUT: printf("_meta:out"); break; case OPC_META_FO: printf("_meta:fo"); break; case OPC_META_FI: printf("_meta:fi"); break; - case OPC_META_FLOW: printf("_meta:flow"); break; default: printf("_meta:%d", instr->opc); break; } diff --git a/src/gallium/drivers/freedreno/ir3/ir3_shader.c b/src/gallium/drivers/freedreno/ir3/ir3_shader.c index 9bf4e64c7f1..75c9cc46e88 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_shader.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_shader.c @@ -146,17 +146,6 @@ assemble_variant(struct ir3_shader_variant *v) v->ir = NULL; } -/* reset before attempting to compile again.. */ -static void reset_variant(struct ir3_shader_variant *v, const char *msg) -{ - debug_error(msg); - v->inputs_count = 0; - v->outputs_count = 0; - v->total_in = 0; - v->has_samp = false; - v->immediates_count = 0; -} - static struct ir3_shader_variant * create_variant(struct ir3_shader *shader, struct ir3_shader_key key) { @@ -177,22 +166,7 @@ create_variant(struct ir3_shader *shader, struct ir3_shader_key key) tgsi_dump(tokens, 0); } - if (fd_mesa_debug & FD_DBG_NIR) { - ret = ir3_compile_shader_nir(v, tokens, key); - if (ret) - reset_variant(v, "NIR compiler failed, fallback to TGSI!"); - } else { - ret = -1; - } - - if (ret) { - ret = ir3_compile_shader(v, tokens, key, true); - if (ret) { - reset_variant(v, "new compiler failed, trying without copy propagation!"); - ret = ir3_compile_shader(v, tokens, key, false); - } - } - + ret = ir3_compile_shader_nir(v, tokens, key); if (ret) { debug_error("compile failed!"); goto fail; From 7674ab12e826d2ea33f13fb2e6ca8ae2a62fe460 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Tue, 9 Jun 2015 17:42:16 -0400 Subject: [PATCH 759/834] freedreno/ir3: silence warnings Signed-off-by: Rob Clark --- src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c index caea34c7fd4..3675f5f060a 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c @@ -1154,6 +1154,8 @@ emit_intrinisic(struct ir3_compile *ctx, nir_intrinsic_instr *intr) if (info->has_dest) { dst = get_dst(ctx, &intr->dest, intr->num_components); + } else { + dst = NULL; } switch (intr->intrinsic) { @@ -1314,6 +1316,8 @@ tex_info(nir_tex_instr *tex, unsigned *flagsp, unsigned *coordsp) coords = 3; flags |= IR3_INSTR_3D; break; + default: + unreachable("bad sampler_dim"); } if (tex->is_shadow) @@ -1336,7 +1340,10 @@ emit_tex(struct ir3_compile *ctx, nir_tex_instr *tex) unsigned i, coords, flags; unsigned nsrc0 = 0, nsrc1 = 0; type_t type; - opc_t opc; + opc_t opc = 0; + + coord = off = ddx = ddy = NULL; + lod = proj = compare = NULL; /* TODO: might just be one component for gathers? */ dst = get_dst(ctx, &tex->dest, 4); @@ -1480,6 +1487,8 @@ emit_tex(struct ir3_compile *ctx, nir_tex_instr *tex) case nir_type_bool: type = TYPE_U32; break; + default: + unreachable("bad dest_type"); } sam = ir3_SAM(b, opc, type, TGSI_WRITEMASK_XYZW, From 5c1e153467a50dec91df49239654017e9ed86d69 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Sat, 25 Apr 2015 16:30:55 -0400 Subject: [PATCH 760/834] freedreno/ir3: dump nocp option No longer used, or even possible, with NIR frontend. Signed-off-by: Rob Clark --- src/gallium/drivers/freedreno/freedreno_screen.c | 1 - src/gallium/drivers/freedreno/freedreno_util.h | 1 - src/gallium/drivers/freedreno/ir3/ir3_cmdline.c | 6 ------ 3 files changed, 8 deletions(-) diff --git a/src/gallium/drivers/freedreno/freedreno_screen.c b/src/gallium/drivers/freedreno/freedreno_screen.c index c9d82563e1c..b3b5462b437 100644 --- a/src/gallium/drivers/freedreno/freedreno_screen.c +++ b/src/gallium/drivers/freedreno/freedreno_screen.c @@ -69,7 +69,6 @@ static const struct debug_named_value debug_options[] = { {"nobin", FD_DBG_NOBIN, "Disable hw binning"}, {"optmsgs", FD_DBG_OPTMSGS,"Enable optimizer debug messages"}, {"glsl120", FD_DBG_GLSL120,"Temporary flag to force GLSL 120 (rather than 130) on a3xx+"}, - {"nocp", FD_DBG_NOCP, "Disable copy-propagation"}, DEBUG_NAMED_VALUE_END }; diff --git a/src/gallium/drivers/freedreno/freedreno_util.h b/src/gallium/drivers/freedreno/freedreno_util.h index 8664e74b335..630c6013ab9 100644 --- a/src/gallium/drivers/freedreno/freedreno_util.h +++ b/src/gallium/drivers/freedreno/freedreno_util.h @@ -64,7 +64,6 @@ enum adreno_stencil_op fd_stencil_op(unsigned op); #define FD_DBG_NOBIN 0x0100 #define FD_DBG_OPTMSGS 0x0400 #define FD_DBG_GLSL120 0x1000 -#define FD_DBG_NOCP 0x2000 extern int fd_mesa_debug; extern bool fd_binning_enabled; diff --git a/src/gallium/drivers/freedreno/ir3/ir3_cmdline.c b/src/gallium/drivers/freedreno/ir3/ir3_cmdline.c index fb5c6513bcb..44493c33c1a 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_cmdline.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_cmdline.c @@ -280,12 +280,6 @@ int main(int argc, char **argv) continue; } - if (!strcmp(argv[n], "--nocp")) { - fd_mesa_debug |= FD_DBG_NOCP; - n++; - continue; - } - if (!strcmp(argv[n], "--help")) { print_usage(); return 0; From 694beb8b830c993e9bfb744655be3dbd558ab3a8 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Sat, 23 May 2015 13:37:41 -0400 Subject: [PATCH 761/834] freedreno/ir3: introduce ir3_compiler object Right now, just provides a cleaner way to get at the gpu-id, given the separation between compiler and context. But we will need this also to hold the reg-set for new register allocation. Signed-off-by: Rob Clark --- .../drivers/freedreno/Makefile.sources | 1 + .../drivers/freedreno/a3xx/fd3_screen.c | 5 ++- .../drivers/freedreno/a4xx/fd4_screen.c | 5 ++- .../drivers/freedreno/freedreno_screen.h | 4 +- src/gallium/drivers/freedreno/ir3/ir3.c | 3 +- src/gallium/drivers/freedreno/ir3/ir3.h | 4 +- .../drivers/freedreno/ir3/ir3_cmdline.c | 6 ++- .../drivers/freedreno/ir3/ir3_compiler.c | 43 +++++++++++++++++++ .../drivers/freedreno/ir3/ir3_compiler.h | 13 +++++- .../drivers/freedreno/ir3/ir3_compiler_nir.c | 20 +++------ .../drivers/freedreno/ir3/ir3_shader.c | 12 ++---- .../drivers/freedreno/ir3/ir3_shader.h | 5 ++- 12 files changed, 90 insertions(+), 31 deletions(-) create mode 100644 src/gallium/drivers/freedreno/ir3/ir3_compiler.c diff --git a/src/gallium/drivers/freedreno/Makefile.sources b/src/gallium/drivers/freedreno/Makefile.sources index 6af8754c4af..baae9144005 100644 --- a/src/gallium/drivers/freedreno/Makefile.sources +++ b/src/gallium/drivers/freedreno/Makefile.sources @@ -121,6 +121,7 @@ ir3_SOURCES := \ ir3/instr-a3xx.h \ ir3/ir3.c \ ir3/ir3_compiler_nir.c \ + ir3/ir3_compiler.c \ ir3/ir3_compiler.h \ ir3/ir3_cp.c \ ir3/ir3_depth.c \ diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_screen.c b/src/gallium/drivers/freedreno/a3xx/fd3_screen.c index 3497921257c..094dcf376e5 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_screen.c +++ b/src/gallium/drivers/freedreno/a3xx/fd3_screen.c @@ -32,6 +32,7 @@ #include "fd3_screen.h" #include "fd3_context.h" #include "fd3_format.h" +#include "ir3_compiler.h" static boolean fd3_screen_is_format_supported(struct pipe_screen *pscreen, @@ -103,7 +104,9 @@ fd3_screen_is_format_supported(struct pipe_screen *pscreen, void fd3_screen_init(struct pipe_screen *pscreen) { - fd_screen(pscreen)->max_rts = 4; + struct fd_screen *screen = fd_screen(pscreen); + screen->max_rts = 4; + screen->compiler = ir3_compiler_create(screen->gpu_id); pscreen->context_create = fd3_context_create; pscreen->is_format_supported = fd3_screen_is_format_supported; } diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_screen.c b/src/gallium/drivers/freedreno/a4xx/fd4_screen.c index f5b46685bdf..e8cbb2d201a 100644 --- a/src/gallium/drivers/freedreno/a4xx/fd4_screen.c +++ b/src/gallium/drivers/freedreno/a4xx/fd4_screen.c @@ -32,6 +32,7 @@ #include "fd4_screen.h" #include "fd4_context.h" #include "fd4_format.h" +#include "ir3_compiler.h" static boolean fd4_screen_is_format_supported(struct pipe_screen *pscreen, @@ -100,7 +101,9 @@ fd4_screen_is_format_supported(struct pipe_screen *pscreen, void fd4_screen_init(struct pipe_screen *pscreen) { - fd_screen(pscreen)->max_rts = 1; + struct fd_screen *screen = fd_screen(pscreen); + screen->max_rts = 1; + screen->compiler = ir3_compiler_create(screen->gpu_id); pscreen->context_create = fd4_context_create; pscreen->is_format_supported = fd4_screen_is_format_supported; } diff --git a/src/gallium/drivers/freedreno/freedreno_screen.h b/src/gallium/drivers/freedreno/freedreno_screen.h index 3b470d1d8a6..dbc2808262a 100644 --- a/src/gallium/drivers/freedreno/freedreno_screen.h +++ b/src/gallium/drivers/freedreno/freedreno_screen.h @@ -46,7 +46,9 @@ struct fd_screen { uint32_t device_id; uint32_t gpu_id; /* 220, 305, etc */ uint32_t chip_id; /* coreid:8 majorrev:8 minorrev:8 patch:8 */ - uint32_t max_rts; + uint32_t max_rts; /* max # of render targets */ + + void *compiler; /* currently unused for a2xx */ struct fd_device *dev; struct fd_pipe *pipe; diff --git a/src/gallium/drivers/freedreno/ir3/ir3.c b/src/gallium/drivers/freedreno/ir3/ir3.c index aea1b967b07..92c92e5001f 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3.c +++ b/src/gallium/drivers/freedreno/ir3/ir3.c @@ -66,11 +66,12 @@ void * ir3_alloc(struct ir3 *shader, int sz) return ptr; } -struct ir3 * ir3_create(void) +struct ir3 * ir3_create(struct ir3_compiler *compiler) { struct ir3 *shader = calloc(1, sizeof(struct ir3)); grow_heap(shader); + shader->compiler = compiler; return shader; } diff --git a/src/gallium/drivers/freedreno/ir3/ir3.h b/src/gallium/drivers/freedreno/ir3/ir3.h index 3c4fd2d46b0..29a6e402056 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3.h +++ b/src/gallium/drivers/freedreno/ir3/ir3.h @@ -35,6 +35,7 @@ /* low level intermediate representation of an adreno shader program */ +struct ir3_compiler; struct ir3; struct ir3_instruction; struct ir3_block; @@ -324,6 +325,7 @@ static inline int ir3_neighbor_count(struct ir3_instruction *instr) struct ir3_heap_chunk; struct ir3 { + struct ir3_compiler *compiler; /* Track bary.f (and ldlv) instructions.. this is needed in * scheduling to ensure that all varying fetches happen before @@ -367,7 +369,7 @@ struct ir3_block { struct list_head instr_list; }; -struct ir3 * ir3_create(void); +struct ir3 * ir3_create(struct ir3_compiler *compiler); void ir3_destroy(struct ir3 *shader); void * ir3_assemble(struct ir3 *shader, struct ir3_info *info, uint32_t gpu_id); diff --git a/src/gallium/drivers/freedreno/ir3/ir3_cmdline.c b/src/gallium/drivers/freedreno/ir3/ir3_cmdline.c index 44493c33c1a..3fa886131f0 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_cmdline.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_cmdline.c @@ -216,6 +216,7 @@ int main(int argc, char **argv) const char *filename; struct tgsi_token toks[65536]; struct tgsi_parse_context parse; + struct ir3_compiler *compiler; struct ir3_shader_variant v; struct ir3_shader_key key = {}; const char *info; @@ -319,8 +320,11 @@ int main(int argc, char **argv) break; } + /* TODO cmdline option to target different gpus: */ + compiler = ir3_compiler_create(320); + info = "NIR compiler"; - ret = ir3_compile_shader_nir(&v, toks, key); + ret = ir3_compile_shader_nir(compiler, &v, toks, key); if (ret) { fprintf(stderr, "compiler failed!\n"); return ret; diff --git a/src/gallium/drivers/freedreno/ir3/ir3_compiler.c b/src/gallium/drivers/freedreno/ir3/ir3_compiler.c new file mode 100644 index 00000000000..0087374539a --- /dev/null +++ b/src/gallium/drivers/freedreno/ir3/ir3_compiler.c @@ -0,0 +1,43 @@ +/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */ + +/* + * Copyright (C) 2015 Rob Clark + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark + */ + +#include "util/ralloc.h" + +#include "ir3_compiler.h" + +struct ir3_compiler * ir3_compiler_create(uint32_t gpu_id) +{ + struct ir3_compiler *compiler = rzalloc(NULL, struct ir3_compiler); + compiler->gpu_id = gpu_id; + return compiler; +} + +void ir3_compiler_destroy(struct ir3_compiler *compiler) +{ + ralloc_free(compiler); +} diff --git a/src/gallium/drivers/freedreno/ir3/ir3_compiler.h b/src/gallium/drivers/freedreno/ir3/ir3_compiler.h index 89a40b50ef3..313916f4288 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_compiler.h +++ b/src/gallium/drivers/freedreno/ir3/ir3_compiler.h @@ -31,7 +31,16 @@ #include "ir3_shader.h" -int ir3_compile_shader_nir(struct ir3_shader_variant *so, - const struct tgsi_token *tokens, struct ir3_shader_key key); +struct ir3_compiler { + uint32_t gpu_id; +}; + +struct ir3_compiler * ir3_compiler_create(uint32_t gpu_id); +void ir3_compiler_destroy(struct ir3_compiler *compiler); + +int ir3_compile_shader_nir(struct ir3_compiler *compiler, + struct ir3_shader_variant *so, + const struct tgsi_token *tokens, + struct ir3_shader_key key); #endif /* IR3_COMPILER_H_ */ diff --git a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c index 3675f5f060a..9bc54c9b83b 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c @@ -192,11 +192,7 @@ lower_tgsi(const struct tgsi_token *tokens, struct ir3_shader_variant *so) break; } - if (!so->shader) { - /* hack for standalone compiler which does not have - * screen/context: - */ - } else if (ir3_shader_gpuid(so->shader) >= 400) { + if (so->ir->compiler->gpu_id >= 400) { /* a4xx seems to have *no* sam.p */ lconfig.lower_TXP = ~0; /* lower all txp */ } else { @@ -214,11 +210,7 @@ compile_init(struct ir3_shader_variant *so, struct ir3_compile *ctx = rzalloc(NULL, struct ir3_compile); const struct tgsi_token *lowered_tokens; - if (!so->shader) { - /* hack for standalone compiler which does not have - * screen/context: - */ - } else if (ir3_shader_gpuid(so->shader) >= 400) { + if (so->ir->compiler->gpu_id >= 400) { /* need special handling for "flat" */ ctx->flat_bypass = true; ctx->levels_add_one = false; @@ -1919,8 +1911,10 @@ fixup_frag_inputs(struct ir3_compile *ctx) } int -ir3_compile_shader_nir(struct ir3_shader_variant *so, - const struct tgsi_token *tokens, struct ir3_shader_key key) +ir3_compile_shader_nir(struct ir3_compiler *compiler, + struct ir3_shader_variant *so, + const struct tgsi_token *tokens, + struct ir3_shader_key key) { struct ir3_compile *ctx; struct ir3_block *block; @@ -1930,7 +1924,7 @@ ir3_compile_shader_nir(struct ir3_shader_variant *so, assert(!so->ir); - so->ir = ir3_create(); + so->ir = ir3_create(compiler); assert(so->ir); diff --git a/src/gallium/drivers/freedreno/ir3/ir3_shader.c b/src/gallium/drivers/freedreno/ir3/ir3_shader.c index 75c9cc46e88..b5b038100cc 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_shader.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_shader.c @@ -127,7 +127,7 @@ static void assemble_variant(struct ir3_shader_variant *v) { struct fd_context *ctx = fd_context(v->shader->pctx); - uint32_t gpu_id = ir3_shader_gpuid(v->shader); + uint32_t gpu_id = v->shader->compiler->gpu_id; uint32_t sz, *bin; bin = ir3_shader_assemble(v, gpu_id); @@ -166,7 +166,7 @@ create_variant(struct ir3_shader *shader, struct ir3_shader_key key) tgsi_dump(tokens, 0); } - ret = ir3_compile_shader_nir(v, tokens, key); + ret = ir3_compile_shader_nir(shader->compiler, v, tokens, key); if (ret) { debug_error("compile failed!"); goto fail; @@ -191,13 +191,6 @@ fail: return NULL; } -uint32_t -ir3_shader_gpuid(struct ir3_shader *shader) -{ - struct fd_context *ctx = fd_context(shader->pctx); - return ctx->screen->gpu_id; -} - struct ir3_shader_variant * ir3_shader_variant(struct ir3_shader *shader, struct ir3_shader_key key) { @@ -260,6 +253,7 @@ ir3_shader_create(struct pipe_context *pctx, const struct tgsi_token *tokens, enum shader_t type) { struct ir3_shader *shader = CALLOC_STRUCT(ir3_shader); + shader->compiler = fd_context(pctx)->screen->compiler; shader->pctx = pctx; shader->type = type; shader->tokens = tgsi_dup_tokens(tokens); diff --git a/src/gallium/drivers/freedreno/ir3/ir3_shader.h b/src/gallium/drivers/freedreno/ir3/ir3_shader.h index e5410bf88b2..8141c5698db 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_shader.h +++ b/src/gallium/drivers/freedreno/ir3/ir3_shader.h @@ -196,6 +196,8 @@ struct ir3_shader_variant { struct ir3_shader { enum shader_t type; + struct ir3_compiler *compiler; + struct pipe_context *pctx; const struct tgsi_token *tokens; @@ -212,7 +214,6 @@ void * ir3_shader_assemble(struct ir3_shader_variant *v, uint32_t gpu_id); struct ir3_shader * ir3_shader_create(struct pipe_context *pctx, const struct tgsi_token *tokens, enum shader_t type); void ir3_shader_destroy(struct ir3_shader *shader); -uint32_t ir3_shader_gpuid(struct ir3_shader *shader); struct ir3_shader_variant * ir3_shader_variant(struct ir3_shader *shader, struct ir3_shader_key key); @@ -220,6 +221,8 @@ struct ir3_shader_variant * ir3_shader_variant(struct ir3_shader *shader, * Helper/util: */ +#include "pipe/p_shader_tokens.h" + static inline int ir3_find_output(const struct ir3_shader_variant *so, ir3_semantic semantic) { From d52fb2f5ad828f879286b9068023b82b9897bc17 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Fri, 1 May 2015 12:21:12 -0400 Subject: [PATCH 762/834] freedreno/ir3/ra: use register_allocate Signed-off-by: Rob Clark --- src/gallium/drivers/freedreno/ir3/ir3.h | 35 +- .../drivers/freedreno/ir3/ir3_compiler.c | 1 + .../drivers/freedreno/ir3/ir3_compiler.h | 3 + .../drivers/freedreno/ir3/ir3_compiler_nir.c | 17 +- src/gallium/drivers/freedreno/ir3/ir3_cp.c | 2 +- src/gallium/drivers/freedreno/ir3/ir3_ra.c | 1065 +++++++++-------- 6 files changed, 616 insertions(+), 507 deletions(-) diff --git a/src/gallium/drivers/freedreno/ir3/ir3.h b/src/gallium/drivers/freedreno/ir3/ir3.h index 29a6e402056..93a6ab5da7c 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3.h +++ b/src/gallium/drivers/freedreno/ir3/ir3.h @@ -83,7 +83,6 @@ struct ir3_register { */ IR3_REG_SSA = 0x2000, /* 'instr' is ptr to assigning instr */ IR3_REG_IA = 0x4000, /* meta-input dst is "assigned" */ - IR3_REG_ADDR = 0x8000, /* register is a0.x */ } flags; union { /* normal registers: @@ -245,6 +244,13 @@ struct ir3_instruction { */ #define DEPTH_UNUSED ~0 unsigned depth; + /* When we get to the RA stage, we no longer need depth, but + * we do need instruction's position/name: + */ + struct { + uint16_t ip; + uint16_t name; + }; }; /* Used during CP and RA stages. For fanin and shader inputs/ @@ -503,6 +509,28 @@ static inline bool is_mem(struct ir3_instruction *instr) return (instr->category == 6); } +static inline bool +is_store(struct ir3_instruction *instr) +{ + if (is_mem(instr)) { + /* these instructions, the "destination" register is + * actually a source, the address to store to. + */ + switch (instr->opc) { + case OPC_STG: + case OPC_STP: + case OPC_STL: + case OPC_STLW: + case OPC_L2G: + case OPC_G2L: + return true; + default: + break; + } + } + return false; +} + static inline bool is_input(struct ir3_instruction *instr) { /* in some cases, ldlv is used to fetch varying without @@ -527,7 +555,7 @@ static inline bool writes_addr(struct ir3_instruction *instr) { if (instr->regs_count > 0) { struct ir3_register *dst = instr->regs[0]; - return !!(dst->flags & IR3_REG_ADDR); + return reg_num(dst) == REG_A0; } return false; } @@ -558,7 +586,7 @@ static inline bool conflicts(struct ir3_instruction *a, static inline bool reg_gpr(struct ir3_register *r) { - if (r->flags & (IR3_REG_CONST | IR3_REG_IMMED | IR3_REG_ADDR)) + if (r->flags & (IR3_REG_CONST | IR3_REG_IMMED)) return false; if ((reg_num(r) == REG_A0) || (reg_num(r) == REG_P0)) return false; @@ -771,6 +799,7 @@ void ir3_block_group(struct ir3_block *block); int ir3_block_sched(struct ir3_block *block); /* register assignment: */ +struct ir3_ra_reg_set * ir3_ra_alloc_reg_set(void *memctx); int ir3_block_ra(struct ir3_block *block, enum shader_t type, bool frag_coord, bool frag_face); diff --git a/src/gallium/drivers/freedreno/ir3/ir3_compiler.c b/src/gallium/drivers/freedreno/ir3/ir3_compiler.c index 0087374539a..7c8eccb54e1 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_compiler.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_compiler.c @@ -34,6 +34,7 @@ struct ir3_compiler * ir3_compiler_create(uint32_t gpu_id) { struct ir3_compiler *compiler = rzalloc(NULL, struct ir3_compiler); compiler->gpu_id = gpu_id; + compiler->set = ir3_ra_alloc_reg_set(compiler); return compiler; } diff --git a/src/gallium/drivers/freedreno/ir3/ir3_compiler.h b/src/gallium/drivers/freedreno/ir3/ir3_compiler.h index 313916f4288..86b1161d9cb 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_compiler.h +++ b/src/gallium/drivers/freedreno/ir3/ir3_compiler.h @@ -31,8 +31,11 @@ #include "ir3_shader.h" +struct ir3_ra_reg_set; + struct ir3_compiler { uint32_t gpu_id; + struct ir3_ra_reg_set *set; }; struct ir3_compiler * ir3_compiler_create(uint32_t gpu_id); diff --git a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c index 9bc54c9b83b..39f4527c22b 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c @@ -385,7 +385,8 @@ create_addr(struct ir3_block *block, struct ir3_instruction *src) instr->regs[1]->flags |= IR3_REG_HALF; instr = ir3_MOV(block, instr, TYPE_S16); - instr->regs[0]->flags |= IR3_REG_ADDR | IR3_REG_HALF; + instr->regs[0]->num = regid(REG_A0, 0); + instr->regs[0]->flags |= IR3_REG_HALF; instr->regs[1]->flags |= IR3_REG_HALF; return instr; @@ -589,6 +590,7 @@ create_frag_face(struct ir3_compile *ctx, unsigned comp) compile_assert(ctx, !ctx->frag_face); ctx->frag_face = create_input(block, NULL, 0); + ctx->frag_face->regs[0]->flags |= IR3_REG_HALF; /* for faceness, we always get -1 or 0 (int).. but TGSI expects * positive vs negative float.. and piglit further seems to @@ -1981,9 +1983,18 @@ ir3_compile_shader_nir(struct ir3_compiler *compiler, */ if (key.half_precision) { for (i = 0; i < block->noutputs; i++) { - if (!block->outputs[i]) + struct ir3_instruction *out = block->outputs[i]; + if (!out) continue; - block->outputs[i]->regs[0]->flags |= IR3_REG_HALF; + out->regs[0]->flags |= IR3_REG_HALF; + /* output could be a fanout (ie. texture fetch output) + * in which case we need to propagate the half-reg flag + * up to the definer so that RA sees it: + */ + if (is_meta(out) && (out->opc == OPC_META_FO)) { + out = out->regs[1]->instr; + out->regs[0]->flags |= IR3_REG_HALF; + } } } diff --git a/src/gallium/drivers/freedreno/ir3/ir3_cp.c b/src/gallium/drivers/freedreno/ir3/ir3_cp.c index 350f7dd5e6b..8c057166f32 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_cp.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_cp.c @@ -41,7 +41,7 @@ static bool is_eligible_mov(struct ir3_instruction *instr, bool allow_flags) struct ir3_register *dst = instr->regs[0]; struct ir3_register *src = instr->regs[1]; struct ir3_instruction *src_instr = ssa(src); - if (dst->flags & (IR3_REG_ADDR | IR3_REG_RELATIV)) + if (dst->flags & IR3_REG_RELATIV) return false; if (src->flags & IR3_REG_RELATIV) return false; diff --git a/src/gallium/drivers/freedreno/ir3/ir3_ra.c b/src/gallium/drivers/freedreno/ir3/ir3_ra.c index 95f6a81861e..39ce9c5d4ce 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_ra.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_ra.c @@ -26,267 +26,533 @@ * Rob Clark */ -#include "pipe/p_shader_tokens.h" #include "util/u_math.h" +#include "util/register_allocate.h" +#include "util/ralloc.h" #include "ir3.h" +#include "ir3_compiler.h" /* * Register Assignment: * - * NOTE: currently only works on a single basic block.. need to think - * about how multiple basic blocks are going to get scheduled. But - * I think I want to re-arrange how blocks work, ie. get rid of the - * block nesting thing.. + * Uses the register_allocate util, which implements graph coloring + * algo with interference classes. To handle the cases where we need + * consecutive registers (for example, texture sample instructions), + * we model these as larger (double/quad/etc) registers which conflict + * with the corresponding registers in other classes. * - * NOTE: we could do register coalescing (eliminate moves) as part of - * the RA step.. OTOH I think we need to do scheduling before register - * assignment. And if we remove a mov that effects scheduling (unless - * we leave a placeholder nop, which seems lame), so I'm not really - * sure how practical this is to do both in a single stage. But OTOH - * I'm not really sure a sane way for the CP stage to realize when it - * cannot remove a mov due to multi-register constraints.. + * Additionally we create additional classes for half-regs, which + * do not conflict with the full-reg classes. We do need at least + * sizes 1-4 (to deal w/ texture sample instructions output to half- + * reg). At the moment we don't create the higher order half-reg + * classes as half-reg frequently does not have enough precision + * for texture coords at higher resolutions. * - * NOTE: http://scopesconf.org/scopes-01/paper/session1_2.ps.gz has - * some ideas to handle array allocation with a more conventional - * graph coloring algorithm for register assignment, which might be - * a good alternative to the current algo. However afaict it cannot - * handle overlapping arrays, which is a scenario that we have to - * deal with + * There are some additional cases that we need to handle specially, + * as the graph coloring algo doesn't understand "partial writes". + * For example, a sequence like: + * + * add r0.z, ... + * sam (f32)(xy)r0.x, ... + * ... + * sam (f32)(xyzw)r0.w, r0.x, ... ; 3d texture, so r0.xyz are coord + * + * In this scenario, we treat r0.xyz as class size 3, which is written + * (from a use/def perspective) at the 'add' instruction and ignore the + * subsequent partial writes to r0.xy. So the 'add r0.z, ...' is the + * defining instruction, as it is the first to partially write r0.xyz. + * + * Note i965 has a similar scenario, which they solve with a virtual + * LOAD_PAYLOAD instruction which gets turned into multiple MOV's after + * register assignment. But for us that is horrible from a scheduling + * standpoint. Instead what we do is use idea of 'definer' instruction. + * Ie. the first instruction (lowest ip) to write to the array is the + * one we consider from use/def perspective when building interference + * graph. (Other instructions which write other array elements just + * define the variable some more.) */ -struct ir3_ra_ctx { - struct ir3_block *block; - enum shader_t type; - bool frag_coord; - bool frag_face; - int cnt; - bool error; - struct { - unsigned base; - unsigned size; - } arrays[MAX_ARRAYS]; +static const unsigned class_sizes[] = { + 1, 2, 3, 4, + 4 + 4, /* txd + 1d/2d */ + 4 + 6, /* txd + 3d */ + /* temporary: until we can assign arrays, create classes so we + * can round up array to fit. NOTE with tgsi arrays should + * really all be multiples of four: + */ + 4 * 4, + 4 * 8, + 4 * 16, + 4 * 32, + +}; +#define class_count ARRAY_SIZE(class_sizes) + +static const unsigned half_class_sizes[] = { + 1, 2, 3, 4, +}; +#define half_class_count ARRAY_SIZE(half_class_sizes) +#define total_class_count (class_count + half_class_count) + +/* Below a0.x are normal regs. RA doesn't need to assign a0.x/p0.x. */ +#define NUM_REGS (4 * (REG_A0 - 1)) +/* Number of virtual regs in a given class: */ +#define CLASS_REGS(i) (NUM_REGS - (class_sizes[i] - 1)) +#define HALF_CLASS_REGS(i) (NUM_REGS - (half_class_sizes[i] - 1)) + +/* register-set, created one time, used for all shaders: */ +struct ir3_ra_reg_set { + struct ra_regs *regs; + unsigned int classes[class_count]; + unsigned int half_classes[half_class_count]; + /* maps flat virtual register space to base gpr: */ + uint16_t *ra_reg_to_gpr; + /* maps cls,gpr to flat virtual register space: */ + uint16_t **gpr_to_ra_reg; }; -#ifdef DEBUG -# include "freedreno_util.h" -# define ra_debug (fd_mesa_debug & FD_DBG_OPTMSGS) -#else -# define ra_debug 0 -#endif - -#define ra_dump_list(msg, ir) do { \ - if (ra_debug) { \ - debug_printf("-- " msg); \ - ir3_print(ir); \ - } \ - } while (0) - -#define ra_dump_instr(msg, n) do { \ - if (ra_debug) { \ - debug_printf(">> " msg); \ - ir3_print_instr(n); \ - } \ - } while (0) - -#define ra_assert(ctx, x) do { \ - debug_assert(x); \ - if (!(x)) { \ - debug_printf("RA: failed assert: %s\n", #x); \ - (ctx)->error = true; \ - }; \ - } while (0) - - -/* sorta ugly way to retrofit half-precision support.. rather than - * passing extra param around, just OR in a high bit. All the low - * value arithmetic (ie. +/- offset within a contiguous vec4, etc) - * will continue to work as long as you don't underflow (and that - * would go badly anyways). +/* One-time setup of RA register-set, which describes all the possible + * "virtual" registers and their interferences. Ie. double register + * occupies (and conflicts with) two single registers, and so forth. + * Since registers do not need to be aligned to their class size, they + * can conflict with other registers in the same class too. Ie: + * + * Single (base) | Double + * --------------+--------------- + * R0 | D0 + * R1 | D0 D1 + * R2 | D1 D2 + * R3 | D2 + * .. and so on.. + * + * (NOTE the disassembler uses notation like r0.x/y/z/w but those are + * really just four scalar registers. Don't let that confuse you.) */ -#define REG_HALF 0x8000 - -#define REG(n, wm, f) (struct ir3_register){ \ - .flags = (f), \ - .num = (n), \ - .wrmask = TGSI_WRITEMASK_ ## wm, \ - } - -/* check that the register exists, is a GPR and is not special (a0/p0) */ -static struct ir3_register * reg_check(struct ir3_instruction *instr, unsigned n) +struct ir3_ra_reg_set * +ir3_ra_alloc_reg_set(void *memctx) { - if ((n < instr->regs_count) && reg_gpr(instr->regs[n]) && - !(instr->regs[n]->flags & IR3_REG_SSA)) - return instr->regs[n]; - return NULL; -} + struct ir3_ra_reg_set *set = rzalloc(memctx, struct ir3_ra_reg_set); + unsigned ra_reg_count, reg, first_half_reg; + unsigned int **q_values; -/* figure out if an unassigned src register points back to the instr we - * are assigning: - */ -static bool instr_used_by(struct ir3_instruction *instr, - struct ir3_register *src) -{ - struct ir3_instruction *src_instr = ssa(src); - unsigned i; - if (instr == src_instr) - return true; - if (src_instr && is_meta(src_instr)) - for (i = 1; i < src_instr->regs_count; i++) - if (instr_used_by(instr, src_instr->regs[i])) - return true; + /* calculate # of regs across all classes: */ + ra_reg_count = 0; + for (unsigned i = 0; i < class_count; i++) + ra_reg_count += CLASS_REGS(i); + for (unsigned i = 0; i < half_class_count; i++) + ra_reg_count += HALF_CLASS_REGS(i); - return false; -} + /* allocate and populate q_values: */ + q_values = ralloc_array(set, unsigned *, total_class_count); + for (unsigned i = 0; i < class_count; i++) { + q_values[i] = rzalloc_array(q_values, unsigned, total_class_count); -static bool instr_is_output(struct ir3_instruction *instr) -{ - struct ir3_block *block = instr->block; - unsigned i; - - for (i = 0; i < block->noutputs; i++) - if (instr == block->outputs[i]) - return true; - - return false; -} - -static void mark_sources(struct ir3_instruction *instr, - struct ir3_instruction *n, regmask_t *liveregs, regmask_t *written) -{ - unsigned i; - - for (i = 1; i < n->regs_count; i++) { - struct ir3_register *r = reg_check(n, i); - if (r) - regmask_set_if_not(liveregs, r, written); - - /* if any src points back to the instruction(s) in - * the block of neighbors that we are assigning then - * mark any written (clobbered) registers as live: + /* From register_allocate.c: + * + * q(B,C) (indexed by C, B is this register class) in + * Runeson/Nyström paper. This is "how many registers of B could + * the worst choice register from C conflict with". + * + * If we just let the register allocation algorithm compute these + * values, is extremely expensive. However, since all of our + * registers are laid out, we can very easily compute them + * ourselves. View the register from C as fixed starting at GRF n + * somewhere in the middle, and the register from B as sliding back + * and forth. Then the first register to conflict from B is the + * one starting at n - class_size[B] + 1 and the last register to + * conflict will start at n + class_size[B] - 1. Therefore, the + * number of conflicts from B is class_size[B] + class_size[C] - 1. + * + * +-+-+-+-+-+-+ +-+-+-+-+-+-+ + * B | | | | | |n| --> | | | | | | | + * +-+-+-+-+-+-+ +-+-+-+-+-+-+ + * +-+-+-+-+-+ + * C |n| | | | | + * +-+-+-+-+-+ + * + * (Idea copied from brw_fs_reg_allocate.cpp) */ - if (instr_used_by(instr, n->regs[i])) - regmask_or(liveregs, liveregs, written); + for (unsigned j = 0; j < class_count; j++) + q_values[i][j] = class_sizes[i] + class_sizes[j] - 1; } -} + for (unsigned i = class_count; i < total_class_count; i++) { + q_values[i] = ralloc_array(q_values, unsigned, total_class_count); -/* live means read before written */ -static void compute_liveregs(struct ir3_ra_ctx *ctx, - struct ir3_instruction *instr, regmask_t *liveregs) -{ - struct ir3_block *block = ctx->block; - regmask_t written; - unsigned i; - - regmask_init(&written); - - list_for_each_entry (struct ir3_instruction, n, &instr->node, node) { - struct ir3_register *r; - - if (is_meta(n)) - continue; - - /* check first src's read: */ - mark_sources(instr, n, liveregs, &written); - - /* for instructions that write to an array, we need to - * capture the dependency on the array elements: - */ - if (n->fanin) - mark_sources(instr, n->fanin, liveregs, &written); - - /* meta-instructions don't actually get scheduled, - * so don't let it's write confuse us.. what we - * really care about is when the src to the meta - * instr was written: - */ - if (is_meta(n)) - continue; - - /* then dst written (if assigned already): */ - r = reg_check(n, 0); - if (r) { - /* if an instruction *is* an output, then it is live */ - if (!instr_is_output(n)) - regmask_set(&written, r); - } - - } - - /* be sure to account for output registers too: */ - for (i = 0; i < block->noutputs; i++) { - struct ir3_register *r; - if (!block->outputs[i]) - continue; - r = reg_check(block->outputs[i], 0); - if (r) - regmask_set_if_not(liveregs, r, &written); - } - - /* if instruction is output, we need a reg that isn't written - * before the end.. equiv to the instr_used_by() check above - * in the loop body - * TODO maybe should follow fanin/fanout? - */ - if (instr_is_output(instr)) - regmask_or(liveregs, liveregs, &written); -} - -static int find_available(regmask_t *liveregs, int size, bool half) -{ - unsigned i; - unsigned f = half ? IR3_REG_HALF : 0; - for (i = 0; i < MAX_REG - size; i++) { - if (!regmask_get(liveregs, ®(i, X, f))) { - unsigned start = i++; - for (; (i < MAX_REG) && ((i - start) < size); i++) - if (regmask_get(liveregs, ®(i, X, f))) - break; - if ((i - start) >= size) - return start; + /* see comment above: */ + for (unsigned j = class_count; j < total_class_count; j++) { + q_values[i][j] = half_class_sizes[i - class_count] + + half_class_sizes[j - class_count] - 1; } } - assert(0); + + /* allocate the reg-set.. */ + set->regs = ra_alloc_reg_set(set, ra_reg_count); + set->ra_reg_to_gpr = ralloc_array(set, uint16_t, ra_reg_count); + set->gpr_to_ra_reg = ralloc_array(set, uint16_t *, total_class_count); + + /* .. and classes */ + reg = 0; + for (unsigned i = 0; i < class_count; i++) { + set->classes[i] = ra_alloc_reg_class(set->regs); + + set->gpr_to_ra_reg[i] = ralloc_array(set, uint16_t, CLASS_REGS(i)); + + for (unsigned j = 0; j < CLASS_REGS(i); j++) { + ra_class_add_reg(set->regs, set->classes[i], reg); + + set->ra_reg_to_gpr[reg] = j; + set->gpr_to_ra_reg[i][j] = reg; + + for (unsigned br = j; br < j + class_sizes[i]; br++) + ra_add_transitive_reg_conflict(set->regs, br, reg); + + reg++; + } + } + + first_half_reg = reg; + + for (unsigned i = 0; i < half_class_count; i++) { + set->half_classes[i] = ra_alloc_reg_class(set->regs); + + set->gpr_to_ra_reg[class_count + i] = + ralloc_array(set, uint16_t, CLASS_REGS(i)); + + for (unsigned j = 0; j < HALF_CLASS_REGS(i); j++) { + ra_class_add_reg(set->regs, set->half_classes[i], reg); + + set->ra_reg_to_gpr[reg] = j; + set->gpr_to_ra_reg[class_count + i][j] = reg; + + for (unsigned br = j; br < j + half_class_sizes[i]; br++) + ra_add_transitive_reg_conflict(set->regs, br + first_half_reg, reg); + + reg++; + } + } + + ra_set_finalize(set->regs, q_values); + + ralloc_free(q_values); + + return set; +} + +/* register-assign context, per-shader */ +struct ir3_ra_ctx { + struct ir3 *ir; + enum shader_t type; + bool frag_face; + + struct ir3_ra_reg_set *set; + struct ra_graph *g; + unsigned alloc_count; + unsigned class_alloc_count[total_class_count]; + unsigned class_base[total_class_count]; + unsigned instr_cnt; + unsigned *def, *use; /* def/use table */ +}; + +static bool +is_half(struct ir3_instruction *instr) +{ + return !!(instr->regs[0]->flags & IR3_REG_HALF); +} + +static int +size_to_class(unsigned sz, bool half) +{ + if (half) { + for (unsigned i = 0; i < half_class_count; i++) + if (half_class_sizes[i] >= sz) + return i + class_count; + } else { + for (unsigned i = 0; i < class_count; i++) + if (class_sizes[i] >= sz) + return i; + } + debug_assert(0); return -1; } -static int alloc_block(struct ir3_ra_ctx *ctx, - struct ir3_instruction *instr, int size) +static bool +is_temp(struct ir3_register *reg) { - struct ir3_register *dst = instr->regs[0]; - struct ir3_instruction *n; - regmask_t liveregs; - unsigned name; + if (reg->flags & (IR3_REG_CONST | IR3_REG_IMMED)) + return false; + if (reg->flags & IR3_REG_RELATIV) // TODO + return false; + if ((reg->num == regid(REG_A0, 0)) || + (reg->num == regid(REG_P0, 0))) + return false; + return true; +} - /* should only ever be called w/ head of neighbor list: */ - debug_assert(!instr->cp.left); +static bool +writes_gpr(struct ir3_instruction *instr) +{ + if (is_store(instr)) + return false; + /* is dest a normal temp register: */ + return is_temp(instr->regs[0]); +} - regmask_init(&liveregs); +static struct ir3_instruction * +get_definer(struct ir3_instruction *instr, int *sz, int *off) +{ + struct ir3_instruction *d = NULL; + if (is_meta(instr) && (instr->opc == OPC_META_FI)) { + /* What about the case where collect is subset of array, we + * need to find the distance between where actual array starts + * and fanin.. that probably doesn't happen currently. + */ + struct ir3_register *src; - for (n = instr; n; n = n->cp.right) - compute_liveregs(ctx, n, &liveregs); + /* note: don't use foreach_ssa_src as this gets called once + * while assigning regs (which clears SSA flag) + */ + foreach_src(src, instr) { + if (!src->instr) + continue; + if ((!d) || (src->instr->ip < d->ip)) + d = src->instr; + } - /* because we do assignment on fanout nodes for wrmask!=0x1, we - * need to handle this special case, where the fanout nodes all - * appear after one or more of the consumers of the src node: - * - * 0098:009: sam _, r2.x - * 0028:010: mul.f r3.z, r4.x, c13.x - * ; we start assigning here for '0098:009: sam'.. but - * ; would miss the usage at '0028:010: mul.f' - * 0101:009: _meta:fo _, _[0098:009: sam], off=2 + *sz = instr->regs_count - 1; + *off = 0; + + } else if (instr->cp.right || instr->cp.left) { + /* covers also the meta:fo case, which ends up w/ single + * scalar instructions for each component: + */ + struct ir3_instruction *f = ir3_neighbor_first(instr); + + /* by definition, the entire sequence forms one linked list + * of single scalar register nodes (even if some of them may + * be fanouts from a texture sample (for example) instr. We + * just need to walk the list finding the first element of + * the group defined (lowest ip) + */ + int cnt = 0; + + d = f; + while (f) { + if (f->ip < d->ip) + d = f; + if (f == instr) + *off = cnt; + f = f->cp.right; + cnt++; + } + + *sz = cnt; + + } else { + /* second case is looking directly at the instruction which + * produces multiple values (eg, texture sample), rather + * than the fanout nodes that point back to that instruction. + * This isn't quite right, because it may be part of a larger + * group, such as: + * + * sam (f32)(xyzw)r0.x, ... + * add r1.x, ... + * add r1.y, ... + * sam (f32)(xyzw)r2.x, r0.w <-- (r0.w, r1.x, r1.y) + * + * need to come up with a better way to handle that case. + */ + if (instr->address) { + *sz = instr->regs[0]->size; + } else { + *sz = util_last_bit(instr->regs[0]->wrmask); + } + *off = 0; + return instr; + } + + if (is_meta(d) && (d->opc == OPC_META_FO)) { + struct ir3_instruction *dd; + int dsz, doff; + + dd = get_definer(d->regs[1]->instr, &dsz, &doff); + + /* by definition, should come before: */ + debug_assert(dd->ip < d->ip); + + *sz = MAX2(*sz, dsz); + + d = dd; + } + + return d; +} + +/* give each instruction a name (and ip), and count up the # of names + * of each class + */ +static void +ra_block_name_instructions(struct ir3_ra_ctx *ctx, struct ir3_block *block) +{ + list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) { + instr->ip = ctx->instr_cnt++; + } + + list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) { + struct ir3_instruction *defn; + int cls, sz, off; + + if (instr->regs_count == 0) + continue; + + if (!writes_gpr(instr)) + continue; + + defn = get_definer(instr, &sz, &off); + + if (defn != instr) + continue; + + /* arrays which don't fit in one of the pre-defined class + * sizes are pre-colored: + * + * TODO but we still need to allocate names for them, don't we?? + */ + cls = size_to_class(sz, is_half(defn)); + if (cls >= 0) { + instr->name = ctx->class_alloc_count[cls]++; + ctx->alloc_count++; + } + } +} + +static void +ra_init(struct ir3_ra_ctx *ctx) +{ + ir3_clear_mark(ctx->ir); + + ra_block_name_instructions(ctx, ctx->ir->block); + + /* figure out the base register name for each class. The + * actual ra name is class_base[cls] + instr->name; */ - if (is_meta(instr) && (instr->opc == OPC_META_FO)) - compute_liveregs(ctx, instr->regs[1]->instr, &liveregs); + ctx->class_base[0] = 0; + for (unsigned i = 1; i < total_class_count; i++) { + ctx->class_base[i] = ctx->class_base[i-1] + + ctx->class_alloc_count[i-1]; + } - name = find_available(&liveregs, size, - !!(dst->flags & IR3_REG_HALF)); + ctx->g = ra_alloc_interference_graph(ctx->set->regs, ctx->alloc_count); + ctx->def = rzalloc_array(ctx->g, unsigned, ctx->alloc_count); + ctx->use = rzalloc_array(ctx->g, unsigned, ctx->alloc_count); +} - if (dst->flags & IR3_REG_HALF) - name |= REG_HALF; +static void +ra_destroy(struct ir3_ra_ctx *ctx) +{ + ralloc_free(ctx->g); +} - return name; +static void +ra_block_compute_live_ranges(struct ir3_ra_ctx *ctx, struct ir3_block *block) +{ + list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) { + struct ir3_instruction *src; + + if (instr->regs_count == 0) + continue; + + /* There are a couple special cases to deal with here: + * + * fanout: used to split values from a higher class to a lower + * class, for example split the results of a texture fetch + * into individual scalar values; We skip over these from + * a 'def' perspective, and for a 'use' we walk the chain + * up to the defining instruction. + * + * fanin: used to collect values from lower class and assemble + * them together into a higher class, for example arguments + * to texture sample instructions; We consider these to be + * defined at the fanin node. + * + * In either case, we trace the instruction back to the original + * definer and consider that as the def/use ip. + */ + + if (writes_gpr(instr)) { + struct ir3_instruction *defn; + int cls, sz, off; + + defn = get_definer(instr, &sz, &off); + if (defn == instr) { + /* arrays which don't fit in one of the pre-defined class + * sizes are pre-colored: + */ + cls = size_to_class(sz, is_half(defn)); + if (cls >= 0) { + unsigned name = ctx->class_base[cls] + defn->name; + ctx->def[name] = defn->ip; + ctx->use[name] = defn->ip; + + debug_assert(name < ctx->alloc_count); + + if (is_half(defn)) { + ra_set_node_class(ctx->g, name, + ctx->set->half_classes[cls - class_count]); + } else { + ra_set_node_class(ctx->g, name, + ctx->set->classes[cls]); + } + } + } + } + + foreach_ssa_src(src, instr) { + if (writes_gpr(src)) { + struct ir3_instruction *srcdefn; + int cls, sz, off; + + srcdefn = get_definer(src, &sz, &off); + cls = size_to_class(sz, is_half(srcdefn)); + if (cls >= 0) { + unsigned name = ctx->class_base[cls] + srcdefn->name; + ctx->use[name] = instr->ip; + } + } + } + } +} + +static void +ra_add_interference(struct ir3_ra_ctx *ctx) +{ + struct ir3_block *block = ctx->ir->block; + + ra_block_compute_live_ranges(ctx, ctx->ir->block); + + /* need to fix things up to keep outputs live: */ + for (unsigned i = 0; i < block->noutputs; i++) { + struct ir3_instruction *instr = block->outputs[i]; + struct ir3_instruction *defn; + int cls, sz, off; + + defn = get_definer(instr, &sz, &off); + cls = size_to_class(sz, is_half(defn)); + if (cls >= 0) { + unsigned name = ctx->class_base[cls] + defn->name; + ctx->use[name] = ctx->instr_cnt; + } + } + + for (unsigned i = 0; i < ctx->alloc_count; i++) { + for (unsigned j = 0; j < ctx->alloc_count; j++) { + if (!((ctx->def[i] >= ctx->use[j]) || + (ctx->def[j] >= ctx->use[i]))) { + ra_add_node_interference(ctx->g, i, j); + } + } + } } static type_t half_type(type_t type) @@ -357,324 +623,123 @@ static void fixup_half_instr_src(struct ir3_instruction *instr) } } -static void reg_assign(struct ir3_instruction *instr, - unsigned r, unsigned name) +static void +reg_assign(struct ir3_ra_ctx *ctx, struct ir3_register *reg, + struct ir3_instruction *instr) { - struct ir3_register *reg = instr->regs[r]; + struct ir3_instruction *defn; + int cls, sz, off; - reg->flags &= ~IR3_REG_SSA; - reg->num = name & ~REG_HALF; + defn = get_definer(instr, &sz, &off); + cls = size_to_class(sz, is_half(defn)); + if (cls >= 0) { + unsigned name = ctx->class_base[cls] + defn->name; + unsigned r = ra_get_node_reg(ctx->g, name); + unsigned num = ctx->set->ra_reg_to_gpr[r] + off; - if (name & REG_HALF) { - reg->flags |= IR3_REG_HALF; - /* if dst reg being assigned, patch up the instr: */ - if (reg == instr->regs[0]) - fixup_half_instr_dst(instr); - else - fixup_half_instr_src(instr); + if (reg->flags & IR3_REG_RELATIV) + num += reg->offset; + + reg->num = num; + reg->flags &= ~IR3_REG_SSA; + + if (is_half(defn)) + reg->flags |= IR3_REG_HALF; } } -static void instr_assign(struct ir3_ra_ctx *ctx, - struct ir3_instruction *instr, unsigned name); - -static void instr_assign_src(struct ir3_ra_ctx *ctx, - struct ir3_instruction *instr, unsigned r, unsigned name) +static void +ra_block_alloc(struct ir3_ra_ctx *ctx, struct ir3_block *block) { - struct ir3_register *reg = instr->regs[r]; + list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) { + struct ir3_register *reg; - if (reg->flags & IR3_REG_RELATIV) - name += reg->offset; + if (instr->regs_count == 0) + continue; - reg_assign(instr, r, name); - - if (is_meta(instr)) { - switch (instr->opc) { - case OPC_META_INPUT: - /* shader-input does not have a src, only block input: */ - debug_assert(instr->regs_count == 2); - instr_assign(ctx, instr, name); - return; - case OPC_META_FO: - instr_assign(ctx, instr, name + instr->fo.off); - return; - case OPC_META_FI: - instr_assign(ctx, instr, name - (r - 1)); - return; - default: - break; + if (writes_gpr(instr)) { + reg_assign(ctx, instr->regs[0], instr); + if (instr->regs[0]->flags & IR3_REG_HALF) + fixup_half_instr_dst(instr); } - } -} -static void instr_assign_srcs(struct ir3_ra_ctx *ctx, - struct ir3_instruction *instr, unsigned name) -{ - list_for_each_entry (struct ir3_instruction, n, &instr->node, node) { - struct ir3_instruction *src; - foreach_ssa_src_n(src, i, n) { - unsigned r = i + 1; - - /* skip address / etc (non real sources): */ - if (r >= n->regs_count) + foreach_src_n(reg, n, instr) { + struct ir3_instruction *src = reg->instr; + if (!src) continue; - if (src == instr) - instr_assign_src(ctx, n, r, name); + reg_assign(ctx, instr->regs[n+1], src); + if (instr->regs[n+1]->flags & IR3_REG_HALF) + fixup_half_instr_src(instr); } - if (ctx->error) - break; } } -static void instr_assign(struct ir3_ra_ctx *ctx, - struct ir3_instruction *instr, unsigned name) -{ - struct ir3_register *reg = instr->regs[0]; - - if (reg->flags & IR3_REG_RELATIV) - return; - - /* check if already assigned: */ - if (!(reg->flags & IR3_REG_SSA)) { - /* ... and if so, sanity check: */ - ra_assert(ctx, reg->num == (name & ~REG_HALF)); - return; - } - - /* rename this instructions dst register: */ - reg_assign(instr, 0, name); - - /* and rename any subsequent use of result of this instr: */ - instr_assign_srcs(ctx, instr, name); - - /* To simplify the neighbor logic, and to "avoid" dealing with - * instructions which write more than one output, we actually - * do register assignment for instructions that produce multiple - * outputs on the fanout nodes and propagate up the assignment - * to the actual instruction: - */ - if (is_meta(instr) && (instr->opc == OPC_META_FO)) { - struct ir3_instruction *src; - - debug_assert(name >= instr->fo.off); - - foreach_ssa_src(src, instr) - instr_assign(ctx, src, name - instr->fo.off); - } -} - -/* check neighbor list to see if it is already partially (or completely) - * assigned, in which case register block is already allocated and we - * just need to complete the assignment: - */ -static int check_partial_assignment(struct ir3_ra_ctx *ctx, - struct ir3_instruction *instr) -{ - struct ir3_instruction *n; - int off = 0; - - debug_assert(!instr->cp.left); - - for (n = instr; n; n = n->cp.right) { - struct ir3_register *dst = n->regs[0]; - if ((n->depth != DEPTH_UNUSED) && - !(dst->flags & IR3_REG_SSA)) { - int name = dst->num - off; - debug_assert(name >= 0); - return name; - } - off++; - } - - return -1; -} - -/* allocate register name(s) for a list of neighboring instructions; - * instr should point to leftmost neighbor (head of list) - */ -static void instr_alloc_and_assign(struct ir3_ra_ctx *ctx, - struct ir3_instruction *instr) -{ - struct ir3_instruction *n; - struct ir3_register *dst; - int name; - - debug_assert(!instr->cp.left); - - if (instr->regs_count == 0) - return; - - dst = instr->regs[0]; - - /* For indirect dst, take the register assignment from the - * fanin and propagate it forward. - */ - if (dst->flags & IR3_REG_RELATIV) { - /* NOTE can be grouped, if for example outputs: - * for now disable cp if indirect writes - */ - instr_alloc_and_assign(ctx, instr->fanin); - - dst->num += instr->fanin->regs[0]->num; - dst->flags &= ~IR3_REG_SSA; - - instr_assign_srcs(ctx, instr, instr->fanin->regs[0]->num); - - return; - } - - /* for instructions w/ fanouts, do the actual register assignment - * on the group of fanout neighbor nodes and propagate the reg - * name back up to the texture instruction. - */ - if (dst->wrmask != 0x1) - return; - - name = check_partial_assignment(ctx, instr); - - /* allocate register(s): */ - if (name >= 0) { - /* already partially assigned, just finish the job */ - } else if (reg_gpr(dst)) { - int size; - /* number of consecutive registers to assign: */ - size = ir3_neighbor_count(instr); - if (dst->wrmask != 0x1) - size = MAX2(size, ffs(~dst->wrmask) - 1); - name = alloc_block(ctx, instr, size); - } else if (dst->flags & IR3_REG_ADDR) { - debug_assert(!instr->cp.right); - dst->flags &= ~IR3_REG_ADDR; - name = regid(REG_A0, 0) | REG_HALF; - } else { - debug_assert(!instr->cp.right); - /* predicate register (p0).. etc */ - name = regid(REG_P0, 0); - debug_assert(dst->num == name); - } - - ra_assert(ctx, name >= 0); - - for (n = instr; n && !ctx->error; n = n->cp.right) { - instr_assign(ctx, n, name); - name++; - } -} - -static void instr_assign_array(struct ir3_ra_ctx *ctx, - struct ir3_instruction *instr) -{ - struct ir3_instruction *src; - int name, aid = instr->fi.aid; - - if (ctx->arrays[aid].base == ~0) { - int size = instr->regs_count - 1; - ctx->arrays[aid].base = alloc_block(ctx, instr, size); - ctx->arrays[aid].size = size; - } - - name = ctx->arrays[aid].base; - - foreach_ssa_src_n(src, i, instr) { - unsigned r = i + 1; - - /* skip address / etc (non real sources): */ - if (r >= instr->regs_count) - break; - - instr_assign(ctx, src, name); - name++; - } - -} - -static bool -block_ra(struct ir3_block *block, void *state) -{ - struct ir3_ra_ctx *ctx = state; - - ra_dump_list("-------\n", block->shader); - - /* first pass, assign arrays: */ - list_for_each_entry (struct ir3_instruction, n, &block->instr_list, node) { - if (is_meta(n) && (n->opc == OPC_META_FI) && n->fi.aid) { - debug_assert(!n->cp.left); /* don't think this should happen */ - ra_dump_instr("ASSIGN ARRAY: ", n); - instr_assign_array(ctx, n); - ra_dump_list("-------\n", block->shader); - } - - if (ctx->error) - return false; - } - - list_for_each_entry (struct ir3_instruction, n, &block->instr_list, node) { - ra_dump_instr("ASSIGN: ", n); - instr_alloc_and_assign(ctx, ir3_neighbor_first(n)); - ra_dump_list("-------\n", block->shader); - - if (ctx->error) - return false; - } - - return true; -} - static int -shader_ra(struct ir3_ra_ctx *ctx, struct ir3_block *block) +ra_alloc(struct ir3_ra_ctx *ctx) { /* frag shader inputs get pre-assigned, since we have some * constraints/unknowns about setup for some of these regs: */ if (ctx->type == SHADER_FRAGMENT) { + struct ir3_block *block = ctx->ir->block; unsigned i = 0, j; if (ctx->frag_face && (i < block->ninputs) && block->inputs[i]) { + struct ir3_instruction *instr = block->inputs[i]; + unsigned cls = size_to_class(1, true); + unsigned name = ctx->class_base[cls] + instr->name; + unsigned reg = ctx->set->gpr_to_ra_reg[cls][0]; + /* if we have frag_face, it gets hr0.x */ - instr_assign(ctx, block->inputs[i], REG_HALF | 0); + ra_set_node_reg(ctx->g, name, reg); i += 4; } - for (j = 0; i < block->ninputs; i++, j++) - if (block->inputs[i]) - instr_assign(ctx, block->inputs[i], j); + + for (j = 0; i < block->ninputs; i++) { + struct ir3_instruction *instr = block->inputs[i]; + if (instr) { + struct ir3_instruction *defn; + int cls, sz, off; + + defn = get_definer(instr, &sz, &off); + if (defn == instr) { + unsigned name, reg; + + cls = size_to_class(sz, is_half(defn)); + debug_assert(cls >= 0); + name = ctx->class_base[cls] + defn->name; + reg = ctx->set->gpr_to_ra_reg[cls][j]; + + ra_set_node_reg(ctx->g, name, reg); + j += sz; + } + } + } } - block_ra(block, ctx); + if (!ra_allocate(ctx->g)) + return -1; - return ctx->error ? -1 : 0; -} + ra_block_alloc(ctx, ctx->ir->block); -static bool -block_mark_dst(struct ir3_block *block, void *state) -{ - list_for_each_entry (struct ir3_instruction, n, &block->instr_list, node) - if (n->regs_count > 0) - n->regs[0]->flags |= IR3_REG_SSA; - return true; + return 0; } int ir3_block_ra(struct ir3_block *block, enum shader_t type, bool frag_coord, bool frag_face) { struct ir3_ra_ctx ctx = { - .block = block, + .ir = block->shader, .type = type, - .frag_coord = frag_coord, .frag_face = frag_face, + .set = block->shader->compiler->set, }; int ret; - memset(&ctx.arrays, ~0, sizeof(ctx.arrays)); - - /* mark dst registers w/ SSA flag so we can see which - * have been assigned so far: - * NOTE: we really should set SSA flag consistently on - * every dst register in the frontend. - */ - block_mark_dst(block, &ctx); - - ir3_clear_mark(block->shader); - ret = shader_ra(&ctx, block); + ra_init(&ctx); + ra_add_interference(&ctx); + ret = ra_alloc(&ctx); + ra_destroy(&ctx); return ret; } From c8fb5f8a011e1db78af3ceaf91c5cb3b1acaee14 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Mon, 25 May 2015 10:30:54 -0400 Subject: [PATCH 763/834] freedreno/ir3: move inputs/outputs to shader These belong in the shader, rather than the block. Mostly a lot of churn and nothing too interesting. But splitting this out from the rest of ir3_block reshuffling to cut down the noise in the later patch. Signed-off-by: Rob Clark --- .../drivers/freedreno/freedreno_util.h | 4 +- src/gallium/drivers/freedreno/ir3/ir3.c | 46 ++----- src/gallium/drivers/freedreno/ir3/ir3.h | 31 ++--- .../drivers/freedreno/ir3/ir3_cmdline.c | 16 +-- .../drivers/freedreno/ir3/ir3_compiler_nir.c | 127 +++++++++--------- src/gallium/drivers/freedreno/ir3/ir3_cp.c | 13 +- src/gallium/drivers/freedreno/ir3/ir3_depth.c | 19 +-- src/gallium/drivers/freedreno/ir3/ir3_group.c | 36 ++--- .../drivers/freedreno/ir3/ir3_legalize.c | 10 +- src/gallium/drivers/freedreno/ir3/ir3_print.c | 6 +- src/gallium/drivers/freedreno/ir3/ir3_ra.c | 22 +-- src/gallium/drivers/freedreno/ir3/ir3_sched.c | 6 +- 12 files changed, 160 insertions(+), 176 deletions(-) diff --git a/src/gallium/drivers/freedreno/freedreno_util.h b/src/gallium/drivers/freedreno/freedreno_util.h index 630c6013ab9..deb0e602ce2 100644 --- a/src/gallium/drivers/freedreno/freedreno_util.h +++ b/src/gallium/drivers/freedreno/freedreno_util.h @@ -62,8 +62,8 @@ enum adreno_stencil_op fd_stencil_op(unsigned op); #define FD_DBG_NOBYPASS 0x0040 #define FD_DBG_FRAGHALF 0x0080 #define FD_DBG_NOBIN 0x0100 -#define FD_DBG_OPTMSGS 0x0400 -#define FD_DBG_GLSL120 0x1000 +#define FD_DBG_OPTMSGS 0x0200 +#define FD_DBG_GLSL120 0x0400 extern int fd_mesa_debug; extern bool fd_binning_enabled; diff --git a/src/gallium/drivers/freedreno/ir3/ir3.c b/src/gallium/drivers/freedreno/ir3/ir3.c index 92c92e5001f..7515b79b0c9 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3.c +++ b/src/gallium/drivers/freedreno/ir3/ir3.c @@ -66,12 +66,20 @@ void * ir3_alloc(struct ir3 *shader, int sz) return ptr; } -struct ir3 * ir3_create(struct ir3_compiler *compiler) +struct ir3 * ir3_create(struct ir3_compiler *compiler, + unsigned nin, unsigned nout) { - struct ir3 *shader = - calloc(1, sizeof(struct ir3)); + struct ir3 *shader = calloc(1, sizeof(struct ir3)); + grow_heap(shader); + shader->compiler = compiler; + shader->ninputs = nin; + shader->inputs = ir3_alloc(shader, sizeof(shader->inputs[0]) * nin); + + shader->noutputs = nout; + shader->outputs = ir3_alloc(shader, sizeof(shader->outputs[0]) * nout); + return shader; } @@ -601,39 +609,11 @@ static void insert_instr(struct ir3_block *block, array_insert(shader->baryfs, instr); } -struct ir3_block * ir3_block_create(struct ir3 *shader, - unsigned ntmp, unsigned nin, unsigned nout) +struct ir3_block * ir3_block_create(struct ir3 *shader) { - struct ir3_block *block; - unsigned size; - char *ptr; - - size = sizeof(*block); - size += sizeof(block->temporaries[0]) * ntmp; - size += sizeof(block->inputs[0]) * nin; - size += sizeof(block->outputs[0]) * nout; - - ptr = ir3_alloc(shader, size); - - block = (void *)ptr; - ptr += sizeof(*block); - - block->temporaries = (void *)ptr; - block->ntemporaries = ntmp; - ptr += sizeof(block->temporaries[0]) * ntmp; - - block->inputs = (void *)ptr; - block->ninputs = nin; - ptr += sizeof(block->inputs[0]) * nin; - - block->outputs = (void *)ptr; - block->noutputs = nout; - ptr += sizeof(block->outputs[0]) * nout; - + struct ir3_block *block = ir3_alloc(shader, sizeof(*block)); block->shader = shader; - list_inithead(&block->instr_list); - return block; } diff --git a/src/gallium/drivers/freedreno/ir3/ir3.h b/src/gallium/drivers/freedreno/ir3/ir3.h index 93a6ab5da7c..38912aa3bd4 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3.h +++ b/src/gallium/drivers/freedreno/ir3/ir3.h @@ -333,6 +333,10 @@ struct ir3_heap_chunk; struct ir3 { struct ir3_compiler *compiler; + unsigned ninputs, noutputs; + struct ir3_instruction **inputs; + struct ir3_instruction **outputs; + /* Track bary.f (and ldlv) instructions.. this is needed in * scheduling to ensure that all varying fetches happen before * any potential kill instructions. The hw gets grumpy if all @@ -365,24 +369,19 @@ struct ir3 { struct ir3_block { struct ir3 *shader; - unsigned ntemporaries, ninputs, noutputs; - /* maps TGSI_FILE_TEMPORARY index back to the assigning instruction: */ - struct ir3_instruction **temporaries; - struct ir3_instruction **inputs; - struct ir3_instruction **outputs; /* only a single address register: */ struct ir3_instruction *address; struct list_head instr_list; }; -struct ir3 * ir3_create(struct ir3_compiler *compiler); +struct ir3 * ir3_create(struct ir3_compiler *compiler, + unsigned nin, unsigned nout); void ir3_destroy(struct ir3 *shader); void * ir3_assemble(struct ir3 *shader, struct ir3_info *info, uint32_t gpu_id); void * ir3_alloc(struct ir3 *shader, int sz); -struct ir3_block * ir3_block_create(struct ir3 *shader, - unsigned ntmp, unsigned nin, unsigned nout); +struct ir3_block * ir3_block_create(struct ir3 *shader); struct ir3_instruction * ir3_instr_create(struct ir3_block *block, int category, opc_t opc); @@ -780,32 +779,28 @@ static inline struct ir3_instruction * __ssa_src_n(struct ir3_instruction *instr void ir3_print(struct ir3 *ir); void ir3_print_instr(struct ir3_instruction *instr); -/* flatten if/else: */ -int ir3_block_flatten(struct ir3_block *block); - /* depth calculation: */ int ir3_delayslots(struct ir3_instruction *assigner, struct ir3_instruction *consumer, unsigned n); void ir3_insert_by_depth(struct ir3_instruction *instr, struct list_head *list); -void ir3_block_depth(struct ir3_block *block); +void ir3_depth(struct ir3 *ir); /* copy-propagate: */ -void ir3_block_cp(struct ir3_block *block); +void ir3_cp(struct ir3 *ir); /* group neighbors and insert mov's to resolve conflicts: */ -void ir3_block_group(struct ir3_block *block); +void ir3_group(struct ir3 *ir); /* scheduling: */ -int ir3_block_sched(struct ir3_block *block); +int ir3_sched(struct ir3 *ir); /* register assignment: */ struct ir3_ra_reg_set * ir3_ra_alloc_reg_set(void *memctx); -int ir3_block_ra(struct ir3_block *block, enum shader_t type, +int ir3_ra(struct ir3 *ir3, enum shader_t type, bool frag_coord, bool frag_face); /* legalize: */ -void ir3_block_legalize(struct ir3_block *block, - bool *has_samp, int *max_bary); +void ir3_legalize(struct ir3 *ir, bool *has_samp, int *max_bary); /* ************************************************************************* */ /* instruction helpers */ diff --git a/src/gallium/drivers/freedreno/ir3/ir3_cmdline.c b/src/gallium/drivers/freedreno/ir3/ir3_cmdline.c index 3fa886131f0..ad9d2719d59 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_cmdline.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_cmdline.c @@ -66,34 +66,34 @@ static void dump_info(struct ir3_shader_variant *so, const char *str) // TODO make gpu_id configurable on cmdline bin = ir3_shader_assemble(so, 320); if (fd_mesa_debug & FD_DBG_DISASM) { - struct ir3_block *block = so->ir->block; + struct ir3 *ir = so->ir; struct ir3_register *reg; uint8_t regid; unsigned i; debug_printf("; %s: %s\n", type, str); - for (i = 0; i < block->ninputs; i++) { - if (!block->inputs[i]) { + for (i = 0; i < ir->ninputs; i++) { + if (!ir->inputs[i]) { debug_printf("; in%d unused\n", i); continue; } - reg = block->inputs[i]->regs[0]; + reg = ir->inputs[i]->regs[0]; regid = reg->num; debug_printf("@in(%sr%d.%c)\tin%d\n", (reg->flags & IR3_REG_HALF) ? "h" : "", (regid >> 2), "xyzw"[regid & 0x3], i); } - for (i = 0; i < block->noutputs; i++) { - if (!block->outputs[i]) { + for (i = 0; i < ir->noutputs; i++) { + if (!ir->outputs[i]) { debug_printf("; out%d unused\n", i); continue; } /* kill shows up as a virtual output.. skip it! */ - if (is_kill(block->outputs[i])) + if (is_kill(ir->outputs[i])) continue; - reg = block->outputs[i]->regs[0]; + reg = ir->outputs[i]->regs[0]; regid = reg->num; debug_printf("@out(%sr%d.%c)\tout%d\n", (reg->flags & IR3_REG_HALF) ? "h" : "", diff --git a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c index 39f4527c22b..f62a5ec2b26 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c @@ -51,6 +51,8 @@ static struct ir3_instruction * create_immed(struct ir3_block *block, uint32_t val); struct ir3_compile { + struct ir3_compiler *compiler; + const struct tgsi_token *tokens; struct nir_shader *s; @@ -170,7 +172,8 @@ static struct nir_shader *to_nir(const struct tgsi_token *tokens) /* TODO nir doesn't lower everything for us yet, but ideally it would: */ static const struct tgsi_token * -lower_tgsi(const struct tgsi_token *tokens, struct ir3_shader_variant *so) +lower_tgsi(struct ir3_compile *ctx, const struct tgsi_token *tokens, + struct ir3_shader_variant *so) { struct tgsi_shader_info info; struct tgsi_lowering_config lconfig = { @@ -192,7 +195,7 @@ lower_tgsi(const struct tgsi_token *tokens, struct ir3_shader_variant *so) break; } - if (so->ir->compiler->gpu_id >= 400) { + if (ctx->compiler->gpu_id >= 400) { /* a4xx seems to have *no* sam.p */ lconfig.lower_TXP = ~0; /* lower all txp */ } else { @@ -204,13 +207,14 @@ lower_tgsi(const struct tgsi_token *tokens, struct ir3_shader_variant *so) } static struct ir3_compile * -compile_init(struct ir3_shader_variant *so, +compile_init(struct ir3_compiler *compiler, + struct ir3_shader_variant *so, const struct tgsi_token *tokens) { struct ir3_compile *ctx = rzalloc(NULL, struct ir3_compile); const struct tgsi_token *lowered_tokens; - if (so->ir->compiler->gpu_id >= 400) { + if (compiler->gpu_id >= 400) { /* need special handling for "flat" */ ctx->flat_bypass = true; ctx->levels_add_one = false; @@ -230,6 +234,7 @@ compile_init(struct ir3_shader_variant *so, break; } + ctx->compiler = compiler; ctx->ir = so->ir; ctx->so = so; ctx->next_inloc = 8; @@ -240,7 +245,7 @@ compile_init(struct ir3_shader_variant *so, ctx->addr_ht = _mesa_hash_table_create(ctx, _mesa_hash_pointer, _mesa_key_pointer_equal); - lowered_tokens = lower_tgsi(tokens, so); + lowered_tokens = lower_tgsi(ctx, tokens, so); if (!lowered_tokens) lowered_tokens = tokens; ctx->s = to_nir(lowered_tokens); @@ -454,7 +459,7 @@ create_collect(struct ir3_block *block, struct ir3_instruction **arr, return NULL; collect = ir3_instr_create2(block, -1, OPC_META_FI, 1 + arrsz); - ir3_reg_create(collect, 0, 0); + ir3_reg_create(collect, 0, 0); /* dst */ for (unsigned i = 0; i < arrsz; i++) ir3_reg_create(collect, 0, IR3_REG_SSA)->instr = arr[i]; @@ -1134,8 +1139,8 @@ static void add_sysval_input(struct ir3_compile *ctx, unsigned name, so->inputs[n].interpolate = TGSI_INTERPOLATE_CONSTANT; so->total_in++; - ctx->block->ninputs = MAX2(ctx->block->ninputs, r + 1); - ctx->block->inputs[r] = instr; + ctx->ir->ninputs = MAX2(ctx->ir->ninputs, r + 1); + ctx->ir->inputs[r] = instr; } static void @@ -1174,17 +1179,18 @@ emit_intrinisic(struct ir3_compile *ctx, nir_intrinsic_instr *intr) case nir_intrinsic_load_input: for (int i = 0; i < intr->num_components; i++) { unsigned n = idx * 4 + i; - dst[i] = b->inputs[n]; + dst[i] = ctx->ir->inputs[n]; } break; case nir_intrinsic_load_input_indirect: src = get_src(ctx, &intr->src[0]); struct ir3_instruction *collect = - create_collect(b, b->inputs, b->ninputs); + create_collect(b, ctx->ir->inputs, ctx->ir->ninputs); struct ir3_instruction *addr = get_addr(ctx, src[0]); for (int i = 0; i < intr->num_components; i++) { unsigned n = idx * 4 + i; - dst[i] = create_indirect_load(ctx, b->ninputs, n, addr, collect); + dst[i] = create_indirect_load(ctx, ctx->ir->ninputs, + n, addr, collect); } break; case nir_intrinsic_load_var: @@ -1197,7 +1203,7 @@ emit_intrinisic(struct ir3_compile *ctx, nir_intrinsic_instr *intr) src = get_src(ctx, &intr->src[0]); for (int i = 0; i < intr->num_components; i++) { unsigned n = idx * 4 + i; - b->outputs[n] = src[i]; + ctx->ir->outputs[n] = src[i]; } break; case nir_intrinsic_load_base_vertex: @@ -1707,7 +1713,7 @@ setup_input(struct ir3_compile *ctx, nir_variable *in) instr = create_input(ctx->block, NULL, idx); } - ctx->block->inputs[idx] = instr; + ctx->ir->inputs[idx] = instr; } if (so->inputs[n].bary || (ctx->so->type == SHADER_VERTEX)) { @@ -1774,7 +1780,7 @@ setup_output(struct ir3_compile *ctx, nir_variable *out) for (int i = 0; i < ncomp; i++) { unsigned idx = (n * 4) + i; - ctx->block->outputs[idx] = create_immed(ctx->block, fui(0.0)); + ctx->ir->outputs[idx] = create_immed(ctx->block, fui(0.0)); } } @@ -1794,12 +1800,14 @@ emit_instructions(struct ir3_compile *ctx) ninputs += 8; } - ctx->block = ir3_block_create(ctx->ir, 0, ninputs, noutputs); + ctx->ir = ir3_create(ctx->compiler, ninputs, noutputs); + ctx->block = ir3_block_create(ctx->ir); + ctx->ir->block = ctx->block; if (ctx->so->type == SHADER_FRAGMENT) { - ctx->block->noutputs -= ARRAY_SIZE(ctx->kill); + ctx->ir->noutputs -= ARRAY_SIZE(ctx->kill); } else if (ctx->so->type == SHADER_VERTEX) { - ctx->block->ninputs -= 8; + ctx->ir->ninputs -= 8; } /* for fragment shader, we have a single input register (usually @@ -1849,12 +1857,12 @@ static void fixup_frag_inputs(struct ir3_compile *ctx) { struct ir3_shader_variant *so = ctx->so; - struct ir3_block *block = ctx->block; + struct ir3 *ir = ctx->ir; struct ir3_instruction **inputs; struct ir3_instruction *instr; int n, regid = 0; - block->ninputs = 0; + ir->ninputs = 0; n = 4; /* always have frag_pos */ n += COND(so->frag_face, 4); @@ -1866,15 +1874,15 @@ fixup_frag_inputs(struct ir3_compile *ctx) /* this ultimately gets assigned to hr0.x so doesn't conflict * with frag_coord/frag_pos.. */ - inputs[block->ninputs++] = ctx->frag_face; + inputs[ir->ninputs++] = ctx->frag_face; ctx->frag_face->regs[0]->num = 0; /* remaining channels not used, but let's avoid confusing * other parts that expect inputs to come in groups of vec4 */ - inputs[block->ninputs++] = NULL; - inputs[block->ninputs++] = NULL; - inputs[block->ninputs++] = NULL; + inputs[ir->ninputs++] = NULL; + inputs[ir->ninputs++] = NULL; + inputs[ir->ninputs++] = NULL; } /* since we don't know where to set the regid for frag_coord, @@ -1888,28 +1896,28 @@ fixup_frag_inputs(struct ir3_compile *ctx) ctx->frag_coord[2]->regs[0]->num = regid++; ctx->frag_coord[3]->regs[0]->num = regid++; - inputs[block->ninputs++] = ctx->frag_coord[0]; - inputs[block->ninputs++] = ctx->frag_coord[1]; - inputs[block->ninputs++] = ctx->frag_coord[2]; - inputs[block->ninputs++] = ctx->frag_coord[3]; + inputs[ir->ninputs++] = ctx->frag_coord[0]; + inputs[ir->ninputs++] = ctx->frag_coord[1]; + inputs[ir->ninputs++] = ctx->frag_coord[2]; + inputs[ir->ninputs++] = ctx->frag_coord[3]; } /* we always have frag_pos: */ so->pos_regid = regid; /* r0.x */ - instr = create_input(block, NULL, block->ninputs); + instr = create_input(ctx->block, NULL, ir->ninputs); instr->regs[0]->num = regid++; - inputs[block->ninputs++] = instr; + inputs[ir->ninputs++] = instr; ctx->frag_pos->regs[1]->instr = instr; /* r0.y */ - instr = create_input(block, NULL, block->ninputs); + instr = create_input(ctx->block, NULL, ir->ninputs); instr->regs[0]->num = regid++; - inputs[block->ninputs++] = instr; + inputs[ir->ninputs++] = instr; ctx->frag_pos->regs[2]->instr = instr; - block->inputs = inputs; + ir->inputs = inputs; } int @@ -1919,18 +1927,14 @@ ir3_compile_shader_nir(struct ir3_compiler *compiler, struct ir3_shader_key key) { struct ir3_compile *ctx; - struct ir3_block *block; + struct ir3 *ir; struct ir3_instruction **inputs; unsigned i, j, actual_in; int ret = 0, max_bary; assert(!so->ir); - so->ir = ir3_create(compiler); - - assert(so->ir); - - ctx = compile_init(so, tokens); + ctx = compile_init(compiler, so, tokens); if (!ctx) { DBG("INIT failed!"); ret = -1; @@ -1945,11 +1949,10 @@ ir3_compile_shader_nir(struct ir3_compiler *compiler, goto out; } - block = ctx->block; - so->ir->block = block; + ir = so->ir = ctx->ir; /* keep track of the inputs from TGSI perspective.. */ - inputs = block->inputs; + inputs = ir->inputs; /* but fixup actual inputs for frag shader: */ if (so->type == SHADER_FRAGMENT) @@ -1966,24 +1969,24 @@ ir3_compile_shader_nir(struct ir3_compiler *compiler, (name == TGSI_SEMANTIC_PSIZE))) { if (i != j) { so->outputs[j] = so->outputs[i]; - block->outputs[(j*4)+0] = block->outputs[(i*4)+0]; - block->outputs[(j*4)+1] = block->outputs[(i*4)+1]; - block->outputs[(j*4)+2] = block->outputs[(i*4)+2]; - block->outputs[(j*4)+3] = block->outputs[(i*4)+3]; + ir->outputs[(j*4)+0] = ir->outputs[(i*4)+0]; + ir->outputs[(j*4)+1] = ir->outputs[(i*4)+1]; + ir->outputs[(j*4)+2] = ir->outputs[(i*4)+2]; + ir->outputs[(j*4)+3] = ir->outputs[(i*4)+3]; } j++; } } so->outputs_count = j; - block->noutputs = j * 4; + ir->noutputs = j * 4; } /* if we want half-precision outputs, mark the output registers * as half: */ if (key.half_precision) { - for (i = 0; i < block->noutputs; i++) { - struct ir3_instruction *out = block->outputs[i]; + for (i = 0; i < ir->noutputs; i++) { + struct ir3_instruction *out = ir->outputs[i]; if (!out) continue; out->regs[0]->flags |= IR3_REG_HALF; @@ -2004,36 +2007,34 @@ ir3_compile_shader_nir(struct ir3_compiler *compiler, */ if (so->type == SHADER_FRAGMENT) { for (i = 0; i < ctx->kill_count; i++) - block->outputs[block->noutputs++] = ctx->kill[i]; + ir->outputs[ir->noutputs++] = ctx->kill[i]; } if (fd_mesa_debug & FD_DBG_OPTMSGS) { printf("BEFORE CP:\n"); - ir3_print(so->ir); + ir3_print(ir); } - ir3_block_depth(block); - - ir3_block_cp(block); + ir3_cp(ir); if (fd_mesa_debug & FD_DBG_OPTMSGS) { printf("BEFORE GROUPING:\n"); - ir3_print(so->ir); + ir3_print(ir); } /* Group left/right neighbors, inserting mov's where needed to * solve conflicts: */ - ir3_block_group(block); + ir3_group(ir); - ir3_block_depth(block); + ir3_depth(ir); if (fd_mesa_debug & FD_DBG_OPTMSGS) { printf("AFTER DEPTH:\n"); - ir3_print(so->ir); + ir3_print(ir); } - ret = ir3_block_sched(block); + ret = ir3_sched(ir); if (ret) { DBG("SCHED failed!"); goto out; @@ -2041,10 +2042,10 @@ ir3_compile_shader_nir(struct ir3_compiler *compiler, if (fd_mesa_debug & FD_DBG_OPTMSGS) { printf("AFTER SCHED:\n"); - ir3_print(so->ir); + ir3_print(ir); } - ret = ir3_block_ra(block, so->type, so->frag_coord, so->frag_face); + ret = ir3_ra(ir, so->type, so->frag_coord, so->frag_face); if (ret) { DBG("RA failed!"); goto out; @@ -2052,14 +2053,14 @@ ir3_compile_shader_nir(struct ir3_compiler *compiler, if (fd_mesa_debug & FD_DBG_OPTMSGS) { printf("AFTER RA:\n"); - ir3_print(so->ir); + ir3_print(ir); } - ir3_block_legalize(block, &so->has_samp, &max_bary); + ir3_legalize(ir, &so->has_samp, &max_bary); /* fixup input/outputs: */ for (i = 0; i < so->outputs_count; i++) { - so->outputs[i].regid = block->outputs[i*4]->regs[0]->num; + so->outputs[i].regid = ir->outputs[i*4]->regs[0]->num; /* preserve hack for depth output.. tgsi writes depth to .z, * but what we give the hw is the scalar register: */ diff --git a/src/gallium/drivers/freedreno/ir3/ir3_cp.c b/src/gallium/drivers/freedreno/ir3/ir3_cp.c index 8c057166f32..a477bd4b237 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_cp.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_cp.c @@ -387,16 +387,17 @@ instr_cp(struct ir3_instruction *instr, unsigned *flags) return instr; } -void ir3_block_cp(struct ir3_block *block) +void +ir3_cp(struct ir3 *ir) { - ir3_clear_mark(block->shader); + ir3_clear_mark(ir->block->shader); - for (unsigned i = 0; i < block->noutputs; i++) { - if (block->outputs[i]) { + for (unsigned i = 0; i < ir->noutputs; i++) { + if (ir->outputs[i]) { struct ir3_instruction *out = - instr_cp(block->outputs[i], NULL); + instr_cp(ir->outputs[i], NULL); - block->outputs[i] = out; + ir->outputs[i] = out; } } } diff --git a/src/gallium/drivers/freedreno/ir3/ir3_depth.c b/src/gallium/drivers/freedreno/ir3/ir3_depth.c index 601e14a1c85..6fc8b1762ff 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_depth.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_depth.c @@ -144,22 +144,23 @@ remove_unused_by_block(struct ir3_block *block) } } -void ir3_block_depth(struct ir3_block *block) +void +ir3_depth(struct ir3 *ir) { unsigned i; - ir3_clear_mark(block->shader); - for (i = 0; i < block->noutputs; i++) - if (block->outputs[i]) - ir3_instr_depth(block->outputs[i]); + ir3_clear_mark(ir->block->shader); + for (i = 0; i < ir->noutputs; i++) + if (ir->outputs[i]) + ir3_instr_depth(ir->outputs[i]); /* mark un-used instructions: */ - remove_unused_by_block(block); + remove_unused_by_block(ir->block); /* cleanup unused inputs: */ - for (i = 0; i < block->ninputs; i++) { - struct ir3_instruction *in = block->inputs[i]; + for (i = 0; i < ir->ninputs; i++) { + struct ir3_instruction *in = ir->inputs[i]; if (in && (in->depth == DEPTH_UNUSED)) - block->inputs[i] = NULL; + ir->inputs[i] = NULL; } } diff --git a/src/gallium/drivers/freedreno/ir3/ir3_group.c b/src/gallium/drivers/freedreno/ir3/ir3_group.c index d744477aad1..85d0948fa97 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_group.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_group.c @@ -99,7 +99,8 @@ static struct ir3_instruction *instr_get(void *arr, int idx) { return ssa(((struct ir3_instruction *)arr)->regs[idx+1]); } -static void instr_insert_mov(void *arr, int idx, struct ir3_instruction *instr) +static void +instr_insert_mov(void *arr, int idx, struct ir3_instruction *instr) { ((struct ir3_instruction *)arr)->regs[idx+1]->instr = ir3_MOV(instr->block, instr, TYPE_F32); @@ -107,7 +108,8 @@ static void instr_insert_mov(void *arr, int idx, struct ir3_instruction *instr) static struct group_ops instr_ops = { instr_get, instr_insert_mov }; -static void group_n(struct group_ops *ops, void *arr, unsigned n) +static void +group_n(struct group_ops *ops, void *arr, unsigned n) { unsigned i, j; @@ -170,7 +172,8 @@ restart: } } -static void instr_find_neighbors(struct ir3_instruction *instr) +static void +instr_find_neighbors(struct ir3_instruction *instr) { struct ir3_instruction *src; @@ -189,7 +192,8 @@ static void instr_find_neighbors(struct ir3_instruction *instr) * we need to insert dummy/padding instruction for grouping, and * then take it back out again before anyone notices. */ -static void pad_and_group_input(struct ir3_instruction **input, unsigned n) +static void +pad_and_group_input(struct ir3_instruction **input, unsigned n) { int i, mask = 0; struct ir3_block *block = NULL; @@ -214,7 +218,8 @@ static void pad_and_group_input(struct ir3_instruction **input, unsigned n) } } -static void block_find_neighbors(struct ir3_block *block) +static void +find_neighbors(struct ir3 *ir) { unsigned i; @@ -232,22 +237,23 @@ static void block_find_neighbors(struct ir3_block *block) * This logic won't quite cut it if we don't align smaller * on vec4 boundaries */ - for (i = 0; i < block->ninputs; i += 4) - pad_and_group_input(&block->inputs[i], 4); - for (i = 0; i < block->noutputs; i += 4) - group_n(&arr_ops_out, &block->outputs[i], 4); + for (i = 0; i < ir->ninputs; i += 4) + pad_and_group_input(&ir->inputs[i], 4); + for (i = 0; i < ir->noutputs; i += 4) + group_n(&arr_ops_out, &ir->outputs[i], 4); - for (i = 0; i < block->noutputs; i++) { - if (block->outputs[i]) { - struct ir3_instruction *instr = block->outputs[i]; + for (i = 0; i < ir->noutputs; i++) { + if (ir->outputs[i]) { + struct ir3_instruction *instr = ir->outputs[i]; instr_find_neighbors(instr); } } } -void ir3_block_group(struct ir3_block *block) +void +ir3_group(struct ir3 *ir) { - ir3_clear_mark(block->shader); - block_find_neighbors(block); + ir3_clear_mark(ir->block->shader); + find_neighbors(ir); } diff --git a/src/gallium/drivers/freedreno/ir3/ir3_legalize.c b/src/gallium/drivers/freedreno/ir3/ir3_legalize.c index be0b5ce442c..34055f4c612 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_legalize.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_legalize.c @@ -26,7 +26,6 @@ * Rob Clark */ -#include "pipe/p_shader_tokens.h" #include "util/u_math.h" #include "freedreno_util.h" @@ -48,7 +47,8 @@ struct ir3_legalize_ctx { int max_bary; }; -static void legalize(struct ir3_legalize_ctx *ctx) +static void +legalize(struct ir3_legalize_ctx *ctx) { struct ir3_block *block = ctx->block; struct ir3_instruction *last_input = NULL; @@ -220,11 +220,11 @@ static void legalize(struct ir3_legalize_ctx *ctx) ->flags |= IR3_INSTR_SS | IR3_INSTR_SY; } -void ir3_block_legalize(struct ir3_block *block, - bool *has_samp, int *max_bary) +void +ir3_legalize(struct ir3 *ir, bool *has_samp, int *max_bary) { struct ir3_legalize_ctx ctx = { - .block = block, + .block = ir->block, .max_bary = -1, }; diff --git a/src/gallium/drivers/freedreno/ir3/ir3_print.c b/src/gallium/drivers/freedreno/ir3/ir3_print.c index 986a5bc7be0..965c834b8aa 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_print.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_print.c @@ -198,10 +198,10 @@ ir3_print(struct ir3 *ir) print_block(block, 0); - for (unsigned i = 0; i < block->noutputs; i++) { - if (!block->outputs[i]) + for (unsigned i = 0; i < ir->noutputs; i++) { + if (!ir->outputs[i]) continue; printf("out%d: ", i); - print_instr(block->outputs[i], 0); + print_instr(ir->outputs[i], 0); } } diff --git a/src/gallium/drivers/freedreno/ir3/ir3_ra.c b/src/gallium/drivers/freedreno/ir3/ir3_ra.c index 39ce9c5d4ce..394c63f646d 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_ra.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_ra.c @@ -527,13 +527,13 @@ ra_block_compute_live_ranges(struct ir3_ra_ctx *ctx, struct ir3_block *block) static void ra_add_interference(struct ir3_ra_ctx *ctx) { - struct ir3_block *block = ctx->ir->block; + struct ir3 *ir = ctx->ir; ra_block_compute_live_ranges(ctx, ctx->ir->block); /* need to fix things up to keep outputs live: */ - for (unsigned i = 0; i < block->noutputs; i++) { - struct ir3_instruction *instr = block->outputs[i]; + for (unsigned i = 0; i < ir->noutputs; i++) { + struct ir3_instruction *instr = ir->outputs[i]; struct ir3_instruction *defn; int cls, sz, off; @@ -682,10 +682,10 @@ ra_alloc(struct ir3_ra_ctx *ctx) * constraints/unknowns about setup for some of these regs: */ if (ctx->type == SHADER_FRAGMENT) { - struct ir3_block *block = ctx->ir->block; + struct ir3 *ir = ctx->ir; unsigned i = 0, j; - if (ctx->frag_face && (i < block->ninputs) && block->inputs[i]) { - struct ir3_instruction *instr = block->inputs[i]; + if (ctx->frag_face && (i < ir->ninputs) && ir->inputs[i]) { + struct ir3_instruction *instr = ir->inputs[i]; unsigned cls = size_to_class(1, true); unsigned name = ctx->class_base[cls] + instr->name; unsigned reg = ctx->set->gpr_to_ra_reg[cls][0]; @@ -695,8 +695,8 @@ ra_alloc(struct ir3_ra_ctx *ctx) i += 4; } - for (j = 0; i < block->ninputs; i++) { - struct ir3_instruction *instr = block->inputs[i]; + for (j = 0; i < ir->ninputs; i++) { + struct ir3_instruction *instr = ir->inputs[i]; if (instr) { struct ir3_instruction *defn; int cls, sz, off; @@ -725,14 +725,14 @@ ra_alloc(struct ir3_ra_ctx *ctx) return 0; } -int ir3_block_ra(struct ir3_block *block, enum shader_t type, +int ir3_ra(struct ir3 *ir, enum shader_t type, bool frag_coord, bool frag_face) { struct ir3_ra_ctx ctx = { - .ir = block->shader, + .ir = ir, .type = type, .frag_face = frag_face, - .set = block->shader->compiler->set, + .set = ir->compiler->set, }; int ret; diff --git a/src/gallium/drivers/freedreno/ir3/ir3_sched.c b/src/gallium/drivers/freedreno/ir3/ir3_sched.c index 1d166d879df..0d404a83583 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_sched.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_sched.c @@ -424,11 +424,11 @@ sched_block(struct ir3_sched_ctx *ctx, struct ir3_block *block) } } -int ir3_block_sched(struct ir3_block *block) +int ir3_sched(struct ir3 *ir) { struct ir3_sched_ctx ctx = {0}; - ir3_clear_mark(block->shader); - sched_block(&ctx, block); + ir3_clear_mark(ir->block->shader); + sched_block(&ctx, ir->block); if (ctx.error) return -1; return 0; From d646d3ae9d221104db0e9daec33ef470b1bdd957 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Mon, 25 May 2015 10:59:21 -0400 Subject: [PATCH 764/834] freedreno/ir3: simplify find_neighbors stop condition Signed-off-by: Rob Clark --- src/gallium/drivers/freedreno/ir3/ir3_group.c | 18 +----------------- 1 file changed, 1 insertion(+), 17 deletions(-) diff --git a/src/gallium/drivers/freedreno/ir3/ir3_group.c b/src/gallium/drivers/freedreno/ir3/ir3_group.c index 85d0948fa97..1fe09cc11e5 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_group.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_group.c @@ -34,22 +34,6 @@ * Find/group instruction neighbors: */ -/* stop condition for iteration: */ -static bool check_stop(struct ir3_instruction *instr) -{ - if (ir3_instr_check_mark(instr)) - return true; - - /* stay within the block.. don't try to operate across - * basic block boundaries or we'll have problems when - * dealing with multiple basic blocks: - */ - if (is_meta(instr) && (instr->opc == OPC_META_INPUT)) - return true; - - return false; -} - /* bleh.. we need to do the same group_n() thing for both inputs/outputs * (where we have a simple instr[] array), and fanin nodes (where we have * an extra indirection via reg->instr). @@ -177,7 +161,7 @@ instr_find_neighbors(struct ir3_instruction *instr) { struct ir3_instruction *src; - if (check_stop(instr)) + if (ir3_instr_check_mark(instr)) return; if (is_meta(instr) && (instr->opc == OPC_META_FI)) From 660d5c1646f5d63f9626b24beabc9cfc318849d4 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Mon, 1 Jun 2015 12:35:19 -0400 Subject: [PATCH 765/834] freedreno/ir3: a4xx encodes larger immed offset Without this, negative branch/jump offsets look like very large positive offsets. Signed-off-by: Rob Clark --- src/gallium/drivers/freedreno/ir3/disasm-a3xx.c | 8 ++++---- src/gallium/drivers/freedreno/ir3/instr-a3xx.h | 12 ++++++++++-- src/gallium/drivers/freedreno/ir3/ir3.c | 7 ++++++- src/gallium/drivers/freedreno/ir3/ir3.h | 1 + 4 files changed, 21 insertions(+), 7 deletions(-) diff --git a/src/gallium/drivers/freedreno/ir3/disasm-a3xx.c b/src/gallium/drivers/freedreno/ir3/disasm-a3xx.c index a5136c6bd3d..48ae7c71b9f 100644 --- a/src/gallium/drivers/freedreno/ir3/disasm-a3xx.c +++ b/src/gallium/drivers/freedreno/ir3/disasm-a3xx.c @@ -133,16 +133,16 @@ static void print_instr_cat0(instr_t *instr) break; case OPC_BR: printf(" %sp0.%c, #%d", cat0->inv ? "!" : "", - component[cat0->comp], cat0->immed); + component[cat0->comp], cat0->a3xx.immed); break; case OPC_JUMP: case OPC_CALL: - printf(" #%d", cat0->immed); + printf(" #%d", cat0->a3xx.immed); break; } - if ((debug & PRINT_VERBOSE) && (cat0->dummy1|cat0->dummy2|cat0->dummy3|cat0->dummy4)) - printf("\t{0: %x,%x,%x,%x}", cat0->dummy1, cat0->dummy2, cat0->dummy3, cat0->dummy4); + if ((debug & PRINT_VERBOSE) && (cat0->a3xx.dummy1|cat0->dummy2|cat0->dummy3|cat0->dummy4)) + printf("\t{0: %x,%x,%x,%x}", cat0->a3xx.dummy1, cat0->dummy2, cat0->dummy3, cat0->dummy4); } static void print_instr_cat1(instr_t *instr) diff --git a/src/gallium/drivers/freedreno/ir3/instr-a3xx.h b/src/gallium/drivers/freedreno/ir3/instr-a3xx.h index 5ead0c86999..efb07ea479e 100644 --- a/src/gallium/drivers/freedreno/ir3/instr-a3xx.h +++ b/src/gallium/drivers/freedreno/ir3/instr-a3xx.h @@ -278,8 +278,16 @@ static inline int reg_special(reg_t reg) typedef struct PACKED { /* dword0: */ - int16_t immed : 16; - uint32_t dummy1 : 16; + union PACKED { + struct PACKED { + int16_t immed : 16; + uint32_t dummy1 : 16; + } a3xx; + struct PACKED { + int32_t immed : 20; + uint32_t dummy1 : 12; + } a4xx; + }; /* dword1: */ uint32_t dummy2 : 8; diff --git a/src/gallium/drivers/freedreno/ir3/ir3.c b/src/gallium/drivers/freedreno/ir3/ir3.c index 7515b79b0c9..ba5851c6c82 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3.c +++ b/src/gallium/drivers/freedreno/ir3/ir3.c @@ -152,7 +152,11 @@ static int emit_cat0(struct ir3_instruction *instr, void *ptr, { instr_cat0_t *cat0 = ptr; - cat0->immed = instr->cat0.immed; + if (info->gpu_id >= 400) { + cat0->a4xx.immed = instr->cat0.immed; + } else { + cat0->a3xx.immed = instr->cat0.immed; + } cat0->repeat = instr->repeat; cat0->ss = !!(instr->flags & IR3_INSTR_SS); cat0->inv = instr->cat0.inv; @@ -547,6 +551,7 @@ void * ir3_assemble(struct ir3 *shader, struct ir3_info *info, struct ir3_block *block = shader->block; uint32_t *ptr, *dwords; + info->gpu_id = gpu_id; info->max_reg = -1; info->max_half_reg = -1; info->max_const = -1; diff --git a/src/gallium/drivers/freedreno/ir3/ir3.h b/src/gallium/drivers/freedreno/ir3/ir3.h index 38912aa3bd4..95b866988b8 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3.h +++ b/src/gallium/drivers/freedreno/ir3/ir3.h @@ -41,6 +41,7 @@ struct ir3_instruction; struct ir3_block; struct ir3_info { + uint32_t gpu_id; uint16_t sizedwords; uint16_t instrs_count; /* expanded to account for rpt's */ /* NOTE: max_reg, etc, does not include registers not touched From 457f7c2a2a93b45396ac66e0d4b3896d2db8fdf3 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Tue, 9 Jun 2015 17:17:06 -0400 Subject: [PATCH 766/834] freedreno/ir3: block reshuffling and loops! This shuffles things around to allow the shader to have multiple basic blocks. We drop the entire CFG structure from nir and just preserve the blocks. At scheduling we know whether to schedule conditional branches or unconditional jumps at the end of the block based on the # of block successors. (Dropping jumps to the following instruction, etc.) One slight complication is that variables (load_var/store_var, ie. arrays) are not in SSA form, so we have to figure out where to put the phi's ourself. For this, we use the predecessor set information from nir_block. (We could perhaps use NIR's dominance frontier information to help with this?) Signed-off-by: Rob Clark --- src/gallium/drivers/freedreno/ir3/ir3.c | 60 ++- src/gallium/drivers/freedreno/ir3/ir3.h | 92 +++- .../drivers/freedreno/ir3/ir3_compiler_nir.c | 430 ++++++++++++++++-- src/gallium/drivers/freedreno/ir3/ir3_cp.c | 14 +- src/gallium/drivers/freedreno/ir3/ir3_depth.c | 14 +- src/gallium/drivers/freedreno/ir3/ir3_group.c | 7 +- .../drivers/freedreno/ir3/ir3_legalize.c | 184 +++++++- src/gallium/drivers/freedreno/ir3/ir3_print.c | 38 +- src/gallium/drivers/freedreno/ir3/ir3_ra.c | 226 +++++++-- src/gallium/drivers/freedreno/ir3/ir3_sched.c | 88 +++- 10 files changed, 1026 insertions(+), 127 deletions(-) diff --git a/src/gallium/drivers/freedreno/ir3/ir3.c b/src/gallium/drivers/freedreno/ir3/ir3.c index ba5851c6c82..a166b67d7cf 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3.c +++ b/src/gallium/drivers/freedreno/ir3/ir3.c @@ -80,6 +80,8 @@ struct ir3 * ir3_create(struct ir3_compiler *compiler, shader->noutputs = nout; shader->outputs = ir3_alloc(shader, sizeof(shader->outputs[0]) * nout); + list_inithead(&shader->block_list); + return shader; } @@ -548,7 +550,6 @@ static int (*emit[])(struct ir3_instruction *instr, void *ptr, void * ir3_assemble(struct ir3 *shader, struct ir3_info *info, uint32_t gpu_id) { - struct ir3_block *block = shader->block; uint32_t *ptr, *dwords; info->gpu_id = gpu_id; @@ -558,8 +559,10 @@ void * ir3_assemble(struct ir3 *shader, struct ir3_info *info, info->instrs_count = 0; info->sizedwords = 0; - list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) { - info->sizedwords += 2; + list_for_each_entry (struct ir3_block, block, &shader->block_list, node) { + list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) { + info->sizedwords += 2; + } } /* need a integer number of instruction "groups" (sets of 16 @@ -574,12 +577,14 @@ void * ir3_assemble(struct ir3 *shader, struct ir3_info *info, ptr = dwords = calloc(4, info->sizedwords); - list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) { - int ret = emit[instr->category](instr, dwords, info); - if (ret) - goto fail; - info->instrs_count += 1 + instr->repeat; - dwords += 2; + list_for_each_entry (struct ir3_block, block, &shader->block_list, node) { + list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) { + int ret = emit[instr->category](instr, dwords, info); + if (ret) + goto fail; + info->instrs_count += 1 + instr->repeat; + dwords += 2; + } } return ptr; @@ -617,7 +622,12 @@ static void insert_instr(struct ir3_block *block, struct ir3_block * ir3_block_create(struct ir3 *shader) { struct ir3_block *block = ir3_alloc(shader, sizeof(*block)); +#ifdef DEBUG + static uint32_t serialno = 0; + block->serialno = ++serialno; +#endif block->shader = shader; + list_inithead(&block->node); list_inithead(&block->instr_list); return block; } @@ -688,10 +698,40 @@ struct ir3_instruction * ir3_instr_clone(struct ir3_instruction *instr) struct ir3_register * ir3_reg_create(struct ir3_instruction *instr, int num, int flags) { - struct ir3_register *reg = reg_create(instr->block->shader, num, flags); + struct ir3 *shader = instr->block->shader; + struct ir3_register *reg = reg_create(shader, num, flags); #ifdef DEBUG debug_assert(instr->regs_count < instr->regs_max); #endif instr->regs[instr->regs_count++] = reg; return reg; } + +void +ir3_block_clear_mark(struct ir3_block *block) +{ + list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) + instr->flags &= ~IR3_INSTR_MARK; +} + +void +ir3_clear_mark(struct ir3 *ir) +{ + list_for_each_entry (struct ir3_block, block, &ir->block_list, node) { + ir3_block_clear_mark(block); + } +} + +/* note: this will destroy instr->depth, don't do it until after sched! */ +void +ir3_count_instructions(struct ir3 *ir) +{ + unsigned ip = 0; + list_for_each_entry (struct ir3_block, block, &ir->block_list, node) { + list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) { + instr->ip = ip++; + } + block->start_ip = list_first_entry(&block->instr_list, struct ir3_instruction, node)->ip; + block->end_ip = list_last_entry(&block->instr_list, struct ir3_instruction, node)->ip; + } +} diff --git a/src/gallium/drivers/freedreno/ir3/ir3.h b/src/gallium/drivers/freedreno/ir3/ir3.h index 95b866988b8..9c35a763d58 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3.h +++ b/src/gallium/drivers/freedreno/ir3/ir3.h @@ -83,7 +83,8 @@ struct ir3_register { * before register assignment is done: */ IR3_REG_SSA = 0x2000, /* 'instr' is ptr to assigning instr */ - IR3_REG_IA = 0x4000, /* meta-input dst is "assigned" */ + IR3_REG_PHI_SRC= 0x4000, /* phi src, regs[0]->instr points to phi */ + } flags; union { /* normal registers: @@ -187,6 +188,7 @@ struct ir3_instruction { char inv; char comp; int immed; + struct ir3_block *target; } cat0; struct { type_t src_type, dst_type; @@ -220,14 +222,14 @@ struct ir3_instruction { int aid; } fi; struct { - struct ir3_block *if_block, *else_block; - } flow; + /* used to temporarily hold reference to nir_phi_instr + * until we resolve the phi srcs + */ + void *nphi; + } phi; struct { struct ir3_block *block; } inout; - - /* XXX keep this as big as all other union members! */ - uint32_t info[3]; }; /* transient values used during various algorithms: */ @@ -363,16 +365,40 @@ struct ir3 { unsigned predicates_count, predicates_sz; struct ir3_instruction **predicates; - struct ir3_block *block; + /* List of blocks: */ + struct list_head block_list; + unsigned heap_idx; struct ir3_heap_chunk *chunk; }; +typedef struct nir_block nir_block; + struct ir3_block { + struct list_head node; struct ir3 *shader; - /* only a single address register: */ - struct ir3_instruction *address; - struct list_head instr_list; + + nir_block *nblock; + + struct list_head instr_list; /* list of ir3_instruction */ + + /* each block has either one or two successors.. in case of + * two successors, 'condition' decides which one to follow. + * A block preceding an if/else has two successors. + */ + struct ir3_instruction *condition; + struct ir3_block *successors[2]; + + uint16_t start_ip, end_ip; + + /* used for per-pass extra block data. Mainly used right + * now in RA step to track livein/liveout. + */ + void *bd; + +#ifdef DEBUG + uint32_t serialno; +#endif }; struct ir3 * ir3_create(struct ir3_compiler *compiler, @@ -394,7 +420,6 @@ const char *ir3_instr_name(struct ir3_instruction *instr); struct ir3_register * ir3_reg_create(struct ir3_instruction *instr, int num, int flags); - static inline bool ir3_instr_check_mark(struct ir3_instruction *instr) { if (instr->flags & IR3_INSTR_MARK) @@ -403,19 +428,10 @@ static inline bool ir3_instr_check_mark(struct ir3_instruction *instr) return false; } -static inline void ir3_clear_mark(struct ir3 *shader) -{ - /* TODO would be nice to drop the instruction array.. for - * new compiler, _clear_mark() is all we use it for, and - * we could probably manage a linked list instead.. - * - * Also, we'll probably want to mark instructions within - * a block, so tracking the list of instrs globally is - * unlikely to be what we want. - */ - list_for_each_entry (struct ir3_instruction, instr, &shader->block->instr_list, node) - instr->flags &= ~IR3_INSTR_MARK; -} +void ir3_block_clear_mark(struct ir3_block *block); +void ir3_clear_mark(struct ir3 *shader); + +void ir3_count_instructions(struct ir3 *ir); static inline int ir3_instr_regno(struct ir3_instruction *instr, struct ir3_register *reg) @@ -593,6 +609,22 @@ static inline bool reg_gpr(struct ir3_register *r) return true; } +static inline type_t half_type(type_t type) +{ + switch (type) { + case TYPE_F32: return TYPE_F16; + case TYPE_U32: return TYPE_U16; + case TYPE_S32: return TYPE_S16; + case TYPE_F16: + case TYPE_U16: + case TYPE_S16: + return type; + default: + assert(0); + return ~0; + } +} + /* some cat2 instructions (ie. those which are not float) can embed an * immediate: */ @@ -837,6 +869,15 @@ ir3_NOP(struct ir3_block *block) return ir3_instr_create(block, 0, OPC_NOP); } +#define INSTR0(CAT, name) \ +static inline struct ir3_instruction * \ +ir3_##name(struct ir3_block *block) \ +{ \ + struct ir3_instruction *instr = \ + ir3_instr_create(block, CAT, OPC_##name); \ + return instr; \ +} + #define INSTR1(CAT, name) \ static inline struct ir3_instruction * \ ir3_##name(struct ir3_block *block, \ @@ -880,7 +921,10 @@ ir3_##name(struct ir3_block *block, \ } /* cat0 instructions: */ +INSTR0(0, BR); +INSTR0(0, JUMP); INSTR1(0, KILL); +INSTR0(0, END); /* cat2 instructions, most 2 src but some 1 src: */ INSTR2(2, ADD_F) diff --git a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c index f62a5ec2b26..4165e2d6aa7 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c @@ -48,8 +48,6 @@ #include "ir3.h" -static struct ir3_instruction * create_immed(struct ir3_block *block, uint32_t val); - struct ir3_compile { struct ir3_compiler *compiler; @@ -62,7 +60,10 @@ struct ir3_compile { /* bitmask of which samplers are integer: */ uint16_t integer_s; - struct ir3_block *block; + struct ir3_block *block; /* the current block */ + struct ir3_block *in_block; /* block created for shader inputs */ + + nir_function_impl *impl; /* For fragment shaders, from the hw perspective the only * actual input is r0.xy position register passed to bary.f. @@ -94,6 +95,11 @@ struct ir3_compile { */ struct hash_table *addr_ht; + /* maps nir_block to ir3_block, mostly for the purposes of + * figuring out the blocks successors + */ + struct hash_table *block_ht; + /* for calculating input/output positions/linkages: */ unsigned next_inloc; @@ -120,6 +126,9 @@ struct ir3_compile { }; +static struct ir3_instruction * create_immed(struct ir3_block *block, uint32_t val); +static struct ir3_block * get_block(struct ir3_compile *ctx, nir_block *nblock); + static struct nir_shader *to_nir(const struct tgsi_token *tokens) { struct nir_shader_compiler_options options = { @@ -148,6 +157,7 @@ static struct nir_shader *to_nir(const struct tgsi_token *tokens) nir_lower_vars_to_ssa(s); nir_lower_alu_to_scalar(s); + nir_lower_phis_to_scalar(s); progress |= nir_copy_prop(s); progress |= nir_opt_dce(s); @@ -244,6 +254,8 @@ compile_init(struct ir3_compiler *compiler, _mesa_hash_pointer, _mesa_key_pointer_equal); ctx->addr_ht = _mesa_hash_table_create(ctx, _mesa_hash_pointer, _mesa_key_pointer_equal); + ctx->block_ht = _mesa_hash_table_create(ctx, + _mesa_hash_pointer, _mesa_key_pointer_equal); lowered_tokens = lower_tgsi(ctx, tokens, so); if (!lowered_tokens) @@ -287,33 +299,206 @@ compile_free(struct ir3_compile *ctx) ralloc_free(ctx); } - +/* global per-array information: */ struct ir3_array { unsigned length, aid; +}; + +/* per-block array state: */ +struct ir3_array_value { + /* TODO drop length/aid, and just have ptr back to ir3_array */ + unsigned length, aid; + /* initial array element values are phi's, other than for the + * entry block. The phi src's get added later in a resolve step + * after we have visited all the blocks, to account for back + * edges in the cfg. + */ + struct ir3_instruction **phis; + /* current array element values (as block is processed). When + * the array phi's are resolved, it will contain the array state + * at exit of block, so successor blocks can use it to add their + * phi srcs. + */ struct ir3_instruction *arr[]; }; +/* track array assignments per basic block. When an array is read + * outside of the same basic block, we can use NIR's dominance-frontier + * information to figure out where phi nodes are needed. + */ +struct ir3_nir_block_data { + unsigned foo; + /* indexed by array-id (aid): */ + struct ir3_array_value *arrs[]; +}; + +static struct ir3_nir_block_data * +get_block_data(struct ir3_compile *ctx, struct ir3_block *block) +{ + if (!block->bd) { + struct ir3_nir_block_data *bd = ralloc_size(ctx, sizeof(*bd) + + ((ctx->num_arrays + 1) * sizeof(bd->arrs[0]))); + block->bd = bd; + } + return block->bd; +} + static void declare_var(struct ir3_compile *ctx, nir_variable *var) { unsigned length = glsl_get_length(var->type) * 4; /* always vec4, at least with ttn */ - struct ir3_array *arr = ralloc_size(ctx, sizeof(*arr) + - (length * sizeof(arr->arr[0]))); + struct ir3_array *arr = ralloc(ctx, struct ir3_array); arr->length = length; arr->aid = ++ctx->num_arrays; - /* Some shaders end up reading array elements without first writing.. - * so initialize things to prevent null instr ptrs later: - */ - for (unsigned i = 0; i < length; i++) - arr->arr[i] = create_immed(ctx->block, 0); _mesa_hash_table_insert(ctx->var_ht, var, arr); } -static struct ir3_array * +static nir_block * +nir_block_pred(nir_block *block) +{ + assert(block->predecessors->entries < 2); + if (block->predecessors->entries == 0) + return NULL; + return (nir_block *)_mesa_set_next_entry(block->predecessors, NULL)->key; +} + +static struct ir3_array_value * get_var(struct ir3_compile *ctx, nir_variable *var) { struct hash_entry *entry = _mesa_hash_table_search(ctx->var_ht, var); - return entry->data; + struct ir3_block *block = ctx->block; + struct ir3_nir_block_data *bd = get_block_data(ctx, block); + struct ir3_array *arr = entry->data; + + if (!bd->arrs[arr->aid]) { + struct ir3_array_value *av = ralloc_size(bd, sizeof(*av) + + (arr->length * sizeof(av->arr[0]))); + struct ir3_array_value *defn = NULL; + nir_block *pred_block; + + av->length = arr->length; + av->aid = arr->aid; + + /* For loops, we have to consider that we have not visited some + * of the blocks who should feed into the phi (ie. back-edges in + * the cfg).. for example: + * + * loop { + * block { load_var; ... } + * if then block {} else block {} + * block { store_var; ... } + * if then block {} else block {} + * block {...} + * } + * + * We can skip the phi if we can chase the block predecessors + * until finding the block previously defining the array without + * crossing a block that has more than one predecessor. + * + * Otherwise create phi's and resolve them as a post-pass after + * all the blocks have been visited (to handle back-edges). + */ + + for (pred_block = block->nblock; + pred_block && (pred_block->predecessors->entries < 2) && !defn; + pred_block = nir_block_pred(pred_block)) { + struct ir3_block *pblock = get_block(ctx, pred_block); + struct ir3_nir_block_data *pbd = pblock->bd; + if (!pbd) + continue; + defn = pbd->arrs[arr->aid]; + } + + if (defn) { + /* only one possible definer: */ + for (unsigned i = 0; i < arr->length; i++) + av->arr[i] = defn->arr[i]; + } else if (pred_block) { + /* not the first block, and multiple potential definers: */ + av->phis = ralloc_size(av, arr->length * sizeof(av->phis[0])); + + for (unsigned i = 0; i < arr->length; i++) { + struct ir3_instruction *phi; + + phi = ir3_instr_create2(block, -1, OPC_META_PHI, + 1 + ctx->impl->num_blocks); + ir3_reg_create(phi, 0, 0); /* dst */ + + /* phi's should go at head of block: */ + list_delinit(&phi->node); + list_add(&phi->node, &block->instr_list); + + av->phis[i] = av->arr[i] = phi; + } + } else { + /* Some shaders end up reading array elements without + * first writing.. so initialize things to prevent null + * instr ptrs later: + */ + for (unsigned i = 0; i < arr->length; i++) + av->arr[i] = create_immed(block, 0); + } + + bd->arrs[arr->aid] = av; + } + + return bd->arrs[arr->aid]; +} + +static void +add_array_phi_srcs(struct ir3_compile *ctx, nir_block *nblock, + struct ir3_array_value *av, BITSET_WORD *visited) +{ + struct ir3_block *block; + struct ir3_nir_block_data *bd; + + if (BITSET_TEST(visited, nblock->index)) + return; + + BITSET_SET(visited, nblock->index); + + block = get_block(ctx, nblock); + bd = block->bd; + + if (bd && bd->arrs[av->aid]) { + struct ir3_array_value *dav = bd->arrs[av->aid]; + for (unsigned i = 0; i < av->length; i++) { + ir3_reg_create(av->phis[i], 0, IR3_REG_SSA)->instr = + dav->arr[i]; + } + } else { + /* didn't find defn, recurse predecessors: */ + struct set_entry *entry; + set_foreach(nblock->predecessors, entry) { + add_array_phi_srcs(ctx, (nir_block *)entry->key, av, visited); + } + } +} + +static void +resolve_array_phis(struct ir3_compile *ctx, struct ir3_block *block) +{ + struct ir3_nir_block_data *bd = block->bd; + unsigned bitset_words = BITSET_WORDS(ctx->impl->num_blocks); + + if (!bd) + return; + + /* TODO use nir dom_frontier to help us with this? */ + + for (unsigned i = 1; i <= ctx->num_arrays; i++) { + struct ir3_array_value *av = bd->arrs[i]; + BITSET_WORD visited[bitset_words]; + struct set_entry *entry; + + if (!(av && av->phis)) + continue; + + memset(visited, 0, sizeof(visited)); + set_foreach(block->nblock->predecessors, entry) { + add_array_phi_srcs(ctx, (nir_block *)entry->key, av, visited); + } + } } /* allocate a n element value array (to be populated by caller) and @@ -416,6 +601,22 @@ get_addr(struct ir3_compile *ctx, struct ir3_instruction *src) return addr; } +static struct ir3_instruction * +get_predicate(struct ir3_compile *ctx, struct ir3_instruction *src) +{ + struct ir3_block *b = ctx->block; + struct ir3_instruction *cond; + + /* NOTE: only cmps.*.* can write p0.x: */ + cond = ir3_CMPS_S(b, src, 0, create_immed(b, 0), 0); + cond->cat2.condition = IR3_COND_NE; + + /* condition always goes in predicate register: */ + cond->regs[0]->num = regid(REG_P0, 0); + + return cond; +} + static struct ir3_instruction * create_uniform(struct ir3_compile *ctx, unsigned n) { @@ -1029,7 +1230,7 @@ emit_intrinisic_load_var(struct ir3_compile *ctx, nir_intrinsic_instr *intr, { nir_deref_var *dvar = intr->variables[0]; nir_deref_array *darr = nir_deref_as_array(dvar->deref.child); - struct ir3_array *arr = get_var(ctx, dvar->var); + struct ir3_array_value *arr = get_var(ctx, dvar->var); compile_assert(ctx, dvar->deref.child && (dvar->deref.child->deref_type == nir_deref_type_array)); @@ -1069,7 +1270,7 @@ emit_intrinisic_store_var(struct ir3_compile *ctx, nir_intrinsic_instr *intr) { nir_deref_var *dvar = intr->variables[0]; nir_deref_array *darr = nir_deref_as_array(dvar->deref.child); - struct ir3_array *arr = get_var(ctx, dvar->var); + struct ir3_array_value *arr = get_var(ctx, dvar->var); struct ir3_instruction **src; compile_assert(ctx, dvar->deref.child && @@ -1245,6 +1446,7 @@ emit_intrinisic(struct ir3_compile *ctx, nir_intrinsic_instr *intr) cond = create_immed(b, 1); } + /* NOTE: only cmps.*.* can write p0.x: */ cond = ir3_CMPS_S(b, cond, 0, create_immed(b, 0), 0); cond->cat2.condition = IR3_COND_NE; @@ -1557,6 +1759,71 @@ emit_tex_txs(struct ir3_compile *ctx, nir_tex_instr *tex) } } +static void +emit_phi(struct ir3_compile *ctx, nir_phi_instr *nphi) +{ + struct ir3_instruction *phi, **dst; + + /* NOTE: phi's should be lowered to scalar at this point */ + compile_assert(ctx, nphi->dest.ssa.num_components == 1); + + dst = get_dst(ctx, &nphi->dest, 1); + + phi = ir3_instr_create2(ctx->block, -1, OPC_META_PHI, + 1 + exec_list_length(&nphi->srcs)); + ir3_reg_create(phi, 0, 0); /* dst */ + phi->phi.nphi = nphi; + + dst[0] = phi; +} + +/* phi instructions are left partially constructed. We don't resolve + * their srcs until the end of the block, since (eg. loops) one of + * the phi's srcs might be defined after the phi due to back edges in + * the CFG. + */ +static void +resolve_phis(struct ir3_compile *ctx, struct ir3_block *block) +{ + list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) { + nir_phi_instr *nphi; + + /* phi's only come at start of block: */ + if (!(is_meta(instr) && (instr->opc == OPC_META_PHI))) + break; + + if (!instr->phi.nphi) + break; + + nphi = instr->phi.nphi; + instr->phi.nphi = NULL; + + foreach_list_typed(nir_phi_src, nsrc, node, &nphi->srcs) { + struct ir3_instruction *src = get_src(ctx, &nsrc->src)[0]; + ir3_reg_create(instr, 0, IR3_REG_SSA)->instr = src; + } + } + + resolve_array_phis(ctx, block); +} + +static void +emit_jump(struct ir3_compile *ctx, nir_jump_instr *jump) +{ + switch (jump->type) { + case nir_jump_break: + case nir_jump_continue: + /* I *think* we can simply just ignore this, and use the + * successor block link to figure out where we need to + * jump to for break/continue + */ + break; + default: + compile_error(ctx, "Unhandled NIR jump type: %d\n", jump->type); + break; + } +} + static void emit_instr(struct ir3_compile *ctx, nir_instr *instr) { @@ -1590,44 +1857,111 @@ emit_instr(struct ir3_compile *ctx, nir_instr *instr) } break; } - case nir_instr_type_call: - case nir_instr_type_jump: case nir_instr_type_phi: + emit_phi(ctx, nir_instr_as_phi(instr)); + break; + case nir_instr_type_jump: + emit_jump(ctx, nir_instr_as_jump(instr)); + break; + case nir_instr_type_call: case nir_instr_type_parallel_copy: compile_error(ctx, "Unhandled NIR instruction type: %d\n", instr->type); break; } } -static void -emit_block(struct ir3_compile *ctx, nir_block *block) +static struct ir3_block * +get_block(struct ir3_compile *ctx, nir_block *nblock) { - nir_foreach_instr(block, instr) { + struct ir3_block *block; + struct hash_entry *entry; + entry = _mesa_hash_table_search(ctx->block_ht, nblock); + if (entry) + return entry->data; + + block = ir3_block_create(ctx->ir); + block->nblock = nblock; + _mesa_hash_table_insert(ctx->block_ht, nblock, block); + + return block; +} + +static void +emit_block(struct ir3_compile *ctx, nir_block *nblock) +{ + struct ir3_block *block = get_block(ctx, nblock); + + for (int i = 0; i < ARRAY_SIZE(block->successors); i++) { + if (nblock->successors[i]) { + block->successors[i] = + get_block(ctx, nblock->successors[i]); + } + } + + ctx->block = block; + list_addtail(&block->node, &ctx->ir->block_list); + + nir_foreach_instr(nblock, instr) { emit_instr(ctx, instr); if (ctx->error) return; } } +static void emit_cf_list(struct ir3_compile *ctx, struct exec_list *list); + static void -emit_function(struct ir3_compile *ctx, nir_function_impl *impl) +emit_if(struct ir3_compile *ctx, nir_if *nif) { - foreach_list_typed(nir_cf_node, node, node, &impl->body) { + struct ir3_instruction *condition = get_src(ctx, &nif->condition)[0]; + + ctx->block->condition = + get_predicate(ctx, ir3_b2n(condition->block, condition)); + + emit_cf_list(ctx, &nif->then_list); + emit_cf_list(ctx, &nif->else_list); +} + +static void +emit_loop(struct ir3_compile *ctx, nir_loop *nloop) +{ + emit_cf_list(ctx, &nloop->body); +} + +static void +emit_cf_list(struct ir3_compile *ctx, struct exec_list *list) +{ + foreach_list_typed(nir_cf_node, node, node, list) { switch (node->type) { case nir_cf_node_block: emit_block(ctx, nir_cf_node_as_block(node)); break; case nir_cf_node_if: + emit_if(ctx, nir_cf_node_as_if(node)); + break; case nir_cf_node_loop: + emit_loop(ctx, nir_cf_node_as_loop(node)); + break; case nir_cf_node_function: compile_error(ctx, "TODO\n"); break; } - if (ctx->error) - return; } } +static void +emit_function(struct ir3_compile *ctx, nir_function_impl *impl) +{ + emit_cf_list(ctx, &impl->body); + emit_block(ctx, impl->end_block); + + /* at this point, we should have a single empty block, + * into which we emit the 'end' instruction. + */ + compile_assert(ctx, list_empty(&ctx->block->instr_list)); + ir3_END(ctx->block); +} + static void setup_input(struct ir3_compile *ctx, nir_variable *in) { @@ -1787,8 +2121,19 @@ setup_output(struct ir3_compile *ctx, nir_variable *out) static void emit_instructions(struct ir3_compile *ctx) { - unsigned ninputs = exec_list_length(&ctx->s->inputs) * 4; - unsigned noutputs = exec_list_length(&ctx->s->outputs) * 4; + unsigned ninputs, noutputs; + nir_function_impl *fxn = NULL; + + /* Find the main function: */ + nir_foreach_overload(ctx->s, overload) { + compile_assert(ctx, strcmp(overload->function->name, "main") == 0); + compile_assert(ctx, overload->impl); + fxn = overload->impl; + break; + } + + ninputs = exec_list_length(&ctx->s->inputs) * 4; + noutputs = exec_list_length(&ctx->s->outputs) * 4; /* we need to allocate big enough outputs array so that * we can stuff the kill's at the end. Likewise for vtx @@ -1801,8 +2146,11 @@ emit_instructions(struct ir3_compile *ctx) } ctx->ir = ir3_create(ctx->compiler, ninputs, noutputs); - ctx->block = ir3_block_create(ctx->ir); - ctx->ir->block = ctx->block; + + /* Create inputs in first block: */ + ctx->block = get_block(ctx, fxn->start_block); + ctx->in_block = ctx->block; + list_addtail(&ctx->block->node, &ctx->ir->block_list); if (ctx->so->type == SHADER_FRAGMENT) { ctx->ir->noutputs -= ARRAY_SIZE(ctx->kill); @@ -1838,13 +2186,12 @@ emit_instructions(struct ir3_compile *ctx) declare_var(ctx, var); } - /* Find the main function and emit the body: */ - nir_foreach_overload(ctx->s, overload) { - compile_assert(ctx, strcmp(overload->function->name, "main") == 0); - compile_assert(ctx, overload->impl); - emit_function(ctx, overload->impl); - if (ctx->error) - return; + /* And emit the body: */ + ctx->impl = fxn; + emit_function(ctx, fxn); + + list_for_each_entry (struct ir3_block, block, &ctx->ir->block_list, node) { + resolve_phis(ctx, block); } } @@ -1906,13 +2253,13 @@ fixup_frag_inputs(struct ir3_compile *ctx) so->pos_regid = regid; /* r0.x */ - instr = create_input(ctx->block, NULL, ir->ninputs); + instr = create_input(ctx->in_block, NULL, ir->ninputs); instr->regs[0]->num = regid++; inputs[ir->ninputs++] = instr; ctx->frag_pos->regs[1]->instr = instr; /* r0.y */ - instr = create_input(ctx->block, NULL, ir->ninputs); + instr = create_input(ctx->in_block, NULL, ir->ninputs); instr->regs[0]->num = regid++; inputs[ir->ninputs++] = instr; ctx->frag_pos->regs[2]->instr = instr; @@ -1998,6 +2345,10 @@ ir3_compile_shader_nir(struct ir3_compiler *compiler, out = out->regs[1]->instr; out->regs[0]->flags |= IR3_REG_HALF; } + + if (out->category == 1) { + out->cat1.dst_type = half_type(out->cat1.dst_type); + } } } @@ -2058,6 +2409,11 @@ ir3_compile_shader_nir(struct ir3_compiler *compiler, ir3_legalize(ir, &so->has_samp, &max_bary); + if (fd_mesa_debug & FD_DBG_OPTMSGS) { + printf("AFTER LEGALIZE:\n"); + ir3_print(ir); + } + /* fixup input/outputs: */ for (i = 0; i < so->outputs_count; i++) { so->outputs[i].regid = ir->outputs[i*4]->regs[0]->num; diff --git a/src/gallium/drivers/freedreno/ir3/ir3_cp.c b/src/gallium/drivers/freedreno/ir3/ir3_cp.c index a477bd4b237..8c7c80f7aae 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_cp.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_cp.c @@ -54,6 +54,13 @@ static bool is_eligible_mov(struct ir3_instruction *instr, bool allow_flags) /* TODO: remove this hack: */ if (is_meta(src_instr) && (src_instr->opc == OPC_META_FO)) return false; + /* TODO: we currently don't handle left/right neighbors + * very well when inserting parallel-copies into phi.. + * to avoid problems don't eliminate a mov coming out + * of phi.. + */ + if (is_meta(src_instr) && (src_instr->opc == OPC_META_PHI)) + return false; return true; } return false; @@ -390,7 +397,7 @@ instr_cp(struct ir3_instruction *instr, unsigned *flags) void ir3_cp(struct ir3 *ir) { - ir3_clear_mark(ir->block->shader); + ir3_clear_mark(ir); for (unsigned i = 0; i < ir->noutputs; i++) { if (ir->outputs[i]) { @@ -400,4 +407,9 @@ ir3_cp(struct ir3 *ir) ir->outputs[i] = out; } } + + list_for_each_entry (struct ir3_block, block, &ir->block_list, node) { + if (block->condition) + block->condition = instr_cp(block->condition, NULL); + } } diff --git a/src/gallium/drivers/freedreno/ir3/ir3_depth.c b/src/gallium/drivers/freedreno/ir3/ir3_depth.c index 6fc8b1762ff..3a108243479 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_depth.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_depth.c @@ -134,6 +134,8 @@ remove_unused_by_block(struct ir3_block *block) { list_for_each_entry_safe (struct ir3_instruction, instr, &block->instr_list, node) { if (!ir3_instr_check_mark(instr)) { + if (is_flow(instr) && (instr->opc == OPC_END)) + continue; /* mark it, in case it is input, so we can * remove unused inputs: */ @@ -149,13 +151,21 @@ ir3_depth(struct ir3 *ir) { unsigned i; - ir3_clear_mark(ir->block->shader); + ir3_clear_mark(ir); for (i = 0; i < ir->noutputs; i++) if (ir->outputs[i]) ir3_instr_depth(ir->outputs[i]); + /* We also need to account for if-condition: */ + list_for_each_entry (struct ir3_block, block, &ir->block_list, node) { + if (block->condition) + ir3_instr_depth(block->condition); + } + /* mark un-used instructions: */ - remove_unused_by_block(ir->block); + list_for_each_entry (struct ir3_block, block, &ir->block_list, node) { + remove_unused_by_block(block); + } /* cleanup unused inputs: */ for (i = 0; i < ir->ninputs; i++) { diff --git a/src/gallium/drivers/freedreno/ir3/ir3_group.c b/src/gallium/drivers/freedreno/ir3/ir3_group.c index 1fe09cc11e5..70d9b08e019 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_group.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_group.c @@ -116,6 +116,10 @@ restart: conflict = conflicts(instr->cp.left, left) || conflicts(instr->cp.right, right); + /* RA can't yet deal very well w/ group'd phi's: */ + if (is_meta(instr) && (instr->opc == OPC_META_PHI)) + conflict = true; + /* we also can't have an instr twice in the group: */ for (j = i + 1; (j < n) && !conflict; j++) if (ops->get(arr, j) == instr) @@ -226,7 +230,6 @@ find_neighbors(struct ir3 *ir) for (i = 0; i < ir->noutputs; i += 4) group_n(&arr_ops_out, &ir->outputs[i], 4); - for (i = 0; i < ir->noutputs; i++) { if (ir->outputs[i]) { struct ir3_instruction *instr = ir->outputs[i]; @@ -238,6 +241,6 @@ find_neighbors(struct ir3 *ir) void ir3_group(struct ir3 *ir) { - ir3_clear_mark(ir->block->shader); + ir3_clear_mark(ir); find_neighbors(ir); } diff --git a/src/gallium/drivers/freedreno/ir3/ir3_legalize.c b/src/gallium/drivers/freedreno/ir3/ir3_legalize.c index 34055f4c612..f4a4223ae17 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_legalize.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_legalize.c @@ -42,15 +42,28 @@ */ struct ir3_legalize_ctx { - struct ir3_block *block; bool has_samp; int max_bary; }; +/* We want to evaluate each block from the position of any other + * predecessor block, in order that the flags set are the union + * of all possible program paths. For stopping condition, we + * want to stop when the pair of has + * been visited already. + * + * XXX is that completely true? We could have different needs_xyz + * flags set depending on path leading to pred-block.. we could + * do *most* of this based on chasing src instructions ptrs (and + * following all phi srcs).. except the write-after-read hazzard. + * + * For now we just set ss/sy flag on first instruction on block, + * and handle everything within the block as before. + */ + static void -legalize(struct ir3_legalize_ctx *ctx) +legalize_block(struct ir3_legalize_ctx *ctx, struct ir3_block *block) { - struct ir3_block *block = ctx->block; struct ir3_instruction *last_input = NULL; struct ir3_instruction *last_rel = NULL; struct list_head instr_list; @@ -203,6 +216,7 @@ legalize(struct ir3_legalize_ctx *ctx) ir3_reg_create(baryf, regid(0, 0), 0); /* insert the dummy bary.f after last_input: */ + list_delinit(&baryf->node); list_add(&baryf->node, &last_input->node); last_input = baryf; @@ -213,23 +227,177 @@ legalize(struct ir3_legalize_ctx *ctx) if (last_rel) last_rel->flags |= IR3_INSTR_UL; - /* create/add 'end' instruction: */ - ir3_instr_create(block, 0, OPC_END); - list_first_entry(&block->instr_list, struct ir3_instruction, node) ->flags |= IR3_INSTR_SS | IR3_INSTR_SY; } +/* NOTE: branch instructions are always the last instruction(s) + * in the block. We take advantage of this as we resolve the + * branches, since "if (foo) break;" constructs turn into + * something like: + * + * block3 { + * ... + * 0029:021: mov.s32s32 r62.x, r1.y + * 0082:022: br !p0.x, target=block5 + * 0083:023: br p0.x, target=block4 + * // succs: if _[0029:021: mov.s32s32] block4; else block5; + * } + * block4 { + * 0084:024: jump, target=block6 + * // succs: block6; + * } + * block5 { + * 0085:025: jump, target=block7 + * // succs: block7; + * } + * + * ie. only instruction in block4/block5 is a jump, so when + * resolving branches we can easily detect this by checking + * that the first instruction in the target block is itself + * a jump, and setup the br directly to the jump's target + * (and strip back out the now unreached jump) + * + * TODO sometimes we end up with things like: + * + * br !p0.x, #2 + * br p0.x, #12 + * add.u r0.y, r0.y, 1 + * + * If we swapped the order of the branches, we could drop one. + */ +static struct ir3_block * +resolve_dest_block(struct ir3_block *block) +{ + /* special case for last block: */ + if (!block->successors[0]) + return block; + + /* NOTE that we may or may not have inserted the jump + * in the target block yet, so conditions to resolve + * the dest to the dest block's successor are: + * + * (1) successor[1] == NULL && + * (2) (block-is-empty || only-instr-is-jump) + */ + if (block->successors[1] == NULL) { + if (list_empty(&block->instr_list)) { + return block->successors[0]; + } else if (list_length(&block->instr_list) == 1) { + struct ir3_instruction *instr = list_first_entry( + &block->instr_list, struct ir3_instruction, node); + if (is_flow(instr) && (instr->opc == OPC_JUMP)) + return block->successors[0]; + } + } + return block; +} + +static bool +resolve_jump(struct ir3_instruction *instr) +{ + struct ir3_block *tblock = + resolve_dest_block(instr->cat0.target); + struct ir3_instruction *target; + + if (tblock != instr->cat0.target) { + list_delinit(&instr->cat0.target->node); + instr->cat0.target = tblock; + return true; + } + + target = list_first_entry(&tblock->instr_list, + struct ir3_instruction, node); + + if ((!target) || (target->ip == (instr->ip + 1))) { + list_delinit(&instr->node); + return true; + } else { + instr->cat0.immed = + (int)target->ip - (int)instr->ip; + } + return false; +} + +/* resolve jumps, removing jumps/branches to immediately following + * instruction which we end up with from earlier stages. Since + * removing an instruction can invalidate earlier instruction's + * branch offsets, we need to do this iteratively until no more + * branches are removed. + */ +static bool +resolve_jumps(struct ir3 *ir) +{ + list_for_each_entry (struct ir3_block, block, &ir->block_list, node) + list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) + if (is_flow(instr) && instr->cat0.target) + if (resolve_jump(instr)) + return true; + + return false; +} + +/* we want to mark points where divergent flow control re-converges + * with (jp) flags. For now, since we don't do any optimization for + * things that start out as a 'do {} while()', re-convergence points + * will always be a branch or jump target. Note that this is overly + * conservative, since unconditional jump targets are not convergence + * points, we are just assuming that the other path to reach the jump + * target was divergent. If we were clever enough to optimize the + * jump at end of a loop back to a conditional branch into a single + * conditional branch, ie. like: + * + * add.f r1.w, r0.x, (neg)(r)c2.x <= loop start + * mul.f r1.z, r1.z, r0.x + * mul.f r1.y, r1.y, r0.x + * mul.f r0.z, r1.x, r0.x + * mul.f r0.w, r0.y, r0.x + * cmps.f.ge r0.x, (r)c2.y, (r)r1.w + * add.s r0.x, (r)r0.x, (r)-1 + * sel.f32 r0.x, (r)c3.y, (r)r0.x, c3.x + * cmps.f.eq p0.x, r0.x, c3.y + * mov.f32f32 r0.x, r1.w + * mov.f32f32 r0.y, r0.w + * mov.f32f32 r1.x, r0.z + * (rpt2)nop + * br !p0.x, #-13 + * (jp)mul.f r0.x, c263.y, r1.y + * + * Then we'd have to be more clever, as the convergence point is no + * longer a branch or jump target. + */ +static void +mark_convergence_points(struct ir3 *ir) +{ + list_for_each_entry (struct ir3_block, block, &ir->block_list, node) { + list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) { + if (is_flow(instr) && instr->cat0.target) { + struct ir3_instruction *target = + list_first_entry(&instr->cat0.target->instr_list, + struct ir3_instruction, node); + target->flags |= IR3_INSTR_JP; + } + } + } +} + void ir3_legalize(struct ir3 *ir, bool *has_samp, int *max_bary) { struct ir3_legalize_ctx ctx = { - .block = ir->block, .max_bary = -1, }; - legalize(&ctx); + list_for_each_entry (struct ir3_block, block, &ir->block_list, node) { + legalize_block(&ctx, block); + } *has_samp = ctx.has_samp; *max_bary = ctx.max_bary; + + do { + ir3_count_instructions(ir); + } while(resolve_jumps(ir)); + + mark_convergence_points(ir); } diff --git a/src/gallium/drivers/freedreno/ir3/ir3_print.c b/src/gallium/drivers/freedreno/ir3/ir3_print.c index 965c834b8aa..f377982dd5e 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_print.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_print.c @@ -137,6 +137,16 @@ tab(int lvl) printf("\t"); } +static uint32_t +block_id(struct ir3_block *block) +{ +#ifdef DEBUG + return block->serialno; +#else + return (uint32_t)(uint64_t)block; +#endif +} + static void print_instr(struct ir3_instruction *instr, int lvl) { @@ -173,6 +183,14 @@ print_instr(struct ir3_instruction *instr, int lvl) } } + if (is_flow(instr) && instr->cat0.target) { + /* the predicate register src is implied: */ + if (instr->opc == OPC_BR) { + printf(" %sp0.x", instr->cat0.inv ? "!" : ""); + } + printf(", target=block%u", block_id(instr->cat0.target)); + } + printf("\n"); } @@ -184,19 +202,31 @@ void ir3_print_instr(struct ir3_instruction *instr) static void print_block(struct ir3_block *block, int lvl) { - tab(lvl); printf("block {\n"); + tab(lvl); printf("block%u {\n", block_id(block)); list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) { print_instr(instr, lvl+1); } + if (block->successors[1]) { + /* leading into if/else: */ + tab(lvl+1); + printf("/* succs: if _["); + print_instr_name(block->condition); + printf("] block%u; else block%u; */\n", + block_id(block->successors[0]), + block_id(block->successors[1])); + } else if (block->successors[0]) { + tab(lvl+1); + printf("/* succs: block%u; */\n", + block_id(block->successors[0])); + } tab(lvl); printf("}\n"); } void ir3_print(struct ir3 *ir) { - struct ir3_block *block = ir->block; - - print_block(block, 0); + list_for_each_entry (struct ir3_block, block, &ir->block_list, node) + print_block(block, 0); for (unsigned i = 0; i < ir->noutputs; i++) { if (!ir->outputs[i]) diff --git a/src/gallium/drivers/freedreno/ir3/ir3_ra.c b/src/gallium/drivers/freedreno/ir3/ir3_ra.c index 394c63f646d..359cd9a0d5d 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_ra.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_ra.c @@ -29,6 +29,7 @@ #include "util/u_math.h" #include "util/register_allocate.h" #include "util/ralloc.h" +#include "util/bitset.h" #include "ir3.h" #include "ir3_compiler.h" @@ -255,6 +256,14 @@ struct ir3_ra_ctx { unsigned *def, *use; /* def/use table */ }; +/* additional block-data (per-block) */ +struct ir3_ra_block_data { + BITSET_WORD *def; /* variables defined before used in block */ + BITSET_WORD *use; /* variables used before defined in block */ + BITSET_WORD *livein; /* which defs reach entry point of block */ + BITSET_WORD *liveout; /* which defs reach exit point of block */ +}; + static bool is_half(struct ir3_instruction *instr) { @@ -369,7 +378,39 @@ get_definer(struct ir3_instruction *instr, int *sz, int *off) *sz = util_last_bit(instr->regs[0]->wrmask); } *off = 0; - return instr; + d = instr; + } + + if (d->regs[0]->flags & IR3_REG_PHI_SRC) { + struct ir3_instruction *phi = d->regs[0]->instr; + struct ir3_instruction *dd; + int dsz, doff; + + dd = get_definer(phi, &dsz, &doff); + + *sz = MAX2(*sz, dsz); + *off = doff; + + if (dd->ip < d->ip) { + d = dd; + } + } + + if (is_meta(d) && (d->opc == OPC_META_PHI)) { + /* we have already inserted parallel-copies into + * the phi, so we don't need to chase definers + */ + struct ir3_register *src; + + /* note: don't use foreach_ssa_src as this gets called once + * while assigning regs (which clears SSA flag) + */ + foreach_src(src, d) { + if (!src->instr) + continue; + if (src->instr->ip < d->ip) + d = src->instr; + } } if (is_meta(d) && (d->opc == OPC_META_FO)) { @@ -395,14 +436,12 @@ get_definer(struct ir3_instruction *instr, int *sz, int *off) static void ra_block_name_instructions(struct ir3_ra_ctx *ctx, struct ir3_block *block) { - list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) { - instr->ip = ctx->instr_cnt++; - } - list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) { struct ir3_instruction *defn; int cls, sz, off; + ctx->instr_cnt++; + if (instr->regs_count == 0) continue; @@ -431,8 +470,11 @@ static void ra_init(struct ir3_ra_ctx *ctx) { ir3_clear_mark(ctx->ir); + ir3_count_instructions(ctx->ir); - ra_block_name_instructions(ctx, ctx->ir->block); + list_for_each_entry (struct ir3_block, block, &ctx->ir->block_list, node) { + ra_block_name_instructions(ctx, block); + } /* figure out the base register name for each class. The * actual ra name is class_base[cls] + instr->name; @@ -448,6 +490,16 @@ ra_init(struct ir3_ra_ctx *ctx) ctx->use = rzalloc_array(ctx->g, unsigned, ctx->alloc_count); } +static unsigned +ra_name(struct ir3_ra_ctx *ctx, int cls, struct ir3_instruction *defn) +{ + unsigned name; + debug_assert(cls >= 0); + name = ctx->class_base[cls] + defn->name; + debug_assert(name < ctx->alloc_count); + return name; +} + static void ra_destroy(struct ir3_ra_ctx *ctx) { @@ -457,6 +509,18 @@ ra_destroy(struct ir3_ra_ctx *ctx) static void ra_block_compute_live_ranges(struct ir3_ra_ctx *ctx, struct ir3_block *block) { + struct ir3_ra_block_data *bd; + unsigned bitset_words = BITSET_WORDS(ctx->alloc_count); + + bd = rzalloc(ctx->g, struct ir3_ra_block_data); + + bd->def = rzalloc_array(bd, BITSET_WORD, bitset_words); + bd->use = rzalloc_array(bd, BITSET_WORD, bitset_words); + bd->livein = rzalloc_array(bd, BITSET_WORD, bitset_words); + bd->liveout = rzalloc_array(bd, BITSET_WORD, bitset_words); + + block->bd = bd; + list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) { struct ir3_instruction *src; @@ -474,7 +538,15 @@ ra_block_compute_live_ranges(struct ir3_ra_ctx *ctx, struct ir3_block *block) * fanin: used to collect values from lower class and assemble * them together into a higher class, for example arguments * to texture sample instructions; We consider these to be - * defined at the fanin node. + * defined at the earliest fanin source. + * + * phi: used to merge values from different flow control paths + * to the same reg. Consider defined at earliest phi src, + * and update all the other phi src's (which may come later + * in the program) as users to extend the var's live range. + * + * Most of this, other than phi, is completely handled in the + * get_definer() helper. * * In either case, we trace the instruction back to the original * definer and consider that as the def/use ip. @@ -491,11 +563,15 @@ ra_block_compute_live_ranges(struct ir3_ra_ctx *ctx, struct ir3_block *block) */ cls = size_to_class(sz, is_half(defn)); if (cls >= 0) { - unsigned name = ctx->class_base[cls] + defn->name; + unsigned name = ra_name(ctx, cls, defn); + ctx->def[name] = defn->ip; ctx->use[name] = defn->ip; - debug_assert(name < ctx->alloc_count); + /* since we are in SSA at this point: */ + debug_assert(!BITSET_TEST(bd->use, name)); + + BITSET_SET(bd->def, name); if (is_half(defn)) { ra_set_node_class(ctx->g, name, @@ -504,6 +580,24 @@ ra_block_compute_live_ranges(struct ir3_ra_ctx *ctx, struct ir3_block *block) ra_set_node_class(ctx->g, name, ctx->set->classes[cls]); } + + /* extend the live range for phi srcs, which may come + * from the bottom of the loop + */ + if (defn->regs[0]->flags & IR3_REG_PHI_SRC) { + struct ir3_instruction *phi = defn->regs[0]->instr; + foreach_ssa_src(src, phi) { + /* if src is after phi, then we need to extend + * the liverange to the end of src's block: + */ + if (src->ip > phi->ip) { + struct ir3_instruction *last = + list_last_entry(&src->block->instr_list, + struct ir3_instruction, node); + ctx->use[name] = MAX2(ctx->use[name], last->ip); + } + } + } } } } @@ -516,20 +610,94 @@ ra_block_compute_live_ranges(struct ir3_ra_ctx *ctx, struct ir3_block *block) srcdefn = get_definer(src, &sz, &off); cls = size_to_class(sz, is_half(srcdefn)); if (cls >= 0) { - unsigned name = ctx->class_base[cls] + srcdefn->name; - ctx->use[name] = instr->ip; + unsigned name = ra_name(ctx, cls, srcdefn); + ctx->use[name] = MAX2(ctx->use[name], instr->ip); + if (!BITSET_TEST(bd->def, name)) + BITSET_SET(bd->use, name); } } } } } +static bool +ra_compute_livein_liveout(struct ir3_ra_ctx *ctx) +{ + unsigned bitset_words = BITSET_WORDS(ctx->alloc_count); + bool progress = false; + + list_for_each_entry (struct ir3_block, block, &ctx->ir->block_list, node) { + struct ir3_ra_block_data *bd = block->bd; + + /* update livein: */ + for (unsigned i = 0; i < bitset_words; i++) { + BITSET_WORD new_livein = + (bd->use[i] | (bd->liveout[i] & ~bd->def[i])); + + if (new_livein & ~bd->livein[i]) { + bd->livein[i] |= new_livein; + progress = true; + } + } + + /* update liveout: */ + for (unsigned j = 0; j < ARRAY_SIZE(block->successors); j++) { + struct ir3_block *succ = block->successors[j]; + struct ir3_ra_block_data *succ_bd; + + if (!succ) + continue; + + succ_bd = succ->bd; + + for (unsigned i = 0; i < bitset_words; i++) { + BITSET_WORD new_liveout = + (succ_bd->livein[i] & ~bd->liveout[i]); + + if (new_liveout) { + bd->liveout[i] |= new_liveout; + progress = true; + } + } + } + } + + return progress; +} + static void ra_add_interference(struct ir3_ra_ctx *ctx) { struct ir3 *ir = ctx->ir; - ra_block_compute_live_ranges(ctx, ctx->ir->block); + /* compute live ranges (use/def) on a block level, also updating + * block's def/use bitmasks (used below to calculate per-block + * livein/liveout): + */ + list_for_each_entry (struct ir3_block, block, &ir->block_list, node) { + ra_block_compute_live_ranges(ctx, block); + } + + /* update per-block livein/liveout: */ + while (ra_compute_livein_liveout(ctx)) {} + + /* extend start/end ranges based on livein/liveout info from cfg: */ + unsigned bitset_words = BITSET_WORDS(ctx->alloc_count); + list_for_each_entry (struct ir3_block, block, &ir->block_list, node) { + struct ir3_ra_block_data *bd = block->bd; + + for (unsigned i = 0; i < bitset_words; i++) { + if (BITSET_TEST(bd->livein, i)) { + ctx->def[i] = MIN2(ctx->def[i], block->start_ip); + ctx->use[i] = MAX2(ctx->use[i], block->start_ip); + } + + if (BITSET_TEST(bd->liveout, i)) { + ctx->def[i] = MIN2(ctx->def[i], block->end_ip); + ctx->use[i] = MAX2(ctx->use[i], block->end_ip); + } + } + } /* need to fix things up to keep outputs live: */ for (unsigned i = 0; i < ir->noutputs; i++) { @@ -540,7 +708,7 @@ ra_add_interference(struct ir3_ra_ctx *ctx) defn = get_definer(instr, &sz, &off); cls = size_to_class(sz, is_half(defn)); if (cls >= 0) { - unsigned name = ctx->class_base[cls] + defn->name; + unsigned name = ra_name(ctx, cls, defn); ctx->use[name] = ctx->instr_cnt; } } @@ -555,23 +723,6 @@ ra_add_interference(struct ir3_ra_ctx *ctx) } } -static type_t half_type(type_t type) -{ - switch (type) { - case TYPE_F32: return TYPE_F16; - case TYPE_U32: return TYPE_U16; - case TYPE_S32: return TYPE_S16; - /* instructions may already be fixed up: */ - case TYPE_F16: - case TYPE_U16: - case TYPE_S16: - return type; - default: - assert(0); - return ~0; - } -} - /* some instructions need fix-up if dst register is half precision: */ static void fixup_half_instr_dst(struct ir3_instruction *instr) { @@ -633,7 +784,7 @@ reg_assign(struct ir3_ra_ctx *ctx, struct ir3_register *reg, defn = get_definer(instr, &sz, &off); cls = size_to_class(sz, is_half(defn)); if (cls >= 0) { - unsigned name = ctx->class_base[cls] + defn->name; + unsigned name = ra_name(ctx, cls, defn); unsigned r = ra_get_node_reg(ctx->g, name); unsigned num = ctx->set->ra_reg_to_gpr[r] + off; @@ -641,7 +792,7 @@ reg_assign(struct ir3_ra_ctx *ctx, struct ir3_register *reg, num += reg->offset; reg->num = num; - reg->flags &= ~IR3_REG_SSA; + reg->flags &= ~(IR3_REG_SSA | IR3_REG_PHI_SRC); if (is_half(defn)) reg->flags |= IR3_REG_HALF; @@ -686,8 +837,8 @@ ra_alloc(struct ir3_ra_ctx *ctx) unsigned i = 0, j; if (ctx->frag_face && (i < ir->ninputs) && ir->inputs[i]) { struct ir3_instruction *instr = ir->inputs[i]; - unsigned cls = size_to_class(1, true); - unsigned name = ctx->class_base[cls] + instr->name; + int cls = size_to_class(1, true); + unsigned name = ra_name(ctx, cls, instr); unsigned reg = ctx->set->gpr_to_ra_reg[cls][0]; /* if we have frag_face, it gets hr0.x */ @@ -706,8 +857,7 @@ ra_alloc(struct ir3_ra_ctx *ctx) unsigned name, reg; cls = size_to_class(sz, is_half(defn)); - debug_assert(cls >= 0); - name = ctx->class_base[cls] + defn->name; + name = ra_name(ctx, cls, defn); reg = ctx->set->gpr_to_ra_reg[cls][j]; ra_set_node_reg(ctx->g, name, reg); @@ -720,7 +870,9 @@ ra_alloc(struct ir3_ra_ctx *ctx) if (!ra_allocate(ctx->g)) return -1; - ra_block_alloc(ctx, ctx->ir->block); + list_for_each_entry (struct ir3_block, block, &ctx->ir->block_list, node) { + ra_block_alloc(ctx, block); + } return 0; } diff --git a/src/gallium/drivers/freedreno/ir3/ir3_sched.c b/src/gallium/drivers/freedreno/ir3/ir3_sched.c index 0d404a83583..49a4426d163 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_sched.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_sched.c @@ -205,6 +205,16 @@ instr_eligibility(struct ir3_sched_ctx *ctx, struct ir3_sched_notes *notes, struct ir3_instruction *src; unsigned delay = 0; + /* Phi instructions can have a dependency on something not + * scheduled yet (for ex, loops). But OTOH we don't really + * care. By definition phi's should appear at the top of + * the block, and it's sources should be values from the + * previously executing block, so they are always ready to + * be scheduled: + */ + if (is_meta(instr) && (instr->opc == OPC_META_PHI)) + return 0; + foreach_ssa_src(src, instr) { /* if dependency not scheduled, we aren't ready yet: */ if (!is_scheduled(src)) @@ -422,13 +432,87 @@ sched_block(struct ir3_sched_ctx *ctx, struct ir3_block *block) } } } + + /* And lastly, insert branch/jump instructions to take us to + * the next block. Later we'll strip back out the branches + * that simply jump to next instruction. + */ + if (block->successors[1]) { + /* if/else, conditional branches to "then" or "else": */ + struct ir3_instruction *br; + unsigned delay = 6; + + debug_assert(ctx->pred); + debug_assert(block->condition); + + delay -= distance(ctx, ctx->pred, delay); + + while (delay > 0) { + ir3_NOP(block); + delay--; + } + + /* create "else" branch first (since "then" block should + * frequently/always end up being a fall-thru): + */ + br = ir3_BR(block); + br->cat0.inv = true; + br->cat0.target = block->successors[1]; + + /* NOTE: we have to hard code delay of 6 above, since + * we want to insert the nop's before constructing the + * branch. Throw in an assert so we notice if this + * ever breaks on future generation: + */ + debug_assert(ir3_delayslots(ctx->pred, br, 0) == 6); + + br = ir3_BR(block); + br->cat0.target = block->successors[0]; + + } else if (block->successors[0]) { + /* otherwise unconditional jump to next block: */ + struct ir3_instruction *jmp; + + jmp = ir3_JUMP(block); + jmp->cat0.target = block->successors[0]; + } + + /* NOTE: if we kept track of the predecessors, we could do a better + * job w/ (jp) flags.. every node w/ > predecessor is a join point. + * Note that as we eliminate blocks which contain only an unconditional + * jump we probably need to propagate (jp) flag.. + */ +} + +/* this is needed to ensure later RA stage succeeds: */ +static void +sched_insert_parallel_copies(struct ir3_block *block) +{ + list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) { + if (is_meta(instr) && (instr->opc == OPC_META_PHI)) { + struct ir3_register *reg; + foreach_src(reg, instr) { + struct ir3_instruction *src = reg->instr; + struct ir3_instruction *mov = + ir3_MOV(src->block, src, TYPE_U32); + mov->regs[0]->flags |= IR3_REG_PHI_SRC; + mov->regs[0]->instr = instr; + reg->instr = mov; + } + } + } } int ir3_sched(struct ir3 *ir) { struct ir3_sched_ctx ctx = {0}; - ir3_clear_mark(ir->block->shader); - sched_block(&ctx, ir->block); + list_for_each_entry (struct ir3_block, block, &ir->block_list, node) { + sched_insert_parallel_copies(block); + } + ir3_clear_mark(ir); + list_for_each_entry (struct ir3_block, block, &ir->block_list, node) { + sched_block(&ctx, block); + } if (ctx.error) return -1; return 0; From 0f008082b184072159e5aedc7fc103efba8740ed Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Mon, 8 Jun 2015 14:23:49 -0400 Subject: [PATCH 767/834] freedreno: remove int sampler shader variants We get this information from NIR (which gets it from sview decl in tgsi when translating from tgsi), so no need to maintain shader variants for this. Signed-off-by: Rob Clark --- .../drivers/freedreno/a3xx/fd3_context.h | 3 -- src/gallium/drivers/freedreno/a3xx/fd3_draw.c | 11 ++---- .../drivers/freedreno/a3xx/fd3_texture.c | 35 +------------------ .../drivers/freedreno/a4xx/fd4_context.h | 3 -- src/gallium/drivers/freedreno/a4xx/fd4_draw.c | 8 ++--- .../drivers/freedreno/a4xx/fd4_texture.c | 34 +----------------- .../drivers/freedreno/ir3/ir3_compiler_nir.c | 13 ------- .../drivers/freedreno/ir3/ir3_shader.h | 4 --- 8 files changed, 7 insertions(+), 104 deletions(-) diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_context.h b/src/gallium/drivers/freedreno/a3xx/fd3_context.h index 4e3f521716e..77e4605e550 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_context.h +++ b/src/gallium/drivers/freedreno/a3xx/fd3_context.h @@ -105,9 +105,6 @@ struct fd3_context { */ unsigned fsaturate_s, fsaturate_t, fsaturate_r; - /* bitmask of integer texture samplers */ - uint16_t vinteger_s, finteger_s; - /* some state changes require a different shader variant. Keep * track of this so we know when we need to re-emit shader state * due to variant change. See fixup_shader_state() diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_draw.c b/src/gallium/drivers/freedreno/a3xx/fd3_draw.c index b522cf86695..b5838b58eb2 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_draw.c +++ b/src/gallium/drivers/freedreno/a3xx/fd3_draw.c @@ -104,14 +104,12 @@ fixup_shader_state(struct fd_context *ctx, struct ir3_shader_key *key) if (last_key->has_per_samp || key->has_per_samp) { if ((last_key->vsaturate_s != key->vsaturate_s) || (last_key->vsaturate_t != key->vsaturate_t) || - (last_key->vsaturate_r != key->vsaturate_r) || - (last_key->vinteger_s != key->vinteger_s)) + (last_key->vsaturate_r != key->vsaturate_r)) ctx->prog.dirty |= FD_SHADER_DIRTY_VP; if ((last_key->fsaturate_s != key->fsaturate_s) || (last_key->fsaturate_t != key->fsaturate_t) || - (last_key->fsaturate_r != key->fsaturate_r) || - (last_key->finteger_s != key->finteger_s)) + (last_key->fsaturate_r != key->fsaturate_r)) ctx->prog.dirty |= FD_SHADER_DIRTY_FP; } @@ -140,16 +138,13 @@ fd3_draw_vbo(struct fd_context *ctx, const struct pipe_draw_info *info) // TODO set .half_precision based on render target format, // ie. float16 and smaller use half, float32 use full.. .half_precision = !!(fd_mesa_debug & FD_DBG_FRAGHALF), - .has_per_samp = (fd3_ctx->fsaturate || fd3_ctx->vsaturate || - fd3_ctx->vinteger_s || fd3_ctx->finteger_s), + .has_per_samp = (fd3_ctx->fsaturate || fd3_ctx->vsaturate), .vsaturate_s = fd3_ctx->vsaturate_s, .vsaturate_t = fd3_ctx->vsaturate_t, .vsaturate_r = fd3_ctx->vsaturate_r, .fsaturate_s = fd3_ctx->fsaturate_s, .fsaturate_t = fd3_ctx->fsaturate_t, .fsaturate_r = fd3_ctx->fsaturate_r, - .vinteger_s = fd3_ctx->vinteger_s, - .finteger_s = fd3_ctx->finteger_s, }, .rasterflat = ctx->rasterizer && ctx->rasterizer->flatshade, .sprite_coord_enable = ctx->rasterizer ? ctx->rasterizer->sprite_coord_enable : 0, diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_texture.c b/src/gallium/drivers/freedreno/a3xx/fd3_texture.c index 6f44ee3c08e..a278bf5c603 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_texture.c +++ b/src/gallium/drivers/freedreno/a3xx/fd3_texture.c @@ -263,44 +263,11 @@ fd3_sampler_view_create(struct pipe_context *pctx, struct pipe_resource *prsc, return &so->base; } -static void -fd3_set_sampler_views(struct pipe_context *pctx, unsigned shader, - unsigned start, unsigned nr, - struct pipe_sampler_view **views) -{ - struct fd_context *ctx = fd_context(pctx); - struct fd3_context *fd3_ctx = fd3_context(ctx); - struct fd_texture_stateobj *tex; - uint16_t integer_s = 0, *ptr; - int i; - - fd_set_sampler_views(pctx, shader, start, nr, views); - - switch (shader) { - case PIPE_SHADER_FRAGMENT: - tex = &ctx->fragtex; - ptr = &fd3_ctx->finteger_s; - break; - case PIPE_SHADER_VERTEX: - tex = &ctx->verttex; - ptr = &fd3_ctx->vinteger_s; - break; - default: - return; - } - - for (i = 0; i < tex->num_textures; i++) - if (util_format_is_pure_integer(tex->textures[i]->format)) - integer_s |= 1 << i; - *ptr = integer_s; -} - - void fd3_texture_init(struct pipe_context *pctx) { pctx->create_sampler_state = fd3_sampler_state_create; pctx->bind_sampler_states = fd3_sampler_states_bind; pctx->create_sampler_view = fd3_sampler_view_create; - pctx->set_sampler_views = fd3_set_sampler_views; + pctx->set_sampler_views = fd_set_sampler_views; } diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_context.h b/src/gallium/drivers/freedreno/a4xx/fd4_context.h index 384602a2e4f..53e1bf6a2e6 100644 --- a/src/gallium/drivers/freedreno/a4xx/fd4_context.h +++ b/src/gallium/drivers/freedreno/a4xx/fd4_context.h @@ -83,9 +83,6 @@ struct fd4_context { */ uint16_t fsaturate_s, fsaturate_t, fsaturate_r; - /* bitmask of integer texture samplers */ - uint16_t vinteger_s, finteger_s; - /* some state changes require a different shader variant. Keep * track of this so we know when we need to re-emit shader state * due to variant change. See fixup_shader_state() diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_draw.c b/src/gallium/drivers/freedreno/a4xx/fd4_draw.c index ae407f753fe..de5a306af60 100644 --- a/src/gallium/drivers/freedreno/a4xx/fd4_draw.c +++ b/src/gallium/drivers/freedreno/a4xx/fd4_draw.c @@ -82,8 +82,7 @@ fixup_shader_state(struct fd_context *ctx, struct ir3_shader_key *key) if (last_key->has_per_samp || key->has_per_samp) { if ((last_key->vsaturate_s != key->vsaturate_s) || (last_key->vsaturate_t != key->vsaturate_t) || - (last_key->vsaturate_r != key->vsaturate_r) || - (last_key->vinteger_s != key->vinteger_s)) + (last_key->vsaturate_r != key->vsaturate_r)) ctx->prog.dirty |= FD_SHADER_DIRTY_VP; if ((last_key->fsaturate_s != key->fsaturate_s) || @@ -122,16 +121,13 @@ fd4_draw_vbo(struct fd_context *ctx, const struct pipe_draw_info *info) // TODO set .half_precision based on render target format, // ie. float16 and smaller use half, float32 use full.. .half_precision = !!(fd_mesa_debug & FD_DBG_FRAGHALF), - .has_per_samp = (fd4_ctx->fsaturate || fd4_ctx->vsaturate || - fd4_ctx->vinteger_s || fd4_ctx->finteger_s), + .has_per_samp = (fd4_ctx->fsaturate || fd4_ctx->vsaturate), .vsaturate_s = fd4_ctx->vsaturate_s, .vsaturate_t = fd4_ctx->vsaturate_t, .vsaturate_r = fd4_ctx->vsaturate_r, .fsaturate_s = fd4_ctx->fsaturate_s, .fsaturate_t = fd4_ctx->fsaturate_t, .fsaturate_r = fd4_ctx->fsaturate_r, - .vinteger_s = fd4_ctx->vinteger_s, - .finteger_s = fd4_ctx->finteger_s, }, .format = fd4_emit_format(pfb->cbufs[0]), .pformat = pipe_surface_format(pfb->cbufs[0]), diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_texture.c b/src/gallium/drivers/freedreno/a4xx/fd4_texture.c index ff1ff8f0d34..6ba25d0816d 100644 --- a/src/gallium/drivers/freedreno/a4xx/fd4_texture.c +++ b/src/gallium/drivers/freedreno/a4xx/fd4_texture.c @@ -205,43 +205,11 @@ fd4_sampler_view_create(struct pipe_context *pctx, struct pipe_resource *prsc, return &so->base; } -static void -fd4_set_sampler_views(struct pipe_context *pctx, unsigned shader, - unsigned start, unsigned nr, struct pipe_sampler_view **views) -{ - struct fd_context *ctx = fd_context(pctx); - struct fd4_context *fd4_ctx = fd4_context(ctx); - struct fd_texture_stateobj *tex; - uint16_t integer_s = 0, *ptr; - int i; - - fd_set_sampler_views(pctx, shader, start, nr, views); - - switch (shader) { - case PIPE_SHADER_FRAGMENT: - tex = &ctx->fragtex; - ptr = &fd4_ctx->finteger_s; - break; - case PIPE_SHADER_VERTEX: - tex = &ctx->verttex; - ptr = &fd4_ctx->vinteger_s; - break; - default: - return; - } - - for (i = 0; i < tex->num_textures; i++) - if (util_format_is_pure_integer(tex->textures[i]->format)) - integer_s |= 1 << i; - - *ptr = integer_s; -} - void fd4_texture_init(struct pipe_context *pctx) { pctx->create_sampler_state = fd4_sampler_state_create; pctx->bind_sampler_states = fd_sampler_states_bind; pctx->create_sampler_view = fd4_sampler_view_create; - pctx->set_sampler_views = fd4_set_sampler_views; + pctx->set_sampler_views = fd_set_sampler_views; } diff --git a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c index 4165e2d6aa7..5c215a55a36 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c @@ -57,9 +57,6 @@ struct ir3_compile { struct ir3 *ir; struct ir3_shader_variant *so; - /* bitmask of which samplers are integer: */ - uint16_t integer_s; - struct ir3_block *block; /* the current block */ struct ir3_block *in_block; /* block created for shader inputs */ @@ -234,16 +231,6 @@ compile_init(struct ir3_compiler *compiler, ctx->levels_add_one = true; } - switch (so->type) { - case SHADER_FRAGMENT: - case SHADER_COMPUTE: - ctx->integer_s = so->key.finteger_s; - break; - case SHADER_VERTEX: - ctx->integer_s = so->key.vinteger_s; - break; - } - ctx->compiler = compiler; ctx->ir = so->ir; ctx->so = so; diff --git a/src/gallium/drivers/freedreno/ir3/ir3_shader.h b/src/gallium/drivers/freedreno/ir3/ir3_shader.h index 8141c5698db..9f1b0769180 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_shader.h +++ b/src/gallium/drivers/freedreno/ir3/ir3_shader.h @@ -86,10 +86,6 @@ struct ir3_shader_key { * shader: */ uint16_t fsaturate_s, fsaturate_t, fsaturate_r; - - /* bitmask of sampler which produces integer outputs: - */ - uint16_t vinteger_s, finteger_s; }; static inline bool From 43048c7093c367897fbcbb3ca8580e9c122cd6f6 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Mon, 8 Jun 2015 14:45:47 -0400 Subject: [PATCH 768/834] freedreno/ir3: only unminify txf coords on a3xx Seems like a4xx gets this right. Signed-off-by: Rob Clark --- src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c index 5c215a55a36..a9a8df3bc3b 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c @@ -109,6 +109,11 @@ struct ir3_compile { */ bool levels_add_one; + /* on a3xx, we need to scale up integer coords for isaml based + * on LoD: + */ + bool unminify_coords; + /* for looking up which system value is which */ unsigned sysval_semantics[8]; @@ -225,10 +230,12 @@ compile_init(struct ir3_compiler *compiler, /* need special handling for "flat" */ ctx->flat_bypass = true; ctx->levels_add_one = false; + ctx->unminify_coords = false; } else { /* no special handling for "flat" */ ctx->flat_bypass = false; ctx->levels_add_one = true; + ctx->unminify_coords = true; } ctx->compiler = compiler; @@ -1592,11 +1599,12 @@ emit_tex(struct ir3_compile *ctx, nir_tex_instr *tex) tex_info(tex, &flags, &coords); /* scale up integer coords for TXF based on the LOD */ - if (opc == OPC_ISAML) { + if (ctx->unminify_coords && (opc == OPC_ISAML)) { assert(has_lod); for (i = 0; i < coords; i++) coord[i] = ir3_SHL_B(b, coord[i], 0, lod, 0); } + /* * lay out the first argument in the proper order: * - actual coordinates first From 1ee4d51e7a68f8f2dcb52a0e2f9af81fdbe078a2 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Fri, 12 Jun 2015 14:27:44 -0400 Subject: [PATCH 769/834] freedreno/ir3/nir: add more opcodes Signed-off-by: Rob Clark --- src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c index a9a8df3bc3b..0c2600b8100 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c @@ -1076,9 +1076,15 @@ emit_alu(struct ir3_compile *ctx, nir_alu_instr *alu) case nir_op_imax: dst[0] = ir3_MAX_S(b, src[0], 0, src[1], 0); break; + case nir_op_umax: + dst[0] = ir3_MAX_U(b, src[0], 0, src[1], 0); + break; case nir_op_imin: dst[0] = ir3_MIN_S(b, src[0], 0, src[1], 0); break; + case nir_op_umin: + dst[0] = ir3_MIN_U(b, src[0], 0, src[1], 0); + break; case nir_op_imul: /* * dst = (al * bl) + (ah * bl << 16) + (al * bh << 16) @@ -2451,7 +2457,8 @@ ir3_compile_shader_nir(struct ir3_compiler *compiler, out: if (ret) { - ir3_destroy(so->ir); + if (so->ir) + ir3_destroy(so->ir); so->ir = NULL; } compile_free(ctx); From 66a93a0ff9aa402c37aa9d00b4489715d611b496 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Sat, 13 Jun 2015 09:14:31 -0400 Subject: [PATCH 770/834] freedreno/ir3: pass sz to split_dest() For query_levels, we generate a getinfo with writemask of (z), which RA will consider as size==3. But we were still generating four fanouts. Which meant that RA would see it as two different register classes, depending on the path to definer. Ie. on the getinfo instruction itself it would see size==3, but when chasing back through the fanouts it would see size==4. Easiest way to solve that is to just generate the chain of neighboring fanouts to have the correct size in the first place. Note: we may eventually want split_dest() to take start/end or wrmask instead, since really we only need size==1. But RA is not clever enough for that, query_levels is not that common, and the other two registers that get allocated are never used so those register slots can be immediately re-used. So bunch of work for probably no real gain. Signed-off-by: Rob Clark --- src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c | 10 +++++----- src/gallium/drivers/freedreno/ir3/ir3_ra.c | 2 ++ 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c index 0c2600b8100..48b1d8f3606 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c @@ -822,10 +822,10 @@ create_frag_face(struct ir3_compile *ctx, unsigned comp) */ static void split_dest(struct ir3_block *block, struct ir3_instruction **dst, - struct ir3_instruction *src) + struct ir3_instruction *src, unsigned n) { struct ir3_instruction *prev = NULL; - for (int i = 0, j = 0; i < 4; i++) { + for (int i = 0, j = 0; i < n; i++) { struct ir3_instruction *split = ir3_instr_create(block, -1, OPC_META_FO); ir3_reg_create(split, 0, IR3_REG_SSA); @@ -1699,7 +1699,7 @@ emit_tex(struct ir3_compile *ctx, nir_tex_instr *tex) create_collect(b, src0, nsrc0), create_collect(b, src1, nsrc1)); - split_dest(b, dst, sam); + split_dest(b, dst, sam, 4); } static void @@ -1716,7 +1716,7 @@ emit_tex_query_levels(struct ir3_compile *ctx, nir_tex_instr *tex) /* even though there is only one component, since it ends * up in .z rather than .x, we need a split_dest() */ - split_dest(b, dst, sam); + split_dest(b, dst, sam, 3); /* The # of levels comes from getinfo.z. We need to add 1 to it, since * the value in TEX_CONST_0 is zero-based. @@ -1744,7 +1744,7 @@ emit_tex_txs(struct ir3_compile *ctx, nir_tex_instr *tex) sam = ir3_SAM(b, OPC_GETSIZE, TYPE_U32, TGSI_WRITEMASK_XYZW, flags, tex->sampler_index, tex->sampler_index, lod, NULL); - split_dest(b, dst, sam); + split_dest(b, dst, sam, 4); /* Array size actually ends up in .w rather than .z. This doesn't * matter for miplevel 0, but for higher mips the value in z is diff --git a/src/gallium/drivers/freedreno/ir3/ir3_ra.c b/src/gallium/drivers/freedreno/ir3/ir3_ra.c index 359cd9a0d5d..e5aba859fab 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_ra.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_ra.c @@ -424,6 +424,8 @@ get_definer(struct ir3_instruction *instr, int *sz, int *off) *sz = MAX2(*sz, dsz); + /* Fanout's are grouped, so *off should already valid */ + d = dd; } From 2bf5a4211ef305d90ca6133ca09c3b79e6088d50 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Mon, 22 Jun 2015 13:36:41 +1000 Subject: [PATCH 771/834] r600g: ignore sampler views for now. This fixes a regression in that r600 stopped working when sampler views were pushed. Signed-off-by: Dave Airlie --- src/gallium/drivers/r600/r600_shader.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index 07da1676182..af7622e9b34 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -725,6 +725,7 @@ static int tgsi_declaration(struct r600_shader_ctx *ctx) case TGSI_FILE_CONSTANT: case TGSI_FILE_SAMPLER: + case TGSI_FILE_SAMPLER_VIEW: case TGSI_FILE_ADDRESS: break; From 57bdcae9e0fbf639014cd375543a8dd356406ac0 Mon Sep 17 00:00:00 2001 From: Chia-I Wu Date: Sat, 20 Jun 2015 23:27:08 +0800 Subject: [PATCH 772/834] ilo: add ilo_state_compute Replace gen6_idrt_data with ilo_state_compute, which has a bunch of validations and is now preferred. --- src/gallium/drivers/ilo/Makefile.sources | 2 + .../drivers/ilo/core/ilo_builder_media.h | 106 +---- .../drivers/ilo/core/ilo_state_compute.c | 435 ++++++++++++++++++ .../drivers/ilo/core/ilo_state_compute.h | 92 ++++ src/gallium/drivers/ilo/ilo_render_dynamic.c | 36 +- src/gallium/drivers/ilo/ilo_render_gen.h | 3 + src/gallium/drivers/ilo/ilo_render_media.c | 3 +- src/gallium/drivers/ilo/ilo_state.h | 1 + 8 files changed, 586 insertions(+), 92 deletions(-) create mode 100644 src/gallium/drivers/ilo/core/ilo_state_compute.c create mode 100644 src/gallium/drivers/ilo/core/ilo_state_compute.h diff --git a/src/gallium/drivers/ilo/Makefile.sources b/src/gallium/drivers/ilo/Makefile.sources index e5a0950dc7c..95b6b7a7b16 100644 --- a/src/gallium/drivers/ilo/Makefile.sources +++ b/src/gallium/drivers/ilo/Makefile.sources @@ -21,6 +21,8 @@ C_SOURCES := \ core/ilo_image.h \ core/ilo_state_cc.c \ core/ilo_state_cc.h \ + core/ilo_state_compute.c \ + core/ilo_state_compute.h \ core/ilo_state_raster.c \ core/ilo_state_raster.h \ core/ilo_state_sampler.c \ diff --git a/src/gallium/drivers/ilo/core/ilo_builder_media.h b/src/gallium/drivers/ilo/core/ilo_builder_media.h index 7fbe6d41635..7197104a23e 100644 --- a/src/gallium/drivers/ilo/core/ilo_builder_media.h +++ b/src/gallium/drivers/ilo/core/ilo_builder_media.h @@ -29,57 +29,30 @@ #define ILO_BUILDER_MEDIA_H #include "genhw/genhw.h" -#include "../ilo_shader.h" #include "intel_winsys.h" #include "ilo_core.h" #include "ilo_dev.h" +#include "ilo_state_compute.h" #include "ilo_builder.h" -struct gen6_idrt_data { - const struct ilo_shader_state *cs; - - uint32_t sampler_offset; - uint32_t binding_table_offset; - - unsigned curbe_size; - unsigned thread_group_size; -}; - static inline void gen6_MEDIA_VFE_STATE(struct ilo_builder *builder, - unsigned curbe_alloc, bool use_slm) + const struct ilo_state_compute *compute) { const uint8_t cmd_len = 8; - const unsigned idrt_alloc = - ((ilo_dev_gen(builder->dev) >= ILO_GEN(7.5)) ? 64 : 32) * 32; - int max_threads; uint32_t *dw; - ILO_DEV_ASSERT(builder->dev, 7, 7.5); - - max_threads = builder->dev->thread_count; - - curbe_alloc = align(curbe_alloc, 32); - assert(idrt_alloc + curbe_alloc <= builder->dev->urb_size / (use_slm + 1)); + ILO_DEV_ASSERT(builder->dev, 6, 7.5); ilo_builder_batch_pointer(builder, cmd_len, &dw); dw[0] = GEN6_RENDER_CMD(MEDIA, MEDIA_VFE_STATE) | (cmd_len - 2); - dw[1] = 0; /* scratch */ - - dw[2] = (max_threads - 1) << GEN6_VFE_DW2_MAX_THREADS__SHIFT | - 0 << GEN6_VFE_DW2_URB_ENTRY_COUNT__SHIFT | - GEN6_VFE_DW2_RESET_GATEWAY_TIMER | - GEN6_VFE_DW2_BYPASS_GATEWAY_CONTROL; - if (ilo_dev_gen(builder->dev) >= ILO_GEN(7)) - dw[2] |= GEN7_VFE_DW2_GPGPU_MODE; - + /* see compute_set_gen6_MEDIA_VFE_STATE() */ + dw[1] = compute->vfe[0]; + dw[2] = compute->vfe[1]; dw[3] = 0; - - dw[4] = 0 << GEN6_VFE_DW4_URB_ENTRY_SIZE__SHIFT | - (curbe_alloc / 32); - + dw[4] = compute->vfe[2]; dw[5] = 0; dw[6] = 0; dw[7] = 0; @@ -194,8 +167,10 @@ gen7_GPGPU_WALKER(struct ilo_builder *builder, static inline uint32_t gen6_INTERFACE_DESCRIPTOR_DATA(struct ilo_builder *builder, - const struct gen6_idrt_data *data, - int idrt_count) + const struct ilo_state_compute *compute, + const uint32_t *kernel_offsets, + const uint32_t *sampler_offsets, + const uint32_t *binding_table_offsets) { /* * From the Sandy Bridge PRM, volume 2 part 2, page 34: @@ -211,61 +186,26 @@ gen6_INTERFACE_DESCRIPTOR_DATA(struct ilo_builder *builder, * aligned address of the Interface Descriptor data." */ const int state_align = 32; - const int state_len = (32 / 4) * idrt_count; + const int state_len = (32 / 4) * compute->idrt_count; uint32_t state_offset, *dw; int i; - ILO_DEV_ASSERT(builder->dev, 7, 7.5); + ILO_DEV_ASSERT(builder->dev, 6, 7.5); state_offset = ilo_builder_dynamic_pointer(builder, ILO_BUILDER_ITEM_INTERFACE_DESCRIPTOR, state_align, state_len, &dw); - for (i = 0; i < idrt_count; i++) { - const struct gen6_idrt_data *idrt = &data[i]; - const struct ilo_shader_state *cs = idrt->cs; - unsigned sampler_count, bt_size, slm_size; - - sampler_count = - ilo_shader_get_kernel_param(cs, ILO_KERNEL_SAMPLER_COUNT); - assert(sampler_count <= 16); - sampler_count = (sampler_count + 3) / 4; - - bt_size = - ilo_shader_get_kernel_param(cs, ILO_KERNEL_SURFACE_TOTAL_COUNT); - if (bt_size > 31) - bt_size = 31; - - slm_size = ilo_shader_get_kernel_param(cs, ILO_KERNEL_CS_LOCAL_SIZE); - - assert(idrt->curbe_size / 32 <= 63); - - dw[0] = ilo_shader_get_kernel_offset(idrt->cs); + for (i = 0; i < compute->idrt_count; i++) { + /* see compute_set_gen6_INTERFACE_DESCRIPTOR_DATA() */ + dw[0] = compute->idrt[i][0] + kernel_offsets[i]; dw[1] = 0; - dw[2] = idrt->sampler_offset | - sampler_count << GEN6_IDRT_DW2_SAMPLER_COUNT__SHIFT; - dw[3] = idrt->binding_table_offset | - bt_size << GEN6_IDRT_DW3_BINDING_TABLE_SIZE__SHIFT; - - dw[4] = (idrt->curbe_size / 32) << GEN6_IDRT_DW4_CURBE_READ_LEN__SHIFT | - 0 << GEN6_IDRT_DW4_CURBE_READ_OFFSET__SHIFT; - - if (ilo_dev_gen(builder->dev) >= ILO_GEN(7)) { - dw[5] = GEN7_IDRT_DW5_ROUNDING_MODE_RTNE; - - if (slm_size) { - assert(slm_size <= 64 * 1024); - slm_size = util_next_power_of_two((slm_size + 4095) / 4096); - - dw[5] |= GEN7_IDRT_DW5_BARRIER_ENABLE | - slm_size << GEN7_IDRT_DW5_SLM_SIZE__SHIFT | - idrt->thread_group_size << - GEN7_IDRT_DW5_THREAD_GROUP_SIZE__SHIFT; - } - } else { - dw[5] = 0; - } - - dw[6] = 0; + dw[2] = compute->idrt[i][1] | + sampler_offsets[i]; + dw[3] = compute->idrt[i][2] | + binding_table_offsets[i]; + dw[4] = compute->idrt[i][3]; + dw[5] = compute->idrt[i][4]; + dw[6] = compute->idrt[i][5]; dw[7] = 0; dw += 8; diff --git a/src/gallium/drivers/ilo/core/ilo_state_compute.c b/src/gallium/drivers/ilo/core/ilo_state_compute.c new file mode 100644 index 00000000000..a5fe5e1a6b0 --- /dev/null +++ b/src/gallium/drivers/ilo/core/ilo_state_compute.c @@ -0,0 +1,435 @@ +/* + * Mesa 3-D graphics library + * + * Copyright (C) 2012-2015 LunarG, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Authors: + * Chia-I Wu + */ + +#include "ilo_debug.h" +#include "ilo_state_compute.h" + +struct compute_urb_configuration { + int idrt_entry_count; + int curbe_entry_count; + + int urb_entry_count; + /* in 256-bit register increments */ + int urb_entry_size; +}; + +static int +get_gen6_rob_entry_count(const struct ilo_dev *dev) +{ + ILO_DEV_ASSERT(dev, 6, 8); + + /* + * From the Ivy Bridge PRM, volume 2 part 2, page 60: + * + * "ROB has 64KB of storage; 2048 entries." + * + * From the valid ranges of "CURBE Allocation Size", we can also conclude + * that interface entries and CURBE data must be in ROB. And that ROB + * should be 16KB, or 512 entries, on Gen7 GT1. + */ + if (ilo_dev_gen(dev) >= ILO_GEN(7.5)) + return 2048; + else if (ilo_dev_gen(dev) >= ILO_GEN(7)) + return (dev->gt == 2) ? 2048 : 512; + else + return (dev->gt == 2) ? 2048 : 1024; +} + +static int +get_gen6_idrt_entry_count(const struct ilo_dev *dev) +{ + ILO_DEV_ASSERT(dev, 6, 8); + + /* + * From the Ivy Bridge PRM, volume 2 part 2, page 21: + * + * "The first 32 URB entries are reserved for the interface + * descriptor..." + * + * From the Haswell PRM, volume 7, page 836: + * + * "The first 64 URB entries are reserved for the interface + * description..." + */ + return (ilo_dev_gen(dev) >= ILO_GEN(7.5)) ? 64 : 32; +} + +static int +get_gen6_curbe_entry_count(const struct ilo_dev *dev, uint32_t curbe_size) +{ + /* + * From the Ivy Bridge PRM, volume 2 part 2, page 21: + * + * "(CURBE Allocation Size) Specifies the total length allocated for + * CURBE, in 256-bit register increments. + */ + const int entry_count = (curbe_size + 31) / 32; + + ILO_DEV_ASSERT(dev, 6, 8); + + assert(get_gen6_idrt_entry_count(dev) + entry_count <= + get_gen6_rob_entry_count(dev)); + + return entry_count; +} + +static bool +compute_get_gen6_urb_configuration(const struct ilo_dev *dev, + const struct ilo_state_compute_info *info, + struct compute_urb_configuration *urb) +{ + ILO_DEV_ASSERT(dev, 6, 8); + + urb->idrt_entry_count = get_gen6_idrt_entry_count(dev); + urb->curbe_entry_count = + get_gen6_curbe_entry_count(dev, info->curbe_alloc_size); + + /* + * From the Broadwell PRM, volume 2b, page 451: + * + * "Please note that 0 is not allowed for this field (Number of URB + * Entries)." + */ + urb->urb_entry_count = (ilo_dev_gen(dev) >= ILO_GEN(8)) ? 1 : 0; + + /* + * From the Ivy Bridge PRM, volume 2 part 2, page 52: + * + * "(URB Entry Allocation Size) Specifies the length of each URB entry + * used by the unit, in 256-bit register increments - 1." + */ + urb->urb_entry_size = 1; + + /* + * From the Ivy Bridge PRM, volume 2 part 2, page 22: + * + * MEDIA_VFE_STATE specifies the amount of CURBE space, the URB handle + * size and the number of URB handles. The driver must ensure that + * ((URB_handle_size * URB_num_handle) - CURBE - 32) <= + * URB_allocation_in_L3." + */ + assert(urb->idrt_entry_count + urb->curbe_entry_count + + urb->urb_entry_count * urb->urb_entry_size <= + info->cv_urb_alloc_size / 32); + + return true; +} + +static int +compute_interface_get_gen6_read_end(const struct ilo_dev *dev, + const struct ilo_state_compute_interface_info *interface) +{ + const int per_thread_read = (interface->curbe_read_length + 31) / 32; + const int cross_thread_read = + (interface->cross_thread_curbe_read_length + 31) / 32; + + ILO_DEV_ASSERT(dev, 6, 8); + + assert(interface->curbe_read_offset % 32 == 0); + + /* + * From the Ivy Bridge PRM, volume 2 part 2, page 60: + * + * "(Constant URB Entry Read Length) [0,63]" + */ + assert(per_thread_read <= 63); + + /* From the Haswell PRM, volume 2d, page 199: + * + * "(Cross-Thread Constant Data Read Length) [0,127]" + */ + if (ilo_dev_gen(dev) >= ILO_GEN(7.5)) + assert(cross_thread_read <= 127); + else + assert(!cross_thread_read); + + if (per_thread_read || cross_thread_read) { + return interface->curbe_read_offset / 32 + cross_thread_read + + per_thread_read * interface->thread_group_size; + } else { + return 0; + } +} + +static bool +compute_validate_gen6(const struct ilo_dev *dev, + const struct ilo_state_compute_info *info, + const struct compute_urb_configuration *urb) +{ + int min_curbe_entry_count; + uint8_t i; + + ILO_DEV_ASSERT(dev, 6, 8); + + assert(info->interface_count <= urb->idrt_entry_count); + + min_curbe_entry_count = 0; + for (i = 0; i < info->interface_count; i++) { + const int read_end = + compute_interface_get_gen6_read_end(dev, &info->interfaces[i]); + + if (min_curbe_entry_count < read_end) + min_curbe_entry_count = read_end; + } + + assert(min_curbe_entry_count <= urb->curbe_entry_count); + + /* + * From the Broadwell PRM, volume 2b, page 452: + * + * "CURBE Allocation Size should be 0 for GPGPU workloads that uses + * indirect instead of CURBE." + */ + if (!min_curbe_entry_count) + assert(!urb->curbe_entry_count); + + return true; +} + +static uint8_t +compute_get_gen6_scratch_space(const struct ilo_dev *dev, + const struct ilo_state_compute_info *info) +{ + uint32_t scratch_size = 0; + uint8_t i; + + ILO_DEV_ASSERT(dev, 6, 8); + + for (i = 0; i < info->interface_count; i++) { + if (scratch_size < info->interfaces[i].scratch_size) + scratch_size = info->interfaces[i].scratch_size; + } + + if (ilo_dev_gen(dev) >= ILO_GEN(8)) { + assert(scratch_size <= 2 * 1024 * 1024); + + /* next power of two, starting from 1KB */ + return (scratch_size > 1024) ? + (util_last_bit(scratch_size - 1) - 10): 0; + } else if (ilo_dev_gen(dev) >= ILO_GEN(7.5)) { + assert(scratch_size <= 2 * 1024 * 1024); + + /* next power of two, starting from 2KB */ + return (scratch_size > 2048) ? + (util_last_bit(scratch_size - 1) - 11): 0; + } else { + assert(scratch_size <= 12 * 1024); + + return (scratch_size > 1024) ? + (scratch_size - 1) / 1024 : 0; + } +} + +static bool +compute_set_gen6_MEDIA_VFE_STATE(struct ilo_state_compute *compute, + const struct ilo_dev *dev, + const struct ilo_state_compute_info *info) +{ + struct compute_urb_configuration urb; + uint8_t scratch_space; + + uint32_t dw1, dw2, dw4; + + ILO_DEV_ASSERT(dev, 6, 8); + + if (!compute_get_gen6_urb_configuration(dev, info, &urb) || + !compute_validate_gen6(dev, info, &urb)) + return false; + + scratch_space = compute_get_gen6_scratch_space(dev, info); + + dw1 = scratch_space << GEN6_VFE_DW1_SCRATCH_SPACE_PER_THREAD__SHIFT; + dw2 = (dev->thread_count - 1) << GEN6_VFE_DW2_MAX_THREADS__SHIFT | + urb.urb_entry_count << GEN6_VFE_DW2_URB_ENTRY_COUNT__SHIFT | + GEN6_VFE_DW2_RESET_GATEWAY_TIMER | + GEN6_VFE_DW2_BYPASS_GATEWAY_CONTROL; + + if (ilo_dev_gen(dev) >= ILO_GEN(7) && ilo_dev_gen(dev) <= ILO_GEN(7.5)) + dw2 |= GEN7_VFE_DW2_GPGPU_MODE; + + assert(urb.urb_entry_size); + + dw4 = (urb.urb_entry_size - 1) << GEN6_VFE_DW4_URB_ENTRY_SIZE__SHIFT | + urb.curbe_entry_count << GEN6_VFE_DW4_CURBE_SIZE__SHIFT; + + STATIC_ASSERT(ARRAY_SIZE(compute->vfe) >= 3); + compute->vfe[0] = dw1; + compute->vfe[1] = dw2; + compute->vfe[2] = dw4; + + return true; +} + +static uint8_t +compute_interface_get_gen6_sampler_count(const struct ilo_dev *dev, + const struct ilo_state_compute_interface_info *interface) +{ + ILO_DEV_ASSERT(dev, 6, 8); + return (interface->sampler_count <= 12) ? + (interface->sampler_count + 3) / 4 : 4; +} + +static uint8_t +compute_interface_get_gen6_surface_count(const struct ilo_dev *dev, + const struct ilo_state_compute_interface_info *interface) +{ + ILO_DEV_ASSERT(dev, 6, 8); + return (interface->surface_count <= 31) ? interface->surface_count : 31; +} + +static uint8_t +compute_interface_get_gen7_slm_size(const struct ilo_dev *dev, + const struct ilo_state_compute_interface_info *interface) +{ + ILO_DEV_ASSERT(dev, 7, 8); + + /* + * From the Ivy Bridge PRM, volume 2 part 2, page 61: + * + * "The amount is specified in 4k blocks, but only powers of 2 are + * allowed: 0, 4k, 8k, 16k, 32k and 64k per half-slice." + */ + assert(interface->slm_size <= 64 * 1024); + + return util_next_power_of_two((interface->slm_size + 4095) / 4096); +} + +static bool +compute_set_gen6_INTERFACE_DESCRIPTOR_DATA(struct ilo_state_compute *compute, + const struct ilo_dev *dev, + const struct ilo_state_compute_info *info) +{ + uint8_t i; + + ILO_DEV_ASSERT(dev, 6, 8); + + for (i = 0; i < info->interface_count; i++) { + const struct ilo_state_compute_interface_info *interface = + &info->interfaces[i]; + uint16_t read_offset, per_thread_read_len, cross_thread_read_len; + uint8_t sampler_count, surface_count; + uint32_t dw0, dw2, dw3, dw4, dw5, dw6; + + assert(interface->kernel_offset % 64 == 0); + assert(interface->thread_group_size); + + read_offset = interface->curbe_read_offset / 32; + per_thread_read_len = (interface->curbe_read_length + 31) / 32; + cross_thread_read_len = + (interface->cross_thread_curbe_read_length + 31) / 32; + + sampler_count = + compute_interface_get_gen6_sampler_count(dev, interface); + surface_count = + compute_interface_get_gen6_surface_count(dev, interface); + + dw0 = interface->kernel_offset; + dw2 = sampler_count << GEN6_IDRT_DW2_SAMPLER_COUNT__SHIFT; + dw3 = surface_count << GEN6_IDRT_DW3_BINDING_TABLE_SIZE__SHIFT; + dw4 = per_thread_read_len << GEN6_IDRT_DW4_CURBE_READ_LEN__SHIFT | + read_offset << GEN6_IDRT_DW4_CURBE_READ_OFFSET__SHIFT; + + dw5 = 0; + dw6 = 0; + if (ilo_dev_gen(dev) >= ILO_GEN(7)) { + const uint8_t slm_size = + compute_interface_get_gen7_slm_size(dev, interface); + + dw5 |= GEN7_IDRT_DW5_ROUNDING_MODE_RTNE; + + if (slm_size) { + dw5 |= GEN7_IDRT_DW5_BARRIER_ENABLE | + slm_size << GEN7_IDRT_DW5_SLM_SIZE__SHIFT; + } + + /* + * From the Haswell PRM, volume 2d, page 199: + * + * "(Number of Threads in GPGPU Thread Group) Specifies the + * number of threads that are in this thread group. Used to + * program the barrier for the number of messages to expect. The + * minimum value is 0 (which will disable the barrier), while + * the maximum value is the number of threads in a subslice for + * local barriers." + * + * From the Broadwell PRM, volume 2d, page 183: + * + * "(Number of Threads in GPGPU Thread Group) Specifies the + * number of threads that are in this thread group. The minimum + * value is 1, while the maximum value is the number of threads + * in a subslice for local barriers. See vol1b Configurations + * for the number of threads per subslice for different + * products. The maximum value for global barriers is limited + * by the number of threads in the system, or by 511, whichever + * is lower. This field should not be set to 0 even if the + * barrier is disabled, since an accurate value is needed for + * proper pre-emption." + */ + if (slm_size || ilo_dev_gen(dev) >= ILO_GEN(8)) { + dw5 |= interface->thread_group_size << + GEN7_IDRT_DW5_THREAD_GROUP_SIZE__SHIFT; + } + + if (ilo_dev_gen(dev) >= ILO_GEN(7.5)) { + dw6 |= cross_thread_read_len << + GEN75_IDRT_DW6_CROSS_THREAD_CURBE_READ_LEN__SHIFT; + } + } + + STATIC_ASSERT(ARRAY_SIZE(compute->idrt[i]) >= 6); + compute->idrt[i][0] = dw0; + compute->idrt[i][1] = dw2; + compute->idrt[i][2] = dw3; + compute->idrt[i][3] = dw4; + compute->idrt[i][4] = dw5; + compute->idrt[i][5] = dw6; + } + + return true; +} + +bool +ilo_state_compute_init(struct ilo_state_compute *compute, + const struct ilo_dev *dev, + const struct ilo_state_compute_info *info) +{ + bool ret = true; + + assert(ilo_is_zeroed(compute, sizeof(*compute))); + assert(ilo_is_zeroed(info->data, info->data_size)); + + assert(ilo_state_compute_data_size(dev, info->interface_count) <= + info->data_size); + compute->idrt = (uint32_t (*)[6]) info->data; + + ret &= compute_set_gen6_MEDIA_VFE_STATE(compute, dev, info); + ret &= compute_set_gen6_INTERFACE_DESCRIPTOR_DATA(compute, dev, info); + + assert(ret); + + return ret; +} diff --git a/src/gallium/drivers/ilo/core/ilo_state_compute.h b/src/gallium/drivers/ilo/core/ilo_state_compute.h new file mode 100644 index 00000000000..346f7b617f4 --- /dev/null +++ b/src/gallium/drivers/ilo/core/ilo_state_compute.h @@ -0,0 +1,92 @@ +/* + * Mesa 3-D graphics library + * + * Copyright (C) 2015 LunarG, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Authors: + * Chia-I Wu + */ + +#ifndef ILO_STATE_COMPUTE_H +#define ILO_STATE_COMPUTE_H + +#include "genhw/genhw.h" + +#include "ilo_core.h" +#include "ilo_dev.h" + +/* + * From the Haswell PRM, volume 7, page 836: + * + * "The first 64 URB entries are reserved for the interface + * description..." + */ +#define ILO_STATE_COMPUTE_MAX_INTERFACE_COUNT 64 + +struct ilo_state_compute_interface_info { + /* usually 0 unless there are multiple interfaces */ + uint32_t kernel_offset; + + uint32_t scratch_size; + + uint8_t sampler_count; + uint8_t surface_count; + + uint16_t thread_group_size; + uint32_t slm_size; + + uint16_t curbe_read_offset; + uint16_t curbe_read_length; + uint16_t cross_thread_curbe_read_length; +}; + +struct ilo_state_compute_info { + void *data; + size_t data_size; + + const struct ilo_state_compute_interface_info *interfaces; + uint8_t interface_count; + + uint32_t cv_urb_alloc_size; + uint32_t curbe_alloc_size; +}; + +struct ilo_state_compute { + uint32_t vfe[3]; + + uint32_t (*idrt)[6]; + uint8_t idrt_count; +}; + +static inline size_t +ilo_state_compute_data_size(const struct ilo_dev *dev, + uint8_t interface_count) +{ + const struct ilo_state_compute *compute = NULL; + return sizeof(compute->idrt[0]) * interface_count; +} + +bool +ilo_state_compute_init(struct ilo_state_compute *compute, + const struct ilo_dev *dev, + const struct ilo_state_compute_info *info); + +#endif /* ILO_STATE_COMPUTE_H */ diff --git a/src/gallium/drivers/ilo/ilo_render_dynamic.c b/src/gallium/drivers/ilo/ilo_render_dynamic.c index 5618920a507..3b4c80227a6 100644 --- a/src/gallium/drivers/ilo/ilo_render_dynamic.c +++ b/src/gallium/drivers/ilo/ilo_render_dynamic.c @@ -30,6 +30,7 @@ #include "ilo_common.h" #include "ilo_blitter.h" +#include "ilo_shader.h" #include "ilo_state.h" #include "ilo_render_gen.h" @@ -522,20 +523,39 @@ gen6_emit_launch_grid_dynamic_idrt(struct ilo_render *r, struct ilo_render_launch_grid_session *session) { const struct ilo_shader_state *cs = vec->cs; - struct gen6_idrt_data data; + struct ilo_state_compute_interface_info interface; + struct ilo_state_compute_info info; + uint32_t kernel_offset; ILO_DEV_ASSERT(r->dev, 7, 7.5); - memset(&data, 0, sizeof(data)); + memset(&interface, 0, sizeof(interface)); - data.cs = cs; - data.sampler_offset = r->state.cs.SAMPLER_STATE; - data.binding_table_offset = r->state.cs.BINDING_TABLE_STATE; + interface.sampler_count = + ilo_shader_get_kernel_param(cs, ILO_KERNEL_SAMPLER_COUNT); + interface.surface_count = + ilo_shader_get_kernel_param(cs, ILO_KERNEL_SURFACE_TOTAL_COUNT); + interface.thread_group_size = session->thread_group_size; + interface.slm_size = + ilo_shader_get_kernel_param(cs, ILO_KERNEL_CS_LOCAL_SIZE); + interface.curbe_read_length = r->state.cs.PUSH_CONSTANT_BUFFER_size; - data.curbe_size = r->state.cs.PUSH_CONSTANT_BUFFER_size; - data.thread_group_size = session->thread_group_size; + memset(&info, 0, sizeof(info)); + info.data = session->compute_data; + info.data_size = sizeof(session->compute_data); + info.interfaces = &interface; + info.interface_count = 1; + info.cv_urb_alloc_size = r->dev->urb_size; + info.curbe_alloc_size = r->state.cs.PUSH_CONSTANT_BUFFER_size; + + ilo_state_compute_init(&session->compute, r->dev, &info); + + kernel_offset = ilo_shader_get_kernel_offset(cs); + + session->idrt = gen6_INTERFACE_DESCRIPTOR_DATA(r->builder, + &session->compute, &kernel_offset, + &r->state.cs.SAMPLER_STATE, &r->state.cs.BINDING_TABLE_STATE); - session->idrt = gen6_INTERFACE_DESCRIPTOR_DATA(r->builder, &data, 1); session->idrt_size = 32; } diff --git a/src/gallium/drivers/ilo/ilo_render_gen.h b/src/gallium/drivers/ilo/ilo_render_gen.h index 00c8113a45d..aae4ef2f373 100644 --- a/src/gallium/drivers/ilo/ilo_render_gen.h +++ b/src/gallium/drivers/ilo/ilo_render_gen.h @@ -185,6 +185,9 @@ struct ilo_render_launch_grid_session { uint32_t idrt; int idrt_size; + + uint32_t compute_data[6]; + struct ilo_state_compute compute; }; int diff --git a/src/gallium/drivers/ilo/ilo_render_media.c b/src/gallium/drivers/ilo/ilo_render_media.c index 387920a912c..a0de0024d61 100644 --- a/src/gallium/drivers/ilo/ilo_render_media.c +++ b/src/gallium/drivers/ilo/ilo_render_media.c @@ -30,6 +30,7 @@ #include "core/ilo_builder_mi.h" #include "core/ilo_builder_render.h" +#include "ilo_shader.h" #include "ilo_state.h" #include "ilo_render_gen.h" @@ -206,7 +207,7 @@ ilo_render_emit_launch_grid_commands(struct ilo_render *render, gen6_state_base_address(render->builder, true); - gen6_MEDIA_VFE_STATE(render->builder, pcb_size, use_slm); + gen6_MEDIA_VFE_STATE(render->builder, &session->compute); if (pcb_size) gen6_MEDIA_CURBE_LOAD(render->builder, pcb, pcb_size); diff --git a/src/gallium/drivers/ilo/ilo_state.h b/src/gallium/drivers/ilo/ilo_state.h index e4746d0969b..537e5db120b 100644 --- a/src/gallium/drivers/ilo/ilo_state.h +++ b/src/gallium/drivers/ilo/ilo_state.h @@ -29,6 +29,7 @@ #define ILO_STATE_H #include "core/ilo_state_cc.h" +#include "core/ilo_state_compute.h" #include "core/ilo_state_raster.h" #include "core/ilo_state_sampler.h" #include "core/ilo_state_sbe.h" From f22406837ff5dc881d8496d05ab001204b14eaf5 Mon Sep 17 00:00:00 2001 From: Alexandre Courbot Date: Fri, 17 Oct 2014 14:58:11 +0900 Subject: [PATCH 773/834] nouveau: support for custom VRAM domains Some GPUs (e.g. GK20A, GM20B) do not embed VRAM of their own and use the system memory as a backend instead. For such systems, allocating objects in VRAM results in errors since the kernel will not allow VRAM objects allocations. This patch adds a vram_domain member to struct nouveau_screen that can optionally be initialized to an alternative domain to use for VRAM allocations. If left untouched, NOUVEAU_BO_VRAM will be used for systems that embed VRAM, and NOUVEAU_BO_GART will be used for VRAM-less systems. Code that uses GPU objects is then expected to use the NV_VRAM_DOMAIN() macro in place of NOUVEAU_BO_VRAM to ensure correct behavior on VRAM-less chips. Signed-off-by: Alexandre Courbot Reviewed-by: Ilia Mirkin Reviewed-by: Martin Peres --- src/gallium/drivers/nouveau/nouveau_screen.c | 10 ++++++++++ src/gallium/drivers/nouveau/nouveau_screen.h | 4 ++++ 2 files changed, 14 insertions(+) diff --git a/src/gallium/drivers/nouveau/nouveau_screen.c b/src/gallium/drivers/nouveau/nouveau_screen.c index b4f1413fd8b..c6e5074db19 100644 --- a/src/gallium/drivers/nouveau/nouveau_screen.c +++ b/src/gallium/drivers/nouveau/nouveau_screen.c @@ -164,6 +164,16 @@ nouveau_screen_init(struct nouveau_screen *screen, struct nouveau_device *dev) size = sizeof(nvc0_data); } + /* + * Set default VRAM domain if not overridden + */ + if (!screen->vram_domain) { + if (dev->vram_size > 0) + screen->vram_domain = NOUVEAU_BO_VRAM; + else + screen->vram_domain = NOUVEAU_BO_GART; + } + ret = nouveau_object_new(&dev->object, 0, NOUVEAU_FIFO_CHANNEL_CLASS, data, size, &screen->channel); if (ret) diff --git a/src/gallium/drivers/nouveau/nouveau_screen.h b/src/gallium/drivers/nouveau/nouveau_screen.h index cf06f7e88aa..30041b271c9 100644 --- a/src/gallium/drivers/nouveau/nouveau_screen.h +++ b/src/gallium/drivers/nouveau/nouveau_screen.h @@ -51,6 +51,8 @@ struct nouveau_screen { boolean hint_buf_keep_sysmem_copy; + unsigned vram_domain; + struct { unsigned profiles_checked; unsigned profiles_present; @@ -94,6 +96,8 @@ struct nouveau_screen { #endif }; +#define NV_VRAM_DOMAIN(screen) ((screen)->vram_domain) + #ifdef NOUVEAU_ENABLE_DRIVER_STATISTICS # define NOUVEAU_DRV_STAT(s, n, v) do { \ (s)->stats.named.n += (v); \ From da8300cb03e8cf1f37b5573a2db026fd28e0a3c5 Mon Sep 17 00:00:00 2001 From: Alexandre Courbot Date: Fri, 17 Oct 2014 15:05:32 +0900 Subject: [PATCH 774/834] nvc0: use NV_VRAM_DOMAIN() macro Use the newly-introduced NV_VRAM_DOMAIN() macro to support alternative VRAM domains for chips that do not have dedicated video memory. Signed-off-by: Alexandre Courbot Reviewed-by: Ilia Mirkin Reviewed-by: Martin Peres --- src/gallium/drivers/nouveau/nouveau_buffer.c | 6 +++--- src/gallium/drivers/nouveau/nv50/nv50_miptree.c | 2 +- src/gallium/drivers/nouveau/nvc0/nvc0_compute.c | 2 +- src/gallium/drivers/nouveau/nvc0/nvc0_context.c | 4 ++-- src/gallium/drivers/nouveau/nvc0/nvc0_miptree.c | 2 +- src/gallium/drivers/nouveau/nvc0/nvc0_program.c | 8 ++++---- src/gallium/drivers/nouveau/nvc0/nvc0_screen.c | 17 +++++++++++------ .../drivers/nouveau/nvc0/nvc0_shader_state.c | 2 +- .../drivers/nouveau/nvc0/nvc0_state_validate.c | 2 +- src/gallium/drivers/nouveau/nvc0/nvc0_tex.c | 2 +- src/gallium/drivers/nouveau/nvc0/nve4_compute.c | 2 +- 11 files changed, 27 insertions(+), 22 deletions(-) diff --git a/src/gallium/drivers/nouveau/nouveau_buffer.c b/src/gallium/drivers/nouveau/nouveau_buffer.c index 32fa65c8a51..09cdbb53ecb 100644 --- a/src/gallium/drivers/nouveau/nouveau_buffer.c +++ b/src/gallium/drivers/nouveau/nouveau_buffer.c @@ -658,13 +658,13 @@ nouveau_buffer_create(struct pipe_screen *pscreen, switch (buffer->base.usage) { case PIPE_USAGE_DEFAULT: case PIPE_USAGE_IMMUTABLE: - buffer->domain = NOUVEAU_BO_VRAM; + buffer->domain = NV_VRAM_DOMAIN(screen); break; case PIPE_USAGE_DYNAMIC: /* For most apps, we'd have to do staging transfers to avoid sync * with this usage, and GART -> GART copies would be suboptimal. */ - buffer->domain = NOUVEAU_BO_VRAM; + buffer->domain = NV_VRAM_DOMAIN(screen); break; case PIPE_USAGE_STAGING: case PIPE_USAGE_STREAM: @@ -676,7 +676,7 @@ nouveau_buffer_create(struct pipe_screen *pscreen, } } else { if (buffer->base.bind & screen->vidmem_bindings) - buffer->domain = NOUVEAU_BO_VRAM; + buffer->domain = NV_VRAM_DOMAIN(screen); else if (buffer->base.bind & screen->sysmem_bindings) buffer->domain = NOUVEAU_BO_GART; diff --git a/src/gallium/drivers/nouveau/nv50/nv50_miptree.c b/src/gallium/drivers/nouveau/nv50/nv50_miptree.c index 10cebb17eee..f15d8f3ecb6 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_miptree.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_miptree.c @@ -377,7 +377,7 @@ nv50_miptree_create(struct pipe_screen *pscreen, if (!bo_config.nv50.memtype && (pt->bind & PIPE_BIND_SHARED)) mt->base.domain = NOUVEAU_BO_GART; else - mt->base.domain = NOUVEAU_BO_VRAM; + mt->base.domain = NV_VRAM_DOMAIN(nouveau_screen(pscreen)); bo_flags = mt->base.domain | NOUVEAU_BO_NOSNOOP; if (mt->base.base.bind & (PIPE_BIND_CURSOR | PIPE_BIND_DISPLAY_TARGET)) diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c b/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c index ad287a2af6b..56fc83d3679 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c @@ -57,7 +57,7 @@ nvc0_screen_compute_setup(struct nvc0_screen *screen, return ret; } - ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 0, 1 << 12, NULL, + ret = nouveau_bo_new(dev, NV_VRAM_DOMAIN(&screen->base), 0, 1 << 12, NULL, &screen->parm); if (ret) return ret; diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_context.c b/src/gallium/drivers/nouveau/nvc0/nvc0_context.c index 7904984f503..a35c3f66142 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_context.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_context.c @@ -329,7 +329,7 @@ nvc0_create(struct pipe_screen *pscreen, void *priv) /* add permanently resident buffers to bufctxts */ - flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_RD; + flags = NV_VRAM_DOMAIN(&screen->base) | NOUVEAU_BO_RD; BCTX_REFN_bo(nvc0->bufctx_3d, SCREEN, flags, screen->text); BCTX_REFN_bo(nvc0->bufctx_3d, SCREEN, flags, screen->uniform_bo); @@ -340,7 +340,7 @@ nvc0_create(struct pipe_screen *pscreen, void *priv) BCTX_REFN_bo(nvc0->bufctx_cp, CP_SCREEN, flags, screen->parm); } - flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR; + flags = NV_VRAM_DOMAIN(&screen->base) | NOUVEAU_BO_RDWR; if (screen->poly_cache) BCTX_REFN_bo(nvc0->bufctx_3d, SCREEN, flags, screen->poly_cache); diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_miptree.c b/src/gallium/drivers/nouveau/nvc0/nvc0_miptree.c index fc75fc6a4a1..3875bbf4ca4 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_miptree.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_miptree.c @@ -302,7 +302,7 @@ nvc0_miptree_create(struct pipe_screen *pscreen, if (!bo_config.nvc0.memtype && (pt->usage == PIPE_USAGE_STAGING || pt->bind & PIPE_BIND_SHARED)) mt->base.domain = NOUVEAU_BO_GART; else - mt->base.domain = NOUVEAU_BO_VRAM; + mt->base.domain = NV_VRAM_DOMAIN(nouveau_screen(pscreen)); bo_flags = mt->base.domain | NOUVEAU_BO_NOSNOOP; diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c index 4a47cb2d164..e1f5a8c4416 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c @@ -735,12 +735,12 @@ nvc0_program_upload_code(struct nvc0_context *nvc0, struct nvc0_program *prog) if (!is_cp) nvc0->base.push_data(&nvc0->base, screen->text, prog->code_base, - NOUVEAU_BO_VRAM, NVC0_SHADER_HEADER_SIZE, prog->hdr); + NV_VRAM_DOMAIN(&screen->base), NVC0_SHADER_HEADER_SIZE, prog->hdr); nvc0->base.push_data(&nvc0->base, screen->text, code_pos, - NOUVEAU_BO_VRAM, prog->code_size, prog->code); + NV_VRAM_DOMAIN(&screen->base), prog->code_size, prog->code); if (prog->immd_size) nvc0->base.push_data(&nvc0->base, - screen->text, prog->immd_base, NOUVEAU_BO_VRAM, + screen->text, prog->immd_base, NV_VRAM_DOMAIN(&screen->base), prog->immd_size, prog->immd_data); BEGIN_NVC0(nvc0->base.pushbuf, NVC0_3D(MEM_BARRIER), 1); @@ -771,7 +771,7 @@ nvc0_program_library_upload(struct nvc0_context *nvc0) return; nvc0->base.push_data(&nvc0->base, - screen->text, screen->lib_code->start, NOUVEAU_BO_VRAM, + screen->text, screen->lib_code->start, NV_VRAM_DOMAIN(&screen->base), size, code); /* no need for a memory barrier, will be emitted with first program */ } diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c index 5936d05a5b9..56c230e42fc 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c @@ -583,7 +583,7 @@ nvc0_screen_resize_tls_area(struct nvc0_screen *screen, size = align(size, 1 << 17); - ret = nouveau_bo_new(screen->base.device, NOUVEAU_BO_VRAM, 1 << 17, size, + ret = nouveau_bo_new(screen->base.device, NV_VRAM_DOMAIN(&screen->base), 1 << 17, size, NULL, &bo); if (ret) { NOUVEAU_ERR("failed to allocate TLS area, size: 0x%"PRIx64"\n", size); @@ -646,6 +646,11 @@ nvc0_screen_create(struct nouveau_device *dev) screen->base.sysmem_bindings |= PIPE_BIND_VERTEX_BUFFER | PIPE_BIND_INDEX_BUFFER; + if (screen->base.vram_domain & NOUVEAU_BO_GART) { + screen->base.sysmem_bindings |= screen->base.vidmem_bindings; + screen->base.vidmem_bindings = 0; + } + pscreen->destroy = nvc0_screen_destroy; pscreen->context_create = nvc0_create; pscreen->is_format_supported = nvc0_screen_is_format_supported; @@ -824,7 +829,7 @@ nvc0_screen_create(struct nouveau_device *dev) nvc0_magic_3d_init(push, screen->eng3d->oclass); - ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 1 << 17, 1 << 20, NULL, + ret = nouveau_bo_new(dev, NV_VRAM_DOMAIN(&screen->base), 1 << 17, 1 << 20, NULL, &screen->text); if (ret) goto fail; @@ -834,12 +839,12 @@ nvc0_screen_create(struct nouveau_device *dev) */ nouveau_heap_init(&screen->text_heap, 0, (1 << 20) - 0x100); - ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 1 << 12, 6 << 16, NULL, + ret = nouveau_bo_new(dev, NV_VRAM_DOMAIN(&screen->base), 1 << 12, 6 << 16, NULL, &screen->uniform_bo); if (ret) goto fail; - PUSH_REFN (push, screen->uniform_bo, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + PUSH_REFN (push, screen->uniform_bo, NV_VRAM_DOMAIN(&screen->base) | NOUVEAU_BO_WR); for (i = 0; i < 5; ++i) { /* TIC and TSC entries for each unit (nve4+ only) */ @@ -910,7 +915,7 @@ nvc0_screen_create(struct nouveau_device *dev) PUSH_DATA (push, 0); if (screen->eng3d->oclass < GM107_3D_CLASS) { - ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 1 << 17, 1 << 20, NULL, + ret = nouveau_bo_new(dev, NV_VRAM_DOMAIN(&screen->base), 1 << 17, 1 << 20, NULL, &screen->poly_cache); if (ret) goto fail; @@ -921,7 +926,7 @@ nvc0_screen_create(struct nouveau_device *dev) PUSH_DATA (push, 3); } - ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 1 << 17, 1 << 17, NULL, + ret = nouveau_bo_new(dev, NV_VRAM_DOMAIN(&screen->base), 1 << 17, 1 << 17, NULL, &screen->txc); if (ret) goto fail; diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c b/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c index 516b33b76d5..c9b5a5cbfc1 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c @@ -34,7 +34,7 @@ nvc0_program_update_context_state(struct nvc0_context *nvc0, struct nouveau_pushbuf *push = nvc0->base.pushbuf; if (prog && prog->need_tls) { - const uint32_t flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR; + const uint32_t flags = NV_VRAM_DOMAIN(&nvc0->screen->base) | NOUVEAU_BO_RDWR; if (!nvc0->state.tls_required) BCTX_REFN_bo(nvc0->bufctx_3d, TLS, flags, nvc0->screen->tls); nvc0->state.tls_required |= 1 << stage; diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c b/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c index d3ad81d2d66..c52399ab312 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c @@ -439,7 +439,7 @@ nvc0_constbufs_validate(struct nvc0_context *nvc0) BEGIN_NVC0(push, NVC0_3D(CB_BIND(s)), 1); PUSH_DATA (push, (0 << 4) | 1); } - nvc0_cb_push(&nvc0->base, bo, NOUVEAU_BO_VRAM, + nvc0_cb_push(&nvc0->base, bo, NV_VRAM_DOMAIN(&nvc0->screen->base), base, nvc0->state.uniform_buffer_bound[s], 0, (size + 3) / 4, nvc0->constbuf[s][0].u.data); diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c b/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c index 457f27c8311..ddc0409ca86 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c @@ -396,7 +396,7 @@ nvc0_validate_tsc(struct nvc0_context *nvc0, int s) tsc->id = nvc0_screen_tsc_alloc(nvc0->screen, tsc); nvc0_m2mf_push_linear(&nvc0->base, nvc0->screen->txc, - 65536 + tsc->id * 32, NOUVEAU_BO_VRAM, + 65536 + tsc->id * 32, NV_VRAM_DOMAIN(&nvc0->screen->base), 32, tsc->tsc); need_flush = TRUE; } diff --git a/src/gallium/drivers/nouveau/nvc0/nve4_compute.c b/src/gallium/drivers/nouveau/nvc0/nve4_compute.c index f243316b899..fce02a7cc57 100644 --- a/src/gallium/drivers/nouveau/nvc0/nve4_compute.c +++ b/src/gallium/drivers/nouveau/nvc0/nve4_compute.c @@ -63,7 +63,7 @@ nve4_screen_compute_setup(struct nvc0_screen *screen, return ret; } - ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 0, NVE4_CP_PARAM_SIZE, NULL, + ret = nouveau_bo_new(dev, NV_VRAM_DOMAIN(&screen->base), 0, NVE4_CP_PARAM_SIZE, NULL, &screen->parm); if (ret) return ret; From aa3e5e0dded4d732ea46083201940bd23214785c Mon Sep 17 00:00:00 2001 From: Chia-I Wu Date: Mon, 22 Jun 2015 13:15:24 +0800 Subject: [PATCH 775/834] ilo: add ilo_state_vf_valid_element_format() Check if a surface format can be used as a VE format. --- src/gallium/drivers/ilo/core/ilo_format.c | 126 +------------------- src/gallium/drivers/ilo/core/ilo_state_vf.c | 126 ++++++++++++++++++++ src/gallium/drivers/ilo/core/ilo_state_vf.h | 4 + 3 files changed, 132 insertions(+), 124 deletions(-) diff --git a/src/gallium/drivers/ilo/core/ilo_format.c b/src/gallium/drivers/ilo/core/ilo_format.c index 280e499d54a..c1b001b5664 100644 --- a/src/gallium/drivers/ilo/core/ilo_format.c +++ b/src/gallium/drivers/ilo/core/ilo_format.c @@ -26,12 +26,9 @@ */ #include "genhw/genhw.h" +#include "ilo_state_vf.h" #include "ilo_format.h" -struct ilo_vf_cap { - int vertex_element; -}; - struct ilo_sol_cap { int buffer; }; @@ -50,122 +47,6 @@ struct ilo_dp_cap { int media_color_processing; }; -/* - * This table is based on: - * - * - the Sandy Bridge PRM, volume 4 part 1, page 88-97 - * - the Ivy Bridge PRM, volume 2 part 1, page 97-99 - * - the Haswell PRM, volume 7, page 467-470 - */ -static const struct ilo_vf_cap ilo_vf_caps[] = { -#define CAP(vertex_element) { ILO_GEN(vertex_element) } - [GEN6_FORMAT_R32G32B32A32_FLOAT] = CAP( 1), - [GEN6_FORMAT_R32G32B32A32_SINT] = CAP( 1), - [GEN6_FORMAT_R32G32B32A32_UINT] = CAP( 1), - [GEN6_FORMAT_R32G32B32A32_UNORM] = CAP( 1), - [GEN6_FORMAT_R32G32B32A32_SNORM] = CAP( 1), - [GEN6_FORMAT_R64G64_FLOAT] = CAP( 1), - [GEN6_FORMAT_R32G32B32A32_SSCALED] = CAP( 1), - [GEN6_FORMAT_R32G32B32A32_USCALED] = CAP( 1), - [GEN6_FORMAT_R32G32B32A32_SFIXED] = CAP(7.5), - [GEN6_FORMAT_R32G32B32_FLOAT] = CAP( 1), - [GEN6_FORMAT_R32G32B32_SINT] = CAP( 1), - [GEN6_FORMAT_R32G32B32_UINT] = CAP( 1), - [GEN6_FORMAT_R32G32B32_UNORM] = CAP( 1), - [GEN6_FORMAT_R32G32B32_SNORM] = CAP( 1), - [GEN6_FORMAT_R32G32B32_SSCALED] = CAP( 1), - [GEN6_FORMAT_R32G32B32_USCALED] = CAP( 1), - [GEN6_FORMAT_R32G32B32_SFIXED] = CAP(7.5), - [GEN6_FORMAT_R16G16B16A16_UNORM] = CAP( 1), - [GEN6_FORMAT_R16G16B16A16_SNORM] = CAP( 1), - [GEN6_FORMAT_R16G16B16A16_SINT] = CAP( 1), - [GEN6_FORMAT_R16G16B16A16_UINT] = CAP( 1), - [GEN6_FORMAT_R16G16B16A16_FLOAT] = CAP( 1), - [GEN6_FORMAT_R32G32_FLOAT] = CAP( 1), - [GEN6_FORMAT_R32G32_SINT] = CAP( 1), - [GEN6_FORMAT_R32G32_UINT] = CAP( 1), - [GEN6_FORMAT_R32G32_UNORM] = CAP( 1), - [GEN6_FORMAT_R32G32_SNORM] = CAP( 1), - [GEN6_FORMAT_R64_FLOAT] = CAP( 1), - [GEN6_FORMAT_R16G16B16A16_SSCALED] = CAP( 1), - [GEN6_FORMAT_R16G16B16A16_USCALED] = CAP( 1), - [GEN6_FORMAT_R32G32_SSCALED] = CAP( 1), - [GEN6_FORMAT_R32G32_USCALED] = CAP( 1), - [GEN6_FORMAT_R32G32_SFIXED] = CAP(7.5), - [GEN6_FORMAT_B8G8R8A8_UNORM] = CAP( 1), - [GEN6_FORMAT_R10G10B10A2_UNORM] = CAP( 1), - [GEN6_FORMAT_R10G10B10A2_UINT] = CAP( 1), - [GEN6_FORMAT_R10G10B10_SNORM_A2_UNORM] = CAP( 1), - [GEN6_FORMAT_R8G8B8A8_UNORM] = CAP( 1), - [GEN6_FORMAT_R8G8B8A8_SNORM] = CAP( 1), - [GEN6_FORMAT_R8G8B8A8_SINT] = CAP( 1), - [GEN6_FORMAT_R8G8B8A8_UINT] = CAP( 1), - [GEN6_FORMAT_R16G16_UNORM] = CAP( 1), - [GEN6_FORMAT_R16G16_SNORM] = CAP( 1), - [GEN6_FORMAT_R16G16_SINT] = CAP( 1), - [GEN6_FORMAT_R16G16_UINT] = CAP( 1), - [GEN6_FORMAT_R16G16_FLOAT] = CAP( 1), - [GEN6_FORMAT_B10G10R10A2_UNORM] = CAP(7.5), - [GEN6_FORMAT_R11G11B10_FLOAT] = CAP( 1), - [GEN6_FORMAT_R32_SINT] = CAP( 1), - [GEN6_FORMAT_R32_UINT] = CAP( 1), - [GEN6_FORMAT_R32_FLOAT] = CAP( 1), - [GEN6_FORMAT_R32_UNORM] = CAP( 1), - [GEN6_FORMAT_R32_SNORM] = CAP( 1), - [GEN6_FORMAT_R10G10B10X2_USCALED] = CAP( 1), - [GEN6_FORMAT_R8G8B8A8_SSCALED] = CAP( 1), - [GEN6_FORMAT_R8G8B8A8_USCALED] = CAP( 1), - [GEN6_FORMAT_R16G16_SSCALED] = CAP( 1), - [GEN6_FORMAT_R16G16_USCALED] = CAP( 1), - [GEN6_FORMAT_R32_SSCALED] = CAP( 1), - [GEN6_FORMAT_R32_USCALED] = CAP( 1), - [GEN6_FORMAT_R8G8_UNORM] = CAP( 1), - [GEN6_FORMAT_R8G8_SNORM] = CAP( 1), - [GEN6_FORMAT_R8G8_SINT] = CAP( 1), - [GEN6_FORMAT_R8G8_UINT] = CAP( 1), - [GEN6_FORMAT_R16_UNORM] = CAP( 1), - [GEN6_FORMAT_R16_SNORM] = CAP( 1), - [GEN6_FORMAT_R16_SINT] = CAP( 1), - [GEN6_FORMAT_R16_UINT] = CAP( 1), - [GEN6_FORMAT_R16_FLOAT] = CAP( 1), - [GEN6_FORMAT_R8G8_SSCALED] = CAP( 1), - [GEN6_FORMAT_R8G8_USCALED] = CAP( 1), - [GEN6_FORMAT_R16_SSCALED] = CAP( 1), - [GEN6_FORMAT_R16_USCALED] = CAP( 1), - [GEN6_FORMAT_R8_UNORM] = CAP( 1), - [GEN6_FORMAT_R8_SNORM] = CAP( 1), - [GEN6_FORMAT_R8_SINT] = CAP( 1), - [GEN6_FORMAT_R8_UINT] = CAP( 1), - [GEN6_FORMAT_R8_SSCALED] = CAP( 1), - [GEN6_FORMAT_R8_USCALED] = CAP( 1), - [GEN6_FORMAT_R8G8B8_UNORM] = CAP( 1), - [GEN6_FORMAT_R8G8B8_SNORM] = CAP( 1), - [GEN6_FORMAT_R8G8B8_SSCALED] = CAP( 1), - [GEN6_FORMAT_R8G8B8_USCALED] = CAP( 1), - [GEN6_FORMAT_R64G64B64A64_FLOAT] = CAP( 1), - [GEN6_FORMAT_R64G64B64_FLOAT] = CAP( 1), - [GEN6_FORMAT_R16G16B16_FLOAT] = CAP( 6), - [GEN6_FORMAT_R16G16B16_UNORM] = CAP( 1), - [GEN6_FORMAT_R16G16B16_SNORM] = CAP( 1), - [GEN6_FORMAT_R16G16B16_SSCALED] = CAP( 1), - [GEN6_FORMAT_R16G16B16_USCALED] = CAP( 1), - [GEN6_FORMAT_R16G16B16_UINT] = CAP(7.5), - [GEN6_FORMAT_R16G16B16_SINT] = CAP(7.5), - [GEN6_FORMAT_R32_SFIXED] = CAP(7.5), - [GEN6_FORMAT_R10G10B10A2_SNORM] = CAP(7.5), - [GEN6_FORMAT_R10G10B10A2_USCALED] = CAP(7.5), - [GEN6_FORMAT_R10G10B10A2_SSCALED] = CAP(7.5), - [GEN6_FORMAT_R10G10B10A2_SINT] = CAP(7.5), - [GEN6_FORMAT_B10G10R10A2_SNORM] = CAP(7.5), - [GEN6_FORMAT_B10G10R10A2_USCALED] = CAP(7.5), - [GEN6_FORMAT_B10G10R10A2_SSCALED] = CAP(7.5), - [GEN6_FORMAT_B10G10R10A2_UINT] = CAP(7.5), - [GEN6_FORMAT_B10G10R10A2_SINT] = CAP(7.5), - [GEN6_FORMAT_R8G8B8_UINT] = CAP(7.5), - [GEN6_FORMAT_R8G8B8_SINT] = CAP(7.5), -#undef CAP -}; - /* * This table is based on: * @@ -408,11 +289,8 @@ ilo_format_support_vb(const struct ilo_dev *dev, enum pipe_format format) { const int idx = ilo_format_translate(dev, format, PIPE_BIND_VERTEX_BUFFER); - const struct ilo_vf_cap *cap = (idx >= 0 && idx < Elements(ilo_vf_caps)) ? - &ilo_vf_caps[idx] : NULL; - return (cap && cap->vertex_element && - ilo_dev_gen(dev) >= cap->vertex_element); + return (idx >= 0 && ilo_state_vf_valid_element_format(dev, idx)); } bool diff --git a/src/gallium/drivers/ilo/core/ilo_state_vf.c b/src/gallium/drivers/ilo/core/ilo_state_vf.c index 09e0f7f2293..ddc75428ed7 100644 --- a/src/gallium/drivers/ilo/core/ilo_state_vf.c +++ b/src/gallium/drivers/ilo/core/ilo_state_vf.c @@ -67,6 +67,7 @@ vf_validate_gen6_elements(const struct ilo_dev *dev, assert(elem->buffer < ILO_STATE_VF_MAX_BUFFER_COUNT); assert(elem->vertex_offset < max_vertex_offset); + assert(ilo_state_vf_valid_element_format(dev, elem->format)); } return true; @@ -642,6 +643,131 @@ index_buffer_set_gen8_3DSTATE_INDEX_BUFFER(struct ilo_state_index_buffer *ib, return true; } +bool +ilo_state_vf_valid_element_format(const struct ilo_dev *dev, + enum gen_surface_format format) +{ + /* + * This table is based on: + * + * - the Sandy Bridge PRM, volume 4 part 1, page 88-97 + * - the Ivy Bridge PRM, volume 2 part 1, page 97-99 + * - the Haswell PRM, volume 7, page 467-470 + */ + static const int vf_element_formats[] = { + [GEN6_FORMAT_R32G32B32A32_FLOAT] = ILO_GEN( 1), + [GEN6_FORMAT_R32G32B32A32_SINT] = ILO_GEN( 1), + [GEN6_FORMAT_R32G32B32A32_UINT] = ILO_GEN( 1), + [GEN6_FORMAT_R32G32B32A32_UNORM] = ILO_GEN( 1), + [GEN6_FORMAT_R32G32B32A32_SNORM] = ILO_GEN( 1), + [GEN6_FORMAT_R64G64_FLOAT] = ILO_GEN( 1), + [GEN6_FORMAT_R32G32B32A32_SSCALED] = ILO_GEN( 1), + [GEN6_FORMAT_R32G32B32A32_USCALED] = ILO_GEN( 1), + [GEN6_FORMAT_R32G32B32A32_SFIXED] = ILO_GEN(7.5), + [GEN6_FORMAT_R32G32B32_FLOAT] = ILO_GEN( 1), + [GEN6_FORMAT_R32G32B32_SINT] = ILO_GEN( 1), + [GEN6_FORMAT_R32G32B32_UINT] = ILO_GEN( 1), + [GEN6_FORMAT_R32G32B32_UNORM] = ILO_GEN( 1), + [GEN6_FORMAT_R32G32B32_SNORM] = ILO_GEN( 1), + [GEN6_FORMAT_R32G32B32_SSCALED] = ILO_GEN( 1), + [GEN6_FORMAT_R32G32B32_USCALED] = ILO_GEN( 1), + [GEN6_FORMAT_R32G32B32_SFIXED] = ILO_GEN(7.5), + [GEN6_FORMAT_R16G16B16A16_UNORM] = ILO_GEN( 1), + [GEN6_FORMAT_R16G16B16A16_SNORM] = ILO_GEN( 1), + [GEN6_FORMAT_R16G16B16A16_SINT] = ILO_GEN( 1), + [GEN6_FORMAT_R16G16B16A16_UINT] = ILO_GEN( 1), + [GEN6_FORMAT_R16G16B16A16_FLOAT] = ILO_GEN( 1), + [GEN6_FORMAT_R32G32_FLOAT] = ILO_GEN( 1), + [GEN6_FORMAT_R32G32_SINT] = ILO_GEN( 1), + [GEN6_FORMAT_R32G32_UINT] = ILO_GEN( 1), + [GEN6_FORMAT_R32G32_UNORM] = ILO_GEN( 1), + [GEN6_FORMAT_R32G32_SNORM] = ILO_GEN( 1), + [GEN6_FORMAT_R64_FLOAT] = ILO_GEN( 1), + [GEN6_FORMAT_R16G16B16A16_SSCALED] = ILO_GEN( 1), + [GEN6_FORMAT_R16G16B16A16_USCALED] = ILO_GEN( 1), + [GEN6_FORMAT_R32G32_SSCALED] = ILO_GEN( 1), + [GEN6_FORMAT_R32G32_USCALED] = ILO_GEN( 1), + [GEN6_FORMAT_R32G32_SFIXED] = ILO_GEN(7.5), + [GEN6_FORMAT_B8G8R8A8_UNORM] = ILO_GEN( 1), + [GEN6_FORMAT_R10G10B10A2_UNORM] = ILO_GEN( 1), + [GEN6_FORMAT_R10G10B10A2_UINT] = ILO_GEN( 1), + [GEN6_FORMAT_R10G10B10_SNORM_A2_UNORM] = ILO_GEN( 1), + [GEN6_FORMAT_R8G8B8A8_UNORM] = ILO_GEN( 1), + [GEN6_FORMAT_R8G8B8A8_SNORM] = ILO_GEN( 1), + [GEN6_FORMAT_R8G8B8A8_SINT] = ILO_GEN( 1), + [GEN6_FORMAT_R8G8B8A8_UINT] = ILO_GEN( 1), + [GEN6_FORMAT_R16G16_UNORM] = ILO_GEN( 1), + [GEN6_FORMAT_R16G16_SNORM] = ILO_GEN( 1), + [GEN6_FORMAT_R16G16_SINT] = ILO_GEN( 1), + [GEN6_FORMAT_R16G16_UINT] = ILO_GEN( 1), + [GEN6_FORMAT_R16G16_FLOAT] = ILO_GEN( 1), + [GEN6_FORMAT_B10G10R10A2_UNORM] = ILO_GEN(7.5), + [GEN6_FORMAT_R11G11B10_FLOAT] = ILO_GEN( 1), + [GEN6_FORMAT_R32_SINT] = ILO_GEN( 1), + [GEN6_FORMAT_R32_UINT] = ILO_GEN( 1), + [GEN6_FORMAT_R32_FLOAT] = ILO_GEN( 1), + [GEN6_FORMAT_R32_UNORM] = ILO_GEN( 1), + [GEN6_FORMAT_R32_SNORM] = ILO_GEN( 1), + [GEN6_FORMAT_R10G10B10X2_USCALED] = ILO_GEN( 1), + [GEN6_FORMAT_R8G8B8A8_SSCALED] = ILO_GEN( 1), + [GEN6_FORMAT_R8G8B8A8_USCALED] = ILO_GEN( 1), + [GEN6_FORMAT_R16G16_SSCALED] = ILO_GEN( 1), + [GEN6_FORMAT_R16G16_USCALED] = ILO_GEN( 1), + [GEN6_FORMAT_R32_SSCALED] = ILO_GEN( 1), + [GEN6_FORMAT_R32_USCALED] = ILO_GEN( 1), + [GEN6_FORMAT_R8G8_UNORM] = ILO_GEN( 1), + [GEN6_FORMAT_R8G8_SNORM] = ILO_GEN( 1), + [GEN6_FORMAT_R8G8_SINT] = ILO_GEN( 1), + [GEN6_FORMAT_R8G8_UINT] = ILO_GEN( 1), + [GEN6_FORMAT_R16_UNORM] = ILO_GEN( 1), + [GEN6_FORMAT_R16_SNORM] = ILO_GEN( 1), + [GEN6_FORMAT_R16_SINT] = ILO_GEN( 1), + [GEN6_FORMAT_R16_UINT] = ILO_GEN( 1), + [GEN6_FORMAT_R16_FLOAT] = ILO_GEN( 1), + [GEN6_FORMAT_R8G8_SSCALED] = ILO_GEN( 1), + [GEN6_FORMAT_R8G8_USCALED] = ILO_GEN( 1), + [GEN6_FORMAT_R16_SSCALED] = ILO_GEN( 1), + [GEN6_FORMAT_R16_USCALED] = ILO_GEN( 1), + [GEN6_FORMAT_R8_UNORM] = ILO_GEN( 1), + [GEN6_FORMAT_R8_SNORM] = ILO_GEN( 1), + [GEN6_FORMAT_R8_SINT] = ILO_GEN( 1), + [GEN6_FORMAT_R8_UINT] = ILO_GEN( 1), + [GEN6_FORMAT_R8_SSCALED] = ILO_GEN( 1), + [GEN6_FORMAT_R8_USCALED] = ILO_GEN( 1), + [GEN6_FORMAT_R8G8B8_UNORM] = ILO_GEN( 1), + [GEN6_FORMAT_R8G8B8_SNORM] = ILO_GEN( 1), + [GEN6_FORMAT_R8G8B8_SSCALED] = ILO_GEN( 1), + [GEN6_FORMAT_R8G8B8_USCALED] = ILO_GEN( 1), + [GEN6_FORMAT_R64G64B64A64_FLOAT] = ILO_GEN( 1), + [GEN6_FORMAT_R64G64B64_FLOAT] = ILO_GEN( 1), + [GEN6_FORMAT_R16G16B16_FLOAT] = ILO_GEN( 6), + [GEN6_FORMAT_R16G16B16_UNORM] = ILO_GEN( 1), + [GEN6_FORMAT_R16G16B16_SNORM] = ILO_GEN( 1), + [GEN6_FORMAT_R16G16B16_SSCALED] = ILO_GEN( 1), + [GEN6_FORMAT_R16G16B16_USCALED] = ILO_GEN( 1), + [GEN6_FORMAT_R16G16B16_UINT] = ILO_GEN(7.5), + [GEN6_FORMAT_R16G16B16_SINT] = ILO_GEN(7.5), + [GEN6_FORMAT_R32_SFIXED] = ILO_GEN(7.5), + [GEN6_FORMAT_R10G10B10A2_SNORM] = ILO_GEN(7.5), + [GEN6_FORMAT_R10G10B10A2_USCALED] = ILO_GEN(7.5), + [GEN6_FORMAT_R10G10B10A2_SSCALED] = ILO_GEN(7.5), + [GEN6_FORMAT_R10G10B10A2_SINT] = ILO_GEN(7.5), + [GEN6_FORMAT_B10G10R10A2_SNORM] = ILO_GEN(7.5), + [GEN6_FORMAT_B10G10R10A2_USCALED] = ILO_GEN(7.5), + [GEN6_FORMAT_B10G10R10A2_SSCALED] = ILO_GEN(7.5), + [GEN6_FORMAT_B10G10R10A2_UINT] = ILO_GEN(7.5), + [GEN6_FORMAT_B10G10R10A2_SINT] = ILO_GEN(7.5), + [GEN6_FORMAT_R8G8B8_UINT] = ILO_GEN(7.5), + [GEN6_FORMAT_R8G8B8_SINT] = ILO_GEN(7.5), + }; + + ILO_DEV_ASSERT(dev, 6, 8); + + return (format < ARRAY_SIZE(vf_element_formats) && + vf_element_formats[format] && + ilo_dev_gen(dev) >= vf_element_formats[format]); +} + bool ilo_state_vf_init(struct ilo_state_vf *vf, const struct ilo_dev *dev, diff --git a/src/gallium/drivers/ilo/core/ilo_state_vf.h b/src/gallium/drivers/ilo/core/ilo_state_vf.h index 39750d8aafe..f15c63a248a 100644 --- a/src/gallium/drivers/ilo/core/ilo_state_vf.h +++ b/src/gallium/drivers/ilo/core/ilo_state_vf.h @@ -174,6 +174,10 @@ ilo_state_vf_data_size(const struct ilo_dev *dev, uint8_t element_count) sizeof(vf->user_instancing[0])) * element_count; } +bool +ilo_state_vf_valid_element_format(const struct ilo_dev *dev, + enum gen_surface_format format); + bool ilo_state_vf_init(struct ilo_state_vf *vf, const struct ilo_dev *dev, From 3547bb078307995e92d509037bc86af7fd60c8c3 Mon Sep 17 00:00:00 2001 From: Chia-I Wu Date: Mon, 22 Jun 2015 13:37:05 +0800 Subject: [PATCH 776/834] ilo: add ilo_state_surface_valid_format() Check if a surface format can be used for the specified access type. --- src/gallium/drivers/ilo/Makefile.sources | 1 + src/gallium/drivers/ilo/core/ilo_format.c | 291 +-------------- .../drivers/ilo/core/ilo_state_surface.h | 5 + .../ilo/core/ilo_state_surface_format.c | 351 ++++++++++++++++++ 4 files changed, 364 insertions(+), 284 deletions(-) create mode 100644 src/gallium/drivers/ilo/core/ilo_state_surface_format.c diff --git a/src/gallium/drivers/ilo/Makefile.sources b/src/gallium/drivers/ilo/Makefile.sources index 95b6b7a7b16..ed024e3a866 100644 --- a/src/gallium/drivers/ilo/Makefile.sources +++ b/src/gallium/drivers/ilo/Makefile.sources @@ -35,6 +35,7 @@ C_SOURCES := \ core/ilo_state_sol.c \ core/ilo_state_sol.h \ core/ilo_state_surface.c \ + core/ilo_state_surface_format.c \ core/ilo_state_surface.h \ core/ilo_state_urb.c \ core/ilo_state_urb.h \ diff --git a/src/gallium/drivers/ilo/core/ilo_format.c b/src/gallium/drivers/ilo/core/ilo_format.c index c1b001b5664..b96f71c0021 100644 --- a/src/gallium/drivers/ilo/core/ilo_format.c +++ b/src/gallium/drivers/ilo/core/ilo_format.c @@ -26,264 +26,10 @@ */ #include "genhw/genhw.h" +#include "ilo_state_surface.h" #include "ilo_state_vf.h" #include "ilo_format.h" -struct ilo_sol_cap { - int buffer; -}; - -struct ilo_sampler_cap { - int sampling; - int filtering; - int shadow_map; - int chroma_key; -}; - -struct ilo_dp_cap { - int rt_write; - int rt_write_blending; - int typed_write; - int media_color_processing; -}; - -/* - * This table is based on: - * - * - the Sandy Bridge PRM, volume 4 part 1, page 88-97 - * - the Ivy Bridge PRM, volume 2 part 1, page 195 - * - the Haswell PRM, volume 7, page 535 - */ -static const struct ilo_sol_cap ilo_sol_caps[] = { -#define CAP(buffer) { ILO_GEN(buffer) } - [GEN6_FORMAT_R32G32B32A32_FLOAT] = CAP( 1), - [GEN6_FORMAT_R32G32B32A32_SINT] = CAP( 1), - [GEN6_FORMAT_R32G32B32A32_UINT] = CAP( 1), - [GEN6_FORMAT_R32G32B32_FLOAT] = CAP( 1), - [GEN6_FORMAT_R32G32B32_SINT] = CAP( 1), - [GEN6_FORMAT_R32G32B32_UINT] = CAP( 1), - [GEN6_FORMAT_R32G32_FLOAT] = CAP( 1), - [GEN6_FORMAT_R32G32_SINT] = CAP( 1), - [GEN6_FORMAT_R32G32_UINT] = CAP( 1), - [GEN6_FORMAT_R32_SINT] = CAP( 1), - [GEN6_FORMAT_R32_UINT] = CAP( 1), - [GEN6_FORMAT_R32_FLOAT] = CAP( 1), -#undef CAP -}; - -/* - * This table is based on: - * - * - the Sandy Bridge PRM, volume 4 part 1, page 88-97 - * - the Ivy Bridge PRM, volume 4 part 1, page 84-87 - */ -static const struct ilo_sampler_cap ilo_sampler_caps[] = { -#define CAP(sampling, filtering, shadow_map, chroma_key) \ - { ILO_GEN(sampling), ILO_GEN(filtering), ILO_GEN(shadow_map), ILO_GEN(chroma_key) } - [GEN6_FORMAT_R32G32B32A32_FLOAT] = CAP( 1, 5, 0, 0), - [GEN6_FORMAT_R32G32B32A32_SINT] = CAP( 1, 0, 0, 0), - [GEN6_FORMAT_R32G32B32A32_UINT] = CAP( 1, 0, 0, 0), - [GEN6_FORMAT_R32G32B32X32_FLOAT] = CAP( 1, 5, 0, 0), - [GEN6_FORMAT_R32G32B32_FLOAT] = CAP( 1, 5, 0, 0), - [GEN6_FORMAT_R32G32B32_SINT] = CAP( 1, 0, 0, 0), - [GEN6_FORMAT_R32G32B32_UINT] = CAP( 1, 0, 0, 0), - [GEN6_FORMAT_R16G16B16A16_UNORM] = CAP( 1, 1, 0, 0), - [GEN6_FORMAT_R16G16B16A16_SNORM] = CAP( 1, 1, 0, 0), - [GEN6_FORMAT_R16G16B16A16_SINT] = CAP( 1, 0, 0, 0), - [GEN6_FORMAT_R16G16B16A16_UINT] = CAP( 1, 0, 0, 0), - [GEN6_FORMAT_R16G16B16A16_FLOAT] = CAP( 1, 1, 0, 0), - [GEN6_FORMAT_R32G32_FLOAT] = CAP( 1, 5, 0, 0), - [GEN6_FORMAT_R32G32_SINT] = CAP( 1, 0, 0, 0), - [GEN6_FORMAT_R32G32_UINT] = CAP( 1, 0, 0, 0), - [GEN6_FORMAT_R32_FLOAT_X8X24_TYPELESS] = CAP( 1, 5, 1, 0), - [GEN6_FORMAT_X32_TYPELESS_G8X24_UINT] = CAP( 1, 0, 0, 0), - [GEN6_FORMAT_L32A32_FLOAT] = CAP( 1, 5, 0, 0), - [GEN6_FORMAT_R16G16B16X16_UNORM] = CAP( 1, 1, 0, 0), - [GEN6_FORMAT_R16G16B16X16_FLOAT] = CAP( 1, 1, 0, 0), - [GEN6_FORMAT_A32X32_FLOAT] = CAP( 1, 5, 0, 0), - [GEN6_FORMAT_L32X32_FLOAT] = CAP( 1, 5, 0, 0), - [GEN6_FORMAT_I32X32_FLOAT] = CAP( 1, 5, 0, 0), - [GEN6_FORMAT_B8G8R8A8_UNORM] = CAP( 1, 1, 0, 1), - [GEN6_FORMAT_B8G8R8A8_UNORM_SRGB] = CAP( 1, 1, 0, 0), - [GEN6_FORMAT_R10G10B10A2_UNORM] = CAP( 1, 1, 0, 0), - [GEN6_FORMAT_R10G10B10A2_UNORM_SRGB] = CAP( 1, 1, 0, 0), - [GEN6_FORMAT_R10G10B10A2_UINT] = CAP( 1, 0, 0, 0), - [GEN6_FORMAT_R10G10B10_SNORM_A2_UNORM] = CAP( 1, 1, 0, 0), - [GEN6_FORMAT_R8G8B8A8_UNORM] = CAP( 1, 1, 0, 0), - [GEN6_FORMAT_R8G8B8A8_UNORM_SRGB] = CAP( 1, 1, 0, 0), - [GEN6_FORMAT_R8G8B8A8_SNORM] = CAP( 1, 1, 0, 0), - [GEN6_FORMAT_R8G8B8A8_SINT] = CAP( 1, 0, 0, 0), - [GEN6_FORMAT_R8G8B8A8_UINT] = CAP( 1, 0, 0, 0), - [GEN6_FORMAT_R16G16_UNORM] = CAP( 1, 1, 0, 0), - [GEN6_FORMAT_R16G16_SNORM] = CAP( 1, 1, 0, 0), - [GEN6_FORMAT_R16G16_SINT] = CAP( 1, 0, 0, 0), - [GEN6_FORMAT_R16G16_UINT] = CAP( 1, 0, 0, 0), - [GEN6_FORMAT_R16G16_FLOAT] = CAP( 1, 1, 0, 0), - [GEN6_FORMAT_B10G10R10A2_UNORM] = CAP( 1, 1, 0, 0), - [GEN6_FORMAT_B10G10R10A2_UNORM_SRGB] = CAP( 1, 1, 0, 0), - [GEN6_FORMAT_R11G11B10_FLOAT] = CAP( 1, 1, 0, 0), - [GEN6_FORMAT_R32_SINT] = CAP( 1, 0, 0, 0), - [GEN6_FORMAT_R32_UINT] = CAP( 1, 0, 0, 0), - [GEN6_FORMAT_R32_FLOAT] = CAP( 1, 5, 1, 0), - [GEN6_FORMAT_R24_UNORM_X8_TYPELESS] = CAP( 1, 5, 1, 0), - [GEN6_FORMAT_X24_TYPELESS_G8_UINT] = CAP( 1, 0, 0, 0), - [GEN6_FORMAT_L16A16_UNORM] = CAP( 1, 1, 0, 0), - [GEN6_FORMAT_I24X8_UNORM] = CAP( 1, 5, 1, 0), - [GEN6_FORMAT_L24X8_UNORM] = CAP( 1, 5, 1, 0), - [GEN6_FORMAT_A24X8_UNORM] = CAP( 1, 5, 1, 0), - [GEN6_FORMAT_I32_FLOAT] = CAP( 1, 5, 1, 0), - [GEN6_FORMAT_L32_FLOAT] = CAP( 1, 5, 1, 0), - [GEN6_FORMAT_A32_FLOAT] = CAP( 1, 5, 1, 0), - [GEN6_FORMAT_B8G8R8X8_UNORM] = CAP( 1, 1, 0, 1), - [GEN6_FORMAT_B8G8R8X8_UNORM_SRGB] = CAP( 1, 1, 0, 0), - [GEN6_FORMAT_R8G8B8X8_UNORM] = CAP( 1, 1, 0, 0), - [GEN6_FORMAT_R8G8B8X8_UNORM_SRGB] = CAP( 1, 1, 0, 0), - [GEN6_FORMAT_R9G9B9E5_SHAREDEXP] = CAP( 1, 1, 0, 0), - [GEN6_FORMAT_B10G10R10X2_UNORM] = CAP( 1, 1, 0, 0), - [GEN6_FORMAT_L16A16_FLOAT] = CAP( 1, 1, 0, 0), - [GEN6_FORMAT_B5G6R5_UNORM] = CAP( 1, 1, 0, 1), - [GEN6_FORMAT_B5G6R5_UNORM_SRGB] = CAP( 1, 1, 0, 0), - [GEN6_FORMAT_B5G5R5A1_UNORM] = CAP( 1, 1, 0, 1), - [GEN6_FORMAT_B5G5R5A1_UNORM_SRGB] = CAP( 1, 1, 0, 0), - [GEN6_FORMAT_B4G4R4A4_UNORM] = CAP( 1, 1, 0, 1), - [GEN6_FORMAT_B4G4R4A4_UNORM_SRGB] = CAP( 1, 1, 0, 0), - [GEN6_FORMAT_R8G8_UNORM] = CAP( 1, 1, 0, 0), - [GEN6_FORMAT_R8G8_SNORM] = CAP( 1, 1, 0, 1), - [GEN6_FORMAT_R8G8_SINT] = CAP( 1, 0, 0, 0), - [GEN6_FORMAT_R8G8_UINT] = CAP( 1, 0, 0, 0), - [GEN6_FORMAT_R16_UNORM] = CAP( 1, 1, 1, 0), - [GEN6_FORMAT_R16_SNORM] = CAP( 1, 1, 0, 0), - [GEN6_FORMAT_R16_SINT] = CAP( 1, 0, 0, 0), - [GEN6_FORMAT_R16_UINT] = CAP( 1, 0, 0, 0), - [GEN6_FORMAT_R16_FLOAT] = CAP( 1, 1, 0, 0), - [GEN6_FORMAT_A8P8_UNORM_PALETTE0] = CAP( 5, 5, 0, 0), - [GEN6_FORMAT_A8P8_UNORM_PALETTE1] = CAP( 5, 5, 0, 0), - [GEN6_FORMAT_I16_UNORM] = CAP( 1, 1, 1, 0), - [GEN6_FORMAT_L16_UNORM] = CAP( 1, 1, 1, 0), - [GEN6_FORMAT_A16_UNORM] = CAP( 1, 1, 1, 0), - [GEN6_FORMAT_L8A8_UNORM] = CAP( 1, 1, 0, 1), - [GEN6_FORMAT_I16_FLOAT] = CAP( 1, 1, 1, 0), - [GEN6_FORMAT_L16_FLOAT] = CAP( 1, 1, 1, 0), - [GEN6_FORMAT_A16_FLOAT] = CAP( 1, 1, 1, 0), - [GEN6_FORMAT_L8A8_UNORM_SRGB] = CAP(4.5, 4.5, 0, 0), - [GEN6_FORMAT_R5G5_SNORM_B6_UNORM] = CAP( 1, 1, 0, 1), - [GEN6_FORMAT_P8A8_UNORM_PALETTE0] = CAP( 5, 5, 0, 0), - [GEN6_FORMAT_P8A8_UNORM_PALETTE1] = CAP( 5, 5, 0, 0), - [GEN6_FORMAT_R8_UNORM] = CAP( 1, 1, 0, 4.5), - [GEN6_FORMAT_R8_SNORM] = CAP( 1, 1, 0, 0), - [GEN6_FORMAT_R8_SINT] = CAP( 1, 0, 0, 0), - [GEN6_FORMAT_R8_UINT] = CAP( 1, 0, 0, 0), - [GEN6_FORMAT_A8_UNORM] = CAP( 1, 1, 0, 1), - [GEN6_FORMAT_I8_UNORM] = CAP( 1, 1, 0, 0), - [GEN6_FORMAT_L8_UNORM] = CAP( 1, 1, 0, 1), - [GEN6_FORMAT_P4A4_UNORM_PALETTE0] = CAP( 1, 1, 0, 0), - [GEN6_FORMAT_A4P4_UNORM_PALETTE0] = CAP( 1, 1, 0, 0), - [GEN6_FORMAT_P8_UNORM_PALETTE0] = CAP(4.5, 4.5, 0, 0), - [GEN6_FORMAT_L8_UNORM_SRGB] = CAP(4.5, 4.5, 0, 0), - [GEN6_FORMAT_P8_UNORM_PALETTE1] = CAP(4.5, 4.5, 0, 0), - [GEN6_FORMAT_P4A4_UNORM_PALETTE1] = CAP(4.5, 4.5, 0, 0), - [GEN6_FORMAT_A4P4_UNORM_PALETTE1] = CAP(4.5, 4.5, 0, 0), - [GEN6_FORMAT_DXT1_RGB_SRGB] = CAP(4.5, 4.5, 0, 0), - [GEN6_FORMAT_R1_UNORM] = CAP( 1, 1, 0, 0), - [GEN6_FORMAT_YCRCB_NORMAL] = CAP( 1, 1, 0, 1), - [GEN6_FORMAT_YCRCB_SWAPUVY] = CAP( 1, 1, 0, 1), - [GEN6_FORMAT_P2_UNORM_PALETTE0] = CAP(4.5, 4.5, 0, 0), - [GEN6_FORMAT_P2_UNORM_PALETTE1] = CAP(4.5, 4.5, 0, 0), - [GEN6_FORMAT_BC1_UNORM] = CAP( 1, 1, 0, 1), - [GEN6_FORMAT_BC2_UNORM] = CAP( 1, 1, 0, 1), - [GEN6_FORMAT_BC3_UNORM] = CAP( 1, 1, 0, 1), - [GEN6_FORMAT_BC4_UNORM] = CAP( 1, 1, 0, 0), - [GEN6_FORMAT_BC5_UNORM] = CAP( 1, 1, 0, 0), - [GEN6_FORMAT_BC1_UNORM_SRGB] = CAP( 1, 1, 0, 0), - [GEN6_FORMAT_BC2_UNORM_SRGB] = CAP( 1, 1, 0, 0), - [GEN6_FORMAT_BC3_UNORM_SRGB] = CAP( 1, 1, 0, 0), - [GEN6_FORMAT_MONO8] = CAP( 1, 0, 0, 0), - [GEN6_FORMAT_YCRCB_SWAPUV] = CAP( 1, 1, 0, 0), - [GEN6_FORMAT_YCRCB_SWAPY] = CAP( 1, 1, 0, 0), - [GEN6_FORMAT_DXT1_RGB] = CAP( 1, 1, 0, 0), - [GEN6_FORMAT_FXT1] = CAP( 1, 1, 0, 0), - [GEN6_FORMAT_BC4_SNORM] = CAP( 1, 1, 0, 0), - [GEN6_FORMAT_BC5_SNORM] = CAP( 1, 1, 0, 0), - [GEN6_FORMAT_R16G16B16_FLOAT] = CAP( 5, 5, 0, 0), - [GEN6_FORMAT_BC6H_SF16] = CAP( 7, 7, 0, 0), - [GEN6_FORMAT_BC7_UNORM] = CAP( 7, 7, 0, 0), - [GEN6_FORMAT_BC7_UNORM_SRGB] = CAP( 7, 7, 0, 0), - [GEN6_FORMAT_BC6H_UF16] = CAP( 7, 7, 0, 0), -#undef CAP -}; - -/* - * This table is based on: - * - * - the Sandy Bridge PRM, volume 4 part 1, page 88-97 - * - the Ivy Bridge PRM, volume 4 part 1, page 172, 252-253, and 277-278 - * - the Haswell PRM, volume 7, page 262-264 - */ -static const struct ilo_dp_cap ilo_dp_caps[] = { -#define CAP(rt_write, rt_write_blending, typed_write, media_color_processing) \ - { ILO_GEN(rt_write), ILO_GEN(rt_write_blending), ILO_GEN(typed_write), ILO_GEN(media_color_processing) } - [GEN6_FORMAT_R32G32B32A32_FLOAT] = CAP( 1, 1, 7, 0), - [GEN6_FORMAT_R32G32B32A32_SINT] = CAP( 1, 0, 7, 0), - [GEN6_FORMAT_R32G32B32A32_UINT] = CAP( 1, 0, 7, 0), - [GEN6_FORMAT_R16G16B16A16_UNORM] = CAP( 1, 4.5, 7, 6), - [GEN6_FORMAT_R16G16B16A16_SNORM] = CAP( 1, 6, 7, 0), - [GEN6_FORMAT_R16G16B16A16_SINT] = CAP( 1, 0, 7, 0), - [GEN6_FORMAT_R16G16B16A16_UINT] = CAP( 1, 0, 7, 0), - [GEN6_FORMAT_R16G16B16A16_FLOAT] = CAP( 1, 1, 7, 0), - [GEN6_FORMAT_R32G32_FLOAT] = CAP( 1, 1, 7, 0), - [GEN6_FORMAT_R32G32_SINT] = CAP( 1, 0, 7, 0), - [GEN6_FORMAT_R32G32_UINT] = CAP( 1, 0, 7, 0), - [GEN6_FORMAT_B8G8R8A8_UNORM] = CAP( 1, 1, 7, 6), - [GEN6_FORMAT_B8G8R8A8_UNORM_SRGB] = CAP( 1, 1, 0, 0), - [GEN6_FORMAT_R10G10B10A2_UNORM] = CAP( 1, 1, 7, 6), - [GEN6_FORMAT_R10G10B10A2_UNORM_SRGB] = CAP( 0, 0, 0, 6), - [GEN6_FORMAT_R10G10B10A2_UINT] = CAP( 1, 0, 7, 0), - [GEN6_FORMAT_R8G8B8A8_UNORM] = CAP( 1, 1, 7, 6), - [GEN6_FORMAT_R8G8B8A8_UNORM_SRGB] = CAP( 1, 1, 0, 6), - [GEN6_FORMAT_R8G8B8A8_SNORM] = CAP( 1, 6, 7, 0), - [GEN6_FORMAT_R8G8B8A8_SINT] = CAP( 1, 0, 7, 0), - [GEN6_FORMAT_R8G8B8A8_UINT] = CAP( 1, 0, 7, 0), - [GEN6_FORMAT_R16G16_UNORM] = CAP( 1, 4.5, 7, 0), - [GEN6_FORMAT_R16G16_SNORM] = CAP( 1, 6, 7, 0), - [GEN6_FORMAT_R16G16_SINT] = CAP( 1, 0, 7, 0), - [GEN6_FORMAT_R16G16_UINT] = CAP( 1, 0, 7, 0), - [GEN6_FORMAT_R16G16_FLOAT] = CAP( 1, 1, 7, 0), - [GEN6_FORMAT_B10G10R10A2_UNORM] = CAP( 1, 1, 7, 6), - [GEN6_FORMAT_B10G10R10A2_UNORM_SRGB] = CAP( 1, 1, 0, 6), - [GEN6_FORMAT_R11G11B10_FLOAT] = CAP( 1, 1, 7, 0), - [GEN6_FORMAT_R32_SINT] = CAP( 1, 0, 7, 0), - [GEN6_FORMAT_R32_UINT] = CAP( 1, 0, 7, 0), - [GEN6_FORMAT_R32_FLOAT] = CAP( 1, 1, 7, 0), - [GEN6_FORMAT_B8G8R8X8_UNORM] = CAP( 0, 0, 0, 6), - [GEN6_FORMAT_B5G6R5_UNORM] = CAP( 1, 1, 7, 0), - [GEN6_FORMAT_B5G6R5_UNORM_SRGB] = CAP( 1, 1, 0, 0), - [GEN6_FORMAT_B5G5R5A1_UNORM] = CAP( 1, 1, 7, 0), - [GEN6_FORMAT_B5G5R5A1_UNORM_SRGB] = CAP( 1, 1, 0, 0), - [GEN6_FORMAT_B4G4R4A4_UNORM] = CAP( 1, 1, 7, 0), - [GEN6_FORMAT_B4G4R4A4_UNORM_SRGB] = CAP( 1, 1, 0, 0), - [GEN6_FORMAT_R8G8_UNORM] = CAP( 1, 1, 7, 0), - [GEN6_FORMAT_R8G8_SNORM] = CAP( 1, 6, 7, 0), - [GEN6_FORMAT_R8G8_SINT] = CAP( 1, 0, 7, 0), - [GEN6_FORMAT_R8G8_UINT] = CAP( 1, 0, 7, 0), - [GEN6_FORMAT_R16_UNORM] = CAP( 1, 4.5, 7, 7), - [GEN6_FORMAT_R16_SNORM] = CAP( 1, 6, 7, 0), - [GEN6_FORMAT_R16_SINT] = CAP( 1, 0, 7, 0), - [GEN6_FORMAT_R16_UINT] = CAP( 1, 0, 7, 0), - [GEN6_FORMAT_R16_FLOAT] = CAP( 1, 1, 7, 0), - [GEN6_FORMAT_B5G5R5X1_UNORM] = CAP( 1, 1, 7, 0), - [GEN6_FORMAT_B5G5R5X1_UNORM_SRGB] = CAP( 1, 1, 0, 0), - [GEN6_FORMAT_R8_UNORM] = CAP( 1, 1, 7, 0), - [GEN6_FORMAT_R8_SNORM] = CAP( 1, 6, 7, 0), - [GEN6_FORMAT_R8_SINT] = CAP( 1, 0, 7, 0), - [GEN6_FORMAT_R8_UINT] = CAP( 1, 0, 7, 0), - [GEN6_FORMAT_A8_UNORM] = CAP( 1, 1, 7, 0), - [GEN6_FORMAT_YCRCB_NORMAL] = CAP( 1, 0, 0, 6), - [GEN6_FORMAT_YCRCB_SWAPUVY] = CAP( 1, 0, 0, 6), - [GEN6_FORMAT_YCRCB_SWAPUV] = CAP( 1, 0, 0, 6), - [GEN6_FORMAT_YCRCB_SWAPY] = CAP( 1, 0, 0, 6), -#undef CAP -}; - bool ilo_format_support_vb(const struct ilo_dev *dev, enum pipe_format format) @@ -298,10 +44,9 @@ ilo_format_support_sol(const struct ilo_dev *dev, enum pipe_format format) { const int idx = ilo_format_translate(dev, format, PIPE_BIND_STREAM_OUTPUT); - const struct ilo_sol_cap *cap = (idx >= 0 && idx < Elements(ilo_sol_caps)) ? - &ilo_sol_caps[idx] : NULL; - return (cap && cap->buffer && ilo_dev_gen(dev) >= cap->buffer); + return (idx >= 0 && ilo_state_surface_valid_format(dev, + ILO_STATE_SURFACE_ACCESS_DP_SVB, idx)); } bool @@ -309,20 +54,9 @@ ilo_format_support_sampler(const struct ilo_dev *dev, enum pipe_format format) { const int idx = ilo_format_translate(dev, format, PIPE_BIND_SAMPLER_VIEW); - const struct ilo_sampler_cap *cap = (idx >= 0 && - idx < Elements(ilo_sampler_caps)) ? &ilo_sampler_caps[idx] : NULL; - if (!cap || !cap->sampling) - return false; - - assert(!cap->filtering || cap->filtering >= cap->sampling); - - if (util_format_is_pure_integer(format)) - return (ilo_dev_gen(dev) >= cap->sampling); - else if (cap->filtering) - return (ilo_dev_gen(dev) >= cap->filtering); - else - return false; + return (idx >= 0 && ilo_state_surface_valid_format(dev, + ILO_STATE_SURFACE_ACCESS_SAMPLER, idx)); } bool @@ -330,20 +64,9 @@ ilo_format_support_rt(const struct ilo_dev *dev, enum pipe_format format) { const int idx = ilo_format_translate(dev, format, PIPE_BIND_RENDER_TARGET); - const struct ilo_dp_cap *cap = (idx >= 0 && idx < Elements(ilo_dp_caps)) ? - &ilo_dp_caps[idx] : NULL; - if (!cap || !cap->rt_write) - return false; - - assert(!cap->rt_write_blending || cap->rt_write_blending >= cap->rt_write); - - if (util_format_is_pure_integer(format)) - return (ilo_dev_gen(dev) >= cap->rt_write); - else if (cap->rt_write_blending) - return (ilo_dev_gen(dev) >= cap->rt_write_blending); - else - return false; + return (idx >= 0 && ilo_state_surface_valid_format(dev, + ILO_STATE_SURFACE_ACCESS_DP_RENDER, idx)); } bool diff --git a/src/gallium/drivers/ilo/core/ilo_state_surface.h b/src/gallium/drivers/ilo/core/ilo_state_surface.h index deb0b549ebd..9c025428d50 100644 --- a/src/gallium/drivers/ilo/core/ilo_state_surface.h +++ b/src/gallium/drivers/ilo/core/ilo_state_surface.h @@ -94,6 +94,11 @@ struct ilo_state_surface { struct intel_bo *bo; }; +bool +ilo_state_surface_valid_format(const struct ilo_dev *dev, + enum ilo_state_surface_access access, + enum gen_surface_format format); + bool ilo_state_surface_init_for_null(struct ilo_state_surface *surf, const struct ilo_dev *dev); diff --git a/src/gallium/drivers/ilo/core/ilo_state_surface_format.c b/src/gallium/drivers/ilo/core/ilo_state_surface_format.c new file mode 100644 index 00000000000..a40c1b84d17 --- /dev/null +++ b/src/gallium/drivers/ilo/core/ilo_state_surface_format.c @@ -0,0 +1,351 @@ +/* + * Mesa 3-D graphics library + * + * Copyright (C) 2012-2013 LunarG, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Authors: + * Chia-I Wu + */ + +#include "genhw/genhw.h" +#include "ilo_state_surface.h" + +static bool +surface_valid_sampler_format(const struct ilo_dev *dev, + enum ilo_state_surface_access access, + enum gen_surface_format format) +{ + /* + * This table is based on: + * + * - the Sandy Bridge PRM, volume 4 part 1, page 88-97 + * - the Ivy Bridge PRM, volume 4 part 1, page 84-87 + */ + static const struct sampler_cap { + int sampling; + int filtering; + int shadow_map; + int chroma_key; + } caps[] = { +#define CAP(sampling, filtering, shadow_map, chroma_key) \ + { ILO_GEN(sampling), ILO_GEN(filtering), ILO_GEN(shadow_map), ILO_GEN(chroma_key) } + [GEN6_FORMAT_R32G32B32A32_FLOAT] = CAP( 1, 5, 0, 0), + [GEN6_FORMAT_R32G32B32A32_SINT] = CAP( 1, 0, 0, 0), + [GEN6_FORMAT_R32G32B32A32_UINT] = CAP( 1, 0, 0, 0), + [GEN6_FORMAT_R32G32B32X32_FLOAT] = CAP( 1, 5, 0, 0), + [GEN6_FORMAT_R32G32B32_FLOAT] = CAP( 1, 5, 0, 0), + [GEN6_FORMAT_R32G32B32_SINT] = CAP( 1, 0, 0, 0), + [GEN6_FORMAT_R32G32B32_UINT] = CAP( 1, 0, 0, 0), + [GEN6_FORMAT_R16G16B16A16_UNORM] = CAP( 1, 1, 0, 0), + [GEN6_FORMAT_R16G16B16A16_SNORM] = CAP( 1, 1, 0, 0), + [GEN6_FORMAT_R16G16B16A16_SINT] = CAP( 1, 0, 0, 0), + [GEN6_FORMAT_R16G16B16A16_UINT] = CAP( 1, 0, 0, 0), + [GEN6_FORMAT_R16G16B16A16_FLOAT] = CAP( 1, 1, 0, 0), + [GEN6_FORMAT_R32G32_FLOAT] = CAP( 1, 5, 0, 0), + [GEN6_FORMAT_R32G32_SINT] = CAP( 1, 0, 0, 0), + [GEN6_FORMAT_R32G32_UINT] = CAP( 1, 0, 0, 0), + [GEN6_FORMAT_R32_FLOAT_X8X24_TYPELESS] = CAP( 1, 5, 1, 0), + [GEN6_FORMAT_X32_TYPELESS_G8X24_UINT] = CAP( 1, 0, 0, 0), + [GEN6_FORMAT_L32A32_FLOAT] = CAP( 1, 5, 0, 0), + [GEN6_FORMAT_R16G16B16X16_UNORM] = CAP( 1, 1, 0, 0), + [GEN6_FORMAT_R16G16B16X16_FLOAT] = CAP( 1, 1, 0, 0), + [GEN6_FORMAT_A32X32_FLOAT] = CAP( 1, 5, 0, 0), + [GEN6_FORMAT_L32X32_FLOAT] = CAP( 1, 5, 0, 0), + [GEN6_FORMAT_I32X32_FLOAT] = CAP( 1, 5, 0, 0), + [GEN6_FORMAT_B8G8R8A8_UNORM] = CAP( 1, 1, 0, 1), + [GEN6_FORMAT_B8G8R8A8_UNORM_SRGB] = CAP( 1, 1, 0, 0), + [GEN6_FORMAT_R10G10B10A2_UNORM] = CAP( 1, 1, 0, 0), + [GEN6_FORMAT_R10G10B10A2_UNORM_SRGB] = CAP( 1, 1, 0, 0), + [GEN6_FORMAT_R10G10B10A2_UINT] = CAP( 1, 0, 0, 0), + [GEN6_FORMAT_R10G10B10_SNORM_A2_UNORM] = CAP( 1, 1, 0, 0), + [GEN6_FORMAT_R8G8B8A8_UNORM] = CAP( 1, 1, 0, 0), + [GEN6_FORMAT_R8G8B8A8_UNORM_SRGB] = CAP( 1, 1, 0, 0), + [GEN6_FORMAT_R8G8B8A8_SNORM] = CAP( 1, 1, 0, 0), + [GEN6_FORMAT_R8G8B8A8_SINT] = CAP( 1, 0, 0, 0), + [GEN6_FORMAT_R8G8B8A8_UINT] = CAP( 1, 0, 0, 0), + [GEN6_FORMAT_R16G16_UNORM] = CAP( 1, 1, 0, 0), + [GEN6_FORMAT_R16G16_SNORM] = CAP( 1, 1, 0, 0), + [GEN6_FORMAT_R16G16_SINT] = CAP( 1, 0, 0, 0), + [GEN6_FORMAT_R16G16_UINT] = CAP( 1, 0, 0, 0), + [GEN6_FORMAT_R16G16_FLOAT] = CAP( 1, 1, 0, 0), + [GEN6_FORMAT_B10G10R10A2_UNORM] = CAP( 1, 1, 0, 0), + [GEN6_FORMAT_B10G10R10A2_UNORM_SRGB] = CAP( 1, 1, 0, 0), + [GEN6_FORMAT_R11G11B10_FLOAT] = CAP( 1, 1, 0, 0), + [GEN6_FORMAT_R32_SINT] = CAP( 1, 0, 0, 0), + [GEN6_FORMAT_R32_UINT] = CAP( 1, 0, 0, 0), + [GEN6_FORMAT_R32_FLOAT] = CAP( 1, 5, 1, 0), + [GEN6_FORMAT_R24_UNORM_X8_TYPELESS] = CAP( 1, 5, 1, 0), + [GEN6_FORMAT_X24_TYPELESS_G8_UINT] = CAP( 1, 0, 0, 0), + [GEN6_FORMAT_L16A16_UNORM] = CAP( 1, 1, 0, 0), + [GEN6_FORMAT_I24X8_UNORM] = CAP( 1, 5, 1, 0), + [GEN6_FORMAT_L24X8_UNORM] = CAP( 1, 5, 1, 0), + [GEN6_FORMAT_A24X8_UNORM] = CAP( 1, 5, 1, 0), + [GEN6_FORMAT_I32_FLOAT] = CAP( 1, 5, 1, 0), + [GEN6_FORMAT_L32_FLOAT] = CAP( 1, 5, 1, 0), + [GEN6_FORMAT_A32_FLOAT] = CAP( 1, 5, 1, 0), + [GEN6_FORMAT_B8G8R8X8_UNORM] = CAP( 1, 1, 0, 1), + [GEN6_FORMAT_B8G8R8X8_UNORM_SRGB] = CAP( 1, 1, 0, 0), + [GEN6_FORMAT_R8G8B8X8_UNORM] = CAP( 1, 1, 0, 0), + [GEN6_FORMAT_R8G8B8X8_UNORM_SRGB] = CAP( 1, 1, 0, 0), + [GEN6_FORMAT_R9G9B9E5_SHAREDEXP] = CAP( 1, 1, 0, 0), + [GEN6_FORMAT_B10G10R10X2_UNORM] = CAP( 1, 1, 0, 0), + [GEN6_FORMAT_L16A16_FLOAT] = CAP( 1, 1, 0, 0), + [GEN6_FORMAT_B5G6R5_UNORM] = CAP( 1, 1, 0, 1), + [GEN6_FORMAT_B5G6R5_UNORM_SRGB] = CAP( 1, 1, 0, 0), + [GEN6_FORMAT_B5G5R5A1_UNORM] = CAP( 1, 1, 0, 1), + [GEN6_FORMAT_B5G5R5A1_UNORM_SRGB] = CAP( 1, 1, 0, 0), + [GEN6_FORMAT_B4G4R4A4_UNORM] = CAP( 1, 1, 0, 1), + [GEN6_FORMAT_B4G4R4A4_UNORM_SRGB] = CAP( 1, 1, 0, 0), + [GEN6_FORMAT_R8G8_UNORM] = CAP( 1, 1, 0, 0), + [GEN6_FORMAT_R8G8_SNORM] = CAP( 1, 1, 0, 1), + [GEN6_FORMAT_R8G8_SINT] = CAP( 1, 0, 0, 0), + [GEN6_FORMAT_R8G8_UINT] = CAP( 1, 0, 0, 0), + [GEN6_FORMAT_R16_UNORM] = CAP( 1, 1, 1, 0), + [GEN6_FORMAT_R16_SNORM] = CAP( 1, 1, 0, 0), + [GEN6_FORMAT_R16_SINT] = CAP( 1, 0, 0, 0), + [GEN6_FORMAT_R16_UINT] = CAP( 1, 0, 0, 0), + [GEN6_FORMAT_R16_FLOAT] = CAP( 1, 1, 0, 0), + [GEN6_FORMAT_A8P8_UNORM_PALETTE0] = CAP( 5, 5, 0, 0), + [GEN6_FORMAT_A8P8_UNORM_PALETTE1] = CAP( 5, 5, 0, 0), + [GEN6_FORMAT_I16_UNORM] = CAP( 1, 1, 1, 0), + [GEN6_FORMAT_L16_UNORM] = CAP( 1, 1, 1, 0), + [GEN6_FORMAT_A16_UNORM] = CAP( 1, 1, 1, 0), + [GEN6_FORMAT_L8A8_UNORM] = CAP( 1, 1, 0, 1), + [GEN6_FORMAT_I16_FLOAT] = CAP( 1, 1, 1, 0), + [GEN6_FORMAT_L16_FLOAT] = CAP( 1, 1, 1, 0), + [GEN6_FORMAT_A16_FLOAT] = CAP( 1, 1, 1, 0), + [GEN6_FORMAT_L8A8_UNORM_SRGB] = CAP(4.5, 4.5, 0, 0), + [GEN6_FORMAT_R5G5_SNORM_B6_UNORM] = CAP( 1, 1, 0, 1), + [GEN6_FORMAT_P8A8_UNORM_PALETTE0] = CAP( 5, 5, 0, 0), + [GEN6_FORMAT_P8A8_UNORM_PALETTE1] = CAP( 5, 5, 0, 0), + [GEN6_FORMAT_R8_UNORM] = CAP( 1, 1, 0, 4.5), + [GEN6_FORMAT_R8_SNORM] = CAP( 1, 1, 0, 0), + [GEN6_FORMAT_R8_SINT] = CAP( 1, 0, 0, 0), + [GEN6_FORMAT_R8_UINT] = CAP( 1, 0, 0, 0), + [GEN6_FORMAT_A8_UNORM] = CAP( 1, 1, 0, 1), + [GEN6_FORMAT_I8_UNORM] = CAP( 1, 1, 0, 0), + [GEN6_FORMAT_L8_UNORM] = CAP( 1, 1, 0, 1), + [GEN6_FORMAT_P4A4_UNORM_PALETTE0] = CAP( 1, 1, 0, 0), + [GEN6_FORMAT_A4P4_UNORM_PALETTE0] = CAP( 1, 1, 0, 0), + [GEN6_FORMAT_P8_UNORM_PALETTE0] = CAP(4.5, 4.5, 0, 0), + [GEN6_FORMAT_L8_UNORM_SRGB] = CAP(4.5, 4.5, 0, 0), + [GEN6_FORMAT_P8_UNORM_PALETTE1] = CAP(4.5, 4.5, 0, 0), + [GEN6_FORMAT_P4A4_UNORM_PALETTE1] = CAP(4.5, 4.5, 0, 0), + [GEN6_FORMAT_A4P4_UNORM_PALETTE1] = CAP(4.5, 4.5, 0, 0), + [GEN6_FORMAT_DXT1_RGB_SRGB] = CAP(4.5, 4.5, 0, 0), + [GEN6_FORMAT_R1_UNORM] = CAP( 1, 1, 0, 0), + [GEN6_FORMAT_YCRCB_NORMAL] = CAP( 1, 1, 0, 1), + [GEN6_FORMAT_YCRCB_SWAPUVY] = CAP( 1, 1, 0, 1), + [GEN6_FORMAT_P2_UNORM_PALETTE0] = CAP(4.5, 4.5, 0, 0), + [GEN6_FORMAT_P2_UNORM_PALETTE1] = CAP(4.5, 4.5, 0, 0), + [GEN6_FORMAT_BC1_UNORM] = CAP( 1, 1, 0, 1), + [GEN6_FORMAT_BC2_UNORM] = CAP( 1, 1, 0, 1), + [GEN6_FORMAT_BC3_UNORM] = CAP( 1, 1, 0, 1), + [GEN6_FORMAT_BC4_UNORM] = CAP( 1, 1, 0, 0), + [GEN6_FORMAT_BC5_UNORM] = CAP( 1, 1, 0, 0), + [GEN6_FORMAT_BC1_UNORM_SRGB] = CAP( 1, 1, 0, 0), + [GEN6_FORMAT_BC2_UNORM_SRGB] = CAP( 1, 1, 0, 0), + [GEN6_FORMAT_BC3_UNORM_SRGB] = CAP( 1, 1, 0, 0), + [GEN6_FORMAT_MONO8] = CAP( 1, 0, 0, 0), + [GEN6_FORMAT_YCRCB_SWAPUV] = CAP( 1, 1, 0, 0), + [GEN6_FORMAT_YCRCB_SWAPY] = CAP( 1, 1, 0, 0), + [GEN6_FORMAT_DXT1_RGB] = CAP( 1, 1, 0, 0), + [GEN6_FORMAT_FXT1] = CAP( 1, 1, 0, 0), + [GEN6_FORMAT_BC4_SNORM] = CAP( 1, 1, 0, 0), + [GEN6_FORMAT_BC5_SNORM] = CAP( 1, 1, 0, 0), + [GEN6_FORMAT_R16G16B16_FLOAT] = CAP( 5, 5, 0, 0), + [GEN6_FORMAT_BC6H_SF16] = CAP( 7, 7, 0, 0), + [GEN6_FORMAT_BC7_UNORM] = CAP( 7, 7, 0, 0), + [GEN6_FORMAT_BC7_UNORM_SRGB] = CAP( 7, 7, 0, 0), + [GEN6_FORMAT_BC6H_UF16] = CAP( 7, 7, 0, 0), +#undef CAP + }; + + ILO_DEV_ASSERT(dev, 6, 8); + + return (format < ARRAY_SIZE(caps) && caps[format].sampling && + ilo_dev_gen(dev) >= caps[format].sampling); +} + +static bool +surface_valid_dp_format(const struct ilo_dev *dev, + enum ilo_state_surface_access access, + enum gen_surface_format format) +{ + /* + * This table is based on: + * + * - the Sandy Bridge PRM, volume 4 part 1, page 88-97 + * - the Ivy Bridge PRM, volume 4 part 1, page 172, 252-253, and 277-278 + * - the Haswell PRM, volume 7, page 262-264 + */ + static const struct dp_cap { + int rt_write; + int rt_write_blending; + int typed_write; + int media_color_processing; + } caps[] = { +#define CAP(rt_write, rt_write_blending, typed_write, media_color_processing) \ + { ILO_GEN(rt_write), ILO_GEN(rt_write_blending), ILO_GEN(typed_write), ILO_GEN(media_color_processing) } + [GEN6_FORMAT_R32G32B32A32_FLOAT] = CAP( 1, 1, 7, 0), + [GEN6_FORMAT_R32G32B32A32_SINT] = CAP( 1, 0, 7, 0), + [GEN6_FORMAT_R32G32B32A32_UINT] = CAP( 1, 0, 7, 0), + [GEN6_FORMAT_R16G16B16A16_UNORM] = CAP( 1, 4.5, 7, 6), + [GEN6_FORMAT_R16G16B16A16_SNORM] = CAP( 1, 6, 7, 0), + [GEN6_FORMAT_R16G16B16A16_SINT] = CAP( 1, 0, 7, 0), + [GEN6_FORMAT_R16G16B16A16_UINT] = CAP( 1, 0, 7, 0), + [GEN6_FORMAT_R16G16B16A16_FLOAT] = CAP( 1, 1, 7, 0), + [GEN6_FORMAT_R32G32_FLOAT] = CAP( 1, 1, 7, 0), + [GEN6_FORMAT_R32G32_SINT] = CAP( 1, 0, 7, 0), + [GEN6_FORMAT_R32G32_UINT] = CAP( 1, 0, 7, 0), + [GEN6_FORMAT_B8G8R8A8_UNORM] = CAP( 1, 1, 7, 6), + [GEN6_FORMAT_B8G8R8A8_UNORM_SRGB] = CAP( 1, 1, 0, 0), + [GEN6_FORMAT_R10G10B10A2_UNORM] = CAP( 1, 1, 7, 6), + [GEN6_FORMAT_R10G10B10A2_UNORM_SRGB] = CAP( 0, 0, 0, 6), + [GEN6_FORMAT_R10G10B10A2_UINT] = CAP( 1, 0, 7, 0), + [GEN6_FORMAT_R8G8B8A8_UNORM] = CAP( 1, 1, 7, 6), + [GEN6_FORMAT_R8G8B8A8_UNORM_SRGB] = CAP( 1, 1, 0, 6), + [GEN6_FORMAT_R8G8B8A8_SNORM] = CAP( 1, 6, 7, 0), + [GEN6_FORMAT_R8G8B8A8_SINT] = CAP( 1, 0, 7, 0), + [GEN6_FORMAT_R8G8B8A8_UINT] = CAP( 1, 0, 7, 0), + [GEN6_FORMAT_R16G16_UNORM] = CAP( 1, 4.5, 7, 0), + [GEN6_FORMAT_R16G16_SNORM] = CAP( 1, 6, 7, 0), + [GEN6_FORMAT_R16G16_SINT] = CAP( 1, 0, 7, 0), + [GEN6_FORMAT_R16G16_UINT] = CAP( 1, 0, 7, 0), + [GEN6_FORMAT_R16G16_FLOAT] = CAP( 1, 1, 7, 0), + [GEN6_FORMAT_B10G10R10A2_UNORM] = CAP( 1, 1, 7, 6), + [GEN6_FORMAT_B10G10R10A2_UNORM_SRGB] = CAP( 1, 1, 0, 6), + [GEN6_FORMAT_R11G11B10_FLOAT] = CAP( 1, 1, 7, 0), + [GEN6_FORMAT_R32_SINT] = CAP( 1, 0, 7, 0), + [GEN6_FORMAT_R32_UINT] = CAP( 1, 0, 7, 0), + [GEN6_FORMAT_R32_FLOAT] = CAP( 1, 1, 7, 0), + [GEN6_FORMAT_B8G8R8X8_UNORM] = CAP( 0, 0, 0, 6), + [GEN6_FORMAT_B5G6R5_UNORM] = CAP( 1, 1, 7, 0), + [GEN6_FORMAT_B5G6R5_UNORM_SRGB] = CAP( 1, 1, 0, 0), + [GEN6_FORMAT_B5G5R5A1_UNORM] = CAP( 1, 1, 7, 0), + [GEN6_FORMAT_B5G5R5A1_UNORM_SRGB] = CAP( 1, 1, 0, 0), + [GEN6_FORMAT_B4G4R4A4_UNORM] = CAP( 1, 1, 7, 0), + [GEN6_FORMAT_B4G4R4A4_UNORM_SRGB] = CAP( 1, 1, 0, 0), + [GEN6_FORMAT_R8G8_UNORM] = CAP( 1, 1, 7, 0), + [GEN6_FORMAT_R8G8_SNORM] = CAP( 1, 6, 7, 0), + [GEN6_FORMAT_R8G8_SINT] = CAP( 1, 0, 7, 0), + [GEN6_FORMAT_R8G8_UINT] = CAP( 1, 0, 7, 0), + [GEN6_FORMAT_R16_UNORM] = CAP( 1, 4.5, 7, 7), + [GEN6_FORMAT_R16_SNORM] = CAP( 1, 6, 7, 0), + [GEN6_FORMAT_R16_SINT] = CAP( 1, 0, 7, 0), + [GEN6_FORMAT_R16_UINT] = CAP( 1, 0, 7, 0), + [GEN6_FORMAT_R16_FLOAT] = CAP( 1, 1, 7, 0), + [GEN6_FORMAT_B5G5R5X1_UNORM] = CAP( 1, 1, 7, 0), + [GEN6_FORMAT_B5G5R5X1_UNORM_SRGB] = CAP( 1, 1, 0, 0), + [GEN6_FORMAT_R8_UNORM] = CAP( 1, 1, 7, 0), + [GEN6_FORMAT_R8_SNORM] = CAP( 1, 6, 7, 0), + [GEN6_FORMAT_R8_SINT] = CAP( 1, 0, 7, 0), + [GEN6_FORMAT_R8_UINT] = CAP( 1, 0, 7, 0), + [GEN6_FORMAT_A8_UNORM] = CAP( 1, 1, 7, 0), + [GEN6_FORMAT_YCRCB_NORMAL] = CAP( 1, 0, 0, 6), + [GEN6_FORMAT_YCRCB_SWAPUVY] = CAP( 1, 0, 0, 6), + [GEN6_FORMAT_YCRCB_SWAPUV] = CAP( 1, 0, 0, 6), + [GEN6_FORMAT_YCRCB_SWAPY] = CAP( 1, 0, 0, 6), +#undef CAP + }; + + ILO_DEV_ASSERT(dev, 6, 8); + + if (format >= ARRAY_SIZE(caps)) + return false; + + switch (access) { + case ILO_STATE_SURFACE_ACCESS_DP_RENDER: + return (caps[format].rt_write && + ilo_dev_gen(dev) >= caps[format].rt_write); + case ILO_STATE_SURFACE_ACCESS_DP_TYPED: + return (caps[format].typed_write && + ilo_dev_gen(dev) >= caps[format].typed_write); + case ILO_STATE_SURFACE_ACCESS_DP_UNTYPED: + return (format == GEN6_FORMAT_RAW); + case ILO_STATE_SURFACE_ACCESS_DP_DATA: + /* ignored, but can it be raw? */ + assert(format != GEN6_FORMAT_RAW); + return true; + default: + return false; + } +} + +static bool +surface_valid_svb_format(const struct ilo_dev *dev, + enum gen_surface_format format) +{ + ILO_DEV_ASSERT(dev, 6, 8); + + /* + * This table is based on: + * + * - the Sandy Bridge PRM, volume 4 part 1, page 88-97 + * - the Ivy Bridge PRM, volume 2 part 1, page 195 + * - the Haswell PRM, volume 7, page 535 + */ + switch (format) { + case GEN6_FORMAT_R32G32B32A32_FLOAT: + case GEN6_FORMAT_R32G32B32A32_SINT: + case GEN6_FORMAT_R32G32B32A32_UINT: + case GEN6_FORMAT_R32G32B32_FLOAT: + case GEN6_FORMAT_R32G32B32_SINT: + case GEN6_FORMAT_R32G32B32_UINT: + case GEN6_FORMAT_R32G32_FLOAT: + case GEN6_FORMAT_R32G32_SINT: + case GEN6_FORMAT_R32G32_UINT: + case GEN6_FORMAT_R32_SINT: + case GEN6_FORMAT_R32_UINT: + case GEN6_FORMAT_R32_FLOAT: + return true; + default: + return false; + } +} + +bool +ilo_state_surface_valid_format(const struct ilo_dev *dev, + enum ilo_state_surface_access access, + enum gen_surface_format format) +{ + bool valid; + + switch (access) { + case ILO_STATE_SURFACE_ACCESS_SAMPLER: + valid = surface_valid_sampler_format(dev, access, format); + break; + case ILO_STATE_SURFACE_ACCESS_DP_RENDER: + case ILO_STATE_SURFACE_ACCESS_DP_TYPED: + case ILO_STATE_SURFACE_ACCESS_DP_UNTYPED: + case ILO_STATE_SURFACE_ACCESS_DP_DATA: + valid = surface_valid_dp_format(dev, access, format); + break; + case ILO_STATE_SURFACE_ACCESS_DP_SVB: + valid = surface_valid_svb_format(dev, format); + break; + default: + valid = false; + break; + } + + return valid; +} From 513bc5d90b8e9237bd6a04da5d0dee175ff134f6 Mon Sep 17 00:00:00 2001 From: Chia-I Wu Date: Mon, 22 Jun 2015 14:06:13 +0800 Subject: [PATCH 777/834] ilo: move ilo_format.[ch] out of core They provide PIPE_FORMAT_x to GEN6_FORMAT_x translation as well as some convenient helpers. Move them out of core. --- src/gallium/drivers/ilo/Makefile.sources | 4 ++-- src/gallium/drivers/ilo/{core => }/ilo_format.c | 4 ++-- src/gallium/drivers/ilo/{core => }/ilo_format.h | 4 ++-- src/gallium/drivers/ilo/ilo_screen.c | 2 +- src/gallium/drivers/ilo/ilo_state.c | 2 +- 5 files changed, 8 insertions(+), 8 deletions(-) rename src/gallium/drivers/ilo/{core => }/ilo_format.c (99%) rename src/gallium/drivers/ilo/{core => }/ilo_format.h (99%) diff --git a/src/gallium/drivers/ilo/Makefile.sources b/src/gallium/drivers/ilo/Makefile.sources index ed024e3a866..e1bbb9a0781 100644 --- a/src/gallium/drivers/ilo/Makefile.sources +++ b/src/gallium/drivers/ilo/Makefile.sources @@ -15,8 +15,6 @@ C_SOURCES := \ core/ilo_debug.h \ core/ilo_dev.c \ core/ilo_dev.h \ - core/ilo_format.c \ - core/ilo_format.h \ core/ilo_image.c \ core/ilo_image.h \ core/ilo_state_cc.c \ @@ -60,6 +58,8 @@ C_SOURCES := \ ilo_cp.h \ ilo_draw.c \ ilo_draw.h \ + ilo_format.c \ + ilo_format.h \ ilo_gpgpu.c \ ilo_gpgpu.h \ ilo_public.h \ diff --git a/src/gallium/drivers/ilo/core/ilo_format.c b/src/gallium/drivers/ilo/ilo_format.c similarity index 99% rename from src/gallium/drivers/ilo/core/ilo_format.c rename to src/gallium/drivers/ilo/ilo_format.c index b96f71c0021..ca7e6b55ca1 100644 --- a/src/gallium/drivers/ilo/core/ilo_format.c +++ b/src/gallium/drivers/ilo/ilo_format.c @@ -26,8 +26,8 @@ */ #include "genhw/genhw.h" -#include "ilo_state_surface.h" -#include "ilo_state_vf.h" +#include "core/ilo_state_surface.h" +#include "core/ilo_state_vf.h" #include "ilo_format.h" bool diff --git a/src/gallium/drivers/ilo/core/ilo_format.h b/src/gallium/drivers/ilo/ilo_format.h similarity index 99% rename from src/gallium/drivers/ilo/core/ilo_format.h rename to src/gallium/drivers/ilo/ilo_format.h index 6b73ea1dad7..4e955c09c14 100644 --- a/src/gallium/drivers/ilo/core/ilo_format.h +++ b/src/gallium/drivers/ilo/ilo_format.h @@ -29,8 +29,8 @@ #define ILO_FORMAT_H #include "genhw/genhw.h" -#include "ilo_core.h" -#include "ilo_dev.h" + +#include "ilo_common.h" bool ilo_format_support_vb(const struct ilo_dev *dev, diff --git a/src/gallium/drivers/ilo/ilo_screen.c b/src/gallium/drivers/ilo/ilo_screen.c index b75a2590b2b..94105559b80 100644 --- a/src/gallium/drivers/ilo/ilo_screen.c +++ b/src/gallium/drivers/ilo/ilo_screen.c @@ -31,10 +31,10 @@ #include "vl/vl_decoder.h" #include "vl/vl_video_buffer.h" #include "genhw/genhw.h" /* for GEN6_REG_TIMESTAMP */ -#include "core/ilo_format.h" #include "core/intel_winsys.h" #include "ilo_context.h" +#include "ilo_format.h" #include "ilo_resource.h" #include "ilo_transfer.h" /* for ILO_TRANSFER_MAP_BUFFER_ALIGNMENT */ #include "ilo_public.h" diff --git a/src/gallium/drivers/ilo/ilo_state.c b/src/gallium/drivers/ilo/ilo_state.c index 62e31809fb7..4252dbe5613 100644 --- a/src/gallium/drivers/ilo/ilo_state.c +++ b/src/gallium/drivers/ilo/ilo_state.c @@ -26,7 +26,6 @@ */ #include "core/ilo_builder_3d.h" /* for gen6_3d_translate_pipe_prim() */ -#include "core/ilo_format.h" #include "util/u_dual_blend.h" #include "util/u_dynarray.h" #include "util/u_framebuffer.h" @@ -35,6 +34,7 @@ #include "util/u_upload_mgr.h" #include "ilo_context.h" +#include "ilo_format.h" #include "ilo_resource.h" #include "ilo_shader.h" #include "ilo_state.h" From 58f95b332d0cbad226f5bb2e96cd0cad8864fe79 Mon Sep 17 00:00:00 2001 From: Chia-I Wu Date: Mon, 22 Jun 2015 14:15:52 +0800 Subject: [PATCH 778/834] ilo: align vertex buffer size in buf_create() With ilo_format.[ch] moved out of core, the aligning of vertex buffers does not belong to core anymore. --- src/gallium/drivers/ilo/core/ilo_buffer.h | 17 ----------------- src/gallium/drivers/ilo/ilo_resource.c | 22 ++++++++++++++++++++-- 2 files changed, 20 insertions(+), 19 deletions(-) diff --git a/src/gallium/drivers/ilo/core/ilo_buffer.h b/src/gallium/drivers/ilo/core/ilo_buffer.h index 99c7b014736..ca3c61ff890 100644 --- a/src/gallium/drivers/ilo/core/ilo_buffer.h +++ b/src/gallium/drivers/ilo/core/ilo_buffer.h @@ -59,23 +59,6 @@ ilo_buffer_init(struct ilo_buffer *buf, const struct ilo_dev *dev, */ if (bind & PIPE_BIND_SAMPLER_VIEW) buf->bo_size = align(buf->bo_size, 256) + 16; - - if ((bind & PIPE_BIND_VERTEX_BUFFER) && ilo_dev_gen(dev) < ILO_GEN(7.5)) { - /* - * As noted in ilo_format_translate(), we treat some 3-component formats - * as 4-component formats to work around hardware limitations. Imagine - * the case where the vertex buffer holds a single - * PIPE_FORMAT_R16G16B16_FLOAT vertex, and buf->bo_size is 6. The - * hardware would fail to fetch it at boundary check because the vertex - * buffer is expected to hold a PIPE_FORMAT_R16G16B16A16_FLOAT vertex - * and that takes at least 8 bytes. - * - * For the workaround to work, we should add 2 to the bo size. But that - * would waste a page when the bo size is already page aligned. Let's - * round it to page size for now and revisit this when needed. - */ - buf->bo_size = align(buf->bo_size, 4096); - } } #endif /* ILO_BUFFER_H */ diff --git a/src/gallium/drivers/ilo/ilo_resource.c b/src/gallium/drivers/ilo/ilo_resource.c index b6f5d26da5b..be9fd10a84c 100644 --- a/src/gallium/drivers/ilo/ilo_resource.c +++ b/src/gallium/drivers/ilo/ilo_resource.c @@ -443,6 +443,7 @@ buf_create(struct pipe_screen *screen, const struct pipe_resource *templ) { const struct ilo_screen *is = ilo_screen(screen); struct ilo_buffer_resource *buf; + unsigned size; buf = CALLOC_STRUCT(ilo_buffer_resource); if (!buf) @@ -452,8 +453,25 @@ buf_create(struct pipe_screen *screen, const struct pipe_resource *templ) buf->base.screen = screen; pipe_reference_init(&buf->base.reference, 1); - ilo_buffer_init(&buf->buffer, &is->dev, - templ->width0, templ->bind, templ->flags); + size = templ->width0; + + /* + * As noted in ilo_format_translate(), we treat some 3-component formats as + * 4-component formats to work around hardware limitations. Imagine the + * case where the vertex buffer holds a single PIPE_FORMAT_R16G16B16_FLOAT + * vertex, and buf->bo_size is 6. The hardware would fail to fetch it at + * boundary check because the vertex buffer is expected to hold a + * PIPE_FORMAT_R16G16B16A16_FLOAT vertex and that takes at least 8 bytes. + * + * For the workaround to work, we should add 2 to the bo size. But that + * would waste a page when the bo size is already page aligned. Let's + * round it to page size for now and revisit this when needed. + */ + if ((templ->bind & PIPE_BIND_VERTEX_BUFFER) && + ilo_dev_gen(&is->dev) < ILO_GEN(7.5)) + size = align(size, 4096); + + ilo_buffer_init(&buf->buffer, &is->dev, size, templ->bind, templ->flags); if (buf->buffer.bo_size < templ->width0 || buf->buffer.bo_size > ilo_max_resource_size || From 878714142999ca6a6aa03d962e01da94d44c8574 Mon Sep 17 00:00:00 2001 From: Chia-I Wu Date: Mon, 22 Jun 2015 14:27:19 +0800 Subject: [PATCH 779/834] ilo: emit 3DPRIMITIVE from gen6_3dprimitive_info It allows us to remove ilo_ib_state::draw_start_offset and ILO_PRIM_RECTANGLES. gen6_3d_translate_pipe_prim() is also replaced by ilo_translate_draw_mode(). --- src/gallium/drivers/ilo/core/ilo_builder_3d.h | 54 +++++++++-------- .../drivers/ilo/core/ilo_builder_3d_top.h | 29 ---------- src/gallium/drivers/ilo/core/ilo_core.h | 3 - src/gallium/drivers/ilo/ilo_blitter.h | 2 +- .../drivers/ilo/ilo_blitter_rectlist.c | 6 +- src/gallium/drivers/ilo/ilo_render_gen.h | 9 +-- src/gallium/drivers/ilo/ilo_render_gen6.c | 4 +- src/gallium/drivers/ilo/ilo_render_gen7.c | 4 +- src/gallium/drivers/ilo/ilo_render_gen8.c | 5 +- src/gallium/drivers/ilo/ilo_state.c | 58 ++++++++++++++----- src/gallium/drivers/ilo/ilo_state.h | 4 +- 11 files changed, 91 insertions(+), 87 deletions(-) diff --git a/src/gallium/drivers/ilo/core/ilo_builder_3d.h b/src/gallium/drivers/ilo/core/ilo_builder_3d.h index 8d8a79599bd..fb8b53cbe23 100644 --- a/src/gallium/drivers/ilo/core/ilo_builder_3d.h +++ b/src/gallium/drivers/ilo/core/ilo_builder_3d.h @@ -35,41 +35,45 @@ #include "ilo_builder_3d_top.h" #include "ilo_builder_3d_bottom.h" +struct gen6_3dprimitive_info { + enum gen_3dprim_type topology; + bool indexed; + + uint32_t vertex_count; + uint32_t vertex_start; + uint32_t instance_count; + uint32_t instance_start; + int32_t vertex_base; +}; + static inline void gen6_3DPRIMITIVE(struct ilo_builder *builder, - const struct pipe_draw_info *info, - int64_t start_offset) + const struct gen6_3dprimitive_info *info) { const uint8_t cmd_len = 6; - const int prim = gen6_3d_translate_pipe_prim(info->mode); - const int vb_access = (info->indexed) ? - GEN6_3DPRIM_DW0_ACCESS_RANDOM : GEN6_3DPRIM_DW0_ACCESS_SEQUENTIAL; uint32_t *dw; ILO_DEV_ASSERT(builder->dev, 6, 6); ilo_builder_batch_pointer(builder, cmd_len, &dw); - dw[0] = GEN6_RENDER_CMD(3D, 3DPRIMITIVE) | - vb_access | - prim << GEN6_3DPRIM_DW0_TYPE__SHIFT | - (cmd_len - 2); - dw[1] = info->count; - dw[2] = info->start + start_offset; + dw[0] = GEN6_RENDER_CMD(3D, 3DPRIMITIVE) | (cmd_len - 2) | + info->topology << GEN6_3DPRIM_DW0_TYPE__SHIFT; + if (info->indexed) + dw[0] |= GEN6_3DPRIM_DW0_ACCESS_RANDOM; + + dw[1] = info->vertex_count; + dw[2] = info->vertex_start; dw[3] = info->instance_count; - dw[4] = info->start_instance; - dw[5] = info->index_bias; + dw[4] = info->instance_start; + dw[5] = info->vertex_base; } static inline void gen7_3DPRIMITIVE(struct ilo_builder *builder, - const struct pipe_draw_info *info, - int64_t start_offset) + const struct gen6_3dprimitive_info *info) { const uint8_t cmd_len = 7; - const int prim = gen6_3d_translate_pipe_prim(info->mode); - const int vb_access = (info->indexed) ? - GEN7_3DPRIM_DW1_ACCESS_RANDOM : GEN7_3DPRIM_DW1_ACCESS_SEQUENTIAL; uint32_t *dw; ILO_DEV_ASSERT(builder->dev, 7, 8); @@ -77,12 +81,16 @@ gen7_3DPRIMITIVE(struct ilo_builder *builder, ilo_builder_batch_pointer(builder, cmd_len, &dw); dw[0] = GEN6_RENDER_CMD(3D, 3DPRIMITIVE) | (cmd_len - 2); - dw[1] = vb_access | prim; - dw[2] = info->count; - dw[3] = info->start + start_offset; + + dw[1] = info->topology << GEN7_3DPRIM_DW1_TYPE__SHIFT; + if (info->indexed) + dw[1] |= GEN7_3DPRIM_DW1_ACCESS_RANDOM; + + dw[2] = info->vertex_count; + dw[3] = info->vertex_start; dw[4] = info->instance_count; - dw[5] = info->start_instance; - dw[6] = info->index_bias; + dw[5] = info->instance_start; + dw[6] = info->vertex_base; } #endif /* ILO_BUILDER_3D_H */ diff --git a/src/gallium/drivers/ilo/core/ilo_builder_3d_top.h b/src/gallium/drivers/ilo/core/ilo_builder_3d_top.h index 42d171fc0d2..8d30095e6f6 100644 --- a/src/gallium/drivers/ilo/core/ilo_builder_3d_top.h +++ b/src/gallium/drivers/ilo/core/ilo_builder_3d_top.h @@ -217,35 +217,6 @@ gen6_3DSTATE_VF_STATISTICS(struct ilo_builder *builder, ilo_builder_batch_write(builder, cmd_len, &dw0); } -/** - * Translate a pipe primitive type to the matching hardware primitive type. - */ -static inline int -gen6_3d_translate_pipe_prim(unsigned prim) -{ - static const int prim_mapping[ILO_PRIM_MAX] = { - [PIPE_PRIM_POINTS] = GEN6_3DPRIM_POINTLIST, - [PIPE_PRIM_LINES] = GEN6_3DPRIM_LINELIST, - [PIPE_PRIM_LINE_LOOP] = GEN6_3DPRIM_LINELOOP, - [PIPE_PRIM_LINE_STRIP] = GEN6_3DPRIM_LINESTRIP, - [PIPE_PRIM_TRIANGLES] = GEN6_3DPRIM_TRILIST, - [PIPE_PRIM_TRIANGLE_STRIP] = GEN6_3DPRIM_TRISTRIP, - [PIPE_PRIM_TRIANGLE_FAN] = GEN6_3DPRIM_TRIFAN, - [PIPE_PRIM_QUADS] = GEN6_3DPRIM_QUADLIST, - [PIPE_PRIM_QUAD_STRIP] = GEN6_3DPRIM_QUADSTRIP, - [PIPE_PRIM_POLYGON] = GEN6_3DPRIM_POLYGON, - [PIPE_PRIM_LINES_ADJACENCY] = GEN6_3DPRIM_LINELIST_ADJ, - [PIPE_PRIM_LINE_STRIP_ADJACENCY] = GEN6_3DPRIM_LINESTRIP_ADJ, - [PIPE_PRIM_TRIANGLES_ADJACENCY] = GEN6_3DPRIM_TRILIST_ADJ, - [PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY] = GEN6_3DPRIM_TRISTRIP_ADJ, - [ILO_PRIM_RECTANGLES] = GEN6_3DPRIM_RECTLIST, - }; - - assert(prim_mapping[prim]); - - return prim_mapping[prim]; -} - static inline void gen8_3DSTATE_VF_TOPOLOGY(struct ilo_builder *builder, enum gen_3dprim_type topology) diff --git a/src/gallium/drivers/ilo/core/ilo_core.h b/src/gallium/drivers/ilo/core/ilo_core.h index 3587d3930f3..0a7f7d9d3fe 100644 --- a/src/gallium/drivers/ilo/core/ilo_core.h +++ b/src/gallium/drivers/ilo/core/ilo_core.h @@ -40,7 +40,4 @@ #include "util/u_memory.h" #include "util/u_pointer.h" -#define ILO_PRIM_RECTANGLES PIPE_PRIM_MAX -#define ILO_PRIM_MAX (PIPE_PRIM_MAX + 1) - #endif /* ILO_CORE_H */ diff --git a/src/gallium/drivers/ilo/ilo_blitter.h b/src/gallium/drivers/ilo/ilo_blitter.h index 6af6046e1a9..4eba8481c28 100644 --- a/src/gallium/drivers/ilo/ilo_blitter.h +++ b/src/gallium/drivers/ilo/ilo_blitter.h @@ -58,7 +58,7 @@ struct ilo_blitter { bool initialized; float vertices[3][2]; - struct pipe_draw_info draw; + struct gen6_3dprimitive_info draw_info; uint32_t vf_data[4]; struct ilo_state_vf vf; diff --git a/src/gallium/drivers/ilo/ilo_blitter_rectlist.c b/src/gallium/drivers/ilo/ilo_blitter_rectlist.c index afdb0377824..13c8f500680 100644 --- a/src/gallium/drivers/ilo/ilo_blitter_rectlist.c +++ b/src/gallium/drivers/ilo/ilo_blitter_rectlist.c @@ -45,9 +45,9 @@ ilo_blitter_set_invariants(struct ilo_blitter *blitter) return true; /* a rectangle has 3 vertices in a RECTLIST */ - util_draw_init_info(&blitter->draw); - blitter->draw.mode = ILO_PRIM_RECTANGLES; - blitter->draw.count = 3; + blitter->draw_info.topology = GEN6_3DPRIM_RECTLIST; + blitter->draw_info.vertex_count = 3; + blitter->draw_info.instance_count = 1; memset(&elem, 0, sizeof(elem)); /* only vertex X and Y */ diff --git a/src/gallium/drivers/ilo/ilo_render_gen.h b/src/gallium/drivers/ilo/ilo_render_gen.h index aae4ef2f373..6b133750043 100644 --- a/src/gallium/drivers/ilo/ilo_render_gen.h +++ b/src/gallium/drivers/ilo/ilo_render_gen.h @@ -389,11 +389,8 @@ ilo_render_pipe_control(struct ilo_render *r, uint32_t dw1) */ static inline void ilo_render_3dprimitive(struct ilo_render *r, - const struct pipe_draw_info *info, - const struct ilo_ib_state *ib) + const struct gen6_3dprimitive_info *info) { - const int64_t start_offset = (info->indexed) ? ib->draw_start_offset : 0; - ILO_DEV_ASSERT(r->dev, 6, 8); if (r->state.deferred_pipe_control_dw1) @@ -401,9 +398,9 @@ ilo_render_3dprimitive(struct ilo_render *r, /* 3DPRIMITIVE */ if (ilo_dev_gen(r->dev) >= ILO_GEN(7)) - gen7_3DPRIMITIVE(r->builder, info, start_offset); + gen7_3DPRIMITIVE(r->builder, info); else - gen6_3DPRIMITIVE(r->builder, info, start_offset); + gen6_3DPRIMITIVE(r->builder, info); r->state.current_pipe_control_dw1 = 0; assert(!r->state.deferred_pipe_control_dw1); diff --git a/src/gallium/drivers/ilo/ilo_render_gen6.c b/src/gallium/drivers/ilo/ilo_render_gen6.c index b2bc2dcface..c1f759f3043 100644 --- a/src/gallium/drivers/ilo/ilo_render_gen6.c +++ b/src/gallium/drivers/ilo/ilo_render_gen6.c @@ -806,7 +806,7 @@ ilo_render_emit_draw_commands_gen6(struct ilo_render *render, gen6_draw_sf_rect(render, vec, session); gen6_draw_vf(render, vec, session); - ilo_render_3dprimitive(render, vec->draw, &vec->ib); + ilo_render_3dprimitive(render, &vec->draw_info); } static void @@ -926,7 +926,7 @@ ilo_render_emit_rectlist_commands_gen6(struct ilo_render *r, gen6_3DSTATE_DRAWING_RECTANGLE(r->builder, 0, 0, blitter->fb.width, blitter->fb.height); - ilo_render_3dprimitive(r, &blitter->draw, NULL); + ilo_render_3dprimitive(r, &blitter->draw_info); } int diff --git a/src/gallium/drivers/ilo/ilo_render_gen7.c b/src/gallium/drivers/ilo/ilo_render_gen7.c index 4c54edeeb96..6623a8bcb43 100644 --- a/src/gallium/drivers/ilo/ilo_render_gen7.c +++ b/src/gallium/drivers/ilo/ilo_render_gen7.c @@ -649,7 +649,7 @@ ilo_render_emit_draw_commands_gen7(struct ilo_render *render, gen6_draw_sf_rect(render, vec, session); gen6_draw_vf(render, vec, session); - ilo_render_3dprimitive(render, vec->draw, &vec->ib); + ilo_render_3dprimitive(render, &vec->draw_info); } static void @@ -804,7 +804,7 @@ ilo_render_emit_rectlist_commands_gen7(struct ilo_render *r, if (ilo_dev_gen(r->dev) == ILO_GEN(7)) gen7_wa_post_ps_and_later(r); - ilo_render_3dprimitive(r, &blitter->draw, NULL); + ilo_render_3dprimitive(r, &blitter->draw_info); } int diff --git a/src/gallium/drivers/ilo/ilo_render_gen8.c b/src/gallium/drivers/ilo/ilo_render_gen8.c index f86871f852f..65494b4058a 100644 --- a/src/gallium/drivers/ilo/ilo_render_gen8.c +++ b/src/gallium/drivers/ilo/ilo_render_gen8.c @@ -220,8 +220,7 @@ gen8_draw_vf(struct ilo_render *r, if (session->vf_delta.dirty & ILO_STATE_VF_3DSTATE_VERTEX_ELEMENTS) gen6_3DSTATE_VERTEX_ELEMENTS(r->builder, &vec->ve->vf); - gen8_3DSTATE_VF_TOPOLOGY(r->builder, - gen6_3d_translate_pipe_prim(vec->draw->mode)); + gen8_3DSTATE_VF_TOPOLOGY(r->builder, vec->draw_info.topology); if (session->vf_delta.dirty & ILO_STATE_VF_3DSTATE_VF_INSTANCING) { const uint8_t attr_count = ilo_state_vf_get_attr_count(&vec->ve->vf); @@ -270,7 +269,7 @@ ilo_render_emit_draw_commands_gen8(struct ilo_render *render, gen6_draw_sf_rect(render, vec, session); gen8_draw_vf(render, vec, session); - ilo_render_3dprimitive(render, vec->draw, &vec->ib); + ilo_render_3dprimitive(render, &vec->draw_info); } int diff --git a/src/gallium/drivers/ilo/ilo_state.c b/src/gallium/drivers/ilo/ilo_state.c index 4252dbe5613..63534f33fa7 100644 --- a/src/gallium/drivers/ilo/ilo_state.c +++ b/src/gallium/drivers/ilo/ilo_state.c @@ -25,7 +25,6 @@ * Chia-I Wu */ -#include "core/ilo_builder_3d.h" /* for gen6_3d_translate_pipe_prim() */ #include "util/u_dual_blend.h" #include "util/u_dynarray.h" #include "util/u_framebuffer.h" @@ -39,6 +38,34 @@ #include "ilo_shader.h" #include "ilo_state.h" +/** + * Translate a pipe primitive type to the matching hardware primitive type. + */ +static enum gen_3dprim_type +ilo_translate_draw_mode(unsigned mode) +{ + static const enum gen_3dprim_type prim_mapping[PIPE_PRIM_MAX] = { + [PIPE_PRIM_POINTS] = GEN6_3DPRIM_POINTLIST, + [PIPE_PRIM_LINES] = GEN6_3DPRIM_LINELIST, + [PIPE_PRIM_LINE_LOOP] = GEN6_3DPRIM_LINELOOP, + [PIPE_PRIM_LINE_STRIP] = GEN6_3DPRIM_LINESTRIP, + [PIPE_PRIM_TRIANGLES] = GEN6_3DPRIM_TRILIST, + [PIPE_PRIM_TRIANGLE_STRIP] = GEN6_3DPRIM_TRISTRIP, + [PIPE_PRIM_TRIANGLE_FAN] = GEN6_3DPRIM_TRIFAN, + [PIPE_PRIM_QUADS] = GEN6_3DPRIM_QUADLIST, + [PIPE_PRIM_QUAD_STRIP] = GEN6_3DPRIM_QUADSTRIP, + [PIPE_PRIM_POLYGON] = GEN6_3DPRIM_POLYGON, + [PIPE_PRIM_LINES_ADJACENCY] = GEN6_3DPRIM_LINELIST_ADJ, + [PIPE_PRIM_LINE_STRIP_ADJACENCY] = GEN6_3DPRIM_LINESTRIP_ADJ, + [PIPE_PRIM_TRIANGLES_ADJACENCY] = GEN6_3DPRIM_TRILIST_ADJ, + [PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY] = GEN6_3DPRIM_TRISTRIP_ADJ, + }; + + assert(prim_mapping[mode]); + + return prim_mapping[mode]; +} + static enum gen_index_format ilo_translate_index_size(unsigned index_size) { @@ -386,6 +413,7 @@ finalize_index_buffer(struct ilo_context *ilo) vec->ib.state.offset % vec->ib.state.index_size)); struct pipe_resource *current_hw_res = NULL; struct ilo_state_index_buffer_info info; + int64_t vertex_start_bias = 0; if (!(vec->dirty & ILO_DIRTY_IB) && !need_upload) return; @@ -410,25 +438,23 @@ finalize_index_buffer(struct ilo_context *ilo) /* the HW offset should be aligned */ assert(hw_offset % vec->ib.state.index_size == 0); - vec->ib.draw_start_offset = hw_offset / vec->ib.state.index_size; + vertex_start_bias = hw_offset / vec->ib.state.index_size; /* * INDEX[vec->draw->start] in the original buffer is INDEX[0] in the HW * resource */ - vec->ib.draw_start_offset -= vec->draw->start; + vertex_start_bias -= vec->draw->start; } else { pipe_resource_reference(&vec->ib.hw_resource, vec->ib.state.buffer); /* note that index size may be zero when the draw is not indexed */ - if (vec->draw->indexed) { - vec->ib.draw_start_offset = - vec->ib.state.offset / vec->ib.state.index_size; - } else { - vec->ib.draw_start_offset = 0; - } + if (vec->draw->indexed) + vertex_start_bias = vec->ib.state.offset / vec->ib.state.index_size; } + vec->draw_info.vertex_start += vertex_start_bias; + /* treat the IB as clean if the HW states do not change */ if (vec->ib.hw_resource == current_hw_res && vec->ib.hw_index_size == vec->ib.state.index_size) @@ -456,8 +482,6 @@ finalize_vertex_elements(struct ilo_context *ilo) const struct ilo_dev *dev = ilo->dev; struct ilo_state_vector *vec = &ilo->state_vector; struct ilo_ve_state *ve = vec->ve; - const enum gen_3dprim_type topology = - gen6_3d_translate_pipe_prim(vec->draw->mode); const bool last_element_edge_flag = (vec->vs && ilo_shader_get_kernel_param(vec->vs, ILO_KERNEL_VS_INPUT_EDGEFLAG)); const bool prepend_vertexid = (vec->vs && @@ -469,14 +493,14 @@ finalize_vertex_elements(struct ilo_context *ilo) ilo_translate_index_size(vec->ib.state.index_size) : GEN6_INDEX_DWORD; /* check for non-orthogonal states */ - if (ve->vf_params.cv_topology != topology || + if (ve->vf_params.cv_topology != vec->draw_info.topology || ve->vf_params.prepend_vertexid != prepend_vertexid || ve->vf_params.prepend_instanceid != prepend_instanceid || ve->vf_params.last_element_edge_flag != last_element_edge_flag || ve->vf_params.cv_index_format != index_format || ve->vf_params.cut_index_enable != vec->draw->primitive_restart || ve->vf_params.cut_index != vec->draw->restart_index) { - ve->vf_params.cv_topology = topology; + ve->vf_params.cv_topology = vec->draw_info.topology; ve->vf_params.prepend_vertexid = prepend_vertexid; ve->vf_params.prepend_instanceid = prepend_instanceid; ve->vf_params.last_element_edge_flag = last_element_edge_flag; @@ -769,6 +793,14 @@ ilo_finalize_3d_states(struct ilo_context *ilo, { ilo->state_vector.draw = draw; + ilo->state_vector.draw_info.topology = ilo_translate_draw_mode(draw->mode); + ilo->state_vector.draw_info.indexed = draw->indexed; + ilo->state_vector.draw_info.vertex_count = draw->count; + ilo->state_vector.draw_info.vertex_start = draw->start; + ilo->state_vector.draw_info.instance_count = draw->instance_count; + ilo->state_vector.draw_info.instance_start = draw->start_instance; + ilo->state_vector.draw_info.vertex_base = draw->index_bias; + finalize_blend(ilo); finalize_shader_states(&ilo->state_vector); finalize_constant_buffers(ilo); diff --git a/src/gallium/drivers/ilo/ilo_state.h b/src/gallium/drivers/ilo/ilo_state.h index 537e5db120b..3e6fd8a2554 100644 --- a/src/gallium/drivers/ilo/ilo_state.h +++ b/src/gallium/drivers/ilo/ilo_state.h @@ -28,6 +28,7 @@ #ifndef ILO_STATE_H #define ILO_STATE_H +#include "core/ilo_builder_3d.h" /* for gen6_3dprimitive_info */ #include "core/ilo_state_cc.h" #include "core/ilo_state_compute.h" #include "core/ilo_state_raster.h" @@ -169,8 +170,6 @@ struct ilo_ib_state { struct pipe_resource *hw_resource; unsigned hw_index_size; struct ilo_state_index_buffer ib; - /* an offset to be added to pipe_draw_info::start */ - int64_t draw_start_offset; }; struct ilo_cbuf_cso { @@ -339,6 +338,7 @@ struct ilo_global_binding { struct ilo_state_vector { const struct pipe_draw_info *draw; + struct gen6_3dprimitive_info draw_info; uint32_t dirty; From 2b07b8d104a93c26ac92edb3ba72328cdc2dcb52 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Mon, 22 Jun 2015 08:29:49 -0600 Subject: [PATCH 780/834] mesa: use _mesa_lookup_enum_by_nr() in print_array() Print GL_FLOAT, etc. instead of hex value. Reviewed-by: Ilia Mirkin --- src/mesa/main/varray.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/mesa/main/varray.c b/src/mesa/main/varray.c index 7389037ae85..ebdd9eaf02e 100644 --- a/src/mesa/main/varray.c +++ b/src/mesa/main/varray.c @@ -2309,10 +2309,10 @@ print_array(const char *name, GLint index, const struct gl_client_array *array) fprintf(stderr, " %s[%d]: ", name, index); else fprintf(stderr, " %s: ", name); - fprintf(stderr, "Ptr=%p, Type=0x%x, Size=%d, ElemSize=%u, Stride=%d, Buffer=%u(Size %lu)\n", - array->Ptr, array->Type, array->Size, - array->_ElementSize, array->StrideB, - array->BufferObj->Name, (unsigned long) array->BufferObj->Size); + fprintf(stderr, "Ptr=%p, Type=%s, Size=%d, ElemSize=%u, Stride=%d, Buffer=%u(Size %lu)\n", + array->Ptr, _mesa_lookup_enum_by_nr(array->Type), array->Size, + array->_ElementSize, array->StrideB, array->BufferObj->Name, + (unsigned long) array->BufferObj->Size); } From 90754d2df05eafe1a3ee3cd9bb1611a19099fc49 Mon Sep 17 00:00:00 2001 From: Ben Widawsky Date: Wed, 3 Jun 2015 21:35:51 -0700 Subject: [PATCH 781/834] i965/gen9: Implement Push Constant Buffer workaround This implements a workaround (exact excerpt as a comment in the code). The docs specify [clearly, after you struggle for a while] that the offset isn't relative to state base. This actually makes sense. This fixes hangs on SKL. Buffer #0 is meant to be used for normal uniforms. Buffer #1 is typically used for gather constants when using RS. Buffer #1-#3 could be used to push a bunch of UBO data which would just be somewhere in memory, and not relative to the dynamic state. NOTE: I've moved away from the ternary operator for the new gen9 conditions. Admittedly it's probably not great to do this, but I really want to fix this all up in the subsequent patch and doing it here makes that diff a lot nicer. I want to split out the gen8/9 code to make the function a bit more readable, but to keep this easily cherry-pickable I am doing this fix first. If we decide not to merge the cleanup patch then I can revisit this. Cc: "10.5 10.6" Signed-off-by: Ben Widawsky Reviewed-by: Anuj Phogat Tested-by: Valtteri Rantala --- src/mesa/drivers/dri/i965/gen7_vs_state.c | 48 +++++++++++++++++++---- 1 file changed, 41 insertions(+), 7 deletions(-) diff --git a/src/mesa/drivers/dri/i965/gen7_vs_state.c b/src/mesa/drivers/dri/i965/gen7_vs_state.c index 278b3ec6d21..4b17d06fa83 100644 --- a/src/mesa/drivers/dri/i965/gen7_vs_state.c +++ b/src/mesa/drivers/dri/i965/gen7_vs_state.c @@ -43,20 +43,54 @@ gen7_upload_constant_state(struct brw_context *brw, int dwords = brw->gen >= 8 ? 11 : 7; BEGIN_BATCH(dwords); OUT_BATCH(opcode << 16 | (dwords - 2)); - OUT_BATCH(active ? stage_state->push_const_size : 0); - OUT_BATCH(0); + + /* Workaround for SKL+ (we use option #2 until we have a need for more + * constant buffers). This comes from the documentation for 3DSTATE_CONSTANT_* + * + * The driver must ensure The following case does not occur without a flush + * to the 3D engine: 3DSTATE_CONSTANT_* with buffer 3 read length equal to + * zero committed followed by a 3DSTATE_CONSTANT_* with buffer 0 read length + * not equal to zero committed. Possible ways to avoid this condition + * include: + * 1. always force buffer 3 to have a non zero read length + * 2. always force buffer 0 to a zero read length + */ + if (brw->gen >= 9 && active) { + OUT_BATCH(0); + OUT_BATCH(stage_state->push_const_size); + } else { + OUT_BATCH(active ? stage_state->push_const_size : 0); + OUT_BATCH(0); + } /* Pointer to the constant buffer. Covered by the set of state flags * from gen6_prepare_wm_contants */ - OUT_BATCH(active ? (stage_state->push_const_offset | mocs) : 0); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - if (brw->gen >= 8) { + if (brw->gen >= 9 && active) { OUT_BATCH(0); OUT_BATCH(0); OUT_BATCH(0); OUT_BATCH(0); + /* XXX: When using buffers other than 0, you need to specify the + * graphics virtual address regardless of INSPM/debug bits + */ + OUT_RELOC64(brw->batch.bo, I915_GEM_DOMAIN_RENDER, 0, + stage_state->push_const_offset); + OUT_BATCH(0); + OUT_BATCH(0); + } else if (brw->gen>= 8) { + OUT_BATCH(active ? (stage_state->push_const_offset | mocs) : 0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + } else { + OUT_BATCH(active ? (stage_state->push_const_offset | mocs) : 0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); } ADVANCE_BATCH(); From 38eb9015e3be9b93248e64b6befce16872107a7c Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Fri, 15 May 2015 09:54:23 -0700 Subject: [PATCH 782/834] i965: Split VUE map handling out of brw_vs.c into brw_vue_map.c. This was originally only used by the vertex shader, but it's now used by the geometry shader as well, and will also eventually be used for tessellation control and evaluation shaders. I suspect it will be easier to find in a file named after the concept. Signed-off-by: Kenneth Graunke Reviewed-by: Topi Pohjolainen --- src/mesa/drivers/dri/i965/Makefile.sources | 1 + src/mesa/drivers/dri/i965/brw_vs.c | 102 ---------------- src/mesa/drivers/dri/i965/brw_vue_map.c | 136 +++++++++++++++++++++ 3 files changed, 137 insertions(+), 102 deletions(-) create mode 100644 src/mesa/drivers/dri/i965/brw_vue_map.c diff --git a/src/mesa/drivers/dri/i965/Makefile.sources b/src/mesa/drivers/dri/i965/Makefile.sources index 93f336e4b72..981fe79b132 100644 --- a/src/mesa/drivers/dri/i965/Makefile.sources +++ b/src/mesa/drivers/dri/i965/Makefile.sources @@ -130,6 +130,7 @@ i965_FILES = \ brw_vs.h \ brw_vs_state.c \ brw_vs_surface_state.c \ + brw_vue_map.c \ brw_wm.c \ brw_wm.h \ brw_wm_iz.cpp \ diff --git a/src/mesa/drivers/dri/i965/brw_vs.c b/src/mesa/drivers/dri/i965/brw_vs.c index d03567e33b8..6e9848fb1e9 100644 --- a/src/mesa/drivers/dri/i965/brw_vs.c +++ b/src/mesa/drivers/dri/i965/brw_vs.c @@ -40,108 +40,6 @@ #include "util/ralloc.h" -static inline void assign_vue_slot(struct brw_vue_map *vue_map, - int varying) -{ - /* Make sure this varying hasn't been assigned a slot already */ - assert (vue_map->varying_to_slot[varying] == -1); - - vue_map->varying_to_slot[varying] = vue_map->num_slots; - vue_map->slot_to_varying[vue_map->num_slots++] = varying; -} - -/** - * Compute the VUE map for vertex shader program. - */ -void -brw_compute_vue_map(const struct brw_device_info *devinfo, - struct brw_vue_map *vue_map, - GLbitfield64 slots_valid) -{ - vue_map->slots_valid = slots_valid; - int i; - - /* gl_Layer and gl_ViewportIndex don't get their own varying slots -- they - * are stored in the first VUE slot (VARYING_SLOT_PSIZ). - */ - slots_valid &= ~(VARYING_BIT_LAYER | VARYING_BIT_VIEWPORT); - - /* Make sure that the values we store in vue_map->varying_to_slot and - * vue_map->slot_to_varying won't overflow the signed chars that are used - * to store them. Note that since vue_map->slot_to_varying sometimes holds - * values equal to BRW_VARYING_SLOT_COUNT, we need to ensure that - * BRW_VARYING_SLOT_COUNT is <= 127, not 128. - */ - STATIC_ASSERT(BRW_VARYING_SLOT_COUNT <= 127); - - vue_map->num_slots = 0; - for (i = 0; i < BRW_VARYING_SLOT_COUNT; ++i) { - vue_map->varying_to_slot[i] = -1; - vue_map->slot_to_varying[i] = BRW_VARYING_SLOT_COUNT; - } - - /* VUE header: format depends on chip generation and whether clipping is - * enabled. - */ - if (devinfo->gen < 6) { - /* There are 8 dwords in VUE header pre-Ironlake: - * dword 0-3 is indices, point width, clip flags. - * dword 4-7 is ndc position - * dword 8-11 is the first vertex data. - * - * On Ironlake the VUE header is nominally 20 dwords, but the hardware - * will accept the same header layout as Gen4 [and should be a bit faster] - */ - assign_vue_slot(vue_map, VARYING_SLOT_PSIZ); - assign_vue_slot(vue_map, BRW_VARYING_SLOT_NDC); - assign_vue_slot(vue_map, VARYING_SLOT_POS); - } else { - /* There are 8 or 16 DWs (D0-D15) in VUE header on Sandybridge: - * dword 0-3 of the header is indices, point width, clip flags. - * dword 4-7 is the 4D space position - * dword 8-15 of the vertex header is the user clip distance if - * enabled. - * dword 8-11 or 16-19 is the first vertex element data we fill. - */ - assign_vue_slot(vue_map, VARYING_SLOT_PSIZ); - assign_vue_slot(vue_map, VARYING_SLOT_POS); - if (slots_valid & BITFIELD64_BIT(VARYING_SLOT_CLIP_DIST0)) - assign_vue_slot(vue_map, VARYING_SLOT_CLIP_DIST0); - if (slots_valid & BITFIELD64_BIT(VARYING_SLOT_CLIP_DIST1)) - assign_vue_slot(vue_map, VARYING_SLOT_CLIP_DIST1); - - /* front and back colors need to be consecutive so that we can use - * ATTRIBUTE_SWIZZLE_INPUTATTR_FACING to swizzle them when doing - * two-sided color. - */ - if (slots_valid & BITFIELD64_BIT(VARYING_SLOT_COL0)) - assign_vue_slot(vue_map, VARYING_SLOT_COL0); - if (slots_valid & BITFIELD64_BIT(VARYING_SLOT_BFC0)) - assign_vue_slot(vue_map, VARYING_SLOT_BFC0); - if (slots_valid & BITFIELD64_BIT(VARYING_SLOT_COL1)) - assign_vue_slot(vue_map, VARYING_SLOT_COL1); - if (slots_valid & BITFIELD64_BIT(VARYING_SLOT_BFC1)) - assign_vue_slot(vue_map, VARYING_SLOT_BFC1); - } - - /* The hardware doesn't care about the rest of the vertex outputs, so just - * assign them contiguously. Don't reassign outputs that already have a - * slot. - * - * We generally don't need to assign a slot for VARYING_SLOT_CLIP_VERTEX, - * since it's encoded as the clip distances by emit_clip_distances(). - * However, it may be output by transform feedback, and we'd rather not - * recompute state when TF changes, so we just always include it. - */ - for (int i = 0; i < VARYING_SLOT_MAX; ++i) { - if ((slots_valid & BITFIELD64_BIT(i)) && - vue_map->varying_to_slot[i] == -1) { - assign_vue_slot(vue_map, i); - } - } -} - - /** * Decide which set of clip planes should be used when clipping via * gl_Position or gl_ClipVertex. diff --git a/src/mesa/drivers/dri/i965/brw_vue_map.c b/src/mesa/drivers/dri/i965/brw_vue_map.c new file mode 100644 index 00000000000..ff92bd266a4 --- /dev/null +++ b/src/mesa/drivers/dri/i965/brw_vue_map.c @@ -0,0 +1,136 @@ +/* + * Copyright © 2011 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +/** + * @file brw_vue_map.c + * + * Authors: + * Paul Berry + * Chris Forbes + * Eric Anholt + */ + + +#include "main/compiler.h" +#include "brw_context.h" + +static inline void +assign_vue_slot(struct brw_vue_map *vue_map, int varying) +{ + /* Make sure this varying hasn't been assigned a slot already */ + assert (vue_map->varying_to_slot[varying] == -1); + + vue_map->varying_to_slot[varying] = vue_map->num_slots; + vue_map->slot_to_varying[vue_map->num_slots++] = varying; +} + +/** + * Compute the VUE map for vertex shader program. + */ +void +brw_compute_vue_map(const struct brw_device_info *devinfo, + struct brw_vue_map *vue_map, + GLbitfield64 slots_valid) +{ + vue_map->slots_valid = slots_valid; + int i; + + /* gl_Layer and gl_ViewportIndex don't get their own varying slots -- they + * are stored in the first VUE slot (VARYING_SLOT_PSIZ). + */ + slots_valid &= ~(VARYING_BIT_LAYER | VARYING_BIT_VIEWPORT); + + /* Make sure that the values we store in vue_map->varying_to_slot and + * vue_map->slot_to_varying won't overflow the signed chars that are used + * to store them. Note that since vue_map->slot_to_varying sometimes holds + * values equal to BRW_VARYING_SLOT_COUNT, we need to ensure that + * BRW_VARYING_SLOT_COUNT is <= 127, not 128. + */ + STATIC_ASSERT(BRW_VARYING_SLOT_COUNT <= 127); + + vue_map->num_slots = 0; + for (i = 0; i < BRW_VARYING_SLOT_COUNT; ++i) { + vue_map->varying_to_slot[i] = -1; + vue_map->slot_to_varying[i] = BRW_VARYING_SLOT_COUNT; + } + + /* VUE header: format depends on chip generation and whether clipping is + * enabled. + */ + if (devinfo->gen < 6) { + /* There are 8 dwords in VUE header pre-Ironlake: + * dword 0-3 is indices, point width, clip flags. + * dword 4-7 is ndc position + * dword 8-11 is the first vertex data. + * + * On Ironlake the VUE header is nominally 20 dwords, but the hardware + * will accept the same header layout as Gen4 [and should be a bit faster] + */ + assign_vue_slot(vue_map, VARYING_SLOT_PSIZ); + assign_vue_slot(vue_map, BRW_VARYING_SLOT_NDC); + assign_vue_slot(vue_map, VARYING_SLOT_POS); + } else { + /* There are 8 or 16 DWs (D0-D15) in VUE header on Sandybridge: + * dword 0-3 of the header is indices, point width, clip flags. + * dword 4-7 is the 4D space position + * dword 8-15 of the vertex header is the user clip distance if + * enabled. + * dword 8-11 or 16-19 is the first vertex element data we fill. + */ + assign_vue_slot(vue_map, VARYING_SLOT_PSIZ); + assign_vue_slot(vue_map, VARYING_SLOT_POS); + if (slots_valid & BITFIELD64_BIT(VARYING_SLOT_CLIP_DIST0)) + assign_vue_slot(vue_map, VARYING_SLOT_CLIP_DIST0); + if (slots_valid & BITFIELD64_BIT(VARYING_SLOT_CLIP_DIST1)) + assign_vue_slot(vue_map, VARYING_SLOT_CLIP_DIST1); + + /* front and back colors need to be consecutive so that we can use + * ATTRIBUTE_SWIZZLE_INPUTATTR_FACING to swizzle them when doing + * two-sided color. + */ + if (slots_valid & BITFIELD64_BIT(VARYING_SLOT_COL0)) + assign_vue_slot(vue_map, VARYING_SLOT_COL0); + if (slots_valid & BITFIELD64_BIT(VARYING_SLOT_BFC0)) + assign_vue_slot(vue_map, VARYING_SLOT_BFC0); + if (slots_valid & BITFIELD64_BIT(VARYING_SLOT_COL1)) + assign_vue_slot(vue_map, VARYING_SLOT_COL1); + if (slots_valid & BITFIELD64_BIT(VARYING_SLOT_BFC1)) + assign_vue_slot(vue_map, VARYING_SLOT_BFC1); + } + + /* The hardware doesn't care about the rest of the vertex outputs, so just + * assign them contiguously. Don't reassign outputs that already have a + * slot. + * + * We generally don't need to assign a slot for VARYING_SLOT_CLIP_VERTEX, + * since it's encoded as the clip distances by emit_clip_distances(). + * However, it may be output by transform feedback, and we'd rather not + * recompute state when TF changes, so we just always include it. + */ + for (int i = 0; i < VARYING_SLOT_MAX; ++i) { + if ((slots_valid & BITFIELD64_BIT(i)) && + vue_map->varying_to_slot[i] == -1) { + assign_vue_slot(vue_map, i); + } + } +} From 94e3864707e48d4b1d5fb5f88a01370a73ddb0cb Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Fri, 15 May 2015 09:58:42 -0700 Subject: [PATCH 783/834] i965: Add and fix comments in brw_vue_map.c. Signed-off-by: Kenneth Graunke Reviewed-by: Topi Pohjolainen --- src/mesa/drivers/dri/i965/brw_vue_map.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/src/mesa/drivers/dri/i965/brw_vue_map.c b/src/mesa/drivers/dri/i965/brw_vue_map.c index ff92bd266a4..76875789ba8 100644 --- a/src/mesa/drivers/dri/i965/brw_vue_map.c +++ b/src/mesa/drivers/dri/i965/brw_vue_map.c @@ -24,6 +24,15 @@ /** * @file brw_vue_map.c * + * This file computes the "VUE map" for a (non-fragment) shader stage, which + * describes the layout of its output varyings. The VUE map is used to match + * outputs from one stage with the inputs of the next. + * + * Largely, varyings can be placed however we like - producers/consumers simply + * have to agree on the layout. However, there is also a "VUE Header" that + * prescribes a fixed-layout for items that interact with fixed function + * hardware, such as the clipper and rasterizer. + * * Authors: * Paul Berry * Chris Forbes @@ -45,7 +54,7 @@ assign_vue_slot(struct brw_vue_map *vue_map, int varying) } /** - * Compute the VUE map for vertex shader program. + * Compute the VUE map for a shader stage. */ void brw_compute_vue_map(const struct brw_device_info *devinfo, @@ -76,6 +85,9 @@ brw_compute_vue_map(const struct brw_device_info *devinfo, /* VUE header: format depends on chip generation and whether clipping is * enabled. + * + * See the Sandybridge PRM, Volume 2 Part 1, section 1.5.1 (page 30), + * "Vertex URB Entry (VUE) Formats" which describes the VUE header layout. */ if (devinfo->gen < 6) { /* There are 8 dwords in VUE header pre-Ironlake: From 1762568fd39b9be42d963d335e36daea25df7044 Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Wed, 10 Jun 2015 00:52:07 -0700 Subject: [PATCH 784/834] nir: Allow vec2/vec3/vec4 instructions in the select peephole pass. These are basically just moves, so they should be safe as well. When disabling i965's GLSL IR level scalarizer (channel expressions) pass, I started seeing NIR code like this: if ssa_21 { block block_1: /* preds: block_0 */ vec4 ssa_120 = vec4 ssa_82, ssa_83, ssa_84, ssa_30 /* succs: block_3 */ } else { block block_2: /* preds: block_0 */ /* succs: block_3 */ } block block_3: /* preds: block_1 block_2 */ vec4 ssa_33 = phi block_1: ssa_120, block_2: ssa_2 Previously, the GLSL IR scalarizer pass would break the vec4 into a series of fmovs, which were allowed by the peephole pass. But with the vec4 operation, they were not. We want to keep getting selects. Normal i965 on Broadwell: instructions in affected programs: 200 -> 176 (-12.00%) helped: 4 With brw_fs_channel_expressions() disabled: instructions in affected programs: 1832 -> 1646 (-10.15%) helped: 30 Signed-off-by: Kenneth Graunke Reviewed-by: Jason Ekstrand Reviewed-by: Connor Abbott --- src/glsl/nir/nir_opt_peephole_select.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/glsl/nir/nir_opt_peephole_select.c b/src/glsl/nir/nir_opt_peephole_select.c index 82c65bb442f..ef7c9775aa3 100644 --- a/src/glsl/nir/nir_opt_peephole_select.c +++ b/src/glsl/nir/nir_opt_peephole_select.c @@ -86,7 +86,9 @@ block_check_for_allowed_instrs(nir_block *block) nir_alu_instr *mov = nir_instr_as_alu(instr); if (mov->op != nir_op_fmov && mov->op != nir_op_imov && mov->op != nir_op_fneg && mov->op != nir_op_ineg && - mov->op != nir_op_fabs && mov->op != nir_op_iabs) + mov->op != nir_op_fabs && mov->op != nir_op_iabs && + mov->op != nir_op_vec2 && mov->op != nir_op_vec3 && + mov->op != nir_op_vec4) return false; /* Can't handle saturate */ From 24e77cb09fda9a57d4a8288ced3e01df4c8ac280 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Wed, 27 May 2015 18:37:17 +1000 Subject: [PATCH 785/834] tgsi: handle indirect sampler arrays. (v2) This is required for ARB_gpu_shader5 support in softpipe. v2: add support to txd/txf/txq paths. Reviewed-by: Roland Scheidegger Signed-off-by: Dave Airlie --- src/gallium/auxiliary/tgsi/tgsi_exec.c | 42 +++++++++++++++++++++++--- 1 file changed, 38 insertions(+), 4 deletions(-) diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c index fde99b9e494..44000ffdb6c 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.c +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c @@ -1988,6 +1988,35 @@ fetch_assign_deriv_channel(struct tgsi_exec_machine *mach, derivs[1][3] = d.f[3]; } +static uint +fetch_sampler_unit(struct tgsi_exec_machine *mach, + const struct tgsi_full_instruction *inst, + uint sampler) +{ + uint unit; + + if (inst->Src[sampler].Register.Indirect) { + const struct tgsi_full_src_register *reg = &inst->Src[sampler]; + union tgsi_exec_channel indir_index, index2; + + index2.i[0] = + index2.i[1] = + index2.i[2] = + index2.i[3] = reg->Indirect.Index; + + fetch_src_file_channel(mach, + 0, + reg->Indirect.File, + reg->Indirect.Swizzle, + &index2, + &ZeroVec, + &indir_index); + unit = inst->Src[sampler].Register.Index + indir_index.i[0]; + } else { + unit = inst->Src[sampler].Register.Index; + } + return unit; +} /* * execute a texture instruction. @@ -2001,14 +2030,15 @@ exec_tex(struct tgsi_exec_machine *mach, const struct tgsi_full_instruction *inst, uint modifier, uint sampler) { - const uint unit = inst->Src[sampler].Register.Index; const union tgsi_exec_channel *args[5], *proj = NULL; union tgsi_exec_channel r[5]; enum tgsi_sampler_control control = tgsi_sampler_lod_none; uint chan; + uint unit; int8_t offsets[3]; int dim, shadow_ref, i; + unit = fetch_sampler_unit(mach, inst, sampler); /* always fetch all 3 offsets, overkill but keeps code simple */ fetch_texel_offsets(mach, inst, offsets); @@ -2107,12 +2137,13 @@ static void exec_txd(struct tgsi_exec_machine *mach, const struct tgsi_full_instruction *inst) { - const uint unit = inst->Src[3].Register.Index; union tgsi_exec_channel r[4]; float derivs[3][2][TGSI_QUAD_SIZE]; uint chan; + uint unit; int8_t offsets[3]; + unit = fetch_sampler_unit(mach, inst, 3); /* always fetch all 3 offsets, overkill but keeps code simple */ fetch_texel_offsets(mach, inst, offsets); @@ -2214,14 +2245,15 @@ static void exec_txf(struct tgsi_exec_machine *mach, const struct tgsi_full_instruction *inst) { - const uint unit = inst->Src[1].Register.Index; union tgsi_exec_channel r[4]; uint chan; + uint unit; float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]; int j; int8_t offsets[3]; unsigned target; + unit = fetch_sampler_unit(mach, inst, 1); /* always fetch all 3 offsets, overkill but keeps code simple */ fetch_texel_offsets(mach, inst, offsets); @@ -2296,12 +2328,14 @@ static void exec_txq(struct tgsi_exec_machine *mach, const struct tgsi_full_instruction *inst) { - const uint unit = inst->Src[1].Register.Index; int result[4]; union tgsi_exec_channel r[4], src; uint chan; + uint unit; int i,j; + unit = fetch_sampler_unit(mach, inst, 1); + fetch_source(mach, &src, &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_INT); /* XXX: This interface can't return per-pixel values */ From 40d225803ecfa805b4dea4ee0ebd04df00ca8827 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Wed, 20 May 2015 10:32:32 +1000 Subject: [PATCH 786/834] draw/tgsi: implement geom shader invocation support. This is just for softpipe, llvmpipe won't work without some changes. Reviewed-by: Roland Scheidegger Signed-off-by: Dave Airlie --- src/gallium/auxiliary/draw/draw_gs.c | 47 ++++++++++++++++---------- src/gallium/auxiliary/draw/draw_gs.h | 2 ++ src/gallium/auxiliary/tgsi/tgsi_scan.c | 2 ++ src/gallium/auxiliary/tgsi/tgsi_scan.h | 1 + 4 files changed, 34 insertions(+), 18 deletions(-) diff --git a/src/gallium/auxiliary/draw/draw_gs.c b/src/gallium/auxiliary/draw/draw_gs.c index 6375d41295c..755e52793a8 100644 --- a/src/gallium/auxiliary/draw/draw_gs.c +++ b/src/gallium/auxiliary/draw/draw_gs.c @@ -190,9 +190,15 @@ static void tgsi_gs_prepare(struct draw_geometry_shader *shader, const unsigned constants_size[PIPE_MAX_CONSTANT_BUFFERS]) { struct tgsi_exec_machine *machine = shader->machine; - + int j; tgsi_exec_set_constant_buffers(machine, PIPE_MAX_CONSTANT_BUFFERS, constants, constants_size); + + if (shader->info.uses_invocationid) { + unsigned i = machine->SysSemanticToIndex[TGSI_SEMANTIC_INVOCATIONID]; + for (j = 0; j < TGSI_QUAD_SIZE; j++) + machine->SystemValue[i].i[j] = shader->invocation_id; + } } static unsigned tgsi_gs_run(struct draw_geometry_shader *shader, @@ -555,7 +561,7 @@ int draw_geometry_shader_run(struct draw_geometry_shader *shader, * overflown vertices into some area where they won't harm anyone */ unsigned total_verts_per_buffer = shader->primitive_boundary * num_in_primitives; - + unsigned invocation; //Assume at least one primitive max_out_prims = MAX2(max_out_prims, 1); @@ -564,7 +570,7 @@ int draw_geometry_shader_run(struct draw_geometry_shader *shader, output_verts->stride = output_verts->vertex_size; output_verts->verts = (struct vertex_header *)MALLOC(output_verts->vertex_size * - total_verts_per_buffer); + total_verts_per_buffer * shader->num_invocations); debug_assert(output_verts->verts); #if 0 @@ -592,7 +598,7 @@ int draw_geometry_shader_run(struct draw_geometry_shader *shader, shader->input = input; shader->input_info = input_info; FREE(shader->primitive_lengths); - shader->primitive_lengths = MALLOC(max_out_prims * sizeof(unsigned)); + shader->primitive_lengths = MALLOC(max_out_prims * sizeof(unsigned) * shader->num_invocations); #ifdef HAVE_LLVM @@ -622,24 +628,27 @@ int draw_geometry_shader_run(struct draw_geometry_shader *shader, } #endif - shader->prepare(shader, constants, constants_size); + for (invocation = 0; invocation < shader->num_invocations; invocation++) { + shader->invocation_id = invocation; - if (input_prim->linear) - gs_run(shader, input_prim, input_verts, - output_prims, output_verts); - else - gs_run_elts(shader, input_prim, input_verts, - output_prims, output_verts); + shader->prepare(shader, constants, constants_size); - /* Flush the remaining primitives. Will happen if - * num_input_primitives % 4 != 0 - */ - if (shader->fetched_prim_count > 0) { - gs_flush(shader); + if (input_prim->linear) + gs_run(shader, input_prim, input_verts, + output_prims, output_verts); + else + gs_run_elts(shader, input_prim, input_verts, + output_prims, output_verts); + + /* Flush the remaining primitives. Will happen if + * num_input_primitives % 4 != 0 + */ + if (shader->fetched_prim_count > 0) { + gs_flush(shader); + } + debug_assert(shader->fetched_prim_count == 0); } - debug_assert(shader->fetched_prim_count == 0); - /* Update prim_info: */ output_prims->linear = TRUE; @@ -771,6 +780,8 @@ draw_create_geometry_shader(struct draw_context *draw, gs->info.properties[TGSI_PROPERTY_GS_OUTPUT_PRIM]; gs->max_output_vertices = gs->info.properties[TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES]; + gs->num_invocations = + gs->info.properties[TGSI_PROPERTY_GS_INVOCATIONS]; if (!gs->max_output_vertices) gs->max_output_vertices = 32; diff --git a/src/gallium/auxiliary/draw/draw_gs.h b/src/gallium/auxiliary/draw/draw_gs.h index 49e93d55a51..663ba847cfe 100644 --- a/src/gallium/auxiliary/draw/draw_gs.h +++ b/src/gallium/auxiliary/draw/draw_gs.h @@ -90,6 +90,8 @@ struct draw_geometry_shader { unsigned vector_length; unsigned max_out_prims; + unsigned num_invocations; + unsigned invocation_id; #ifdef HAVE_LLVM struct draw_gs_inputs *gs_input; struct draw_gs_jit_context *jit_context; diff --git a/src/gallium/auxiliary/tgsi/tgsi_scan.c b/src/gallium/auxiliary/tgsi/tgsi_scan.c index 369f56a1955..711413cdaf6 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_scan.c +++ b/src/gallium/auxiliary/tgsi/tgsi_scan.c @@ -248,6 +248,8 @@ tgsi_scan_shader(const struct tgsi_token *tokens, } else if (semName == TGSI_SEMANTIC_PRIMID) { info->uses_primid = TRUE; + } else if (semName == TGSI_SEMANTIC_INVOCATIONID) { + info->uses_invocationid = TRUE; } } else if (file == TGSI_FILE_OUTPUT) { diff --git a/src/gallium/auxiliary/tgsi/tgsi_scan.h b/src/gallium/auxiliary/tgsi/tgsi_scan.h index af4b128fcaf..b81bdd71f14 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_scan.h +++ b/src/gallium/auxiliary/tgsi/tgsi_scan.h @@ -89,6 +89,7 @@ struct tgsi_shader_info boolean uses_basevertex; boolean uses_primid; boolean uses_frontface; + boolean uses_invocationid; boolean writes_psize; boolean writes_clipvertex; boolean writes_viewport_index; From 1a71fbe28ca0525b618f6fb9d7354f3a6589af2f Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Mon, 22 Jun 2015 13:59:25 +1000 Subject: [PATCH 787/834] draw/gallivm: add invocation ID support for llvmpipe. This extends the draw code to add support for invocations. Reviewed-by: Roland Scheidegger Signed-off-by: Dave Airlie --- src/gallium/auxiliary/draw/draw_gs.c | 3 ++- src/gallium/auxiliary/draw/draw_llvm.c | 5 ++++- src/gallium/auxiliary/draw/draw_llvm.h | 3 ++- src/gallium/auxiliary/gallivm/lp_bld_tgsi.h | 1 + src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c | 5 +++++ 5 files changed, 14 insertions(+), 3 deletions(-) diff --git a/src/gallium/auxiliary/draw/draw_gs.c b/src/gallium/auxiliary/draw/draw_gs.c index 755e52793a8..a1564f93292 100644 --- a/src/gallium/auxiliary/draw/draw_gs.c +++ b/src/gallium/auxiliary/draw/draw_gs.c @@ -391,7 +391,8 @@ llvm_gs_run(struct draw_geometry_shader *shader, (struct vertex_header*)input, input_primitives, shader->draw->instance_id, - shader->llvm_prim_ids); + shader->llvm_prim_ids, + shader->invocation_id); return ret; } diff --git a/src/gallium/auxiliary/draw/draw_llvm.c b/src/gallium/auxiliary/draw/draw_llvm.c index 9629a8a3e42..90a31bc6ac0 100644 --- a/src/gallium/auxiliary/draw/draw_llvm.c +++ b/src/gallium/auxiliary/draw/draw_llvm.c @@ -2069,7 +2069,7 @@ draw_gs_llvm_generate(struct draw_llvm *llvm, struct gallivm_state *gallivm = variant->gallivm; LLVMContextRef context = gallivm->context; LLVMTypeRef int32_type = LLVMInt32TypeInContext(context); - LLVMTypeRef arg_types[6]; + LLVMTypeRef arg_types[7]; LLVMTypeRef func_type; LLVMValueRef variant_func; LLVMValueRef context_ptr; @@ -2105,6 +2105,7 @@ draw_gs_llvm_generate(struct draw_llvm *llvm, arg_types[4] = int32_type; /* instance_id */ arg_types[5] = LLVMPointerType( LLVMVectorType(int32_type, vector_length), 0); /* prim_id_ptr */ + arg_types[6] = int32_type; func_type = LLVMFunctionType(int32_type, arg_types, Elements(arg_types), 0); @@ -2125,6 +2126,7 @@ draw_gs_llvm_generate(struct draw_llvm *llvm, num_prims = LLVMGetParam(variant_func, 3); system_values.instance_id = LLVMGetParam(variant_func, 4); prim_id_ptr = LLVMGetParam(variant_func, 5); + system_values.invocation_id = LLVMGetParam(variant_func, 6); lp_build_name(context_ptr, "context"); lp_build_name(input_array, "input"); @@ -2132,6 +2134,7 @@ draw_gs_llvm_generate(struct draw_llvm *llvm, lp_build_name(num_prims, "num_prims"); lp_build_name(system_values.instance_id, "instance_id"); lp_build_name(prim_id_ptr, "prim_id_ptr"); + lp_build_name(system_values.invocation_id, "invocation_id"); variant->context_ptr = context_ptr; variant->io_ptr = io_ptr; diff --git a/src/gallium/auxiliary/draw/draw_llvm.h b/src/gallium/auxiliary/draw/draw_llvm.h index 9565fc68af9..d48ed721593 100644 --- a/src/gallium/auxiliary/draw/draw_llvm.h +++ b/src/gallium/auxiliary/draw/draw_llvm.h @@ -298,7 +298,8 @@ typedef int struct vertex_header *output, unsigned num_prims, unsigned instance_id, - int *prim_ids); + int *prim_ids, + unsigned invocation_id); struct draw_llvm_variant_key { diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h index 3f76b79b8d1..967373ccdae 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h @@ -165,6 +165,7 @@ struct lp_bld_tgsi_system_values { LLVMValueRef vertex_id_nobase; LLVMValueRef prim_id; LLVMValueRef basevertex; + LLVMValueRef invocation_id; }; diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c index 092bd18b361..268379e7d13 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c @@ -1532,6 +1532,11 @@ emit_fetch_system_value( atype = TGSI_TYPE_UNSIGNED; break; + case TGSI_SEMANTIC_INVOCATIONID: + res = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.invocation_id); + atype = TGSI_TYPE_UNSIGNED; + break; + default: assert(!"unexpected semantic in emit_fetch_system_value"); res = bld_base->base.zero; From 4731be701f3094666c24c143d9d6ddf53c456c39 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Tue, 23 Jun 2015 15:55:30 +1000 Subject: [PATCH 788/834] docs: update GL3 with softpipe/llvmpipe gpu_shader5 pieces. This just updates the bits I've added in the previous few patches. Signed-off-by: Dave Airlie --- docs/GL3.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/GL3.txt b/docs/GL3.txt index a40fdee487a..220bcc8742f 100644 --- a/docs/GL3.txt +++ b/docs/GL3.txt @@ -98,13 +98,13 @@ GL 4.0, GLSL 4.00: GL_ARB_draw_indirect DONE (i965, nvc0, r600, radeonsi, llvmpipe, softpipe) GL_ARB_gpu_shader5 DONE (i965, nvc0) - 'precise' qualifier DONE - - Dynamically uniform sampler array indices DONE (r600) + - Dynamically uniform sampler array indices DONE (r600, softpipe) - Dynamically uniform UBO array indices DONE (r600) - Implicit signed -> unsigned conversions DONE - Fused multiply-add DONE () - Packing/bitfield/conversion functions DONE (r600, radeonsi, softpipe) - Enhanced textureGather DONE (r600, radeonsi, softpipe) - - Geometry shader instancing DONE (r600) + - Geometry shader instancing DONE (r600, llvmpipe, softpipe) - Geometry shader multiple streams DONE () - Enhanced per-sample shading DONE (r600, radeonsi) - Interpolation functions DONE (r600) From 634cfb9a458bcc1051b60ab13bd12e17bba0f71b Mon Sep 17 00:00:00 2001 From: Jose Fonseca Date: Fri, 19 Jun 2015 13:53:46 +0100 Subject: [PATCH 789/834] glsl: Specify the shader stage in linker errors due to too many in/outputs. Reviewed-by: Ian Romanick Reviewed-by: Brian Paul --- src/glsl/link_varyings.cpp | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/src/glsl/link_varyings.cpp b/src/glsl/link_varyings.cpp index 7b2d4bd2394..278a778797b 100644 --- a/src/glsl/link_varyings.cpp +++ b/src/glsl/link_varyings.cpp @@ -1540,13 +1540,15 @@ check_against_output_limit(struct gl_context *ctx, const unsigned output_components = output_vectors * 4; if (output_components > max_output_components) { if (ctx->API == API_OPENGLES2 || prog->IsES) - linker_error(prog, "shader uses too many output vectors " + linker_error(prog, "%s shader uses too many output vectors " "(%u > %u)\n", + _mesa_shader_stage_to_string(producer->Stage), output_vectors, max_output_components / 4); else - linker_error(prog, "shader uses too many output components " + linker_error(prog, "%s shader uses too many output components " "(%u > %u)\n", + _mesa_shader_stage_to_string(producer->Stage), output_components, max_output_components); @@ -1579,13 +1581,15 @@ check_against_input_limit(struct gl_context *ctx, const unsigned input_components = input_vectors * 4; if (input_components > max_input_components) { if (ctx->API == API_OPENGLES2 || prog->IsES) - linker_error(prog, "shader uses too many input vectors " + linker_error(prog, "%s shader uses too many input vectors " "(%u > %u)\n", + _mesa_shader_stage_to_string(consumer->Stage), input_vectors, max_input_components / 4); else - linker_error(prog, "shader uses too many input components " + linker_error(prog, "%s shader uses too many input components " "(%u > %u)\n", + _mesa_shader_stage_to_string(consumer->Stage), input_components, max_input_components); From be5f71d4a52c9ef72f63bb6c339fe0110f2027af Mon Sep 17 00:00:00 2001 From: Jose Fonseca Date: Tue, 23 Jun 2015 12:18:06 +0100 Subject: [PATCH 790/834] draw,tgsi: Assume TGSI_PROPERTY_GS_INVOCATIONS default of 1. If the shader doesn't specify number of invocations, assume one. This fixes geometry shaders on state trackers other than Mesa (and probably graw tests too.) Trivial. --- src/gallium/auxiliary/tgsi/tgsi_scan.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/gallium/auxiliary/tgsi/tgsi_scan.c b/src/gallium/auxiliary/tgsi/tgsi_scan.c index 711413cdaf6..7523baf4ce0 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_scan.c +++ b/src/gallium/auxiliary/tgsi/tgsi_scan.c @@ -62,6 +62,7 @@ tgsi_scan_shader(const struct tgsi_token *tokens, info->file_max[i] = -1; for (i = 0; i < Elements(info->const_file_max); i++) info->const_file_max[i] = -1; + info->properties[TGSI_PROPERTY_GS_INVOCATIONS] = 1; /** ** Setup to begin parsing input shader From aaac913e901229d11a1894f6aaf646de6b1a542c Mon Sep 17 00:00:00 2001 From: Derek Foreman Date: Wed, 17 Jun 2015 11:28:48 -0500 Subject: [PATCH 791/834] egl/drm: Duplicate fd with F_DUPFD_CLOEXEC to prevent leak MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replacing dup() with fcntl F_DUPFD_CLOEXEC creates the duplicate file descriptor with CLOEXEC so it won't be leaked to child processes if the process fork()s later. Signed-off-by: Derek Foreman Reviewed-by: Kristian Høgsberg Reviewed-by: Emil Velikov --- src/egl/drivers/dri2/platform_drm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/egl/drivers/dri2/platform_drm.c b/src/egl/drivers/dri2/platform_drm.c index 3391afc635c..c97c54fb58b 100644 --- a/src/egl/drivers/dri2/platform_drm.c +++ b/src/egl/drivers/dri2/platform_drm.c @@ -632,7 +632,7 @@ dri2_initialize_drm(_EGLDriver *drv, _EGLDisplay *disp) } if (fd < 0) { - fd = dup(gbm_device_get_fd(gbm)); + fd = fcntl(gbm_device_get_fd(gbm), F_DUPFD_CLOEXEC, 3); if (fd < 0) { free(dri2_dpy); return EGL_FALSE; From 9c927463492dea14d82ebdd77f711f86b0e6fc5e Mon Sep 17 00:00:00 2001 From: Derek Foreman Date: Wed, 17 Jun 2015 11:28:49 -0500 Subject: [PATCH 792/834] loader: Rename drm_open_device() to loader_open_device() and share it MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is already our common idiom for opening files with CLOEXEC and it's a little ugly, so let's share this one implementation. Signed-off-by: Derek Foreman Reviewed-by: Kristian Høgsberg Reviewed-by: Emil Velikov --- src/loader/loader.c | 6 +++--- src/loader/loader.h | 3 +++ 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/src/loader/loader.c b/src/loader/loader.c index 17bf1336005..fc468153425 100644 --- a/src/loader/loader.c +++ b/src/loader/loader.c @@ -314,8 +314,8 @@ get_id_path_tag_from_fd(struct udev *udev, int fd) return id_path_tag; } -static int -drm_open_device(const char *device_name) +int +loader_open_device(const char *device_name) { int fd; #ifdef O_CLOEXEC @@ -404,7 +404,7 @@ int loader_get_user_preferred_fd(int default_fd, int *different_device) goto default_device_clean; } - fd = drm_open_device(device_name); + fd = loader_open_device(device_name); if (fd >= 0) { close(default_fd); } else { diff --git a/src/loader/loader.h b/src/loader/loader.h index 60c58f2f8fc..055dc786892 100644 --- a/src/loader/loader.h +++ b/src/loader/loader.h @@ -36,6 +36,9 @@ extern "C" { #define _LOADER_DRI (1 << 0) #define _LOADER_GALLIUM (1 << 1) +int +loader_open_device(const char *); + int loader_get_pci_id_for_fd(int fd, int *vendor_id, int *chip_id); From 324ee9b391ea2db4b74709d30a131e79055bf071 Mon Sep 17 00:00:00 2001 From: Derek Foreman Date: Wed, 17 Jun 2015 11:28:50 -0500 Subject: [PATCH 793/834] glx: Use loader_open_device() helper MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We've moved the open with CLOEXEC idiom into a helper function, so call it instead of duplicating the code here. Signed-off-by: Derek Foreman Reviewed-by: Kristian Høgsberg Reviewed-by: Emil Velikov --- src/glx/dri2_glx.c | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/src/glx/dri2_glx.c b/src/glx/dri2_glx.c index 538cf1adb82..27ea9521e50 100644 --- a/src/glx/dri2_glx.c +++ b/src/glx/dri2_glx.c @@ -1183,15 +1183,7 @@ dri2CreateScreen(int screen, struct glx_display * priv) return NULL; } -#ifdef O_CLOEXEC - psc->fd = open(deviceName, O_RDWR | O_CLOEXEC); - if (psc->fd == -1 && errno == EINVAL) -#endif - { - psc->fd = open(deviceName, O_RDWR); - if (psc->fd != -1) - fcntl(psc->fd, F_SETFD, fcntl(psc->fd, F_GETFD) | FD_CLOEXEC); - } + psc->fd = loader_open_device(deviceName); if (psc->fd < 0) { ErrorMessageF("failed to open drm device: %s\n", strerror(errno)); goto handle_error; From 4f8f790525f1adcb5259cb72b7c9dbfd121867c6 Mon Sep 17 00:00:00 2001 From: Derek Foreman Date: Wed, 17 Jun 2015 11:28:51 -0500 Subject: [PATCH 794/834] egl: Use the loader_open_device() helper to do open with CLOEXEC MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We've moved the open with CLOEXEC idiom into a helper function, so call it instead of duplicating the code. This also replaces a couple of opens that didn't properly do CLOEXEC. Signed-off-by: Derek Foreman Reviewed-by: Kristian Høgsberg Reviewed-by: Emil Velikov --- src/egl/drivers/dri2/platform_drm.c | 4 ++-- src/egl/drivers/dri2/platform_surfaceless.c | 11 +---------- src/egl/drivers/dri2/platform_wayland.c | 11 +---------- src/egl/drivers/dri2/platform_x11.c | 12 ++---------- 4 files changed, 6 insertions(+), 32 deletions(-) diff --git a/src/egl/drivers/dri2/platform_drm.c b/src/egl/drivers/dri2/platform_drm.c index c97c54fb58b..a62da4121fe 100644 --- a/src/egl/drivers/dri2/platform_drm.c +++ b/src/egl/drivers/dri2/platform_drm.c @@ -611,9 +611,9 @@ dri2_initialize_drm(_EGLDriver *drv, _EGLDisplay *disp) char buf[64]; int n = snprintf(buf, sizeof(buf), DRM_DEV_NAME, DRM_DIR_NAME, 0); if (n != -1 && n < sizeof(buf)) - fd = open(buf, O_RDWR); + fd = loader_open_device(buf); if (fd < 0) - fd = open("/dev/dri/card0", O_RDWR); + fd = loader_open_device("/dev/dri/card0"); dri2_dpy->own_device = 1; gbm = gbm_create_device(fd); if (gbm == NULL) diff --git a/src/egl/drivers/dri2/platform_surfaceless.c b/src/egl/drivers/dri2/platform_surfaceless.c index 30cea368554..48f15df75a1 100644 --- a/src/egl/drivers/dri2/platform_surfaceless.c +++ b/src/egl/drivers/dri2/platform_surfaceless.c @@ -97,16 +97,7 @@ dri2_initialize_surfaceless(_EGLDriver *drv, _EGLDisplay *disp) if (asprintf(&card_path, DRM_RENDER_DEV_NAME, DRM_DIR_NAME, base + i) < 0) continue; -#ifdef O_CLOEXEC - dri2_dpy->fd = open(card_path, O_RDWR | O_CLOEXEC); - if (dri2_dpy->fd < 0 && errno == EINVAL) -#endif - { - dri2_dpy->fd = open(card_path, O_RDWR); - if (dri2_dpy->fd >= 0) - fcntl(dri2_dpy->fd, F_SETFD, fcntl(dri2_dpy->fd, F_GETFD) | - FD_CLOEXEC); - } + dri2_dpy->fd = loader_open_device(card_path); free(card_path); if (dri2_dpy->fd < 0) diff --git a/src/egl/drivers/dri2/platform_wayland.c b/src/egl/drivers/dri2/platform_wayland.c index ea2f9f23b96..1c985523862 100644 --- a/src/egl/drivers/dri2/platform_wayland.c +++ b/src/egl/drivers/dri2/platform_wayland.c @@ -891,16 +891,7 @@ drm_handle_device(void *data, struct wl_drm *drm, const char *device) if (!dri2_dpy->device_name) return; -#ifdef O_CLOEXEC - dri2_dpy->fd = open(dri2_dpy->device_name, O_RDWR | O_CLOEXEC); - if (dri2_dpy->fd == -1 && errno == EINVAL) -#endif - { - dri2_dpy->fd = open(dri2_dpy->device_name, O_RDWR); - if (dri2_dpy->fd != -1) - fcntl(dri2_dpy->fd, F_SETFD, fcntl(dri2_dpy->fd, F_GETFD) | - FD_CLOEXEC); - } + dri2_dpy->fd = loader_open_device(dri2_dpy->device_name); if (dri2_dpy->fd == -1) { _eglLog(_EGL_WARNING, "wayland-egl: could not open %s (%s)", dri2_dpy->device_name, strerror(errno)); diff --git a/src/egl/drivers/dri2/platform_x11.c b/src/egl/drivers/dri2/platform_x11.c index 2d5b71746e8..8a34bb19bd5 100644 --- a/src/egl/drivers/dri2/platform_x11.c +++ b/src/egl/drivers/dri2/platform_x11.c @@ -43,6 +43,7 @@ #include "egl_dri2.h" #include "egl_dri2_fallbacks.h" +#include "loader.h" static EGLBoolean dri2_x11_swap_interval(_EGLDriver *drv, _EGLDisplay *disp, _EGLSurface *surf, @@ -1230,16 +1231,7 @@ dri2_initialize_x11_dri2(_EGLDriver *drv, _EGLDisplay *disp) if (!dri2_load_driver(disp)) goto cleanup_conn; -#ifdef O_CLOEXEC - dri2_dpy->fd = open(dri2_dpy->device_name, O_RDWR | O_CLOEXEC); - if (dri2_dpy->fd == -1 && errno == EINVAL) -#endif - { - dri2_dpy->fd = open(dri2_dpy->device_name, O_RDWR); - if (dri2_dpy->fd != -1) - fcntl(dri2_dpy->fd, F_SETFD, fcntl(dri2_dpy->fd, F_GETFD) | - FD_CLOEXEC); - } + dri2_dpy->fd = loader_open_device(dri2_dpy->device_name); if (dri2_dpy->fd == -1) { _eglLog(_EGL_WARNING, "DRI2: could not open %s (%s)", dri2_dpy->device_name, From ec16bb62acfdfe6023d1ba6456ae8a19f14c5d80 Mon Sep 17 00:00:00 2001 From: Emil Velikov Date: Thu, 18 Jun 2015 23:28:05 +0100 Subject: [PATCH 795/834] glapi: gl_table.py: remove unused variable 'es' None of the three build systems ever set it, as such we can clear things up a bit. Signed-off-by: Emil Velikov Reviewed-by: Dylan Baker Reviewed-by: Jose Fonseca --- src/mapi/glapi/gen/gl_table.py | 57 +++++----------------------------- 1 file changed, 8 insertions(+), 49 deletions(-) diff --git a/src/mapi/glapi/gen/gl_table.py b/src/mapi/glapi/gen/gl_table.py index 3f029023087..e25971a698c 100644 --- a/src/mapi/glapi/gen/gl_table.py +++ b/src/mapi/glapi/gen/gl_table.py @@ -33,10 +33,9 @@ import license class PrintGlTable(gl_XML.gl_print_base): - def __init__(self, es=False): + def __init__(self): gl_XML.gl_print_base.__init__(self) - self.es = es self.header_tag = '_GLAPI_TABLE_H_' self.name = "gl_table.py (from Mesa)" self.license = license.bsd_license_template % ( \ @@ -76,10 +75,9 @@ class PrintGlTable(gl_XML.gl_print_base): class PrintRemapTable(gl_XML.gl_print_base): - def __init__(self, es=False): + def __init__(self): gl_XML.gl_print_base.__init__(self) - self.es = es self.header_tag = '_DISPATCH_H_' self.name = "gl_table.py (from Mesa)" self.license = license.bsd_license_template % ( @@ -123,7 +121,6 @@ class PrintRemapTable(gl_XML.gl_print_base): functions = [] abi_functions = [] - alias_functions = [] count = 0 for f in api.functionIterateByOffset(): if not f.is_abi(): @@ -132,11 +129,6 @@ class PrintRemapTable(gl_XML.gl_print_base): else: abi_functions.append([f, -1]) - if self.es: - # remember functions with aliases - if len(f.entry_points) > 1: - alias_functions.append(f) - print '/* total number of offsets below */' print '#define _gloffset_COUNT %d' % (len(abi_functions + functions)) print '' @@ -144,18 +136,11 @@ class PrintRemapTable(gl_XML.gl_print_base): for f, index in abi_functions: print '#define _gloffset_%s %d' % (f.name, f.offset) - if self.es: - remap_table = "esLocalRemapTable" + remap_table = "driDispatchRemapTable" - print '#define %s_size %u' % (remap_table, count) - print 'static int %s[ %s_size ];' % (remap_table, remap_table) - print '' - else: - remap_table = "driDispatchRemapTable" - - print '#define %s_size %u' % (remap_table, count) - print 'extern int %s[ %s_size ];' % (remap_table, remap_table) - print '' + print '#define %s_size %u' % (remap_table, count) + print 'extern int %s[ %s_size ];' % (remap_table, remap_table) + print '' for f, index in functions: print '#define %s_remap_index %u' % (f.name, index) @@ -182,23 +167,6 @@ class PrintRemapTable(gl_XML.gl_print_base): print '}' print - if alias_functions: - print '' - print '/* define aliases for compatibility */' - for f in alias_functions: - for name in f.entry_points: - if name != f.name: - print '#define CALL_%s(disp, parameters) CALL_%s(disp, parameters)' % (name, f.name) - print '#define GET_%s(disp) GET_%s(disp)' % (name, f.name) - print '#define SET_%s(disp, fn) SET_%s(disp, fn)' % (name, f.name) - print '' - - for f in alias_functions: - for name in f.entry_points: - if name != f.name: - print '#define %s_remap_index %s_remap_index' % (name, f.name) - print '' - return @@ -215,12 +183,6 @@ def _parser(): default='table', metavar="mode", help="Generate either a table or a remap_table") - parser.add_argument('-c', '--es-version', - choices=[None, 'es1', 'es2'], - default=None, - metavar="ver", - dest='es', - help="filter functions for es") return parser.parse_args() @@ -231,12 +193,9 @@ def main(): api = gl_XML.parse_GL_API(args.file_name) if args.mode == "table": - printer = PrintGlTable(args.es) + printer = PrintGlTable() elif args.mode == "remap_table": - printer = PrintRemapTable(args.es) - - if args.es is not None: - api.filter_functions_by_api(args.es) + printer = PrintRemapTable() printer.Print(api) From 06109db47b69867da0c7537f97b1aa8650598a08 Mon Sep 17 00:00:00 2001 From: Emil Velikov Date: Thu, 18 Jun 2015 23:30:34 +0100 Subject: [PATCH 796/834] glapi: remap_helper.py: remove unused argument 'es' Identical to the previous commit - unused by neither the Autotools, Android or SCons build. Signed-off-by: Emil Velikov Reviewed-by: Dylan Baker Reviewed-by: Jose Fonseca --- src/mapi/glapi/gen/remap_helper.py | 8 -------- 1 file changed, 8 deletions(-) diff --git a/src/mapi/glapi/gen/remap_helper.py b/src/mapi/glapi/gen/remap_helper.py index 94ae1936d21..edc6c3e14b6 100644 --- a/src/mapi/glapi/gen/remap_helper.py +++ b/src/mapi/glapi/gen/remap_helper.py @@ -174,12 +174,6 @@ def _parser(): metavar="input_file_name", dest='file_name', help="An xml description file.") - parser.add_argument('-c', '--es-version', - choices=[None, 'es1', 'es2'], - default=None, - metavar='ver', - dest='es', - help='A GLES version to support') return parser.parse_args() @@ -188,8 +182,6 @@ def main(): args = _parser() api = gl_XML.parse_GL_API(args.file_name) - if args.es is not None: - api.filter_functions_by_api(args.es) printer = PrintGlRemap() printer.Print(api) From 6d744aaf4e427b6b0b3d8d35d756592a50abbb97 Mon Sep 17 00:00:00 2001 From: Emil Velikov Date: Fri, 19 Jun 2015 17:19:46 +0100 Subject: [PATCH 797/834] configure: warn about shared_glapi & xlib-glx only when both are set Printing out the message when shared_glapi is disabled only leads to confusion. Cc: "10.5 10.6" Signed-off-by: Emil Velikov Reviewed-by: Eric Anholt --- configure.ac | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/configure.ac b/configure.ac index 3c763c27b9f..5161361908d 100644 --- a/configure.ac +++ b/configure.ac @@ -942,7 +942,7 @@ x*yes*yes*) esac # Building Xlib-GLX requires shared glapi to be disabled. -if test "x$enable_xlib_glx" = xyes; then +if test "x$enable_shared_glapi$enable_xlib_glx" = xyesyes; then AC_MSG_NOTICE([Shared GLAPI should not used with Xlib-GLX, disabling]) enable_shared_glapi=no fi From b92233f2a57ec09e9266ba4ed7f200904b784b9a Mon Sep 17 00:00:00 2001 From: Emil Velikov Date: Fri, 19 Jun 2015 17:28:25 +0100 Subject: [PATCH 798/834] drivers/x11: fix the build against shared_glapi Cc: Brian Paul Signed-off-by: Emil Velikov Reviewed-by: Eric Anholt Acked-by: Jose Fonseca --- src/mesa/drivers/x11/Makefile.am | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/mesa/drivers/x11/Makefile.am b/src/mesa/drivers/x11/Makefile.am index c0596f8119e..a8847e8a7ac 100644 --- a/src/mesa/drivers/x11/Makefile.am +++ b/src/mesa/drivers/x11/Makefile.am @@ -25,6 +25,11 @@ EXTRA_DIST = SConscript +if HAVE_SHARED_GLAPI +SHARED_GLAPI_CFLAGS = -DGLX_SHARED_GLAPI +SHARED_GLAPI_LIB = $(top_builddir)/src/mapi/shared-glapi/libglapi.la +endif + AM_CPPFLAGS = \ -I$(top_srcdir)/include \ -I$(top_srcdir)/src/mapi \ @@ -34,6 +39,7 @@ AM_CPPFLAGS = \ -I$(top_srcdir)/src/gallium/auxiliary \ -I$(top_srcdir)/src/mesa/main \ $(X11_INCLUDES) \ + $(SHARED_GLAPI_CFLAGS) \ $(DEFINES) if HAVE_X11_DRIVER @@ -66,6 +72,7 @@ GL_PATCH = 0 lib@GL_LIB@_la_LIBADD = \ $(top_builddir)/src/mesa/libmesa.la \ $(top_builddir)/src/mapi/glapi/libglapi.la \ + $(SHARED_GLAPI_LIB) \ $(GL_LIB_DEPS) lib@GL_LIB@_la_LDFLAGS = \ From 5c37ababae6069ed73522bee35bca6228a80be77 Mon Sep 17 00:00:00 2001 From: Emil Velikov Date: Fri, 19 Jun 2015 17:30:13 +0100 Subject: [PATCH 799/834] targets/libgl-xlib: fix the build against shared_glapi Cc: Brian Paul Signed-off-by: Emil Velikov Reviewed-by: Eric Anholt Acked-by: Jose Fonseca --- src/gallium/targets/libgl-xlib/Makefile.am | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/gallium/targets/libgl-xlib/Makefile.am b/src/gallium/targets/libgl-xlib/Makefile.am index 33b0d1345de..d99caae3cb0 100644 --- a/src/gallium/targets/libgl-xlib/Makefile.am +++ b/src/gallium/targets/libgl-xlib/Makefile.am @@ -24,6 +24,11 @@ GL_MAJOR = 1 GL_MINOR = 5 GL_TINY = $(MESA_MAJOR)$(MESA_MINOR)0$(MESA_TINY) +if HAVE_SHARED_GLAPI +SHARED_GLAPI_CFLAGS = -DGLX_SHARED_GLAPI +SHARED_GLAPI_LIB = $(top_builddir)/src/mapi/shared-glapi/libglapi.la +endif + AM_CPPFLAGS = \ -I$(top_srcdir)/include \ -I$(top_srcdir)/src \ @@ -35,6 +40,7 @@ AM_CPPFLAGS = \ -I$(top_srcdir)/src/gallium/state_trackers/glx/xlib \ -I$(top_srcdir)/src/gallium/auxiliary \ -I$(top_srcdir)/src/gallium/winsys \ + $(SHARED_GLAPI_CFLAGS) \ -DGALLIUM_SOFTPIPE \ -DGALLIUM_RBUG \ -DGALLIUM_TRACE @@ -65,6 +71,7 @@ lib@GL_LIB@_la_LIBADD = \ $(top_builddir)/src/mapi/glapi/libglapi.la \ $(top_builddir)/src/mesa/libmesagallium.la \ $(top_builddir)/src/gallium/auxiliary/libgallium.la \ + $(SHARED_GLAPI_LIB) \ $(GL_LIB_DEPS) \ $(CLOCK_LIB) From 92dc50786224411dc0aeff18b80a1995fcb04dc1 Mon Sep 17 00:00:00 2001 From: Emil Velikov Date: Fri, 19 Jun 2015 19:43:23 +0100 Subject: [PATCH 800/834] configure: allow building shared-glapi powered libgl-xlib Cc: Brian Paul Cc: Adam Jackson Signed-off-by: Emil Velikov Reviewed-by: Eric Anholt Acked-by: Jose Fonseca --- configure.ac | 6 ------ 1 file changed, 6 deletions(-) diff --git a/configure.ac b/configure.ac index 5161361908d..677fb5bbb89 100644 --- a/configure.ac +++ b/configure.ac @@ -941,12 +941,6 @@ x*yes*yes*) ;; esac -# Building Xlib-GLX requires shared glapi to be disabled. -if test "x$enable_shared_glapi$enable_xlib_glx" = xyesyes; then - AC_MSG_NOTICE([Shared GLAPI should not used with Xlib-GLX, disabling]) - enable_shared_glapi=no -fi - AM_CONDITIONAL(HAVE_SHARED_GLAPI, test "x$enable_shared_glapi" = xyes) # Build the pipe-drivers as separate libraries/modules. From 2752e629e71a8b4345b61f55b09d5ed04fc5e4b8 Mon Sep 17 00:00:00 2001 From: Emil Velikov Date: Fri, 19 Jun 2015 17:37:18 +0100 Subject: [PATCH 801/834] drivers/x11: drop unneeded HAVE_X11_DRIVER check Already handled in the Makefile which includes the drivers/x11 subdir. Signed-off-by: Emil Velikov Reviewed-by: Eric Anholt --- src/mesa/drivers/x11/Makefile.am | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/mesa/drivers/x11/Makefile.am b/src/mesa/drivers/x11/Makefile.am index a8847e8a7ac..ba79f6981b9 100644 --- a/src/mesa/drivers/x11/Makefile.am +++ b/src/mesa/drivers/x11/Makefile.am @@ -42,9 +42,7 @@ AM_CPPFLAGS = \ $(SHARED_GLAPI_CFLAGS) \ $(DEFINES) -if HAVE_X11_DRIVER lib_LTLIBRARIES = lib@GL_LIB@.la -endif lib@GL_LIB@_la_SOURCES = \ glxapi.h \ From ddc886b5bfe5976fa2e5f49eeefa918736f1aa97 Mon Sep 17 00:00:00 2001 From: Emil Velikov Date: Fri, 19 Jun 2015 17:44:02 +0100 Subject: [PATCH 802/834] configure: error out when building backend-less libEGL Cc: "10.5 10.6" Signed-off-by: Emil Velikov Reviewed-by: Eric Anholt --- configure.ac | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/configure.ac b/configure.ac index 677fb5bbb89..8e62bd893ee 100644 --- a/configure.ac +++ b/configure.ac @@ -1542,8 +1542,12 @@ if test "x$enable_egl" = xyes; then if test "$enable_static" != yes; then if test "x$enable_dri" = xyes; then - HAVE_EGL_DRIVER_DRI2=1 - fi + HAVE_EGL_DRIVER_DRI2=1 + else + # Avoid building an "empty" libEGL. Drop/update this + # when other backends (haiku?) come along. + AC_MSG_ERROR([egl requires --enable-dri]) + fi fi fi From 994be5143a097ae2cf504ba344362edfee388ac3 Mon Sep 17 00:00:00 2001 From: Emil Velikov Date: Fri, 19 Jun 2015 17:46:41 +0100 Subject: [PATCH 803/834] configure: error out when building libEGL without shared-glapi The latter is a hard requirement and without it we'll error out later on in the build. Cc: "10.5 10.6" Signed-off-by: Emil Velikov Reviewed-by: Eric Anholt --- configure.ac | 3 +++ 1 file changed, 3 insertions(+) diff --git a/configure.ac b/configure.ac index 8e62bd893ee..56095baa7e8 100644 --- a/configure.ac +++ b/configure.ac @@ -1543,6 +1543,9 @@ if test "x$enable_egl" = xyes; then if test "$enable_static" != yes; then if test "x$enable_dri" = xyes; then HAVE_EGL_DRIVER_DRI2=1 + if test "x$enable_shared_glapi" = xno; then + AC_MSG_ERROR([egl_dri2 requires --enable-shared-glapi]) + fi else # Avoid building an "empty" libEGL. Drop/update this # when other backends (haiku?) come along. From 6ed52f78a05a2b56eb521c50767b7a991df51564 Mon Sep 17 00:00:00 2001 From: Emil Velikov Date: Fri, 19 Jun 2015 17:48:30 +0100 Subject: [PATCH 804/834] configure: drop unused variable GBM_BACKEND_DIRS Signed-off-by: Emil Velikov Reviewed-by: Eric Anholt --- configure.ac | 1 - 1 file changed, 1 deletion(-) diff --git a/configure.ac b/configure.ac index 56095baa7e8..ddc757e1629 100644 --- a/configure.ac +++ b/configure.ac @@ -1507,7 +1507,6 @@ if test "x$enable_gbm" = xyes; then fi if test "x$enable_dri" = xyes; then - GBM_BACKEND_DIRS="$GBM_BACKEND_DIRS dri" if test "x$enable_shared_glapi" = xno; then AC_MSG_ERROR([gbm_dri requires --enable-shared-glapi]) fi From 828f13330c9384f2b55c8b0f962d93a74ecd0601 Mon Sep 17 00:00:00 2001 From: Frank Henigman Date: Thu, 6 Nov 2014 16:29:26 -0500 Subject: [PATCH 805/834] gbm: dlopen libglapi so gbm_create_device works Dri driver libs are not linked to pull in libglapi so gbm_create_device() fails when it tries to dlopen them (unless the application is linked with something that does pull in libglapi, like libGL). Until dri drivers can be fixed properly, dlopen libglapi before trying to dlopen them. Cc: "10.5 10.6" Signed-off-by: Frank Henigman [Emil Velikov: Drop misleading bugzilla link, mention that libname differs] Reviewed-by: Emil Velikov --- src/gbm/backends/dri/gbm_dri.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/gbm/backends/dri/gbm_dri.c b/src/gbm/backends/dri/gbm_dri.c index 62bdf891d57..ccc3cc6930f 100644 --- a/src/gbm/backends/dri/gbm_dri.c +++ b/src/gbm/backends/dri/gbm_dri.c @@ -311,6 +311,14 @@ dri_open_driver(struct gbm_dri_device *dri) if (search_paths == NULL) search_paths = DEFAULT_DRIVER_DIR; + /* Temporarily work around dri driver libs that need symbols in libglapi + * but don't automatically link it in. + */ + /* XXX: Library name differs on per platforms basis. Update this as + * osx/cygwin/windows/bsd gets support for GBM.. + */ + dlopen("libglapi.so.0", RTLD_LAZY | RTLD_GLOBAL); + dri->driver = NULL; end = search_paths + strlen(search_paths); for (p = search_paths; p < end && dri->driver == NULL; p = next + 1) { From a0dc6b7824d3b9095919e29393a379ea7f9c1318 Mon Sep 17 00:00:00 2001 From: Emil Velikov Date: Fri, 19 Jun 2015 19:22:38 +0100 Subject: [PATCH 806/834] gbm: do not (over)link against libglapi.so The whole of GBM does not rely on even a single symbol from the GL dispatch library, unsuprisingly. The only need for it comes from the unresolved symbols in the DRI modules, which are now correctly handled with Frank's commit. Cc: "10.5 10.6" Signed-off-by: Emil Velikov --- src/gbm/Makefile.am | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/gbm/Makefile.am b/src/gbm/Makefile.am index dbd4f833867..918fdf7d6ad 100644 --- a/src/gbm/Makefile.am +++ b/src/gbm/Makefile.am @@ -52,7 +52,8 @@ libgbm_dri_la_CFLAGS = \ $(LIBDRM_CFLAGS) libgbm_la_LIBADD += \ - libgbm_dri.la $(top_builddir)/src/mapi/shared-glapi/libglapi.la $(LIBDRM_LIBS) + libgbm_dri.la \ + $(LIBDRM_LIBS) endif TESTS = gbm-symbols-check From 59f8d4ee793a1b620fb385f53b4dfe10e4b70f19 Mon Sep 17 00:00:00 2001 From: Emil Velikov Date: Fri, 19 Jun 2015 19:35:19 +0100 Subject: [PATCH 807/834] android: egl: do not link against libglapi The only reason we touch glapi is to dlopen it in order to: - make sure that the unresolved _glapi* symbols in the dri modules are provided. - fetch glFlush() and use it at various stages in the dri2 driver. Cc: Chih-Wei Huang Cc: Eric Anholt Signed-off-by: Emil Velikov --- src/egl/main/Android.mk | 1 - 1 file changed, 1 deletion(-) diff --git a/src/egl/main/Android.mk b/src/egl/main/Android.mk index 8f687e9f255..0ba72953960 100644 --- a/src/egl/main/Android.mk +++ b/src/egl/main/Android.mk @@ -44,7 +44,6 @@ LOCAL_CFLAGS := \ -D_EGL_OS_UNIX=1 LOCAL_SHARED_LIBRARIES := \ - libglapi \ libdl \ libhardware \ liblog \ From fccf012adc0d3aad877de095244324aa1d2d046a Mon Sep 17 00:00:00 2001 From: Ilia Mirkin Date: Tue, 23 Jun 2015 00:16:59 -0400 Subject: [PATCH 808/834] glsl: binding point is a texture unit, which is a combined space This fixes compilation failures in Dota 2 Reborn where a texture unit binding point was used that was numerically higher than the max per stage. Signed-off-by: Ilia Mirkin Reviewed-by: Chris Forbes Reviewed-by: Timothy Arceri Tested-by: Nick Sarnie Cc: "10.5 10.6" --- src/glsl/ast_to_hir.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/glsl/ast_to_hir.cpp b/src/glsl/ast_to_hir.cpp index 259e01e0ca4..8cb46beab1e 100644 --- a/src/glsl/ast_to_hir.cpp +++ b/src/glsl/ast_to_hir.cpp @@ -2086,7 +2086,7 @@ validate_binding_qualifier(struct _mesa_glsl_parse_state *state, * with an array of size N, all elements of the array from binding * through binding + N - 1 must be within this range." */ - unsigned limit = ctx->Const.Program[state->stage].MaxTextureImageUnits; + unsigned limit = ctx->Const.MaxCombinedTextureImageUnits; if (max_index >= limit) { _mesa_glsl_error(loc, state, "layout(binding = %d) for %d samplers " From 9fcbf515b431a92e0289f234ab77a796cf2a5612 Mon Sep 17 00:00:00 2001 From: Ilia Mirkin Date: Sun, 21 Jun 2015 15:00:16 -0400 Subject: [PATCH 809/834] nvc0: always put all tfb bufs into bufctx Since we clear the TFB bufctx binding point above, we need to put all of the active tfb's back in, even if they haven't changed since last time. Otherwise the tfb may get moved into sysmem and the underlying mapping will generate write errors. Signed-off-by: Ilia Mirkin Cc: "10.5 10.6" --- src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c b/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c index c9b5a5cbfc1..e0842784a88 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c @@ -262,11 +262,13 @@ nvc0_tfb_validate(struct nvc0_context *nvc0) if (tfb) targ->stride = tfb->stride[b]; + buf = nv04_resource(targ->pipe.buffer); + + BCTX_REFN(nvc0->bufctx_3d, TFB, buf, WR); + if (!(nvc0->tfbbuf_dirty & (1 << b))) continue; - buf = nv04_resource(targ->pipe.buffer); - if (!targ->clean) nvc0_query_fifo_wait(push, targ->pq); BEGIN_NVC0(push, NVC0_3D(TFB_BUFFER_ENABLE(b)), 5); @@ -280,7 +282,6 @@ nvc0_tfb_validate(struct nvc0_context *nvc0) PUSH_DATA(push, 0); /* TFB_BUFFER_OFFSET */ targ->clean = FALSE; } - BCTX_REFN(nvc0->bufctx_3d, TFB, buf, WR); } for (; b < 4; ++b) IMMED_NVC0(push, NVC0_3D(TFB_BUFFER_ENABLE(b)), 0); From 78d58e642549fbf340fdb4fca06720d2891216a8 Mon Sep 17 00:00:00 2001 From: Ilia Mirkin Date: Sun, 21 Jun 2015 19:03:35 -0400 Subject: [PATCH 810/834] nv50,nvc0: make sure to pushbuf_refn before putting bo into pushbuf_data Without first running the bo through pushbuf_refn, the nouveau drm library will have uninitialized structures regarding this bo, and will insert incorrect data. This fixes supertuxkart 0.9 crash on start (where it ends up doing a lot of indirect draws). Signed-off-by: Ilia Mirkin Cc: "10.5 10.6" --- src/gallium/drivers/nouveau/nv50/nv50_query.c | 1 + src/gallium/drivers/nouveau/nv50/nv50_vbo.c | 1 + src/gallium/drivers/nouveau/nvc0/nvc0_query.c | 1 + src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c | 1 + 4 files changed, 4 insertions(+) diff --git a/src/gallium/drivers/nouveau/nv50/nv50_query.c b/src/gallium/drivers/nouveau/nv50/nv50_query.c index 55fcac86bd4..81f7474e36b 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_query.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_query.c @@ -452,6 +452,7 @@ nv50_query_pushbuf_submit(struct nouveau_pushbuf *push, /* XXX: does this exist ? */ #define NV50_IB_ENTRY_1_NO_PREFETCH (0 << (31 - 8)) + PUSH_REFN(push, q->bo, NOUVEAU_BO_RD | NOUVEAU_BO_GART); nouveau_pushbuf_space(push, 0, 0, 1); nouveau_pushbuf_data(push, q->bo, q->offset + result_offset, 4 | NV50_IB_ENTRY_1_NO_PREFETCH); diff --git a/src/gallium/drivers/nouveau/nv50/nv50_vbo.c b/src/gallium/drivers/nouveau/nv50/nv50_vbo.c index c1590eefe9f..1fd33b8aa59 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_vbo.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_vbo.c @@ -628,6 +628,7 @@ nv50_draw_elements(struct nv50_context *nv50, boolean shorten, BEGIN_NV04(push, NV50_3D(VERTEX_BEGIN_GL), 1); PUSH_DATA (push, prim); + PUSH_REFN(push, buf->bo, NOUVEAU_BO_RD | buf->domain); nouveau_pushbuf_space(push, 8, 0, 1); switch (index_size) { diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_query.c b/src/gallium/drivers/nouveau/nvc0/nvc0_query.c index 74f210cbf47..aea6cbda02d 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_query.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query.c @@ -617,6 +617,7 @@ nvc0_query_pushbuf_submit(struct nouveau_pushbuf *push, #define NVC0_IB_ENTRY_1_NO_PREFETCH (1 << (31 - 8)) + PUSH_REFN(push, q->bo, NOUVEAU_BO_RD | NOUVEAU_BO_GART); nouveau_pushbuf_space(push, 0, 0, 1); nouveau_pushbuf_data(push, q->bo, q->offset + result_offset, 4 | NVC0_IB_ENTRY_1_NO_PREFETCH); diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c b/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c index 657b8c0fe82..8cf2584b0ce 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c @@ -829,6 +829,7 @@ nvc0_draw_indirect(struct nvc0_context *nvc0, const struct pipe_draw_info *info) } PUSH_DATA(push, nvc0_prim_gl(info->mode)); #define NVC0_IB_ENTRY_1_NO_PREFETCH (1 << (31 - 8)) + PUSH_REFN(push, buf->bo, NOUVEAU_BO_RD | buf->domain); nouveau_pushbuf_space(push, 0, 0, 1); nouveau_pushbuf_data(push, buf->bo, offset, NVC0_IB_ENTRY_1_NO_PREFETCH | size); From 20dca37a20f90762df13efd0e0ec97002b6a89f2 Mon Sep 17 00:00:00 2001 From: Ben Widawsky Date: Wed, 17 Jun 2015 15:50:13 -0700 Subject: [PATCH 811/834] i965/gen9: Don't use encrypted MOCS On gen9+ MOCS is an index into a table. It is 7 bits, and AFAICT, bit 0 is for doing encrypted reads. I don't recall how I decided to do this for BXT. I don't know this patch was ever needed, since it seems nothing is broken today on SKL. Furthermore, this patch may no longer be needed because of the ongoing changes with MOCS setup. It is what is being used/tested, so it's included in the series. The chosen values are the old values left shifted. That was also an arbitrary choice. v2: Use shift in MOCS to make it clear what we're doing. (Ken) Signed-off-by: Ben Widawsky Reviewed-by: Kenneth Graunke --- src/mesa/drivers/dri/i965/brw_defines.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h index bfcc4424c3b..66b9abc9991 100644 --- a/src/mesa/drivers/dri/i965/brw_defines.h +++ b/src/mesa/drivers/dri/i965/brw_defines.h @@ -2495,8 +2495,8 @@ enum brw_wm_barycentric_interp_mode { * cache settings. We still use only either write-back or write-through; and * rely on the documented default values. */ -#define SKL_MOCS_WB 9 -#define SKL_MOCS_WT 5 +#define SKL_MOCS_WB (0b001001 << 1) +#define SKL_MOCS_WT (0b000101 << 1) #define MEDIA_VFE_STATE 0x7000 /* GEN7 DW2, GEN8+ DW3 */ From 3fa9bb81ec8b21f472de32e08d0caf917239da08 Mon Sep 17 00:00:00 2001 From: Boyan Ding Date: Sat, 13 Jun 2015 15:33:20 +0800 Subject: [PATCH 812/834] egl/x11: Remove duplicate call to dri2_x11_add_configs_for_visuals MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The call to dri2_x11_add_configs_for_visuals (previously dri2_add_configs_for_visuals) was moved downwards in commit f8c5b8a1, but appeared again in its original position after its rename in d019cd81. Remove it. Cc: "10.5 10.6" Signed-off-by: Boyan Ding Reviewed-by: Samuel Iglesias Gonsálvez Reviewed-by: Emil Velikov Reviewed-by: Chad Versace --- src/egl/drivers/dri2/platform_x11.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/src/egl/drivers/dri2/platform_x11.c b/src/egl/drivers/dri2/platform_x11.c index 8a34bb19bd5..56c14288204 100644 --- a/src/egl/drivers/dri2/platform_x11.c +++ b/src/egl/drivers/dri2/platform_x11.c @@ -1271,11 +1271,6 @@ dri2_initialize_x11_dri2(_EGLDriver *drv, _EGLDisplay *disp) dri2_x11_setup_swap_interval(dri2_dpy); - if (dri2_dpy->conn) { - if (!dri2_x11_add_configs_for_visuals(dri2_dpy, disp)) - goto cleanup_configs; - } - disp->Extensions.KHR_image_pixmap = EGL_TRUE; disp->Extensions.NOK_swap_region = EGL_TRUE; disp->Extensions.NOK_texture_from_pixmap = EGL_TRUE; From a49328d58d1e3e143f9434976d9f3574acefc4ea Mon Sep 17 00:00:00 2001 From: Matt Turner Date: Mon, 22 Jun 2015 10:59:33 -0700 Subject: [PATCH 813/834] i965/fs: Don't mess up stride for uniform integer multiplication. If the stride is 0, the source is a uniform and we should not modify the stride. Cc: "10.6" Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=91047 Reviewed-by: Kenneth Graunke --- src/mesa/drivers/dri/i965/brw_fs.cpp | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 5563c5aa76c..903624c97cf 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -3196,10 +3196,16 @@ fs_visitor::lower_integer_multiplication() src1_1_w.fixed_hw_reg.dw1.ud >>= 16; } else { src1_0_w.type = BRW_REGISTER_TYPE_UW; - src1_0_w.stride = 2; + if (src1_0_w.stride != 0) { + assert(src1_0_w.stride == 1); + src1_0_w.stride = 2; + } src1_1_w.type = BRW_REGISTER_TYPE_UW; - src1_1_w.stride = 2; + if (src1_1_w.stride != 0) { + assert(src1_1_w.stride == 1); + src1_1_w.stride = 2; + } src1_1_w.subreg_offset += type_sz(BRW_REGISTER_TYPE_UW); } ibld.MUL(low, inst->src[0], src1_0_w); @@ -3209,10 +3215,16 @@ fs_visitor::lower_integer_multiplication() fs_reg src0_1_w = inst->src[0]; src0_0_w.type = BRW_REGISTER_TYPE_UW; - src0_0_w.stride = 2; + if (src0_0_w.stride != 0) { + assert(src0_0_w.stride == 1); + src0_0_w.stride = 2; + } src0_1_w.type = BRW_REGISTER_TYPE_UW; - src0_1_w.stride = 2; + if (src0_1_w.stride != 0) { + assert(src0_1_w.stride == 1); + src0_1_w.stride = 2; + } src0_1_w.subreg_offset += type_sz(BRW_REGISTER_TYPE_UW); ibld.MUL(low, src0_0_w, inst->src[1]); From 04758d25b4240129d4fa8784608a54c40bff3568 Mon Sep 17 00:00:00 2001 From: Matt Turner Date: Fri, 19 Jun 2015 20:40:15 -0700 Subject: [PATCH 814/834] mesa: Delete unused ICEIL(). Can't find any uses of it in git history. Reviewed-by: Jordan Justen --- src/mesa/main/imports.h | 32 -------------------------------- 1 file changed, 32 deletions(-) diff --git a/src/mesa/main/imports.h b/src/mesa/main/imports.h index c4d917ebba4..9ffe3decd0f 100644 --- a/src/mesa/main/imports.h +++ b/src/mesa/main/imports.h @@ -230,38 +230,6 @@ static inline int IFLOOR(float f) } -/** Return (as an integer) ceiling of float */ -static inline int ICEIL(float f) -{ -#if defined(USE_X86_ASM) && defined(__GNUC__) && defined(__i386__) - /* - * IEEE ceil for computers that round to nearest or even. - * 'f' must be between -4194304 and 4194303. - * This ceil operation is done by "(iround(f + .5) + iround(f - .5) + 1) >> 1", - * but uses some IEEE specific tricks for better speed. - * Contributed by Josh Vanderhoof - */ - int ai, bi; - double af, bf; - af = (3 << 22) + 0.5 + (double)f; - bf = (3 << 22) + 0.5 - (double)f; - /* GCC generates an extra fstp/fld without this. */ - __asm__ ("fstps %0" : "=m" (ai) : "t" (af) : "st"); - __asm__ ("fstps %0" : "=m" (bi) : "t" (bf) : "st"); - return (ai - bi + 1) >> 1; -#else - int ai, bi; - double af, bf; - fi_type u; - af = (3 << 22) + 0.5 + (double)f; - bf = (3 << 22) + 0.5 - (double)f; - u.f = (float) af; ai = u.i; - u.f = (float) bf; bi = u.i; - return (ai - bi + 1) >> 1; -#endif -} - - /** * Is x a power of two? */ From 4d93a07c45c8aa4cb3adbfcb9d61dcb54d8c404f Mon Sep 17 00:00:00 2001 From: Matt Turner Date: Mon, 22 Jun 2015 11:09:49 -0700 Subject: [PATCH 815/834] i965/cfg: Assert that cur_do/while/if pointers are non-NULL. Coverity sees that the functions immediately below the new assertions dereference these pointers, but is unaware that an ENDIF always follows an IF, etc. Reviewed-by: Jordan Justen --- src/mesa/drivers/dri/i965/brw_cfg.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/mesa/drivers/dri/i965/brw_cfg.cpp b/src/mesa/drivers/dri/i965/brw_cfg.cpp index 39c419b9b96..f1f230e3751 100644 --- a/src/mesa/drivers/dri/i965/brw_cfg.cpp +++ b/src/mesa/drivers/dri/i965/brw_cfg.cpp @@ -231,6 +231,7 @@ cfg_t::cfg_t(exec_list *instructions) if (cur_else) { cur_else->add_successor(mem_ctx, cur_endif); } else { + assert(cur_if != NULL); cur_if->add_successor(mem_ctx, cur_endif); } @@ -299,6 +300,7 @@ cfg_t::cfg_t(exec_list *instructions) inst->exec_node::remove(); cur->instructions.push_tail(inst); + assert(cur_do != NULL && cur_while != NULL); cur->add_successor(mem_ctx, cur_do); set_next_block(&cur, cur_while, ip); From d8eeb4917ca39a0698731f64933c85a7c44e9247 Mon Sep 17 00:00:00 2001 From: Matt Turner Date: Mon, 22 Jun 2015 11:20:32 -0700 Subject: [PATCH 816/834] i965: Assert that the GL primitive isn't out of range. Coverity sees the if (mode >= BRW_PRIM_OFFSET (128)) test and assumes that the else-branch might execute for mode to up 127, which out be out of bounds. Reviewed-by: Jordan Justen --- src/mesa/drivers/dri/i965/brw_draw.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/mesa/drivers/dri/i965/brw_draw.c b/src/mesa/drivers/dri/i965/brw_draw.c index a7164dbf7d8..b91597a9f5d 100644 --- a/src/mesa/drivers/dri/i965/brw_draw.c +++ b/src/mesa/drivers/dri/i965/brw_draw.c @@ -92,8 +92,10 @@ get_hw_prim_for_gl_prim(int mode) { if (mode >= BRW_PRIM_OFFSET) return mode - BRW_PRIM_OFFSET; - else + else { + assert(mode < ARRAY_SIZE(prim_to_hw_prim)); return prim_to_hw_prim[mode]; + } } From ae097580ac49fbfaf184c89c68cb42b755f62939 Mon Sep 17 00:00:00 2001 From: Matt Turner Date: Mon, 22 Jun 2015 11:42:15 -0700 Subject: [PATCH 817/834] i965: Initialize backend_shader::mem_ctx in its constructor. We were initializing it in each subclasses' constructors for some reason. Reviewed-by: Jordan Justen --- src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 4 +--- src/mesa/drivers/dri/i965/brw_shader.cpp | 2 ++ src/mesa/drivers/dri/i965/brw_shader.h | 1 + src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 3 +-- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp index 4770838b26f..dc992dd332e 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp @@ -1984,13 +1984,11 @@ fs_visitor::fs_visitor(struct brw_context *brw, struct gl_shader_program *shader_prog, struct gl_program *prog, unsigned dispatch_width) - : backend_shader(brw, shader_prog, prog, prog_data, stage), + : backend_shader(brw, mem_ctx, shader_prog, prog, prog_data, stage), key(key), prog_data(prog_data), dispatch_width(dispatch_width), promoted_constants(0), bld(fs_builder(this, dispatch_width).at_end()) { - this->mem_ctx = mem_ctx; - switch (stage) { case MESA_SHADER_FRAGMENT: key_tex = &((const brw_wm_prog_key *) key)->tex; diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp index 545ec2679ae..7a26939cbe9 100644 --- a/src/mesa/drivers/dri/i965/brw_shader.cpp +++ b/src/mesa/drivers/dri/i965/brw_shader.cpp @@ -757,6 +757,7 @@ brw_abs_immediate(enum brw_reg_type type, struct brw_reg *reg) } backend_shader::backend_shader(struct brw_context *brw, + void *mem_ctx, struct gl_shader_program *shader_prog, struct gl_program *prog, struct brw_stage_prog_data *stage_prog_data, @@ -769,6 +770,7 @@ backend_shader::backend_shader(struct brw_context *brw, shader_prog(shader_prog), prog(prog), stage_prog_data(stage_prog_data), + mem_ctx(mem_ctx), cfg(NULL), stage(stage) { diff --git a/src/mesa/drivers/dri/i965/brw_shader.h b/src/mesa/drivers/dri/i965/brw_shader.h index da01d2f7185..e64774901bc 100644 --- a/src/mesa/drivers/dri/i965/brw_shader.h +++ b/src/mesa/drivers/dri/i965/brw_shader.h @@ -215,6 +215,7 @@ class backend_shader { protected: backend_shader(struct brw_context *brw, + void *mem_ctx, struct gl_shader_program *shader_prog, struct gl_program *prog, struct brw_stage_prog_data *stage_prog_data, diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp index 0a76bde7726..669f76973ea 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp @@ -3691,7 +3691,7 @@ vec4_visitor::vec4_visitor(struct brw_context *brw, shader_time_shader_type st_base, shader_time_shader_type st_written, shader_time_shader_type st_reset) - : backend_shader(brw, shader_prog, prog, &prog_data->base, stage), + : backend_shader(brw, mem_ctx, shader_prog, prog, &prog_data->base, stage), c(c), key(key), prog_data(prog_data), @@ -3704,7 +3704,6 @@ vec4_visitor::vec4_visitor(struct brw_context *brw, st_written(st_written), st_reset(st_reset) { - this->mem_ctx = mem_ctx; this->failed = false; this->base_ir = NULL; From 630764407aeba4acf9364739bafb0e3516f72e31 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 19 Jun 2015 17:19:38 -0700 Subject: [PATCH 818/834] i965: Replace some instances of brw->gen with devinfo->gen --- src/mesa/drivers/dri/i965/brw_fs.cpp | 4 ++-- src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 903624c97cf..54dfe34c686 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -3187,7 +3187,7 @@ fs_visitor::lower_integer_multiplication() fs_reg high(GRF, alloc.allocate(dispatch_width / 8), inst->dst.type, dispatch_width); - if (brw->gen >= 7) { + if (devinfo->gen >= 7) { fs_reg src1_0_w = inst->src[1]; fs_reg src1_1_w = inst->src[1]; @@ -3628,7 +3628,7 @@ fs_visitor::setup_vs_payload() void fs_visitor::setup_cs_payload() { - assert(brw->gen >= 7); + assert(devinfo->gen >= 7); payload.num_regs = 1; } diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp index dc992dd332e..3af9d78598c 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp @@ -1344,7 +1344,7 @@ fs_visitor::emit_interpolation_setup_gen6() struct brw_reg g1_uw = retype(brw_vec1_grf(1, 0), BRW_REGISTER_TYPE_UW); fs_builder abld = bld.annotate("compute pixel centers"); - if (brw->gen >= 8 || dispatch_width == 8) { + if (devinfo->gen >= 8 || dispatch_width == 8) { /* The "Register Region Restrictions" page says for BDW (and newer, * presumably): * @@ -1623,7 +1623,7 @@ fs_visitor::emit_single_fb_write(const fs_builder &bld, /* On pre-SNB, we have to interlace the color values. LOAD_PAYLOAD * will do this for us if we just give it a COMPR4 destination. */ - if (brw->gen < 6 && exec_size == 16) + if (devinfo->gen < 6 && exec_size == 16) load->dst.reg |= BRW_MRF_COMPR4; write = ubld.emit(FS_OPCODE_FB_WRITE); @@ -1934,7 +1934,7 @@ fs_visitor::emit_urb_writes() void fs_visitor::emit_cs_terminate() { - assert(brw->gen >= 7); + assert(devinfo->gen >= 7); /* We are getting the thread ID from the compute shader header */ assert(stage == MESA_SHADER_COMPUTE); @@ -1956,7 +1956,7 @@ fs_visitor::emit_cs_terminate() void fs_visitor::emit_barrier() { - assert(brw->gen >= 7); + assert(devinfo->gen >= 7); /* We are getting the barrier ID from the compute shader header */ assert(stage == MESA_SHADER_COMPUTE); From b0ad3ce4e7b9a23ab8fad4823e3c1094d6cf42a6 Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Sat, 18 Apr 2015 12:23:33 -0700 Subject: [PATCH 819/834] mesa: Add a va_args variant of _mesa_gl_debug(). This will be useful for wrapper functions. Signed-off-by: Kenneth Graunke Reviewed-by: Jason Ekstrand Reviewed-by: Chris Forbes --- src/mesa/main/errors.c | 29 +++++++++++++++++++++-------- src/mesa/main/errors.h | 9 +++++++++ 2 files changed, 30 insertions(+), 8 deletions(-) diff --git a/src/mesa/main/errors.c b/src/mesa/main/errors.c index 16f10ddb694..b3406665d94 100644 --- a/src/mesa/main/errors.c +++ b/src/mesa/main/errors.c @@ -1412,6 +1412,26 @@ should_output(struct gl_context *ctx, GLenum error, const char *fmtString) } +void +_mesa_gl_vdebug(struct gl_context *ctx, + GLuint *id, + enum mesa_debug_source source, + enum mesa_debug_type type, + enum mesa_debug_severity severity, + const char *fmtString, + va_list args) +{ + char s[MAX_DEBUG_MESSAGE_LENGTH]; + int len; + + debug_get_id(id); + + len = _mesa_vsnprintf(s, MAX_DEBUG_MESSAGE_LENGTH, fmtString, args); + + log_msg(ctx, source, type, *id, severity, len, s); +} + + void _mesa_gl_debug(struct gl_context *ctx, GLuint *id, @@ -1420,17 +1440,10 @@ _mesa_gl_debug(struct gl_context *ctx, enum mesa_debug_severity severity, const char *fmtString, ...) { - char s[MAX_DEBUG_MESSAGE_LENGTH]; - int len; va_list args; - - debug_get_id(id); - va_start(args, fmtString); - len = _mesa_vsnprintf(s, MAX_DEBUG_MESSAGE_LENGTH, fmtString, args); + _mesa_gl_vdebug(ctx, id, source, type, severity, fmtString, args); va_end(args); - - log_msg(ctx, source, type, *id, severity, len, s); } diff --git a/src/mesa/main/errors.h b/src/mesa/main/errors.h index e6dc9b5f1b9..24f234f7f10 100644 --- a/src/mesa/main/errors.h +++ b/src/mesa/main/errors.h @@ -75,6 +75,15 @@ _mesa_log(const char *fmtString, ...) PRINTFLIKE(1, 2); extern FILE * _mesa_get_log_file(void); +extern void +_mesa_gl_vdebug(struct gl_context *ctx, + GLuint *id, + enum mesa_debug_source source, + enum mesa_debug_type type, + enum mesa_debug_severity severity, + const char *fmtString, + va_list args); + extern void _mesa_gl_debug(struct gl_context *ctx, GLuint *id, From e639a6f68e701f23b977a49c45d646c164991d36 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 16 Apr 2015 14:13:52 -0700 Subject: [PATCH 820/834] i965: Plumb compiler debug logging through a function pointer in brw_compiler v2 (Ken): Make shader_debug_log a printf-like function. v3 (Jason): Add a void * to pass the brw_context through Reviewed-by: Chris Forbes Reviewed-by: Kenneth Graunke --- .../drivers/dri/i965/brw_blorp_blit_eu.cpp | 3 ++- src/mesa/drivers/dri/i965/brw_cs.cpp | 3 ++- src/mesa/drivers/dri/i965/brw_fs.cpp | 3 ++- src/mesa/drivers/dri/i965/brw_fs.h | 4 ++-- .../drivers/dri/i965/brw_fs_generator.cpp | 20 ++++++++---------- src/mesa/drivers/dri/i965/brw_shader.cpp | 16 ++++++++++++++ src/mesa/drivers/dri/i965/brw_shader.h | 2 ++ src/mesa/drivers/dri/i965/brw_vec4.cpp | 6 ++++-- src/mesa/drivers/dri/i965/brw_vec4.h | 4 ++-- .../drivers/dri/i965/brw_vec4_generator.cpp | 21 +++++++------------ .../drivers/dri/i965/brw_vec4_gs_visitor.cpp | 3 ++- 11 files changed, 51 insertions(+), 34 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.cpp b/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.cpp index c1b760920d9..9c041379616 100644 --- a/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.cpp +++ b/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.cpp @@ -29,7 +29,8 @@ brw_blorp_eu_emitter::brw_blorp_eu_emitter(struct brw_context *brw, bool debug_flag) : mem_ctx(ralloc_context(NULL)), - generator(brw, mem_ctx, (void *) rzalloc(mem_ctx, struct brw_wm_prog_key), + generator(brw->intelScreen->compiler, + mem_ctx, (void *) rzalloc(mem_ctx, struct brw_wm_prog_key), (struct brw_stage_prog_data *) rzalloc(mem_ctx, struct brw_wm_prog_data), NULL, 0, false, "BLORP") { diff --git a/src/mesa/drivers/dri/i965/brw_cs.cpp b/src/mesa/drivers/dri/i965/brw_cs.cpp index 1f2a9d2ea67..f93ca2f7e74 100644 --- a/src/mesa/drivers/dri/i965/brw_cs.cpp +++ b/src/mesa/drivers/dri/i965/brw_cs.cpp @@ -128,7 +128,8 @@ brw_cs_emit(struct brw_context *brw, return NULL; } - fs_generator g(brw, mem_ctx, (void*) key, &prog_data->base, &cp->Base, + fs_generator g(brw->intelScreen->compiler, + mem_ctx, (void*) key, &prog_data->base, &cp->Base, v8.promoted_constants, v8.runtime_check_aads_emit, "CS"); if (INTEL_DEBUG & DEBUG_CS) { char *name = ralloc_asprintf(mem_ctx, "%s compute shader %d", diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 54dfe34c686..d66378abf2b 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -4081,7 +4081,8 @@ brw_wm_fs_emit(struct brw_context *brw, prog_data->no_8 = false; } - fs_generator g(brw, mem_ctx, (void *) key, &prog_data->base, + fs_generator g(brw->intelScreen->compiler, + mem_ctx, (void *) key, &prog_data->base, &fp->Base, v.promoted_constants, v.runtime_check_aads_emit, "FS"); if (unlikely(INTEL_DEBUG & DEBUG_WM)) { diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h index cdeea6d9988..7414b65222d 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.h +++ b/src/mesa/drivers/dri/i965/brw_fs.h @@ -398,7 +398,7 @@ public: class fs_generator { public: - fs_generator(struct brw_context *brw, + fs_generator(const struct brw_compiler *compiler, void *mem_ctx, const void *key, struct brw_stage_prog_data *prog_data, @@ -493,7 +493,7 @@ private: bool patch_discard_jumps_to_fb_writes(); - struct brw_context *brw; + const struct brw_compiler *compiler; const struct brw_device_info *devinfo; struct brw_codegen *p; diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp index 8eb3ace5c0a..d98a40d2fce 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp @@ -121,7 +121,7 @@ brw_reg_from_fs_reg(fs_reg *reg) return brw_reg; } -fs_generator::fs_generator(struct brw_context *brw, +fs_generator::fs_generator(const struct brw_compiler *compiler, void *mem_ctx, const void *key, struct brw_stage_prog_data *prog_data, @@ -130,7 +130,7 @@ fs_generator::fs_generator(struct brw_context *brw, bool runtime_check_aads_emit, const char *stage_abbrev) - : brw(brw), devinfo(brw->intelScreen->devinfo), key(key), + : compiler(compiler), devinfo(compiler->devinfo), key(key), prog_data(prog_data), prog(prog), promoted_constants(promoted_constants), runtime_check_aads_emit(runtime_check_aads_emit), debug_flag(false), @@ -2173,15 +2173,13 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width) ralloc_free(annotation.ann); } - static GLuint msg_id = 0; - _mesa_gl_debug(&brw->ctx, &msg_id, - MESA_DEBUG_SOURCE_SHADER_COMPILER, - MESA_DEBUG_TYPE_OTHER, - MESA_DEBUG_SEVERITY_NOTIFICATION, - "%s SIMD%d shader: %d inst, %d loops, %d:%d spills:fills, " - "Promoted %u constants, compacted %d to %d bytes.\n", - stage_abbrev, dispatch_width, before_size / 16, loop_count, - spill_count, fill_count, promoted_constants, before_size, after_size); + compiler->shader_debug_log(log_data, + "%s SIMD%d shader: %d inst, %d loops, " + "%d:%d spills:fills, Promoted %u constants, " + "compacted %d to %d bytes.\n", + stage_abbrev, dispatch_width, before_size / 16, + loop_count, spill_count, fill_count, + promoted_constants, before_size, after_size); return start_offset; } diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp index 7a26939cbe9..6bc9dd9e53c 100644 --- a/src/mesa/drivers/dri/i965/brw_shader.cpp +++ b/src/mesa/drivers/dri/i965/brw_shader.cpp @@ -32,12 +32,28 @@ #include "glsl/glsl_parser_extras.h" #include "main/shaderapi.h" +static void +shader_debug_log_mesa(void *data, const char *fmt, ...) +{ + struct brw_context *brw = (struct brw_context *)data; + va_list args; + + va_start(args, fmt); + GLuint msg_id = 0; + _mesa_gl_vdebug(&brw->ctx, &msg_id, + MESA_DEBUG_SOURCE_SHADER_COMPILER, + MESA_DEBUG_TYPE_OTHER, + MESA_DEBUG_SEVERITY_NOTIFICATION, fmt, args); + va_end(args); +} + struct brw_compiler * brw_compiler_create(void *mem_ctx, const struct brw_device_info *devinfo) { struct brw_compiler *compiler = rzalloc(mem_ctx, struct brw_compiler); compiler->devinfo = devinfo; + compiler->shader_debug_log = shader_debug_log_mesa; brw_fs_alloc_reg_sets(compiler); brw_vec4_alloc_reg_set(compiler); diff --git a/src/mesa/drivers/dri/i965/brw_shader.h b/src/mesa/drivers/dri/i965/brw_shader.h index e64774901bc..30cca5c7cca 100644 --- a/src/mesa/drivers/dri/i965/brw_shader.h +++ b/src/mesa/drivers/dri/i965/brw_shader.h @@ -86,6 +86,8 @@ struct brw_compiler { */ int aligned_pairs_class; } fs_reg_sets[2]; + + void (*shader_debug_log)(void *, const char *str, ...) PRINTFLIKE(2, 3); }; enum PACKED register_file { diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp index 5a9c3f53218..5e549c46d6b 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp @@ -1910,7 +1910,8 @@ brw_vs_emit(struct brw_context *brw, return NULL; } - fs_generator g(brw, mem_ctx, (void *) &c->key, &prog_data->base.base, + fs_generator g(brw->intelScreen->compiler, + mem_ctx, (void *) &c->key, &prog_data->base.base, &c->vp->program.Base, v.promoted_constants, v.runtime_check_aads_emit, "VS"); if (INTEL_DEBUG & DEBUG_VS) { @@ -1947,7 +1948,8 @@ brw_vs_emit(struct brw_context *brw, return NULL; } - vec4_generator g(brw, prog, &c->vp->program.Base, &prog_data->base, + vec4_generator g(brw->intelScreen->compiler, + prog, &c->vp->program.Base, &prog_data->base, mem_ctx, INTEL_DEBUG & DEBUG_VS, "vertex", "VS"); assembly = g.generate_assembly(v.cfg, final_assembly_size); } diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h index 4449e0a52ce..067de7537fc 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.h +++ b/src/mesa/drivers/dri/i965/brw_vec4.h @@ -427,7 +427,7 @@ private: class vec4_generator { public: - vec4_generator(struct brw_context *brw, + vec4_generator(const struct brw_compiler *compiler, struct gl_shader_program *shader_prog, struct gl_program *prog, struct brw_vue_prog_data *prog_data, @@ -509,7 +509,7 @@ private: struct brw_reg dst); void generate_unpack_flags(struct brw_reg dst); - struct brw_context *brw; + const struct brw_compiler *compiler; const struct brw_device_info *devinfo; struct brw_codegen *p; diff --git a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp index 032b5c28091..c633fe0e130 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp @@ -134,7 +134,7 @@ vec4_instruction::get_src(const struct brw_vue_prog_data *prog_data, int i) return brw_reg; } -vec4_generator::vec4_generator(struct brw_context *brw, +vec4_generator::vec4_generator(const struct brw_compiler *compiler, struct gl_shader_program *shader_prog, struct gl_program *prog, struct brw_vue_prog_data *prog_data, @@ -142,13 +142,13 @@ vec4_generator::vec4_generator(struct brw_context *brw, bool debug_flag, const char *stage_name, const char *stage_abbrev) - : brw(brw), devinfo(brw->intelScreen->devinfo), + : compiler(compiler), devinfo(compiler->devinfo), shader_prog(shader_prog), prog(prog), prog_data(prog_data), mem_ctx(mem_ctx), stage_name(stage_name), stage_abbrev(stage_abbrev), debug_flag(debug_flag) { p = rzalloc(mem_ctx, struct brw_codegen); - brw_init_codegen(brw->intelScreen->devinfo, p, mem_ctx); + brw_init_codegen(devinfo, p, mem_ctx); } vec4_generator::~vec4_generator() @@ -1626,16 +1626,11 @@ vec4_generator::generate_code(const cfg_t *cfg) ralloc_free(annotation.ann); } - static GLuint msg_id = 0; - _mesa_gl_debug(&brw->ctx, &msg_id, - MESA_DEBUG_SOURCE_SHADER_COMPILER, - MESA_DEBUG_TYPE_OTHER, - MESA_DEBUG_SEVERITY_NOTIFICATION, - "%s vec4 shader: %d inst, %d loops, " - "compacted %d to %d bytes.\n", - stage_abbrev, - before_size / 16, loop_count, - before_size, after_size); + compiler->shader_debug_log(log_data, + "%s vec4 shader: %d inst, %d loops, " + "compacted %d to %d bytes.\n", + stage_abbrev, before_size / 16, loop_count, + before_size, after_size); } const unsigned * diff --git a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp index eacb2f5be7b..82b8e08398b 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp @@ -629,7 +629,8 @@ generate_assembly(struct brw_context *brw, const cfg_t *cfg, unsigned *final_assembly_size) { - vec4_generator g(brw, shader_prog, prog, prog_data, mem_ctx, + vec4_generator g(brw->intelScreen->compiler, + shader_prog, prog, prog_data, mem_ctx, INTEL_DEBUG & DEBUG_GS, "geometry", "GS"); return g.generate_assembly(cfg, final_assembly_size); } From d7565b7d65f8203c20735a61b86e9158b8ec4447 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 16 Apr 2015 14:34:04 -0700 Subject: [PATCH 821/834] i965: Remove the dependance on brw_context from the generators Reviewed-by: Chris Forbes Reviewed-by: Kenneth Graunke --- src/mesa/drivers/dri/i965/brw_blorp_blit_eu.cpp | 2 +- src/mesa/drivers/dri/i965/brw_cs.cpp | 2 +- src/mesa/drivers/dri/i965/brw_fs.cpp | 2 +- src/mesa/drivers/dri/i965/brw_fs.h | 4 +++- src/mesa/drivers/dri/i965/brw_fs_generator.cpp | 5 +++-- src/mesa/drivers/dri/i965/brw_vec4.cpp | 4 ++-- src/mesa/drivers/dri/i965/brw_vec4.h | 4 +++- src/mesa/drivers/dri/i965/brw_vec4_generator.cpp | 3 ++- src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp | 2 +- 9 files changed, 17 insertions(+), 11 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.cpp b/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.cpp index 9c041379616..789520c7353 100644 --- a/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.cpp +++ b/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.cpp @@ -29,7 +29,7 @@ brw_blorp_eu_emitter::brw_blorp_eu_emitter(struct brw_context *brw, bool debug_flag) : mem_ctx(ralloc_context(NULL)), - generator(brw->intelScreen->compiler, + generator(brw->intelScreen->compiler, brw, mem_ctx, (void *) rzalloc(mem_ctx, struct brw_wm_prog_key), (struct brw_stage_prog_data *) rzalloc(mem_ctx, struct brw_wm_prog_data), NULL, 0, false, "BLORP") diff --git a/src/mesa/drivers/dri/i965/brw_cs.cpp b/src/mesa/drivers/dri/i965/brw_cs.cpp index f93ca2f7e74..0833404d5e5 100644 --- a/src/mesa/drivers/dri/i965/brw_cs.cpp +++ b/src/mesa/drivers/dri/i965/brw_cs.cpp @@ -128,7 +128,7 @@ brw_cs_emit(struct brw_context *brw, return NULL; } - fs_generator g(brw->intelScreen->compiler, + fs_generator g(brw->intelScreen->compiler, brw, mem_ctx, (void*) key, &prog_data->base, &cp->Base, v8.promoted_constants, v8.runtime_check_aads_emit, "CS"); if (INTEL_DEBUG & DEBUG_CS) { diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index d66378abf2b..6a02ab9e12b 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -4081,7 +4081,7 @@ brw_wm_fs_emit(struct brw_context *brw, prog_data->no_8 = false; } - fs_generator g(brw->intelScreen->compiler, + fs_generator g(brw->intelScreen->compiler, brw, mem_ctx, (void *) key, &prog_data->base, &fp->Base, v.promoted_constants, v.runtime_check_aads_emit, "FS"); diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h index 7414b65222d..1d52ff0d80d 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.h +++ b/src/mesa/drivers/dri/i965/brw_fs.h @@ -398,7 +398,7 @@ public: class fs_generator { public: - fs_generator(const struct brw_compiler *compiler, + fs_generator(const struct brw_compiler *compiler, void *log_data, void *mem_ctx, const void *key, struct brw_stage_prog_data *prog_data, @@ -494,6 +494,8 @@ private: bool patch_discard_jumps_to_fb_writes(); const struct brw_compiler *compiler; + void *log_data; /* Passed to compiler->*_log functions */ + const struct brw_device_info *devinfo; struct brw_codegen *p; diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp index d98a40d2fce..2ed0bac6fd9 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp @@ -121,7 +121,7 @@ brw_reg_from_fs_reg(fs_reg *reg) return brw_reg; } -fs_generator::fs_generator(const struct brw_compiler *compiler, +fs_generator::fs_generator(const struct brw_compiler *compiler, void *log_data, void *mem_ctx, const void *key, struct brw_stage_prog_data *prog_data, @@ -130,7 +130,8 @@ fs_generator::fs_generator(const struct brw_compiler *compiler, bool runtime_check_aads_emit, const char *stage_abbrev) - : compiler(compiler), devinfo(compiler->devinfo), key(key), + : compiler(compiler), log_data(log_data), + devinfo(compiler->devinfo), key(key), prog_data(prog_data), prog(prog), promoted_constants(promoted_constants), runtime_check_aads_emit(runtime_check_aads_emit), debug_flag(false), diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp index 5e549c46d6b..572bc174986 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp @@ -1910,7 +1910,7 @@ brw_vs_emit(struct brw_context *brw, return NULL; } - fs_generator g(brw->intelScreen->compiler, + fs_generator g(brw->intelScreen->compiler, brw, mem_ctx, (void *) &c->key, &prog_data->base.base, &c->vp->program.Base, v.promoted_constants, v.runtime_check_aads_emit, "VS"); @@ -1948,7 +1948,7 @@ brw_vs_emit(struct brw_context *brw, return NULL; } - vec4_generator g(brw->intelScreen->compiler, + vec4_generator g(brw->intelScreen->compiler, brw, prog, &c->vp->program.Base, &prog_data->base, mem_ctx, INTEL_DEBUG & DEBUG_VS, "vertex", "VS"); assembly = g.generate_assembly(v.cfg, final_assembly_size); diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h index 067de7537fc..2228d478d09 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.h +++ b/src/mesa/drivers/dri/i965/brw_vec4.h @@ -427,7 +427,7 @@ private: class vec4_generator { public: - vec4_generator(const struct brw_compiler *compiler, + vec4_generator(const struct brw_compiler *compiler, void *log_data, struct gl_shader_program *shader_prog, struct gl_program *prog, struct brw_vue_prog_data *prog_data, @@ -510,6 +510,8 @@ private: void generate_unpack_flags(struct brw_reg dst); const struct brw_compiler *compiler; + void *log_data; /* Passed to compiler->*_log functions */ + const struct brw_device_info *devinfo; struct brw_codegen *p; diff --git a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp index c633fe0e130..d2de2f0be25 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp @@ -135,6 +135,7 @@ vec4_instruction::get_src(const struct brw_vue_prog_data *prog_data, int i) } vec4_generator::vec4_generator(const struct brw_compiler *compiler, + void *log_data, struct gl_shader_program *shader_prog, struct gl_program *prog, struct brw_vue_prog_data *prog_data, @@ -142,7 +143,7 @@ vec4_generator::vec4_generator(const struct brw_compiler *compiler, bool debug_flag, const char *stage_name, const char *stage_abbrev) - : compiler(compiler), devinfo(compiler->devinfo), + : compiler(compiler), log_data(log_data), devinfo(compiler->devinfo), shader_prog(shader_prog), prog(prog), prog_data(prog_data), mem_ctx(mem_ctx), stage_name(stage_name), stage_abbrev(stage_abbrev), debug_flag(debug_flag) diff --git a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp index 82b8e08398b..b047aa18e0a 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp @@ -629,7 +629,7 @@ generate_assembly(struct brw_context *brw, const cfg_t *cfg, unsigned *final_assembly_size) { - vec4_generator g(brw->intelScreen->compiler, + vec4_generator g(brw->intelScreen->compiler, brw, shader_prog, prog, prog_data, mem_ctx, INTEL_DEBUG & DEBUG_GS, "geometry", "GS"); return g.generate_assembly(cfg, final_assembly_size); From 1bc3b62d4aad22b94b8031c29c654a8f90ccc24d Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 16 Apr 2015 17:39:13 -0700 Subject: [PATCH 822/834] i965: Move INTEL_DEBUG variable parsing to screen creation time v2: Do bufmgr set_debug and set_aub_dump at screen time as well. Reviewed-by: Kenneth Graunke --- src/mesa/drivers/dri/i965/brw_context.c | 4 +++- src/mesa/drivers/dri/i965/intel_debug.c | 11 ++++------- src/mesa/drivers/dri/i965/intel_debug.h | 4 ++-- src/mesa/drivers/dri/i965/intel_screen.c | 2 ++ 4 files changed, 11 insertions(+), 10 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c index c629f39bb2a..c0097df3e97 100644 --- a/src/mesa/drivers/dri/i965/brw_context.c +++ b/src/mesa/drivers/dri/i965/brw_context.c @@ -822,7 +822,9 @@ brwCreateContext(gl_api api, _mesa_meta_init(ctx); brw_process_driconf_options(brw); - brw_process_intel_debug_variable(brw); + + if (INTEL_DEBUG & DEBUG_PERF) + brw->perf_debug = true; if (brw->gen >= 8 && !(INTEL_DEBUG & DEBUG_VEC4VS)) brw->scalar_vs = true; diff --git a/src/mesa/drivers/dri/i965/intel_debug.c b/src/mesa/drivers/dri/i965/intel_debug.c index 53f575ab78f..b68c2127f8d 100644 --- a/src/mesa/drivers/dri/i965/intel_debug.c +++ b/src/mesa/drivers/dri/i965/intel_debug.c @@ -88,25 +88,22 @@ intel_debug_flag_for_shader_stage(gl_shader_stage stage) } void -brw_process_intel_debug_variable(struct brw_context *brw) +brw_process_intel_debug_variable(struct intel_screen *screen) { uint64_t intel_debug = driParseDebugString(getenv("INTEL_DEBUG"), debug_control); (void) p_atomic_cmpxchg(&INTEL_DEBUG, 0, intel_debug); if (INTEL_DEBUG & DEBUG_BUFMGR) - dri_bufmgr_set_debug(brw->bufmgr, true); + dri_bufmgr_set_debug(screen->bufmgr, true); - if ((INTEL_DEBUG & DEBUG_SHADER_TIME) && brw->gen < 7) { + if ((INTEL_DEBUG & DEBUG_SHADER_TIME) && screen->devinfo->gen < 7) { fprintf(stderr, "shader_time debugging requires gen7 (Ivybridge) or better.\n"); INTEL_DEBUG &= ~DEBUG_SHADER_TIME; } - if (INTEL_DEBUG & DEBUG_PERF) - brw->perf_debug = true; - if (INTEL_DEBUG & DEBUG_AUB) - drm_intel_bufmgr_gem_set_aub_dump(brw->bufmgr, true); + drm_intel_bufmgr_gem_set_aub_dump(screen->bufmgr, true); } /** diff --git a/src/mesa/drivers/dri/i965/intel_debug.h b/src/mesa/drivers/dri/i965/intel_debug.h index f754be20b1d..4689492e1fd 100644 --- a/src/mesa/drivers/dri/i965/intel_debug.h +++ b/src/mesa/drivers/dri/i965/intel_debug.h @@ -114,8 +114,8 @@ extern uint64_t INTEL_DEBUG; extern uint64_t intel_debug_flag_for_shader_stage(gl_shader_stage stage); -struct brw_context; +struct intel_screen; -extern void brw_process_intel_debug_variable(struct brw_context *brw); +extern void brw_process_intel_debug_variable(struct intel_screen *); extern bool brw_env_var_as_boolean(const char *var_name, bool default_value); diff --git a/src/mesa/drivers/dri/i965/intel_screen.c b/src/mesa/drivers/dri/i965/intel_screen.c index 896a12534e6..3ba75d8278b 100644 --- a/src/mesa/drivers/dri/i965/intel_screen.c +++ b/src/mesa/drivers/dri/i965/intel_screen.c @@ -1372,6 +1372,8 @@ __DRIconfig **intelInitScreen2(__DRIscreen *psp) if (!intelScreen->devinfo) return false; + brw_process_intel_debug_variable(intelScreen); + intelScreen->hw_must_use_separate_stencil = intelScreen->devinfo->gen >= 7; intelScreen->hw_has_swizzling = intel_detect_swizzling(intelScreen); From f45bf97f30f2feacf8f976271a43feea70e5c382 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 22 Jun 2015 16:30:04 -0700 Subject: [PATCH 823/834] i965/fs: Make no16 non-variadic We never used the fact that it was variadic anyway. Reviewed-by: Chris Forbes Reviewed-by: Kenneth Graunke --- src/mesa/drivers/dri/i965/brw_fs.cpp | 14 ++++---------- src/mesa/drivers/dri/i965/brw_fs.h | 2 +- 2 files changed, 5 insertions(+), 11 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 6a02ab9e12b..5d18dda4676 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -703,26 +703,20 @@ fs_visitor::fail(const char *format, ...) * During a SIMD16 compile (if one happens anyway), this just calls fail(). */ void -fs_visitor::no16(const char *format, ...) +fs_visitor::no16(const char *msg) { - va_list va; - - va_start(va, format); - if (dispatch_width == 16) { - vfail(format, va); + fail("%s", msg); } else { simd16_unsupported = true; if (brw->perf_debug) { if (no16_msg) - ralloc_vasprintf_append(&no16_msg, format, va); + ralloc_strcat(&no16_msg, msg); else - no16_msg = ralloc_vasprintf(mem_ctx, format, va); + no16_msg = ralloc_strdup(mem_ctx, msg); } } - - va_end(va); } /** diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h index 1d52ff0d80d..cffedc0c6d1 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.h +++ b/src/mesa/drivers/dri/i965/brw_fs.h @@ -157,7 +157,7 @@ public: fs_inst *inst); void vfail(const char *msg, va_list args); void fail(const char *msg, ...); - void no16(const char *msg, ...); + void no16(const char *msg); void lower_uniform_pull_constant_loads(); bool lower_load_payload(); bool lower_integer_multiplication(); From 3fd457c9ddd4b9f730e70bfd19b2f9eeeeaef089 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 22 Jun 2015 16:32:06 -0700 Subject: [PATCH 824/834] i965/fs: Do the no16 perf logging directly in fs_visitor::no16() While we're at it, we'll drop the note about 10-20% performance loss. Reviewed-by: Kenneth Graunke Reviewed-by: Chris Forbes --- src/mesa/drivers/dri/i965/brw_fs.cpp | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 5d18dda4676..3b311ca4093 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -710,12 +710,7 @@ fs_visitor::no16(const char *msg) } else { simd16_unsupported = true; - if (brw->perf_debug) { - if (no16_msg) - ralloc_strcat(&no16_msg, msg); - else - no16_msg = ralloc_strdup(mem_ctx, msg); - } + perf_debug("SIMD16 shader failed to compile: %s", msg); } } @@ -4054,14 +4049,10 @@ brw_wm_fs_emit(struct brw_context *brw, /* Try a SIMD16 compile */ v2.import_uniforms(&v); if (!v2.run_fs()) { - perf_debug("SIMD16 shader failed to compile, falling back to " - "SIMD8 at a 10-20%% performance cost: %s", v2.fail_msg); + perf_debug("SIMD16 shader failed to compile: %s", v2.fail_msg); } else { simd16_cfg = v2.cfg; } - } else { - perf_debug("SIMD16 shader unsupported, falling back to " - "SIMD8 at a 10-20%% performance cost: %s", v.no16_msg); } } From 073294d3ef20d0dbeffcc38aff3d69eda624ee75 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 22 Jun 2015 17:01:22 -0700 Subject: [PATCH 825/834] i965/fs: Plumb compiler debug logging through brw_compiler Reviewed-by: Kenneth Graunke Reviewed-by: Chris Forbes --- src/mesa/drivers/dri/i965/brw_fs.cpp | 13 ++++++++---- src/mesa/drivers/dri/i965/brw_shader.cpp | 26 ++++++++++++++++++++++++ src/mesa/drivers/dri/i965/brw_shader.h | 1 + 3 files changed, 36 insertions(+), 4 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 3b311ca4093..0c11a9ef65f 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -710,7 +710,9 @@ fs_visitor::no16(const char *msg) } else { simd16_unsupported = true; - perf_debug("SIMD16 shader failed to compile: %s", msg); + struct brw_compiler *compiler = brw->intelScreen->compiler; + compiler->shader_perf_log(brw, + "SIMD16 shader failed to compile: %s", msg); } } @@ -3800,9 +3802,12 @@ fs_visitor::allocate_registers() fail("Failure to register allocate. Reduce number of " "live scalar values to avoid this."); } else { - perf_debug("%s shader triggered register spilling. " - "Try reducing the number of live scalar values to " - "improve performance.\n", stage_name); + struct brw_compiler *compiler = brw->intelScreen->compiler; + compiler->shader_perf_log(brw, + "%s shader triggered register spilling. " + "Try reducing the number of live scalar " + "values to improve performance.\n", + stage_name); } /* Since we're out of heuristics, just go spill registers until we diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp index 6bc9dd9e53c..42d6236e6fd 100644 --- a/src/mesa/drivers/dri/i965/brw_shader.cpp +++ b/src/mesa/drivers/dri/i965/brw_shader.cpp @@ -47,6 +47,31 @@ shader_debug_log_mesa(void *data, const char *fmt, ...) va_end(args); } +static void +shader_perf_log_mesa(void *data, const char *fmt, ...) +{ + struct brw_context *brw = (struct brw_context *)data; + + va_list args; + va_start(args, fmt); + + if (unlikely(INTEL_DEBUG & DEBUG_PERF)) { + va_list args_copy; + va_copy(args_copy, args); + vfprintf(stderr, fmt, args_copy); + va_end(args_copy); + } + + if (brw->perf_debug) { + GLuint msg_id = 0; + _mesa_gl_vdebug(&brw->ctx, &msg_id, + MESA_DEBUG_SOURCE_SHADER_COMPILER, + MESA_DEBUG_TYPE_PERFORMANCE, + MESA_DEBUG_SEVERITY_MEDIUM, fmt, args); + } + va_end(args); +} + struct brw_compiler * brw_compiler_create(void *mem_ctx, const struct brw_device_info *devinfo) { @@ -54,6 +79,7 @@ brw_compiler_create(void *mem_ctx, const struct brw_device_info *devinfo) compiler->devinfo = devinfo; compiler->shader_debug_log = shader_debug_log_mesa; + compiler->shader_perf_log = shader_perf_log_mesa; brw_fs_alloc_reg_sets(compiler); brw_vec4_alloc_reg_set(compiler); diff --git a/src/mesa/drivers/dri/i965/brw_shader.h b/src/mesa/drivers/dri/i965/brw_shader.h index 30cca5c7cca..fe510e7f7c4 100644 --- a/src/mesa/drivers/dri/i965/brw_shader.h +++ b/src/mesa/drivers/dri/i965/brw_shader.h @@ -88,6 +88,7 @@ struct brw_compiler { } fs_reg_sets[2]; void (*shader_debug_log)(void *, const char *str, ...) PRINTFLIKE(2, 3); + void (*shader_perf_log)(void *, const char *str, ...) PRINTFLIKE(2, 3); }; enum PACKED register_file { From 6e255a3299c9ec5208cb5519b5da2edb0ce2972b Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 16 Apr 2015 15:28:17 -0700 Subject: [PATCH 826/834] i965: Add compiler options to brw_compiler This creates the options at screen cration time and then we just copy them into the context at context creation time. We also move is_scalar to the brw_compiler structure. We also end up manually setting some values that the core would have set by default for us. Fortunately, there are only two non-zero shader compiler option defaults that we aren't overriding anyway so this isn't a big deal. Reviewed-by: Kenneth Graunke Reviewed-by: Chris Forbes --- src/mesa/drivers/dri/i965/brw_context.c | 46 ++-------------------- src/mesa/drivers/dri/i965/brw_context.h | 1 - src/mesa/drivers/dri/i965/brw_shader.cpp | 50 +++++++++++++++++++++++- src/mesa/drivers/dri/i965/brw_shader.h | 3 ++ src/mesa/drivers/dri/i965/brw_vec4.cpp | 2 +- src/mesa/drivers/dri/i965/intel_screen.c | 1 + 6 files changed, 57 insertions(+), 46 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c index c0097df3e97..cf408830620 100644 --- a/src/mesa/drivers/dri/i965/brw_context.c +++ b/src/mesa/drivers/dri/i965/brw_context.c @@ -50,6 +50,7 @@ #include "brw_context.h" #include "brw_defines.h" +#include "brw_shader.h" #include "brw_draw.h" #include "brw_state.h" @@ -68,8 +69,6 @@ #include "tnl/t_pipeline.h" #include "util/ralloc.h" -#include "glsl/nir/nir.h" - /*************************************** * Mesa's Driver Functions ***************************************/ @@ -558,48 +557,12 @@ brw_initialize_context_constants(struct brw_context *brw) ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxInputComponents = 128; } - static const nir_shader_compiler_options nir_options = { - .native_integers = true, - /* In order to help allow for better CSE at the NIR level we tell NIR - * to split all ffma instructions during opt_algebraic and we then - * re-combine them as a later step. - */ - .lower_ffma = true, - .lower_sub = true, - }; - /* We want the GLSL compiler to emit code that uses condition codes */ for (int i = 0; i < MESA_SHADER_STAGES; i++) { - ctx->Const.ShaderCompilerOptions[i].MaxIfDepth = brw->gen < 6 ? 16 : UINT_MAX; - ctx->Const.ShaderCompilerOptions[i].EmitCondCodes = true; - ctx->Const.ShaderCompilerOptions[i].EmitNoNoise = true; - ctx->Const.ShaderCompilerOptions[i].EmitNoMainReturn = true; - ctx->Const.ShaderCompilerOptions[i].EmitNoIndirectInput = true; - ctx->Const.ShaderCompilerOptions[i].EmitNoIndirectOutput = - (i == MESA_SHADER_FRAGMENT); - ctx->Const.ShaderCompilerOptions[i].EmitNoIndirectTemp = - (i == MESA_SHADER_FRAGMENT); - ctx->Const.ShaderCompilerOptions[i].EmitNoIndirectUniform = false; - ctx->Const.ShaderCompilerOptions[i].LowerClipDistance = true; + ctx->Const.ShaderCompilerOptions[i] = + brw->intelScreen->compiler->glsl_compiler_options[i]; } - ctx->Const.ShaderCompilerOptions[MESA_SHADER_VERTEX].OptimizeForAOS = true; - ctx->Const.ShaderCompilerOptions[MESA_SHADER_GEOMETRY].OptimizeForAOS = true; - - if (brw->scalar_vs) { - /* If we're using the scalar backend for vertex shaders, we need to - * configure these accordingly. - */ - ctx->Const.ShaderCompilerOptions[MESA_SHADER_VERTEX].EmitNoIndirectOutput = true; - ctx->Const.ShaderCompilerOptions[MESA_SHADER_VERTEX].EmitNoIndirectTemp = true; - ctx->Const.ShaderCompilerOptions[MESA_SHADER_VERTEX].OptimizeForAOS = false; - - ctx->Const.ShaderCompilerOptions[MESA_SHADER_VERTEX].NirOptions = &nir_options; - } - - ctx->Const.ShaderCompilerOptions[MESA_SHADER_FRAGMENT].NirOptions = &nir_options; - ctx->Const.ShaderCompilerOptions[MESA_SHADER_COMPUTE].NirOptions = &nir_options; - /* ARB_viewport_array */ if (brw->gen >= 6 && ctx->API == API_OPENGL_CORE) { ctx->Const.MaxViewports = GEN6_NUM_VIEWPORTS; @@ -826,9 +789,6 @@ brwCreateContext(gl_api api, if (INTEL_DEBUG & DEBUG_PERF) brw->perf_debug = true; - if (brw->gen >= 8 && !(INTEL_DEBUG & DEBUG_VEC4VS)) - brw->scalar_vs = true; - brw_initialize_context_constants(brw); ctx->Const.ResetStrategy = notify_reset diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index 58119ee5c5e..d8fcfffcd10 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -1137,7 +1137,6 @@ struct brw_context bool has_pln; bool no_simd8; bool use_rep_send; - bool scalar_vs; /** * Some versions of Gen hardware don't do centroid interpolation correctly diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp index 42d6236e6fd..6e6a2580d1d 100644 --- a/src/mesa/drivers/dri/i965/brw_shader.cpp +++ b/src/mesa/drivers/dri/i965/brw_shader.cpp @@ -84,6 +84,54 @@ brw_compiler_create(void *mem_ctx, const struct brw_device_info *devinfo) brw_fs_alloc_reg_sets(compiler); brw_vec4_alloc_reg_set(compiler); + if (devinfo->gen >= 8 && !(INTEL_DEBUG & DEBUG_VEC4VS)) + compiler->scalar_vs = true; + + nir_shader_compiler_options *nir_options = + rzalloc(compiler, nir_shader_compiler_options); + nir_options->native_integers = true; + /* In order to help allow for better CSE at the NIR level we tell NIR + * to split all ffma instructions during opt_algebraic and we then + * re-combine them as a later step. + */ + nir_options->lower_ffma = true; + nir_options->lower_sub = true; + + /* We want the GLSL compiler to emit code that uses condition codes */ + for (int i = 0; i < MESA_SHADER_STAGES; i++) { + compiler->glsl_compiler_options[i].MaxUnrollIterations = 32; + compiler->glsl_compiler_options[i].MaxIfDepth = + devinfo->gen < 6 ? 16 : UINT_MAX; + + compiler->glsl_compiler_options[i].EmitCondCodes = true; + compiler->glsl_compiler_options[i].EmitNoNoise = true; + compiler->glsl_compiler_options[i].EmitNoMainReturn = true; + compiler->glsl_compiler_options[i].EmitNoIndirectInput = true; + compiler->glsl_compiler_options[i].EmitNoIndirectOutput = + (i == MESA_SHADER_FRAGMENT); + compiler->glsl_compiler_options[i].EmitNoIndirectTemp = + (i == MESA_SHADER_FRAGMENT); + compiler->glsl_compiler_options[i].EmitNoIndirectUniform = false; + compiler->glsl_compiler_options[i].LowerClipDistance = true; + } + + compiler->glsl_compiler_options[MESA_SHADER_VERTEX].OptimizeForAOS = true; + compiler->glsl_compiler_options[MESA_SHADER_GEOMETRY].OptimizeForAOS = true; + + if (compiler->scalar_vs) { + /* If we're using the scalar backend for vertex shaders, we need to + * configure these accordingly. + */ + compiler->glsl_compiler_options[MESA_SHADER_VERTEX].EmitNoIndirectOutput = true; + compiler->glsl_compiler_options[MESA_SHADER_VERTEX].EmitNoIndirectTemp = true; + compiler->glsl_compiler_options[MESA_SHADER_VERTEX].OptimizeForAOS = false; + + compiler->glsl_compiler_options[MESA_SHADER_VERTEX].NirOptions = nir_options; + } + + compiler->glsl_compiler_options[MESA_SHADER_FRAGMENT].NirOptions = nir_options; + compiler->glsl_compiler_options[MESA_SHADER_COMPUTE].NirOptions = nir_options; + return compiler; } @@ -139,7 +187,7 @@ is_scalar_shader_stage(struct brw_context *brw, int stage) case MESA_SHADER_FRAGMENT: return true; case MESA_SHADER_VERTEX: - return brw->scalar_vs; + return brw->intelScreen->compiler->scalar_vs; default: return false; } diff --git a/src/mesa/drivers/dri/i965/brw_shader.h b/src/mesa/drivers/dri/i965/brw_shader.h index fe510e7f7c4..ac4df738009 100644 --- a/src/mesa/drivers/dri/i965/brw_shader.h +++ b/src/mesa/drivers/dri/i965/brw_shader.h @@ -89,6 +89,9 @@ struct brw_compiler { void (*shader_debug_log)(void *, const char *str, ...) PRINTFLIKE(2, 3); void (*shader_perf_log)(void *, const char *str, ...) PRINTFLIKE(2, 3); + + bool scalar_vs; + struct gl_shader_compiler_options glsl_compiler_options[MESA_SHADER_STAGES]; }; enum PACKED register_file { diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp index 572bc174986..82f055f229e 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp @@ -1882,7 +1882,7 @@ brw_vs_emit(struct brw_context *brw, if (unlikely(INTEL_DEBUG & DEBUG_VS)) brw_dump_ir("vertex", prog, &shader->base, &c->vp->program.Base); - if (brw->scalar_vs) { + if (brw->intelScreen->compiler->scalar_vs) { if (!c->vp->program.Base.nir) { /* Normally we generate NIR in LinkShader() or * ProgramStringNotify(), but Mesa's fixed-function vertex program diff --git a/src/mesa/drivers/dri/i965/intel_screen.c b/src/mesa/drivers/dri/i965/intel_screen.c index 3ba75d8278b..f9398d7859e 100644 --- a/src/mesa/drivers/dri/i965/intel_screen.c +++ b/src/mesa/drivers/dri/i965/intel_screen.c @@ -39,6 +39,7 @@ #include "swrast/s_renderbuffer.h" #include "util/ralloc.h" #include "brw_shader.h" +#include "glsl/nir/nir.h" #include "utils.h" #include "xmlpool.h" From c7893dc3c590b86787d8118e3920debaea3f16da Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 19 Jun 2015 14:46:03 -0700 Subject: [PATCH 827/834] i965: Use a single index per shader for shader_time. Previously, each shader took 3 shader time indices which were potentially at arbirary points in the shader time buffer. Now, each shader gets a single index which refers to 3 consecutive locations in the buffer. This simplifies some of the logic at the cost of having a magic 3 a few places. Reviewed-by: Kenneth Graunke Reviewed-by: Chris Forbes --- src/mesa/drivers/dri/i965/brw_context.h | 14 +--- src/mesa/drivers/dri/i965/brw_fs.cpp | 28 +++----- src/mesa/drivers/dri/i965/brw_fs.h | 3 +- src/mesa/drivers/dri/i965/brw_program.c | 67 ++++++------------- src/mesa/drivers/dri/i965/brw_vec4.cpp | 18 ++--- src/mesa/drivers/dri/i965/brw_vec4.h | 10 +-- .../drivers/dri/i965/brw_vec4_gs_visitor.cpp | 3 +- .../drivers/dri/i965/brw_vec4_visitor.cpp | 8 +-- .../drivers/dri/i965/brw_vec4_vs_visitor.cpp | 2 +- .../dri/i965/test_vec4_copy_propagation.cpp | 3 +- .../dri/i965/test_vec4_register_coalesce.cpp | 3 +- 11 files changed, 55 insertions(+), 104 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index d8fcfffcd10..a7d83f8d7b4 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -821,20 +821,10 @@ struct brw_tracked_state { enum shader_time_shader_type { ST_NONE, ST_VS, - ST_VS_WRITTEN, - ST_VS_RESET, ST_GS, - ST_GS_WRITTEN, - ST_GS_RESET, ST_FS8, - ST_FS8_WRITTEN, - ST_FS8_RESET, ST_FS16, - ST_FS16_WRITTEN, - ST_FS16_RESET, ST_CS, - ST_CS_WRITTEN, - ST_CS_RESET, }; struct brw_vertex_buffer { @@ -979,6 +969,8 @@ enum brw_predicate_state { BRW_PREDICATE_STATE_USE_BIT }; +struct shader_times; + /** * brw_context is derived from gl_context. */ @@ -1503,7 +1495,7 @@ struct brw_context const char **names; int *ids; enum shader_time_shader_type *types; - uint64_t *cumulative; + struct shader_times *cumulative; int num_entries; int max_entries; double report_time; diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 0c11a9ef65f..2839b9f7b89 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -578,38 +578,30 @@ fs_visitor::emit_shader_time_begin() void fs_visitor::emit_shader_time_end() { - enum shader_time_shader_type type, written_type, reset_type; + enum shader_time_shader_type type; switch (stage) { case MESA_SHADER_VERTEX: type = ST_VS; - written_type = ST_VS_WRITTEN; - reset_type = ST_VS_RESET; break; case MESA_SHADER_GEOMETRY: type = ST_GS; - written_type = ST_GS_WRITTEN; - reset_type = ST_GS_RESET; break; case MESA_SHADER_FRAGMENT: if (dispatch_width == 8) { type = ST_FS8; - written_type = ST_FS8_WRITTEN; - reset_type = ST_FS8_RESET; } else { assert(dispatch_width == 16); type = ST_FS16; - written_type = ST_FS16_WRITTEN; - reset_type = ST_FS16_RESET; } break; case MESA_SHADER_COMPUTE: type = ST_CS; - written_type = ST_CS_WRITTEN; - reset_type = ST_CS_RESET; break; default: unreachable("fs_visitor::emit_shader_time_end missing code"); } + int shader_time_index = brw_get_shader_time_index(brw, shader_prog, prog, + type); /* Insert our code just before the final SEND with EOT. */ exec_node *end = this->instructions.get_tail(); @@ -639,20 +631,20 @@ fs_visitor::emit_shader_time_end() * trying to determine the time taken for single instructions. */ ibld.ADD(diff, diff, fs_reg(-2u)); - SHADER_TIME_ADD(ibld, type, diff); - SHADER_TIME_ADD(ibld, written_type, fs_reg(1u)); + SHADER_TIME_ADD(ibld, shader_time_index, 0, diff); + SHADER_TIME_ADD(ibld, shader_time_index, 1, fs_reg(1u)); ibld.emit(BRW_OPCODE_ELSE); - SHADER_TIME_ADD(ibld, reset_type, fs_reg(1u)); + SHADER_TIME_ADD(ibld, shader_time_index, 2, fs_reg(1u)); ibld.emit(BRW_OPCODE_ENDIF); } void fs_visitor::SHADER_TIME_ADD(const fs_builder &bld, - enum shader_time_shader_type type, fs_reg value) + int shader_time_index, int shader_time_subindex, + fs_reg value) { - int shader_time_index = - brw_get_shader_time_index(brw, shader_prog, prog, type); - fs_reg offset = fs_reg(shader_time_index * SHADER_TIME_STRIDE); + int index = shader_time_index * 3 + shader_time_subindex; + fs_reg offset = fs_reg(index * SHADER_TIME_STRIDE); fs_reg payload; if (dispatch_width == 8) diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h index cffedc0c6d1..55a97228bb4 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.h +++ b/src/mesa/drivers/dri/i965/brw_fs.h @@ -278,7 +278,8 @@ public: void emit_shader_time_begin(); void emit_shader_time_end(); void SHADER_TIME_ADD(const brw::fs_builder &bld, - enum shader_time_shader_type type, fs_reg value); + int shader_time_index, int shader_time_subindex, + fs_reg value); void emit_untyped_atomic(unsigned atomic_op, unsigned surf_index, fs_reg dst, fs_reg offset, fs_reg src0, diff --git a/src/mesa/drivers/dri/i965/brw_program.c b/src/mesa/drivers/dri/i965/brw_program.c index 414eab9c002..2327af77ad3 100644 --- a/src/mesa/drivers/dri/i965/brw_program.c +++ b/src/mesa/drivers/dri/i965/brw_program.c @@ -287,18 +287,24 @@ void brwInitFragProgFuncs( struct dd_function_table *functions ) functions->MemoryBarrier = brw_memory_barrier; } +struct shader_times { + uint64_t time; + uint64_t written; + uint64_t reset; +}; + void brw_init_shader_time(struct brw_context *brw) { - const int max_entries = 4096; - brw->shader_time.bo = drm_intel_bo_alloc(brw->bufmgr, "shader time", - max_entries * SHADER_TIME_STRIDE, - 4096); + const int max_entries = 2048; + brw->shader_time.bo = + drm_intel_bo_alloc(brw->bufmgr, "shader time", + max_entries * SHADER_TIME_STRIDE * 3, 4096); brw->shader_time.names = rzalloc_array(brw, const char *, max_entries); brw->shader_time.ids = rzalloc_array(brw, int, max_entries); brw->shader_time.types = rzalloc_array(brw, enum shader_time_shader_type, max_entries); - brw->shader_time.cumulative = rzalloc_array(brw, uint64_t, + brw->shader_time.cumulative = rzalloc_array(brw, struct shader_times, max_entries); brw->shader_time.max_entries = max_entries; } @@ -318,27 +324,6 @@ compare_time(const void *a, const void *b) return 1; } -static void -get_written_and_reset(struct brw_context *brw, int i, - uint64_t *written, uint64_t *reset) -{ - enum shader_time_shader_type type = brw->shader_time.types[i]; - assert(type == ST_VS || type == ST_GS || type == ST_FS8 || - type == ST_FS16 || type == ST_CS); - - /* Find where we recorded written and reset. */ - int wi, ri; - - for (wi = i; brw->shader_time.types[wi] != type + 1; wi++) - ; - - for (ri = i; brw->shader_time.types[ri] != type + 2; ri++) - ; - - *written = brw->shader_time.cumulative[wi]; - *reset = brw->shader_time.cumulative[ri]; -} - static void print_shader_time_line(const char *stage, const char *name, int shader_num, uint64_t time, uint64_t total) @@ -374,26 +359,13 @@ brw_report_shader_time(struct brw_context *brw) sorted[i] = &scaled[i]; switch (type) { - case ST_VS_WRITTEN: - case ST_VS_RESET: - case ST_GS_WRITTEN: - case ST_GS_RESET: - case ST_FS8_WRITTEN: - case ST_FS8_RESET: - case ST_FS16_WRITTEN: - case ST_FS16_RESET: - case ST_CS_WRITTEN: - case ST_CS_RESET: - /* We'll handle these when along with the time. */ - scaled[i] = 0; - continue; - case ST_VS: case ST_GS: case ST_FS8: case ST_FS16: case ST_CS: - get_written_and_reset(brw, i, &written, &reset); + written = brw->shader_time.cumulative[i].written; + reset = brw->shader_time.cumulative[i].reset; break; default: @@ -405,7 +377,7 @@ brw_report_shader_time(struct brw_context *brw) break; } - uint64_t time = brw->shader_time.cumulative[i]; + uint64_t time = brw->shader_time.cumulative[i].time; if (written) { scaled[i] = time / written * (written + reset); } else { @@ -491,16 +463,19 @@ brw_collect_shader_time(struct brw_context *brw) * overhead compared to the cost of tracking the time in the first place. */ drm_intel_bo_map(brw->shader_time.bo, true); - - uint32_t *times = brw->shader_time.bo->virtual; + void *bo_map = brw->shader_time.bo->virtual; for (int i = 0; i < brw->shader_time.num_entries; i++) { - brw->shader_time.cumulative[i] += times[i * SHADER_TIME_STRIDE / 4]; + uint32_t *times = bo_map + i * 3 * SHADER_TIME_STRIDE; + + brw->shader_time.cumulative[i].time += times[SHADER_TIME_STRIDE * 0 / 4]; + brw->shader_time.cumulative[i].written += times[SHADER_TIME_STRIDE * 1 / 4]; + brw->shader_time.cumulative[i].reset += times[SHADER_TIME_STRIDE * 2 / 4]; } /* Zero the BO out to clear it out for our next collection. */ - memset(times, 0, brw->shader_time.bo->size); + memset(bo_map, 0, brw->shader_time.bo->size); drm_intel_bo_unmap(brw->shader_time.bo); } diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp index 82f055f229e..234ee188c27 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp @@ -1676,20 +1676,21 @@ vec4_visitor::emit_shader_time_end() */ emit(ADD(diff, src_reg(diff), src_reg(-2u))); - emit_shader_time_write(st_base, src_reg(diff)); - emit_shader_time_write(st_written, src_reg(1u)); + int shader_time_index = + brw_get_shader_time_index(brw, shader_prog, prog, st_type); + + emit_shader_time_write(shader_time_index, 0, src_reg(diff)); + emit_shader_time_write(shader_time_index, 1, src_reg(1u)); emit(BRW_OPCODE_ELSE); - emit_shader_time_write(st_reset, src_reg(1u)); + emit_shader_time_write(shader_time_index, 2, src_reg(1u)); emit(BRW_OPCODE_ENDIF); } void -vec4_visitor::emit_shader_time_write(enum shader_time_shader_type type, +vec4_visitor::emit_shader_time_write(int shader_time_index, + int shader_time_subindex, src_reg value) { - int shader_time_index = - brw_get_shader_time_index(brw, shader_prog, prog, type); - dst_reg dst = dst_reg(this, glsl_type::get_array_instance(glsl_type::vec4_type, 2)); @@ -1698,7 +1699,8 @@ vec4_visitor::emit_shader_time_write(enum shader_time_shader_type type, time.reg_offset++; offset.type = BRW_REGISTER_TYPE_UD; - emit(MOV(offset, src_reg(shader_time_index * SHADER_TIME_STRIDE))); + int index = shader_time_index * 3 + shader_time_subindex; + emit(MOV(offset, src_reg(index * SHADER_TIME_STRIDE))); time.type = BRW_REGISTER_TYPE_UD; emit(MOV(time, src_reg(value))); diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h index 2228d478d09..8d332af17f4 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.h +++ b/src/mesa/drivers/dri/i965/brw_vec4.h @@ -85,9 +85,7 @@ public: gl_shader_stage stage, void *mem_ctx, bool no_spills, - shader_time_shader_type st_base, - shader_time_shader_type st_written, - shader_time_shader_type st_reset); + shader_time_shader_type st_type); ~vec4_visitor(); dst_reg dst_null_f() @@ -345,7 +343,7 @@ public: void emit_shader_time_begin(); void emit_shader_time_end(); - void emit_shader_time_write(enum shader_time_shader_type type, + void emit_shader_time_write(int shader_time_index, int shader_time_subindex, src_reg value); void emit_untyped_atomic(unsigned atomic_op, unsigned surf_index, @@ -413,9 +411,7 @@ private: */ const bool no_spills; - const shader_time_shader_type st_base; - const shader_time_shader_type st_written; - const shader_time_shader_type st_reset; + const shader_time_shader_type st_type; }; diff --git a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp index b047aa18e0a..d3754de0ca3 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp @@ -41,8 +41,7 @@ vec4_gs_visitor::vec4_gs_visitor(struct brw_context *brw, bool no_spills) : vec4_visitor(brw, &c->base, &c->gp->program.Base, &c->key.base, &c->prog_data.base, prog, MESA_SHADER_GEOMETRY, mem_ctx, - no_spills, - ST_GS, ST_GS_WRITTEN, ST_GS_RESET), + no_spills, ST_GS), c(c) { } diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp index 669f76973ea..5ae572b4c41 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp @@ -3688,9 +3688,7 @@ vec4_visitor::vec4_visitor(struct brw_context *brw, gl_shader_stage stage, void *mem_ctx, bool no_spills, - shader_time_shader_type st_base, - shader_time_shader_type st_written, - shader_time_shader_type st_reset) + shader_time_shader_type st_type) : backend_shader(brw, mem_ctx, shader_prog, prog, &prog_data->base, stage), c(c), key(key), @@ -3700,9 +3698,7 @@ vec4_visitor::vec4_visitor(struct brw_context *brw, first_non_payload_grf(0), need_all_constants_in_pull_buffer(false), no_spills(no_spills), - st_base(st_base), - st_written(st_written), - st_reset(st_reset) + st_type(st_type) { this->failed = false; diff --git a/src/mesa/drivers/dri/i965/brw_vec4_vs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_vs_visitor.cpp index 4baf73ebde1..731176afd18 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_vs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_vs_visitor.cpp @@ -221,7 +221,7 @@ vec4_vs_visitor::vec4_vs_visitor(struct brw_context *brw, &vs_compile->key.base, &vs_prog_data->base, prog, MESA_SHADER_VERTEX, mem_ctx, false /* no_spills */, - ST_VS, ST_VS_WRITTEN, ST_VS_RESET), + ST_VS), vs_compile(vs_compile), vs_prog_data(vs_prog_data) { diff --git a/src/mesa/drivers/dri/i965/test_vec4_copy_propagation.cpp b/src/mesa/drivers/dri/i965/test_vec4_copy_propagation.cpp index 2ef52e9fd6b..8a867366517 100644 --- a/src/mesa/drivers/dri/i965/test_vec4_copy_propagation.cpp +++ b/src/mesa/drivers/dri/i965/test_vec4_copy_propagation.cpp @@ -48,8 +48,7 @@ public: struct gl_shader_program *shader_prog) : vec4_visitor(brw, NULL, NULL, NULL, NULL, shader_prog, MESA_SHADER_VERTEX, NULL, - false /* no_spills */, - ST_NONE, ST_NONE, ST_NONE) + false /* no_spills */, ST_NONE) { } diff --git a/src/mesa/drivers/dri/i965/test_vec4_register_coalesce.cpp b/src/mesa/drivers/dri/i965/test_vec4_register_coalesce.cpp index c8c67574e95..87ebdfa6e9c 100644 --- a/src/mesa/drivers/dri/i965/test_vec4_register_coalesce.cpp +++ b/src/mesa/drivers/dri/i965/test_vec4_register_coalesce.cpp @@ -51,8 +51,7 @@ public: struct gl_shader_program *shader_prog) : vec4_visitor(brw, NULL, NULL, NULL, NULL, shader_prog, MESA_SHADER_VERTEX, NULL, - false /* no_spills */, - ST_NONE, ST_NONE, ST_NONE) + false /* no_spills */, ST_NONE) { } From 1b0f6ffa15b25e8601d60fe1ea74e893f7d33cf5 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 19 Jun 2015 15:40:09 -0700 Subject: [PATCH 828/834] i965: Pull calls to get_shader_time_index out of the visitor Reviewed-by: Kenneth Graunke Reviewed-by: Chris Forbes --- src/mesa/drivers/dri/i965/brw_cs.cpp | 8 ++- src/mesa/drivers/dri/i965/brw_fs.cpp | 55 ++++++------------- src/mesa/drivers/dri/i965/brw_fs.h | 7 ++- src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 7 ++- src/mesa/drivers/dri/i965/brw_vec4.cpp | 25 +++++---- src/mesa/drivers/dri/i965/brw_vec4.h | 7 +-- .../drivers/dri/i965/brw_vec4_gs_visitor.cpp | 18 ++++-- .../drivers/dri/i965/brw_vec4_gs_visitor.h | 3 +- .../drivers/dri/i965/brw_vec4_visitor.cpp | 4 +- .../drivers/dri/i965/brw_vec4_vs_visitor.cpp | 5 +- src/mesa/drivers/dri/i965/brw_vs.h | 3 +- src/mesa/drivers/dri/i965/gen6_gs_visitor.h | 5 +- .../dri/i965/test_fs_cmod_propagation.cpp | 2 +- .../dri/i965/test_fs_saturate_propagation.cpp | 2 +- .../dri/i965/test_vec4_copy_propagation.cpp | 2 +- .../dri/i965/test_vec4_register_coalesce.cpp | 2 +- 16 files changed, 79 insertions(+), 76 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_cs.cpp b/src/mesa/drivers/dri/i965/brw_cs.cpp index 0833404d5e5..fa8b5c8415d 100644 --- a/src/mesa/drivers/dri/i965/brw_cs.cpp +++ b/src/mesa/drivers/dri/i965/brw_cs.cpp @@ -88,10 +88,14 @@ brw_cs_emit(struct brw_context *brw, cfg_t *cfg = NULL; const char *fail_msg = NULL; + int st_index = -1; + if (INTEL_DEBUG & DEBUG_SHADER_TIME) + st_index = brw_get_shader_time_index(brw, prog, &cp->Base, ST_CS); + /* Now the main event: Visit the shader IR and generate our CS IR for it. */ fs_visitor v8(brw, mem_ctx, MESA_SHADER_COMPUTE, key, &prog_data->base, prog, - &cp->Base, 8); + &cp->Base, 8, st_index); if (!v8.run_cs()) { fail_msg = v8.fail_msg; } else if (local_workgroup_size <= 8 * brw->max_cs_threads) { @@ -100,7 +104,7 @@ brw_cs_emit(struct brw_context *brw, } fs_visitor v16(brw, mem_ctx, MESA_SHADER_COMPUTE, key, &prog_data->base, prog, - &cp->Base, 16); + &cp->Base, 16, st_index); if (likely(!(INTEL_DEBUG & DEBUG_NO16)) && !fail_msg && !v8.simd16_unsupported && local_workgroup_size <= 16 * brw->max_cs_threads) { diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 2839b9f7b89..f6c4169d0be 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -578,31 +578,6 @@ fs_visitor::emit_shader_time_begin() void fs_visitor::emit_shader_time_end() { - enum shader_time_shader_type type; - switch (stage) { - case MESA_SHADER_VERTEX: - type = ST_VS; - break; - case MESA_SHADER_GEOMETRY: - type = ST_GS; - break; - case MESA_SHADER_FRAGMENT: - if (dispatch_width == 8) { - type = ST_FS8; - } else { - assert(dispatch_width == 16); - type = ST_FS16; - } - break; - case MESA_SHADER_COMPUTE: - type = ST_CS; - break; - default: - unreachable("fs_visitor::emit_shader_time_end missing code"); - } - int shader_time_index = brw_get_shader_time_index(brw, shader_prog, prog, - type); - /* Insert our code just before the final SEND with EOT. */ exec_node *end = this->instructions.get_tail(); assert(end && ((fs_inst *) end)->eot); @@ -631,16 +606,16 @@ fs_visitor::emit_shader_time_end() * trying to determine the time taken for single instructions. */ ibld.ADD(diff, diff, fs_reg(-2u)); - SHADER_TIME_ADD(ibld, shader_time_index, 0, diff); - SHADER_TIME_ADD(ibld, shader_time_index, 1, fs_reg(1u)); + SHADER_TIME_ADD(ibld, 0, diff); + SHADER_TIME_ADD(ibld, 1, fs_reg(1u)); ibld.emit(BRW_OPCODE_ELSE); - SHADER_TIME_ADD(ibld, shader_time_index, 2, fs_reg(1u)); + SHADER_TIME_ADD(ibld, 2, fs_reg(1u)); ibld.emit(BRW_OPCODE_ENDIF); } void fs_visitor::SHADER_TIME_ADD(const fs_builder &bld, - int shader_time_index, int shader_time_subindex, + int shader_time_subindex, fs_reg value) { int index = shader_time_index * 3 + shader_time_subindex; @@ -3835,7 +3810,7 @@ fs_visitor::run_vs() assign_common_binding_table_offsets(0); setup_vs_payload(); - if (INTEL_DEBUG & DEBUG_SHADER_TIME) + if (shader_time_index >= 0) emit_shader_time_begin(); emit_nir_code(); @@ -3845,7 +3820,7 @@ fs_visitor::run_vs() emit_urb_writes(); - if (INTEL_DEBUG & DEBUG_SHADER_TIME) + if (shader_time_index >= 0) emit_shader_time_end(); calculate_cfg(); @@ -3883,7 +3858,7 @@ fs_visitor::run_fs() } else if (brw->use_rep_send && dispatch_width == 16) { emit_repclear_shader(); } else { - if (INTEL_DEBUG & DEBUG_SHADER_TIME) + if (shader_time_index >= 0) emit_shader_time_begin(); calculate_urb_setup(); @@ -3918,7 +3893,7 @@ fs_visitor::run_fs() emit_fb_writes(); - if (INTEL_DEBUG & DEBUG_SHADER_TIME) + if (shader_time_index >= 0) emit_shader_time_end(); calculate_cfg(); @@ -3962,7 +3937,7 @@ fs_visitor::run_cs() setup_cs_payload(); - if (INTEL_DEBUG & DEBUG_SHADER_TIME) + if (shader_time_index >= 0) emit_shader_time_begin(); emit_nir_code(); @@ -3972,7 +3947,7 @@ fs_visitor::run_cs() emit_cs_terminate(); - if (INTEL_DEBUG & DEBUG_SHADER_TIME) + if (shader_time_index >= 0) emit_shader_time_end(); calculate_cfg(); @@ -4022,10 +3997,16 @@ brw_wm_fs_emit(struct brw_context *brw, if (unlikely(INTEL_DEBUG & DEBUG_WM)) brw_dump_ir("fragment", prog, &shader->base, &fp->Base); + int st_index8 = -1, st_index16 = -1; + if (INTEL_DEBUG & DEBUG_SHADER_TIME) { + st_index8 = brw_get_shader_time_index(brw, prog, &fp->Base, ST_FS8); + st_index16 = brw_get_shader_time_index(brw, prog, &fp->Base, ST_FS16); + } + /* Now the main event: Visit the shader IR and generate our FS IR for it. */ fs_visitor v(brw, mem_ctx, MESA_SHADER_FRAGMENT, key, &prog_data->base, - prog, &fp->Base, 8); + prog, &fp->Base, 8, st_index8); if (!v.run_fs()) { if (prog) { prog->LinkStatus = false; @@ -4040,7 +4021,7 @@ brw_wm_fs_emit(struct brw_context *brw, cfg_t *simd16_cfg = NULL; fs_visitor v2(brw, mem_ctx, MESA_SHADER_FRAGMENT, key, &prog_data->base, - prog, &fp->Base, 16); + prog, &fp->Base, 16, st_index16); if (likely(!(INTEL_DEBUG & DEBUG_NO16) || brw->use_rep_send)) { if (!v.simd16_unsupported) { /* Try a SIMD16 compile */ diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h index 55a97228bb4..525be3a4eaf 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.h +++ b/src/mesa/drivers/dri/i965/brw_fs.h @@ -77,7 +77,8 @@ public: struct brw_stage_prog_data *prog_data, struct gl_shader_program *shader_prog, struct gl_program *prog, - unsigned dispatch_width); + unsigned dispatch_width, + int shader_time_index); ~fs_visitor(); @@ -278,7 +279,7 @@ public: void emit_shader_time_begin(); void emit_shader_time_end(); void SHADER_TIME_ADD(const brw::fs_builder &bld, - int shader_time_index, int shader_time_subindex, + int shader_time_subindex, fs_reg value); void emit_untyped_atomic(unsigned atomic_op, unsigned surf_index, @@ -387,6 +388,8 @@ public: const unsigned dispatch_width; /**< 8 or 16 */ + int shader_time_index; + unsigned promoted_constants; brw::fs_builder bld; }; diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp index 3af9d78598c..bff1169c779 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp @@ -1983,10 +1983,13 @@ fs_visitor::fs_visitor(struct brw_context *brw, struct brw_stage_prog_data *prog_data, struct gl_shader_program *shader_prog, struct gl_program *prog, - unsigned dispatch_width) + unsigned dispatch_width, + int shader_time_index) : backend_shader(brw, mem_ctx, shader_prog, prog, prog_data, stage), key(key), prog_data(prog_data), - dispatch_width(dispatch_width), promoted_constants(0), + dispatch_width(dispatch_width), + shader_time_index(shader_time_index), + promoted_constants(0), bld(fs_builder(this, dispatch_width).at_end()) { switch (stage) { diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp index 234ee188c27..093802c24d2 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp @@ -1676,20 +1676,15 @@ vec4_visitor::emit_shader_time_end() */ emit(ADD(diff, src_reg(diff), src_reg(-2u))); - int shader_time_index = - brw_get_shader_time_index(brw, shader_prog, prog, st_type); - - emit_shader_time_write(shader_time_index, 0, src_reg(diff)); - emit_shader_time_write(shader_time_index, 1, src_reg(1u)); + emit_shader_time_write(0, src_reg(diff)); + emit_shader_time_write(1, src_reg(1u)); emit(BRW_OPCODE_ELSE); - emit_shader_time_write(shader_time_index, 2, src_reg(1u)); + emit_shader_time_write(2, src_reg(1u)); emit(BRW_OPCODE_ENDIF); } void -vec4_visitor::emit_shader_time_write(int shader_time_index, - int shader_time_subindex, - src_reg value) +vec4_visitor::emit_shader_time_write(int shader_time_subindex, src_reg value) { dst_reg dst = dst_reg(this, glsl_type::get_array_instance(glsl_type::vec4_type, 2)); @@ -1715,7 +1710,7 @@ vec4_visitor::run() { sanity_param_count = prog->Parameters->NumParameters; - if (INTEL_DEBUG & DEBUG_SHADER_TIME) + if (shader_time_index >= 0) emit_shader_time_begin(); assign_binding_table_offsets(); @@ -1881,6 +1876,11 @@ brw_vs_emit(struct brw_context *brw, if (prog) shader = (brw_shader *) prog->_LinkedShaders[MESA_SHADER_VERTEX]; + int st_index = -1; + if (INTEL_DEBUG & DEBUG_SHADER_TIME) + st_index = brw_get_shader_time_index(brw, prog, &c->vp->program.Base, + ST_VS); + if (unlikely(INTEL_DEBUG & DEBUG_VS)) brw_dump_ir("vertex", prog, &shader->base, &c->vp->program.Base); @@ -1899,7 +1899,8 @@ brw_vs_emit(struct brw_context *brw, prog_data->base.dispatch_mode = DISPATCH_MODE_SIMD8; fs_visitor v(brw, mem_ctx, MESA_SHADER_VERTEX, &c->key, - &prog_data->base.base, prog, &c->vp->program.Base, 8); + &prog_data->base.base, prog, &c->vp->program.Base, + 8, st_index); if (!v.run_vs()) { if (prog) { prog->LinkStatus = false; @@ -1937,7 +1938,7 @@ brw_vs_emit(struct brw_context *brw, if (!assembly) { prog_data->base.dispatch_mode = DISPATCH_MODE_4X2_DUAL_OBJECT; - vec4_vs_visitor v(brw, c, prog_data, prog, mem_ctx); + vec4_vs_visitor v(brw, c, prog_data, prog, mem_ctx, st_index); if (!v.run()) { if (prog) { prog->LinkStatus = false; diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h index 8d332af17f4..4a3ce62a12e 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.h +++ b/src/mesa/drivers/dri/i965/brw_vec4.h @@ -85,7 +85,7 @@ public: gl_shader_stage stage, void *mem_ctx, bool no_spills, - shader_time_shader_type st_type); + int shader_time_index); ~vec4_visitor(); dst_reg dst_null_f() @@ -343,8 +343,7 @@ public: void emit_shader_time_begin(); void emit_shader_time_end(); - void emit_shader_time_write(int shader_time_index, int shader_time_subindex, - src_reg value); + void emit_shader_time_write(int shader_time_subindex, src_reg value); void emit_untyped_atomic(unsigned atomic_op, unsigned surf_index, dst_reg dst, src_reg offset, src_reg src0, @@ -411,7 +410,7 @@ private: */ const bool no_spills; - const shader_time_shader_type st_type; + int shader_time_index; }; diff --git a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp index d3754de0ca3..9ba96417259 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp @@ -38,10 +38,11 @@ vec4_gs_visitor::vec4_gs_visitor(struct brw_context *brw, struct brw_gs_compile *c, struct gl_shader_program *prog, void *mem_ctx, - bool no_spills) + bool no_spills, + int shader_time_index) : vec4_visitor(brw, &c->base, &c->gp->program.Base, &c->key.base, &c->prog_data.base, prog, MESA_SHADER_GEOMETRY, mem_ctx, - no_spills, ST_GS), + no_spills, shader_time_index), c(c) { } @@ -648,6 +649,10 @@ brw_gs_emit(struct brw_context *brw, brw_dump_ir("geometry", prog, &shader->base, NULL); } + int st_index = -1; + if (INTEL_DEBUG & DEBUG_SHADER_TIME) + st_index = brw_get_shader_time_index(brw, prog, NULL, ST_GS); + if (brw->gen >= 7) { /* Compile the geometry shader in DUAL_OBJECT dispatch mode, if we can do * so without spilling. If the GS invocations count > 1, then we can't use @@ -657,7 +662,8 @@ brw_gs_emit(struct brw_context *brw, likely(!(INTEL_DEBUG & DEBUG_NO_DUAL_OBJECT_GS))) { c->prog_data.base.dispatch_mode = DISPATCH_MODE_4X2_DUAL_OBJECT; - vec4_gs_visitor v(brw, c, prog, mem_ctx, true /* no_spills */); + vec4_gs_visitor v(brw, c, prog, mem_ctx, true /* no_spills */, + st_index); if (v.run()) { return generate_assembly(brw, prog, &c->gp->program.Base, &c->prog_data.base, mem_ctx, v.cfg, @@ -698,9 +704,11 @@ brw_gs_emit(struct brw_context *brw, const unsigned *ret = NULL; if (brw->gen >= 7) - gs = new vec4_gs_visitor(brw, c, prog, mem_ctx, false /* no_spills */); + gs = new vec4_gs_visitor(brw, c, prog, mem_ctx, false /* no_spills */, + st_index); else - gs = new gen6_gs_visitor(brw, c, prog, mem_ctx, false /* no_spills */); + gs = new gen6_gs_visitor(brw, c, prog, mem_ctx, false /* no_spills */, + st_index); if (!gs->run()) { prog->LinkStatus = false; diff --git a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.h b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.h index bcb5a2bcfc1..f42311d154f 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.h +++ b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.h @@ -72,7 +72,8 @@ public: struct brw_gs_compile *c, struct gl_shader_program *prog, void *mem_ctx, - bool no_spills); + bool no_spills, + int shader_time_index); protected: virtual dst_reg *make_reg_for_system_value(ir_variable *ir); diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp index 5ae572b4c41..4f3fc21724b 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp @@ -3688,7 +3688,7 @@ vec4_visitor::vec4_visitor(struct brw_context *brw, gl_shader_stage stage, void *mem_ctx, bool no_spills, - shader_time_shader_type st_type) + int shader_time_index) : backend_shader(brw, mem_ctx, shader_prog, prog, &prog_data->base, stage), c(c), key(key), @@ -3698,7 +3698,7 @@ vec4_visitor::vec4_visitor(struct brw_context *brw, first_non_payload_grf(0), need_all_constants_in_pull_buffer(false), no_spills(no_spills), - st_type(st_type) + shader_time_index(shader_time_index) { this->failed = false; diff --git a/src/mesa/drivers/dri/i965/brw_vec4_vs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_vs_visitor.cpp index 731176afd18..dc1775527be 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_vs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_vs_visitor.cpp @@ -216,12 +216,13 @@ vec4_vs_visitor::vec4_vs_visitor(struct brw_context *brw, struct brw_vs_compile *vs_compile, struct brw_vs_prog_data *vs_prog_data, struct gl_shader_program *prog, - void *mem_ctx) + void *mem_ctx, + int shader_time_index) : vec4_visitor(brw, &vs_compile->base, &vs_compile->vp->program.Base, &vs_compile->key.base, &vs_prog_data->base, prog, MESA_SHADER_VERTEX, mem_ctx, false /* no_spills */, - ST_VS), + shader_time_index), vs_compile(vs_compile), vs_prog_data(vs_prog_data) { diff --git a/src/mesa/drivers/dri/i965/brw_vs.h b/src/mesa/drivers/dri/i965/brw_vs.h index 6157ae6ffa9..6f84179c694 100644 --- a/src/mesa/drivers/dri/i965/brw_vs.h +++ b/src/mesa/drivers/dri/i965/brw_vs.h @@ -94,7 +94,8 @@ public: struct brw_vs_compile *vs_compile, struct brw_vs_prog_data *vs_prog_data, struct gl_shader_program *prog, - void *mem_ctx); + void *mem_ctx, + int shader_time_index); protected: virtual dst_reg *make_reg_for_system_value(ir_variable *ir); diff --git a/src/mesa/drivers/dri/i965/gen6_gs_visitor.h b/src/mesa/drivers/dri/i965/gen6_gs_visitor.h index 28f23c9e4f7..863fbd08552 100644 --- a/src/mesa/drivers/dri/i965/gen6_gs_visitor.h +++ b/src/mesa/drivers/dri/i965/gen6_gs_visitor.h @@ -39,8 +39,9 @@ public: struct brw_gs_compile *c, struct gl_shader_program *prog, void *mem_ctx, - bool no_spills) : - vec4_gs_visitor(brw, c, prog, mem_ctx, no_spills) {} + bool no_spills, + int shader_time_index) : + vec4_gs_visitor(brw, c, prog, mem_ctx, no_spills, shader_time_index) {} protected: virtual void assign_binding_table_offsets(); diff --git a/src/mesa/drivers/dri/i965/test_fs_cmod_propagation.cpp b/src/mesa/drivers/dri/i965/test_fs_cmod_propagation.cpp index 7bb5c4a2fa2..f0209abeca9 100644 --- a/src/mesa/drivers/dri/i965/test_fs_cmod_propagation.cpp +++ b/src/mesa/drivers/dri/i965/test_fs_cmod_propagation.cpp @@ -48,7 +48,7 @@ public: struct brw_wm_prog_data *prog_data, struct gl_shader_program *shader_prog) : fs_visitor(brw, NULL, MESA_SHADER_FRAGMENT, NULL, &prog_data->base, - shader_prog, (struct gl_program *) NULL, 8) {} + shader_prog, (struct gl_program *) NULL, 8, -1) {} }; diff --git a/src/mesa/drivers/dri/i965/test_fs_saturate_propagation.cpp b/src/mesa/drivers/dri/i965/test_fs_saturate_propagation.cpp index d5142f57872..b8cf40eace5 100644 --- a/src/mesa/drivers/dri/i965/test_fs_saturate_propagation.cpp +++ b/src/mesa/drivers/dri/i965/test_fs_saturate_propagation.cpp @@ -48,7 +48,7 @@ public: struct brw_wm_prog_data *prog_data, struct gl_shader_program *shader_prog) : fs_visitor(brw, NULL, MESA_SHADER_FRAGMENT, NULL, &prog_data->base, - shader_prog, (struct gl_program *) NULL, 8) {} + shader_prog, (struct gl_program *) NULL, 8, -1) {} }; diff --git a/src/mesa/drivers/dri/i965/test_vec4_copy_propagation.cpp b/src/mesa/drivers/dri/i965/test_vec4_copy_propagation.cpp index 8a867366517..9234b667f01 100644 --- a/src/mesa/drivers/dri/i965/test_vec4_copy_propagation.cpp +++ b/src/mesa/drivers/dri/i965/test_vec4_copy_propagation.cpp @@ -48,7 +48,7 @@ public: struct gl_shader_program *shader_prog) : vec4_visitor(brw, NULL, NULL, NULL, NULL, shader_prog, MESA_SHADER_VERTEX, NULL, - false /* no_spills */, ST_NONE) + false /* no_spills */, -1) { } diff --git a/src/mesa/drivers/dri/i965/test_vec4_register_coalesce.cpp b/src/mesa/drivers/dri/i965/test_vec4_register_coalesce.cpp index 87ebdfa6e9c..7d274ded2f2 100644 --- a/src/mesa/drivers/dri/i965/test_vec4_register_coalesce.cpp +++ b/src/mesa/drivers/dri/i965/test_vec4_register_coalesce.cpp @@ -51,7 +51,7 @@ public: struct gl_shader_program *shader_prog) : vec4_visitor(brw, NULL, NULL, NULL, NULL, shader_prog, MESA_SHADER_VERTEX, NULL, - false /* no_spills */, ST_NONE) + false /* no_spills */, -1) { } From 4af62c0f5cbadc762abb1bd2e59f44ca220e3f0a Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 19 Jun 2015 17:25:28 -0700 Subject: [PATCH 829/834] i965/fs: Add a do_rep_send flag to run_fs Previously, we were pulling it from brw->do_rep_send Reviewed-by: Kenneth Graunke Reviewed-by: Chris Forbes --- src/mesa/drivers/dri/i965/brw_fs.cpp | 9 +++++---- src/mesa/drivers/dri/i965/brw_fs.h | 2 +- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index f6c4169d0be..83fb5c89741 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -3837,7 +3837,7 @@ fs_visitor::run_vs() } bool -fs_visitor::run_fs() +fs_visitor::run_fs(bool do_rep_send) { brw_wm_prog_data *wm_prog_data = (brw_wm_prog_data *) this->prog_data; brw_wm_prog_key *wm_key = (brw_wm_prog_key *) this->key; @@ -3855,7 +3855,8 @@ fs_visitor::run_fs() if (0) { emit_dummy_fs(); - } else if (brw->use_rep_send && dispatch_width == 16) { + } else if (do_rep_send) { + assert(dispatch_width == 16); emit_repclear_shader(); } else { if (shader_time_index >= 0) @@ -4007,7 +4008,7 @@ brw_wm_fs_emit(struct brw_context *brw, */ fs_visitor v(brw, mem_ctx, MESA_SHADER_FRAGMENT, key, &prog_data->base, prog, &fp->Base, 8, st_index8); - if (!v.run_fs()) { + if (!v.run_fs(false /* do_rep_send */)) { if (prog) { prog->LinkStatus = false; ralloc_strcat(&prog->InfoLog, v.fail_msg); @@ -4026,7 +4027,7 @@ brw_wm_fs_emit(struct brw_context *brw, if (!v.simd16_unsupported) { /* Try a SIMD16 compile */ v2.import_uniforms(&v); - if (!v2.run_fs()) { + if (!v2.run_fs(brw->use_rep_send)) { perf_debug("SIMD16 shader failed to compile: %s", v2.fail_msg); } else { simd16_cfg = v2.cfg; diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h index 525be3a4eaf..4db5a91c57b 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.h +++ b/src/mesa/drivers/dri/i965/brw_fs.h @@ -103,7 +103,7 @@ public: uint32_t const_offset); void DEP_RESOLVE_MOV(const brw::fs_builder &bld, int grf); - bool run_fs(); + bool run_fs(bool do_rep_send); bool run_vs(); bool run_cs(); void optimize(); From 663f8d121d792edee5c012461bfd0b650011ff4a Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 19 Jun 2015 17:29:42 -0700 Subject: [PATCH 830/834] i965/vs: Pass the current set of clip planes through run() and run_vs() Previously, these were pulled out of the GL context conditionally based on whether we were running ff/ARB or a GLSL program. Now, we just pass them in so that the visitor doesn't have to grab them itself. Reviewed-by: Kenneth Graunke Reviewed-by: Chris Forbes --- src/mesa/drivers/dri/i965/brw_fs.cpp | 4 ++-- src/mesa/drivers/dri/i965/brw_fs.h | 8 ++++---- src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 11 +++++------ src/mesa/drivers/dri/i965/brw_vec4.cpp | 8 ++++---- src/mesa/drivers/dri/i965/brw_vec4.h | 4 ++-- src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp | 4 ++-- src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 4 +--- 7 files changed, 20 insertions(+), 23 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 83fb5c89741..10b43858025 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -3803,7 +3803,7 @@ fs_visitor::allocate_registers() } bool -fs_visitor::run_vs() +fs_visitor::run_vs(gl_clip_plane *clip_planes) { assert(stage == MESA_SHADER_VERTEX); @@ -3818,7 +3818,7 @@ fs_visitor::run_vs() if (failed) return false; - emit_urb_writes(); + emit_urb_writes(clip_planes); if (shader_time_index >= 0) emit_shader_time_end(); diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h index 4db5a91c57b..e0a89842ddc 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.h +++ b/src/mesa/drivers/dri/i965/brw_fs.h @@ -84,8 +84,8 @@ public: fs_reg vgrf(const glsl_type *const type); void import_uniforms(fs_visitor *v); - void setup_uniform_clipplane_values(); - void compute_clip_distance(); + void setup_uniform_clipplane_values(gl_clip_plane *clip_planes); + void compute_clip_distance(gl_clip_plane *clip_planes); uint32_t gather_channel(int orig_chan, uint32_t sampler); void swizzle_result(ir_texture_opcode op, int dest_components, @@ -104,7 +104,7 @@ public: void DEP_RESOLVE_MOV(const brw::fs_builder &bld, int grf); bool run_fs(bool do_rep_send); - bool run_vs(); + bool run_vs(gl_clip_plane *clip_planes); bool run_cs(); void optimize(); void allocate_registers(); @@ -271,7 +271,7 @@ public: fs_reg src0_alpha, unsigned components, unsigned exec_size, bool use_2nd_half = false); void emit_fb_writes(); - void emit_urb_writes(); + void emit_urb_writes(gl_clip_plane *clip_planes); void emit_cs_terminate(); void emit_barrier(); diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp index bff1169c779..d441756ef7b 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp @@ -1715,9 +1715,8 @@ fs_visitor::emit_fb_writes() } void -fs_visitor::setup_uniform_clipplane_values() +fs_visitor::setup_uniform_clipplane_values(gl_clip_plane *clip_planes) { - gl_clip_plane *clip_planes = brw_select_clip_planes(ctx); const struct brw_vue_prog_key *key = (const struct brw_vue_prog_key *) this->key; @@ -1731,7 +1730,7 @@ fs_visitor::setup_uniform_clipplane_values() } } -void fs_visitor::compute_clip_distance() +void fs_visitor::compute_clip_distance(gl_clip_plane *clip_planes) { struct brw_vue_prog_data *vue_prog_data = (struct brw_vue_prog_data *) prog_data; @@ -1760,7 +1759,7 @@ void fs_visitor::compute_clip_distance() if (outputs[clip_vertex].file == BAD_FILE) return; - setup_uniform_clipplane_values(); + setup_uniform_clipplane_values(clip_planes); const fs_builder abld = bld.annotate("user clip distances"); @@ -1781,7 +1780,7 @@ void fs_visitor::compute_clip_distance() } void -fs_visitor::emit_urb_writes() +fs_visitor::emit_urb_writes(gl_clip_plane *clip_planes) { int slot, urb_offset, length; struct brw_vs_prog_data *vs_prog_data = @@ -1796,7 +1795,7 @@ fs_visitor::emit_urb_writes() /* Lower legacy ff and ClipVertex clipping to clip distances */ if (key->base.userclip_active && !prog->UsesClipDistanceOut) - compute_clip_distance(); + compute_clip_distance(clip_planes); /* If we don't have any valid slots to write, just do a minimal urb write * send to terminate the shader. */ diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp index 093802c24d2..9c450347ba2 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp @@ -1706,7 +1706,7 @@ vec4_visitor::emit_shader_time_write(int shader_time_subindex, src_reg value) } bool -vec4_visitor::run() +vec4_visitor::run(gl_clip_plane *clip_planes) { sanity_param_count = prog->Parameters->NumParameters; @@ -1728,7 +1728,7 @@ vec4_visitor::run() base_ir = NULL; if (key->userclip_active && !prog->UsesClipDistanceOut) - setup_uniform_clipplane_values(); + setup_uniform_clipplane_values(clip_planes); emit_thread_end(); @@ -1901,7 +1901,7 @@ brw_vs_emit(struct brw_context *brw, fs_visitor v(brw, mem_ctx, MESA_SHADER_VERTEX, &c->key, &prog_data->base.base, prog, &c->vp->program.Base, 8, st_index); - if (!v.run_vs()) { + if (!v.run_vs(brw_select_clip_planes(&brw->ctx))) { if (prog) { prog->LinkStatus = false; ralloc_strcat(&prog->InfoLog, v.fail_msg); @@ -1939,7 +1939,7 @@ brw_vs_emit(struct brw_context *brw, prog_data->base.dispatch_mode = DISPATCH_MODE_4X2_DUAL_OBJECT; vec4_vs_visitor v(brw, c, prog_data, prog, mem_ctx, st_index); - if (!v.run()) { + if (!v.run(brw_select_clip_planes(&brw->ctx))) { if (prog) { prog->LinkStatus = false; ralloc_strcat(&prog->InfoLog, v.fail_msg); diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h index 4a3ce62a12e..193b381acd0 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.h +++ b/src/mesa/drivers/dri/i965/brw_vec4.h @@ -177,10 +177,10 @@ public: struct hash_table *variable_ht; - bool run(void); + bool run(gl_clip_plane *clip_planes); void fail(const char *msg, ...); - void setup_uniform_clipplane_values(); + void setup_uniform_clipplane_values(gl_clip_plane *clip_planes); void setup_uniform_values(ir_variable *ir); void setup_builtin_uniform_values(ir_variable *ir); int setup_uniforms(int payload_reg); diff --git a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp index 9ba96417259..d8767622365 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp @@ -664,7 +664,7 @@ brw_gs_emit(struct brw_context *brw, vec4_gs_visitor v(brw, c, prog, mem_ctx, true /* no_spills */, st_index); - if (v.run()) { + if (v.run(NULL /* clip planes */)) { return generate_assembly(brw, prog, &c->gp->program.Base, &c->prog_data.base, mem_ctx, v.cfg, final_assembly_size); @@ -710,7 +710,7 @@ brw_gs_emit(struct brw_context *brw, gs = new gen6_gs_visitor(brw, c, prog, mem_ctx, false /* no_spills */, st_index); - if (!gs->run()) { + if (!gs->run(NULL /* clip planes */)) { prog->LinkStatus = false; ralloc_strcat(&prog->InfoLog, gs->fail_msg); } else { diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp index 4f3fc21724b..c13669f7d6b 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp @@ -720,10 +720,8 @@ vec4_visitor::setup_uniform_values(ir_variable *ir) } void -vec4_visitor::setup_uniform_clipplane_values() +vec4_visitor::setup_uniform_clipplane_values(gl_clip_plane *clip_planes) { - gl_clip_plane *clip_planes = brw_select_clip_planes(ctx); - for (int i = 0; i < key->nr_userclip_plane_consts; ++i) { assert(this->uniforms < uniform_array_size); this->uniform_vector_size[this->uniforms] = 4; From 924b15d7de2a4ae9057cdf6d5d589c9b677d3325 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 22 Jun 2015 17:17:51 -0700 Subject: [PATCH 831/834] i965/vec4: Turn some _mesa_problem calls into asserts Reviewed-by: Kenneth Graunke Reviewed-by: Chris Forbes --- src/mesa/drivers/dri/i965/brw_vec4_vp.cpp | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_vec4_vp.cpp b/src/mesa/drivers/dri/i965/brw_vec4_vp.cpp index 92d108598a2..dcbd2405078 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_vp.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_vp.cpp @@ -381,8 +381,7 @@ vec4_vs_visitor::emit_program_code() break; default: - _mesa_problem(ctx, "Unsupported opcode %s in vertex program\n", - _mesa_opcode_string(vpi->Opcode)); + assert(!"Unsupported opcode in vertex program"); } /* Copy the temporary back into the actual destination register. */ @@ -574,15 +573,13 @@ vec4_vs_visitor::get_vp_src_reg(const prog_src_register &src) break; default: - _mesa_problem(ctx, "bad uniform src register file: %s\n", - _mesa_register_file_name((gl_register_file)src.File)); + assert(!"Bad uniform in src register file"); return src_reg(this, glsl_type::vec4_type); } break; default: - _mesa_problem(ctx, "bad src register file: %s\n", - _mesa_register_file_name((gl_register_file)src.File)); + assert(!"Bad src register file"); return src_reg(this, glsl_type::vec4_type); } From bcaf4a3f077e3e3fbc66f264fe9124fa920ee70c Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 22 Jun 2015 17:30:23 -0700 Subject: [PATCH 832/834] i965/vec4_vs: Add an explicit use_legacy_snorm_formula flag This way we can stop doing is_gles3 checks inside of the compiler. Reviewed-by: Kenneth Graunke Reviewed-by: Chris Forbes --- src/mesa/drivers/dri/i965/brw_vec4.cpp | 4 +++- src/mesa/drivers/dri/i965/brw_vec4_vs_visitor.cpp | 9 +++++---- src/mesa/drivers/dri/i965/brw_vs.h | 5 ++++- 3 files changed, 12 insertions(+), 6 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp index 9c450347ba2..f51aa1a3a54 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp @@ -35,6 +35,7 @@ extern "C" { #include "program/prog_print.h" #include "program/prog_parameter.h" } +#include "main/context.h" #define MAX_INSTRUCTION (1 << 30) @@ -1938,7 +1939,8 @@ brw_vs_emit(struct brw_context *brw, if (!assembly) { prog_data->base.dispatch_mode = DISPATCH_MODE_4X2_DUAL_OBJECT; - vec4_vs_visitor v(brw, c, prog_data, prog, mem_ctx, st_index); + vec4_vs_visitor v(brw, c, prog_data, prog, mem_ctx, st_index, + !_mesa_is_gles3(&brw->ctx)); if (!v.run(brw_select_clip_planes(&brw->ctx))) { if (prog) { prog->LinkStatus = false; diff --git a/src/mesa/drivers/dri/i965/brw_vec4_vs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_vs_visitor.cpp index dc1775527be..26e3057ac78 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_vs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_vs_visitor.cpp @@ -23,7 +23,6 @@ #include "brw_vs.h" -#include "main/context.h" namespace brw { @@ -78,7 +77,7 @@ vec4_vs_visitor::emit_prolog() /* ES 3.0 has different rules for converting signed normalized * fixed-point numbers than desktop GL. */ - if (_mesa_is_gles3(ctx) && (wa_flags & BRW_ATTRIB_WA_SIGN)) { + if ((wa_flags & BRW_ATTRIB_WA_SIGN) && !use_legacy_snorm_formula) { /* According to equation 2.2 of the ES 3.0 specification, * signed normalization conversion is done by: * @@ -217,14 +216,16 @@ vec4_vs_visitor::vec4_vs_visitor(struct brw_context *brw, struct brw_vs_prog_data *vs_prog_data, struct gl_shader_program *prog, void *mem_ctx, - int shader_time_index) + int shader_time_index, + bool use_legacy_snorm_formula) : vec4_visitor(brw, &vs_compile->base, &vs_compile->vp->program.Base, &vs_compile->key.base, &vs_prog_data->base, prog, MESA_SHADER_VERTEX, mem_ctx, false /* no_spills */, shader_time_index), vs_compile(vs_compile), - vs_prog_data(vs_prog_data) + vs_prog_data(vs_prog_data), + use_legacy_snorm_formula(use_legacy_snorm_formula) { } diff --git a/src/mesa/drivers/dri/i965/brw_vs.h b/src/mesa/drivers/dri/i965/brw_vs.h index 6f84179c694..0511ab50528 100644 --- a/src/mesa/drivers/dri/i965/brw_vs.h +++ b/src/mesa/drivers/dri/i965/brw_vs.h @@ -95,7 +95,8 @@ public: struct brw_vs_prog_data *vs_prog_data, struct gl_shader_program *prog, void *mem_ctx, - int shader_time_index); + int shader_time_index, + bool use_legacy_snorm_formula); protected: virtual dst_reg *make_reg_for_system_value(ir_variable *ir); @@ -116,6 +117,8 @@ private: struct brw_vs_prog_data * const vs_prog_data; src_reg *vp_temp_regs; src_reg vp_addr_reg; + + bool use_legacy_snorm_formula; }; } /* namespace brw */ From 40801295d5a3d747661abb1e2ca64d44c0e3dc05 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 22 Jun 2015 17:17:56 -0700 Subject: [PATCH 833/834] i965: Remove the brw_context from the visitors As of this commit, nothing actually needs the brw_context. Reviewed-by: Kenneth Graunke Reviewed-by: Chris Forbes --- src/mesa/drivers/dri/i965/brw_cs.cpp | 6 +++-- src/mesa/drivers/dri/i965/brw_fs.cpp | 12 +++++----- src/mesa/drivers/dri/i965/brw_fs.h | 2 +- .../drivers/dri/i965/brw_fs_reg_allocate.cpp | 1 - src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 5 +++-- src/mesa/drivers/dri/i965/brw_shader.cpp | 9 ++++---- src/mesa/drivers/dri/i965/brw_shader.h | 8 ++++--- src/mesa/drivers/dri/i965/brw_vec4.cpp | 6 +++-- src/mesa/drivers/dri/i965/brw_vec4.h | 2 +- .../drivers/dri/i965/brw_vec4_gs_visitor.cpp | 14 +++++++----- .../drivers/dri/i965/brw_vec4_gs_visitor.h | 2 +- .../dri/i965/brw_vec4_reg_allocate.cpp | 1 - .../drivers/dri/i965/brw_vec4_visitor.cpp | 5 +++-- .../drivers/dri/i965/brw_vec4_vs_visitor.cpp | 4 ++-- src/mesa/drivers/dri/i965/brw_vs.h | 2 +- src/mesa/drivers/dri/i965/gen6_gs_visitor.h | 4 ++-- .../dri/i965/test_fs_cmod_propagation.cpp | 22 +++++++++---------- .../dri/i965/test_fs_saturate_propagation.cpp | 22 +++++++++---------- .../dri/i965/test_vec4_copy_propagation.cpp | 19 ++++++++-------- .../dri/i965/test_vec4_register_coalesce.cpp | 19 ++++++++-------- 20 files changed, 86 insertions(+), 79 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_cs.cpp b/src/mesa/drivers/dri/i965/brw_cs.cpp index fa8b5c8415d..4c5082c82c4 100644 --- a/src/mesa/drivers/dri/i965/brw_cs.cpp +++ b/src/mesa/drivers/dri/i965/brw_cs.cpp @@ -94,7 +94,8 @@ brw_cs_emit(struct brw_context *brw, /* Now the main event: Visit the shader IR and generate our CS IR for it. */ - fs_visitor v8(brw, mem_ctx, MESA_SHADER_COMPUTE, key, &prog_data->base, prog, + fs_visitor v8(brw->intelScreen->compiler, brw, + mem_ctx, MESA_SHADER_COMPUTE, key, &prog_data->base, prog, &cp->Base, 8, st_index); if (!v8.run_cs()) { fail_msg = v8.fail_msg; @@ -103,7 +104,8 @@ brw_cs_emit(struct brw_context *brw, prog_data->simd_size = 8; } - fs_visitor v16(brw, mem_ctx, MESA_SHADER_COMPUTE, key, &prog_data->base, prog, + fs_visitor v16(brw->intelScreen->compiler, brw, + mem_ctx, MESA_SHADER_COMPUTE, key, &prog_data->base, prog, &cp->Base, 16, st_index); if (likely(!(INTEL_DEBUG & DEBUG_NO16)) && !fail_msg && !v8.simd16_unsupported && diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 10b43858025..4292aa6b9fb 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -677,8 +677,7 @@ fs_visitor::no16(const char *msg) } else { simd16_unsupported = true; - struct brw_compiler *compiler = brw->intelScreen->compiler; - compiler->shader_perf_log(brw, + compiler->shader_perf_log(log_data, "SIMD16 shader failed to compile: %s", msg); } } @@ -3769,8 +3768,7 @@ fs_visitor::allocate_registers() fail("Failure to register allocate. Reduce number of " "live scalar values to avoid this."); } else { - struct brw_compiler *compiler = brw->intelScreen->compiler; - compiler->shader_perf_log(brw, + compiler->shader_perf_log(log_data, "%s shader triggered register spilling. " "Try reducing the number of live scalar " "values to improve performance.\n", @@ -4006,7 +4004,8 @@ brw_wm_fs_emit(struct brw_context *brw, /* Now the main event: Visit the shader IR and generate our FS IR for it. */ - fs_visitor v(brw, mem_ctx, MESA_SHADER_FRAGMENT, key, &prog_data->base, + fs_visitor v(brw->intelScreen->compiler, brw, + mem_ctx, MESA_SHADER_FRAGMENT, key, &prog_data->base, prog, &fp->Base, 8, st_index8); if (!v.run_fs(false /* do_rep_send */)) { if (prog) { @@ -4021,7 +4020,8 @@ brw_wm_fs_emit(struct brw_context *brw, } cfg_t *simd16_cfg = NULL; - fs_visitor v2(brw, mem_ctx, MESA_SHADER_FRAGMENT, key, &prog_data->base, + fs_visitor v2(brw->intelScreen->compiler, brw, + mem_ctx, MESA_SHADER_FRAGMENT, key, &prog_data->base, prog, &fp->Base, 16, st_index16); if (likely(!(INTEL_DEBUG & DEBUG_NO16) || brw->use_rep_send)) { if (!v.simd16_unsupported) { diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h index e0a89842ddc..243baf688de 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.h +++ b/src/mesa/drivers/dri/i965/brw_fs.h @@ -70,7 +70,7 @@ namespace brw { class fs_visitor : public backend_shader { public: - fs_visitor(struct brw_context *brw, + fs_visitor(const struct brw_compiler *compiler, void *log_data, void *mem_ctx, gl_shader_stage stage, const void *key, diff --git a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp index cd78816b9f2..364fc4a5ad2 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp @@ -535,7 +535,6 @@ setup_mrf_hack_interference(fs_visitor *v, struct ra_graph *g, bool fs_visitor::assign_regs(bool allow_spilling) { - struct brw_compiler *compiler = brw->intelScreen->compiler; /* Most of this allocation was written for a reg_width of 1 * (dispatch_width == 8). In extending to SIMD16, the code was * left in place and it was converted to have the hardware diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp index d441756ef7b..ea293416792 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp @@ -1975,7 +1975,7 @@ fs_visitor::emit_barrier() bld.exec_all().emit(SHADER_OPCODE_BARRIER, reg_undef, payload); } -fs_visitor::fs_visitor(struct brw_context *brw, +fs_visitor::fs_visitor(const struct brw_compiler *compiler, void *log_data, void *mem_ctx, gl_shader_stage stage, const void *key, @@ -1984,7 +1984,8 @@ fs_visitor::fs_visitor(struct brw_context *brw, struct gl_program *prog, unsigned dispatch_width, int shader_time_index) - : backend_shader(brw, mem_ctx, shader_prog, prog, prog_data, stage), + : backend_shader(compiler, log_data, mem_ctx, + shader_prog, prog, prog_data, stage), key(key), prog_data(prog_data), dispatch_width(dispatch_width), shader_time_index(shader_time_index), diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp index 6e6a2580d1d..32c40131434 100644 --- a/src/mesa/drivers/dri/i965/brw_shader.cpp +++ b/src/mesa/drivers/dri/i965/brw_shader.cpp @@ -846,15 +846,16 @@ brw_abs_immediate(enum brw_reg_type type, struct brw_reg *reg) return false; } -backend_shader::backend_shader(struct brw_context *brw, +backend_shader::backend_shader(const struct brw_compiler *compiler, + void *log_data, void *mem_ctx, struct gl_shader_program *shader_prog, struct gl_program *prog, struct brw_stage_prog_data *stage_prog_data, gl_shader_stage stage) - : brw(brw), - devinfo(brw->intelScreen->devinfo), - ctx(&brw->ctx), + : compiler(compiler), + log_data(log_data), + devinfo(compiler->devinfo), shader(shader_prog ? (struct brw_shader *)shader_prog->_LinkedShaders[stage] : NULL), shader_prog(shader_prog), diff --git a/src/mesa/drivers/dri/i965/brw_shader.h b/src/mesa/drivers/dri/i965/brw_shader.h index ac4df738009..b2c1a0b8d69 100644 --- a/src/mesa/drivers/dri/i965/brw_shader.h +++ b/src/mesa/drivers/dri/i965/brw_shader.h @@ -220,7 +220,8 @@ enum instruction_scheduler_mode { class backend_shader { protected: - backend_shader(struct brw_context *brw, + backend_shader(const struct brw_compiler *compiler, + void *log_data, void *mem_ctx, struct gl_shader_program *shader_prog, struct gl_program *prog, @@ -229,9 +230,10 @@ protected: public: - struct brw_context * const brw; + const struct brw_compiler *compiler; + void *log_data; /* Passed to compiler->*_log functions */ + const struct brw_device_info * const devinfo; - struct gl_context * const ctx; struct brw_shader * const shader; struct gl_shader_program * const shader_prog; struct gl_program * const prog; diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp index f51aa1a3a54..a5c686ceaaf 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp @@ -1899,7 +1899,8 @@ brw_vs_emit(struct brw_context *brw, prog_data->base.dispatch_mode = DISPATCH_MODE_SIMD8; - fs_visitor v(brw, mem_ctx, MESA_SHADER_VERTEX, &c->key, + fs_visitor v(brw->intelScreen->compiler, brw, + mem_ctx, MESA_SHADER_VERTEX, &c->key, &prog_data->base.base, prog, &c->vp->program.Base, 8, st_index); if (!v.run_vs(brw_select_clip_planes(&brw->ctx))) { @@ -1939,7 +1940,8 @@ brw_vs_emit(struct brw_context *brw, if (!assembly) { prog_data->base.dispatch_mode = DISPATCH_MODE_4X2_DUAL_OBJECT; - vec4_vs_visitor v(brw, c, prog_data, prog, mem_ctx, st_index, + vec4_vs_visitor v(brw->intelScreen->compiler, + c, prog_data, prog, mem_ctx, st_index, !_mesa_is_gles3(&brw->ctx)); if (!v.run(brw_select_clip_planes(&brw->ctx))) { if (prog) { diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h index 193b381acd0..2ac16932189 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.h +++ b/src/mesa/drivers/dri/i965/brw_vec4.h @@ -76,7 +76,7 @@ class vec4_live_variables; class vec4_visitor : public backend_shader, public ir_visitor { public: - vec4_visitor(struct brw_context *brw, + vec4_visitor(const struct brw_compiler *compiler, struct brw_vec4_compile *c, struct gl_program *prog, const struct brw_vue_prog_key *key, diff --git a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp index d8767622365..69bcf5afc51 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp @@ -34,13 +34,13 @@ const unsigned MAX_GS_INPUT_VERTICES = 6; namespace brw { -vec4_gs_visitor::vec4_gs_visitor(struct brw_context *brw, +vec4_gs_visitor::vec4_gs_visitor(const struct brw_compiler *compiler, struct brw_gs_compile *c, struct gl_shader_program *prog, void *mem_ctx, bool no_spills, int shader_time_index) - : vec4_visitor(brw, &c->base, &c->gp->program.Base, &c->key.base, + : vec4_visitor(compiler, &c->base, &c->gp->program.Base, &c->key.base, &c->prog_data.base, prog, MESA_SHADER_GEOMETRY, mem_ctx, no_spills, shader_time_index), c(c) @@ -662,8 +662,8 @@ brw_gs_emit(struct brw_context *brw, likely(!(INTEL_DEBUG & DEBUG_NO_DUAL_OBJECT_GS))) { c->prog_data.base.dispatch_mode = DISPATCH_MODE_4X2_DUAL_OBJECT; - vec4_gs_visitor v(brw, c, prog, mem_ctx, true /* no_spills */, - st_index); + vec4_gs_visitor v(brw->intelScreen->compiler, + c, prog, mem_ctx, true /* no_spills */, st_index); if (v.run(NULL /* clip planes */)) { return generate_assembly(brw, prog, &c->gp->program.Base, &c->prog_data.base, mem_ctx, v.cfg, @@ -704,10 +704,12 @@ brw_gs_emit(struct brw_context *brw, const unsigned *ret = NULL; if (brw->gen >= 7) - gs = new vec4_gs_visitor(brw, c, prog, mem_ctx, false /* no_spills */, + gs = new vec4_gs_visitor(brw->intelScreen->compiler, + c, prog, mem_ctx, false /* no_spills */, st_index); else - gs = new gen6_gs_visitor(brw, c, prog, mem_ctx, false /* no_spills */, + gs = new gen6_gs_visitor(brw->intelScreen->compiler, + c, prog, mem_ctx, false /* no_spills */, st_index); if (!gs->run(NULL /* clip planes */)) { diff --git a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.h b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.h index f42311d154f..e693c56b58f 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.h +++ b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.h @@ -68,7 +68,7 @@ namespace brw { class vec4_gs_visitor : public vec4_visitor { public: - vec4_gs_visitor(struct brw_context *brw, + vec4_gs_visitor(const struct brw_compiler *compiler, struct brw_gs_compile *c, struct gl_shader_program *prog, void *mem_ctx, diff --git a/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp index 5368a75bc0f..555c42e2f24 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp @@ -191,7 +191,6 @@ vec4_visitor::setup_payload_interference(struct ra_graph *g, bool vec4_visitor::reg_allocate() { - struct brw_compiler *compiler = brw->intelScreen->compiler; unsigned int hw_reg_mapping[alloc.count]; int payload_reg_count = this->first_non_payload_grf; diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp index c13669f7d6b..8d7a80b19eb 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp @@ -3677,7 +3677,7 @@ vec4_visitor::resolve_bool_comparison(ir_rvalue *rvalue, src_reg *reg) *reg = neg_result; } -vec4_visitor::vec4_visitor(struct brw_context *brw, +vec4_visitor::vec4_visitor(const struct brw_compiler *compiler, struct brw_vec4_compile *c, struct gl_program *prog, const struct brw_vue_prog_key *key, @@ -3687,7 +3687,8 @@ vec4_visitor::vec4_visitor(struct brw_context *brw, void *mem_ctx, bool no_spills, int shader_time_index) - : backend_shader(brw, mem_ctx, shader_prog, prog, &prog_data->base, stage), + : backend_shader(compiler, NULL, mem_ctx, + shader_prog, prog, &prog_data->base, stage), c(c), key(key), prog_data(prog_data), diff --git a/src/mesa/drivers/dri/i965/brw_vec4_vs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_vs_visitor.cpp index 26e3057ac78..f93062b46d0 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_vs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_vs_visitor.cpp @@ -211,14 +211,14 @@ vec4_vs_visitor::emit_thread_end() } -vec4_vs_visitor::vec4_vs_visitor(struct brw_context *brw, +vec4_vs_visitor::vec4_vs_visitor(const struct brw_compiler *compiler, struct brw_vs_compile *vs_compile, struct brw_vs_prog_data *vs_prog_data, struct gl_shader_program *prog, void *mem_ctx, int shader_time_index, bool use_legacy_snorm_formula) - : vec4_visitor(brw, &vs_compile->base, &vs_compile->vp->program.Base, + : vec4_visitor(compiler, &vs_compile->base, &vs_compile->vp->program.Base, &vs_compile->key.base, &vs_prog_data->base, prog, MESA_SHADER_VERTEX, mem_ctx, false /* no_spills */, diff --git a/src/mesa/drivers/dri/i965/brw_vs.h b/src/mesa/drivers/dri/i965/brw_vs.h index 0511ab50528..61f9b006a58 100644 --- a/src/mesa/drivers/dri/i965/brw_vs.h +++ b/src/mesa/drivers/dri/i965/brw_vs.h @@ -90,7 +90,7 @@ namespace brw { class vec4_vs_visitor : public vec4_visitor { public: - vec4_vs_visitor(struct brw_context *brw, + vec4_vs_visitor(const struct brw_compiler *compiler, struct brw_vs_compile *vs_compile, struct brw_vs_prog_data *vs_prog_data, struct gl_shader_program *prog, diff --git a/src/mesa/drivers/dri/i965/gen6_gs_visitor.h b/src/mesa/drivers/dri/i965/gen6_gs_visitor.h index 863fbd08552..27254ebb727 100644 --- a/src/mesa/drivers/dri/i965/gen6_gs_visitor.h +++ b/src/mesa/drivers/dri/i965/gen6_gs_visitor.h @@ -35,13 +35,13 @@ namespace brw { class gen6_gs_visitor : public vec4_gs_visitor { public: - gen6_gs_visitor(struct brw_context *brw, + gen6_gs_visitor(const struct brw_compiler *comp, struct brw_gs_compile *c, struct gl_shader_program *prog, void *mem_ctx, bool no_spills, int shader_time_index) : - vec4_gs_visitor(brw, c, prog, mem_ctx, no_spills, shader_time_index) {} + vec4_gs_visitor(comp, c, prog, mem_ctx, no_spills, shader_time_index) {} protected: virtual void assign_binding_table_offsets(); diff --git a/src/mesa/drivers/dri/i965/test_fs_cmod_propagation.cpp b/src/mesa/drivers/dri/i965/test_fs_cmod_propagation.cpp index f0209abeca9..8010fb4f610 100644 --- a/src/mesa/drivers/dri/i965/test_fs_cmod_propagation.cpp +++ b/src/mesa/drivers/dri/i965/test_fs_cmod_propagation.cpp @@ -32,7 +32,7 @@ class cmod_propagation_test : public ::testing::Test { virtual void SetUp(); public: - struct brw_context *brw; + struct brw_compiler *compiler; struct brw_device_info *devinfo; struct gl_context *ctx; struct brw_wm_prog_data *prog_data; @@ -44,31 +44,31 @@ public: class cmod_propagation_fs_visitor : public fs_visitor { public: - cmod_propagation_fs_visitor(struct brw_context *brw, + cmod_propagation_fs_visitor(struct brw_compiler *compiler, struct brw_wm_prog_data *prog_data, struct gl_shader_program *shader_prog) - : fs_visitor(brw, NULL, MESA_SHADER_FRAGMENT, NULL, &prog_data->base, - shader_prog, (struct gl_program *) NULL, 8, -1) {} + : fs_visitor(compiler, NULL, NULL, MESA_SHADER_FRAGMENT, NULL, + &prog_data->base, shader_prog, + (struct gl_program *) NULL, 8, -1) {} }; void cmod_propagation_test::SetUp() { - brw = (struct brw_context *)calloc(1, sizeof(*brw)); - devinfo = (struct brw_device_info *)calloc(1, sizeof(*brw)); - brw->intelScreen = (struct intel_screen *)calloc(1, sizeof(*brw->intelScreen)); - brw->intelScreen->devinfo = devinfo; - ctx = &brw->ctx; + ctx = (struct gl_context *)calloc(1, sizeof(*ctx)); + compiler = (struct brw_compiler *)calloc(1, sizeof(*compiler)); + devinfo = (struct brw_device_info *)calloc(1, sizeof(*devinfo)); + compiler->devinfo = devinfo; fp = ralloc(NULL, struct brw_fragment_program); prog_data = ralloc(NULL, struct brw_wm_prog_data); shader_prog = ralloc(NULL, struct gl_shader_program); - v = new cmod_propagation_fs_visitor(brw, prog_data, shader_prog); + v = new cmod_propagation_fs_visitor(compiler, prog_data, shader_prog); _mesa_init_fragment_program(ctx, &fp->program, GL_FRAGMENT_SHADER, 0); - brw->gen = devinfo->gen = 4; + devinfo->gen = 4; } static fs_inst * diff --git a/src/mesa/drivers/dri/i965/test_fs_saturate_propagation.cpp b/src/mesa/drivers/dri/i965/test_fs_saturate_propagation.cpp index b8cf40eace5..3ef0cb319eb 100644 --- a/src/mesa/drivers/dri/i965/test_fs_saturate_propagation.cpp +++ b/src/mesa/drivers/dri/i965/test_fs_saturate_propagation.cpp @@ -32,7 +32,7 @@ class saturate_propagation_test : public ::testing::Test { virtual void SetUp(); public: - struct brw_context *brw; + struct brw_compiler *compiler; struct brw_device_info *devinfo; struct gl_context *ctx; struct brw_wm_prog_data *prog_data; @@ -44,31 +44,31 @@ public: class saturate_propagation_fs_visitor : public fs_visitor { public: - saturate_propagation_fs_visitor(struct brw_context *brw, + saturate_propagation_fs_visitor(struct brw_compiler *compiler, struct brw_wm_prog_data *prog_data, struct gl_shader_program *shader_prog) - : fs_visitor(brw, NULL, MESA_SHADER_FRAGMENT, NULL, &prog_data->base, - shader_prog, (struct gl_program *) NULL, 8, -1) {} + : fs_visitor(compiler, NULL, NULL, MESA_SHADER_FRAGMENT, NULL, + &prog_data->base, shader_prog, + (struct gl_program *) NULL, 8, -1) {} }; void saturate_propagation_test::SetUp() { - brw = (struct brw_context *)calloc(1, sizeof(*brw)); - devinfo = (struct brw_device_info *)calloc(1, sizeof(*brw)); - brw->intelScreen = (struct intel_screen *)calloc(1, sizeof(*brw->intelScreen)); - brw->intelScreen->devinfo = devinfo; - ctx = &brw->ctx; + ctx = (struct gl_context *)calloc(1, sizeof(*ctx)); + compiler = (struct brw_compiler *)calloc(1, sizeof(*compiler)); + devinfo = (struct brw_device_info *)calloc(1, sizeof(*devinfo)); + compiler->devinfo = devinfo; fp = ralloc(NULL, struct brw_fragment_program); prog_data = ralloc(NULL, struct brw_wm_prog_data); shader_prog = ralloc(NULL, struct gl_shader_program); - v = new saturate_propagation_fs_visitor(brw, prog_data, shader_prog); + v = new saturate_propagation_fs_visitor(compiler, prog_data, shader_prog); _mesa_init_fragment_program(ctx, &fp->program, GL_FRAGMENT_SHADER, 0); - brw->gen = devinfo->gen = 4; + devinfo->gen = 4; } static fs_inst * diff --git a/src/mesa/drivers/dri/i965/test_vec4_copy_propagation.cpp b/src/mesa/drivers/dri/i965/test_vec4_copy_propagation.cpp index 9234b667f01..84e43fa75cd 100644 --- a/src/mesa/drivers/dri/i965/test_vec4_copy_propagation.cpp +++ b/src/mesa/drivers/dri/i965/test_vec4_copy_propagation.cpp @@ -33,7 +33,7 @@ class copy_propagation_test : public ::testing::Test { virtual void SetUp(); public: - struct brw_context *brw; + struct brw_compiler *compiler; struct brw_device_info *devinfo; struct gl_context *ctx; struct gl_shader_program *shader_prog; @@ -44,9 +44,9 @@ public: class copy_propagation_vec4_visitor : public vec4_visitor { public: - copy_propagation_vec4_visitor(struct brw_context *brw, + copy_propagation_vec4_visitor(struct brw_compiler *compiler, struct gl_shader_program *shader_prog) - : vec4_visitor(brw, NULL, NULL, NULL, NULL, shader_prog, + : vec4_visitor(compiler, NULL, NULL, NULL, NULL, shader_prog, MESA_SHADER_VERTEX, NULL, false /* no_spills */, -1) { @@ -92,21 +92,20 @@ protected: void copy_propagation_test::SetUp() { - brw = (struct brw_context *)calloc(1, sizeof(*brw)); - devinfo = (struct brw_device_info *)calloc(1, sizeof(*brw)); - brw->intelScreen = (struct intel_screen *)calloc(1, sizeof(*brw->intelScreen)); - brw->intelScreen->devinfo = devinfo; - ctx = &brw->ctx; + ctx = (struct gl_context *)calloc(1, sizeof(*ctx)); + compiler = (struct brw_compiler *)calloc(1, sizeof(*compiler)); + devinfo = (struct brw_device_info *)calloc(1, sizeof(*devinfo)); + compiler->devinfo = devinfo; vp = ralloc(NULL, struct brw_vertex_program); shader_prog = ralloc(NULL, struct gl_shader_program); - v = new copy_propagation_vec4_visitor(brw, shader_prog); + v = new copy_propagation_vec4_visitor(compiler, shader_prog); _mesa_init_vertex_program(ctx, &vp->program, GL_VERTEX_SHADER, 0); - brw->gen = devinfo->gen = 4; + devinfo->gen = 4; } static void diff --git a/src/mesa/drivers/dri/i965/test_vec4_register_coalesce.cpp b/src/mesa/drivers/dri/i965/test_vec4_register_coalesce.cpp index 7d274ded2f2..de2afd39cfe 100644 --- a/src/mesa/drivers/dri/i965/test_vec4_register_coalesce.cpp +++ b/src/mesa/drivers/dri/i965/test_vec4_register_coalesce.cpp @@ -35,7 +35,7 @@ class register_coalesce_test : public ::testing::Test { virtual void SetUp(); public: - struct brw_context *brw; + struct brw_compiler *compiler; struct brw_device_info *devinfo; struct gl_context *ctx; struct gl_shader_program *shader_prog; @@ -47,9 +47,9 @@ public: class register_coalesce_vec4_visitor : public vec4_visitor { public: - register_coalesce_vec4_visitor(struct brw_context *brw, + register_coalesce_vec4_visitor(struct brw_compiler *compiler, struct gl_shader_program *shader_prog) - : vec4_visitor(brw, NULL, NULL, NULL, NULL, shader_prog, + : vec4_visitor(compiler, NULL, NULL, NULL, NULL, shader_prog, MESA_SHADER_VERTEX, NULL, false /* no_spills */, -1) { @@ -95,21 +95,20 @@ protected: void register_coalesce_test::SetUp() { - brw = (struct brw_context *)calloc(1, sizeof(*brw)); - devinfo = (struct brw_device_info *)calloc(1, sizeof(*brw)); - brw->intelScreen = (struct intel_screen *)calloc(1, sizeof(*brw->intelScreen)); - brw->intelScreen->devinfo = devinfo; - ctx = &brw->ctx; + ctx = (struct gl_context *)calloc(1, sizeof(*ctx)); + compiler = (struct brw_compiler *)calloc(1, sizeof(*compiler)); + devinfo = (struct brw_device_info *)calloc(1, sizeof(*devinfo)); + compiler->devinfo = devinfo; vp = ralloc(NULL, struct brw_vertex_program); shader_prog = ralloc(NULL, struct gl_shader_program); - v = new register_coalesce_vec4_visitor(brw, shader_prog); + v = new register_coalesce_vec4_visitor(compiler, shader_prog); _mesa_init_vertex_program(ctx, &vp->program, GL_VERTEX_SHADER, 0); - brw->gen = devinfo->gen = 4; + devinfo->gen = 4; } static void From 6844d6b7f8398a25eff511541b187afeb1199ce0 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 23 Jun 2015 15:39:42 -0700 Subject: [PATCH 834/834] i965/fs: Get rid of an unused variable in emit_barrier() Reviewed-by: Jordan Justen --- src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp index ea293416792..9a4bad6bcf5 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp @@ -1963,11 +1963,11 @@ fs_visitor::emit_barrier() fs_reg payload = fs_reg(GRF, alloc.allocate(1), BRW_REGISTER_TYPE_UD); /* Clear the message payload */ - fs_inst *inst = bld.exec_all().MOV(payload, fs_reg(0u)); + bld.exec_all().MOV(payload, fs_reg(0u)); /* Copy bits 27:24 of r0.2 (barrier id) to the message payload reg.2 */ fs_reg r0_2 = fs_reg(retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_UD)); - inst = bld.exec_all().AND(component(payload, 2), r0_2, fs_reg(0x0f000000u)); + bld.exec_all().AND(component(payload, 2), r0_2, fs_reg(0x0f000000u)); /* Emit a gateway "barrier" message using the payload we set up, followed * by a wait instruction.