From 85fab1f09a2aa8e537203c350b7392c9b52ef86b Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Tue, 9 Feb 2016 20:09:26 -0700 Subject: [PATCH 01/26] mesa: fix trivial comment typo in dlist.c --- src/mesa/main/dlist.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mesa/main/dlist.c b/src/mesa/main/dlist.c index fb31d2f2706..0e25efbae72 100644 --- a/src/mesa/main/dlist.c +++ b/src/mesa/main/dlist.c @@ -607,7 +607,7 @@ void mesa_print_display_list(GLuint list); /** * Allocate a gl_display_list object with an initial block of storage. - * \param count how many display list nodes/tokes to allocate + * \param count how many display list nodes/tokens to allocate */ static struct gl_display_list * make_list(GLuint name, GLuint count) From 3c432d48bfe8b3d3326c16aed191fa80a5400963 Mon Sep 17 00:00:00 2001 From: Topi Pohjolainen Date: Mon, 7 Dec 2015 21:58:33 +0200 Subject: [PATCH 02/26] i965: Use constant pointer when checking for compression Signed-off-by: Topi Pohjolainen Reviewed-by: Ben Widawsky --- src/mesa/drivers/dri/i965/brw_context.h | 2 +- src/mesa/drivers/dri/i965/brw_surface_formats.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index 55d6723eab7..5c63b8f36c6 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -1573,7 +1573,7 @@ void brw_upload_image_surfaces(struct brw_context *brw, /* brw_surface_formats.c */ bool brw_render_target_supported(struct brw_context *brw, struct gl_renderbuffer *rb); -bool brw_losslessly_compressible_format(struct brw_context *brw, +bool brw_losslessly_compressible_format(const struct brw_context *brw, uint32_t brw_format); uint32_t brw_depth_format(struct brw_context *brw, mesa_format format); mesa_format brw_lower_mesa_image_format(const struct brw_device_info *devinfo, diff --git a/src/mesa/drivers/dri/i965/brw_surface_formats.c b/src/mesa/drivers/dri/i965/brw_surface_formats.c index b5c1a3531c2..3c0b23b4a42 100644 --- a/src/mesa/drivers/dri/i965/brw_surface_formats.c +++ b/src/mesa/drivers/dri/i965/brw_surface_formats.c @@ -824,7 +824,7 @@ brw_render_target_supported(struct brw_context *brw, * compression. */ bool -brw_losslessly_compressible_format(struct brw_context *brw, +brw_losslessly_compressible_format(const struct brw_context *brw, uint32_t brw_format) { const struct surface_format_info * const sinfo = From 878b2b8964c23d3be72dc28ef1a9758927f53214 Mon Sep 17 00:00:00 2001 From: Topi Pohjolainen Date: Sun, 3 Jan 2016 15:06:09 +0200 Subject: [PATCH 03/26] i965/gen8: Remove dead assertion The assertion is inside a condition mandating num_samples > 1 and therefore the first half of the constraint is always met. The second half in turn would only be applicable for single sampled case and moreover it is trying to falsely check against surface type instead of format. Subsequent patches will introduce proper support for the lossless compression and dropping this here makes the patches a little simpler. Signed-off-by: Topi Pohjolainen Reviewed-by: Ben Widawsky --- src/mesa/drivers/dri/i965/gen8_surface_state.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/src/mesa/drivers/dri/i965/gen8_surface_state.c b/src/mesa/drivers/dri/i965/gen8_surface_state.c index 0df25d2557c..fc8f701a24c 100644 --- a/src/mesa/drivers/dri/i965/gen8_surface_state.c +++ b/src/mesa/drivers/dri/i965/gen8_surface_state.c @@ -243,12 +243,6 @@ gen8_emit_texture_surface_state(struct brw_context *brw, */ if (brw->gen >= 9 || mt->num_samples == 1) assert(mt->halign == 16); - - if (brw->gen >= 9) { - assert(mt->num_samples > 1 || - brw_losslessly_compressible_format(brw, surf_type)); - } - } uint32_t *surf = allocate_surface_state(brw, surf_offset, surf_index); From 43d23e879c797fa9b6cbbae15e101f2a3ee64751 Mon Sep 17 00:00:00 2001 From: Chris Forbes Date: Tue, 18 Nov 2014 21:49:53 +1300 Subject: [PATCH 04/26] i965/blorp: Fix hiz ops on MSAA surfaces MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two things were broken here: - The depth/stencil surface dimensions were broken for MSAA. - Sample count was programmed incorrectly. Result was the depth resolve didn't work correctly on MSAA surfaces, and so sampling the surface later produced garbage. Fixes the new piglit test arb_texture_multisample-sample-depth, and various artifacts in 'tesseract' with msaa=4 glineardepth=0. Fixes freedesktop bug #76396. Not observed any piglit regressions on Haswell. v2: Just set brw_hiz_op_params::dst.num_samples rather than adding a helper function (Ken). Signed-off-by: Chris Forbes v3: moved the alignment needed for hiz+msaa to brw_blorp.cpp, as suggested by Chad Versace (Alejandro Piñeiro on behalf of Chris Forbes) Signed-off-by: Alejandro Piñeiro Reviewed-by: Ben Widawsky Tested-by: Jordan Justen Reviewed-by: Jordan Justen --- src/mesa/drivers/dri/i965/brw_blorp.cpp | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_blorp.cpp b/src/mesa/drivers/dri/i965/brw_blorp.cpp index 1bc6d15969f..4497eab3bf0 100644 --- a/src/mesa/drivers/dri/i965/brw_blorp.cpp +++ b/src/mesa/drivers/dri/i965/brw_blorp.cpp @@ -319,8 +319,14 @@ brw_hiz_op_params::brw_hiz_op_params(struct intel_mipmap_tree *mt, * not 8. But commit 1f112cc increased the alignment from 4 to 8, which * prevents the clobbering. */ - depth.width = ALIGN(depth.width, 8); - depth.height = ALIGN(depth.height, 4); + dst.num_samples = mt->num_samples; + if (dst.num_samples > 1) { + depth.width = ALIGN(mt->logical_width0, 8); + depth.height = ALIGN(mt->logical_height0, 4); + } else { + depth.width = ALIGN(depth.width, 8); + depth.height = ALIGN(depth.height, 4); + } x1 = depth.width; y1 = depth.height; From e86ba7844fb1acd5f2d48558d0b8bb449e785ff8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bernhard=20Rosenkr=C3=A4nzer?= Date: Wed, 10 Feb 2016 17:19:46 +0100 Subject: [PATCH 05/26] freedreno/ir3: Get rid of nested functions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This allows building Freedreno with clang Signed-off-by: Bernhard Rosenkränzer Signed-off-by: Rob Clark --- src/gallium/drivers/freedreno/ir3/ir3_ra.c | 28 +++++++++++----------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/src/gallium/drivers/freedreno/ir3/ir3_ra.c b/src/gallium/drivers/freedreno/ir3/ir3_ra.c index 2ed78818e61..bcad96e8a30 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_ra.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_ra.c @@ -605,21 +605,21 @@ ra_block_compute_live_ranges(struct ir3_ra_ctx *ctx, struct ir3_block *block) struct ir3_ra_block_data *bd; unsigned bitset_words = BITSET_WORDS(ctx->alloc_count); - void def(unsigned name, struct ir3_instruction *instr) - { - /* defined on first write: */ - if (!ctx->def[name]) - ctx->def[name] = instr->ip; - ctx->use[name] = instr->ip; - BITSET_SET(bd->def, name); - } +#define def(name, instr) \ + do { \ + /* defined on first write: */ \ + if (!ctx->def[name]) \ + ctx->def[name] = instr->ip; \ + ctx->use[name] = instr->ip; \ + BITSET_SET(bd->def, name); \ + } while(0); - void use(unsigned name, struct ir3_instruction *instr) - { - ctx->use[name] = MAX2(ctx->use[name], instr->ip); - if (!BITSET_TEST(bd->def, name)) - BITSET_SET(bd->use, name); - } +#define use(name, instr) \ + do { \ + ctx->use[name] = MAX2(ctx->use[name], instr->ip); \ + if (!BITSET_TEST(bd->def, name)) \ + BITSET_SET(bd->use, name); \ + } while(0); bd = rzalloc(ctx->g, struct ir3_ra_block_data); From 5e8db898fd53b7622e21616f0ff27d985d7be758 Mon Sep 17 00:00:00 2001 From: Samuel Pitoiset Date: Tue, 9 Feb 2016 11:40:08 +0100 Subject: [PATCH 06/26] st/mesa: check ureg_create() retval in create_pbo_upload_vs() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This avoids a possible NULL dereference because ureg_create() might return a NULL pointer. Spotted by coverity. Signed-off-by: Samuel Pitoiset Reviewed-by: Nicolai Hähnle --- src/mesa/state_tracker/st_cb_texture.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/mesa/state_tracker/st_cb_texture.c b/src/mesa/state_tracker/st_cb_texture.c index f2b607c3a1d..d53126a9441 100644 --- a/src/mesa/state_tracker/st_cb_texture.c +++ b/src/mesa/state_tracker/st_cb_texture.c @@ -1132,6 +1132,8 @@ create_pbo_upload_vs(struct st_context *st) struct ureg_dst out_layer; ureg = ureg_create(TGSI_PROCESSOR_VERTEX); + if (!ureg) + return NULL; in_pos = ureg_DECL_vs_input(ureg, TGSI_SEMANTIC_POSITION); From 111602e15909ee2785334db008ac17d4eee8f391 Mon Sep 17 00:00:00 2001 From: Alexandre Demers Date: Wed, 10 Feb 2016 09:45:46 -0500 Subject: [PATCH 07/26] winsys/radeon: better explain the num_tile_pipes fixup for TAHITI (v2) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit v2: Clarify the relation between num_tiles_pipes and GB_TILE_MODE and the fix needed for Tahiti as suggested by Marek. Signed-off-by: Alexandre Demers Signed-off-by: Marek Olšák --- src/gallium/winsys/radeon/drm/radeon_drm_winsys.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c index 49c310cfdf7..73ef0519a8b 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c +++ b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c @@ -405,8 +405,10 @@ static boolean do_winsys_init(struct radeon_drm_winsys *ws) radeon_get_drm_value(ws->fd, RADEON_INFO_NUM_TILE_PIPES, NULL, &ws->info.num_tile_pipes); - /* The kernel returns 12 for some cards for an unknown reason. - * I thought this was supposed to be a power of two. + /* "num_tiles_pipes" must be equal to the number of pipes (Px) in the + /* pipe config field of the GB_TILE_MODE array. Only one card (Tahiti) + /* reports a different value (12). Fix it by setting what's in the + /* GB_TILE_MODE array (8). */ if (ws->gen == DRV_SI && ws->info.num_tile_pipes == 12) ws->info.num_tile_pipes = 8; From 796ee76e2eeabbeed9ab41b012363cabd5497b33 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Wed, 10 Feb 2016 19:41:37 +0100 Subject: [PATCH 08/26] winsys/radeon: fix the num_tile_pipes comment to silence warnings --- src/gallium/winsys/radeon/drm/radeon_drm_winsys.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c index 73ef0519a8b..8151c447065 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c +++ b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c @@ -406,9 +406,9 @@ static boolean do_winsys_init(struct radeon_drm_winsys *ws) &ws->info.num_tile_pipes); /* "num_tiles_pipes" must be equal to the number of pipes (Px) in the - /* pipe config field of the GB_TILE_MODE array. Only one card (Tahiti) - /* reports a different value (12). Fix it by setting what's in the - /* GB_TILE_MODE array (8). + * pipe config field of the GB_TILE_MODE array. Only one card (Tahiti) + * reports a different value (12). Fix it by setting what's in the + * GB_TILE_MODE array (8). */ if (ws->gen == DRV_SI && ws->info.num_tile_pipes == 12) ws->info.num_tile_pipes = 8; From 7aedbbacae6d3ec3d06735fff2eb662964773ad2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Sat, 6 Feb 2016 22:09:45 +0100 Subject: [PATCH 09/26] radeonsi: put image, fmask, and sampler descriptors into one array MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The texture slot is expanded to 16 dwords containing 2 descriptors. Those can be: - Image and fmask, or - Image and sampler state By carefully choosing the locations, we can put all three into one slot, with the fmask and sampler state being mutually exclusive. This improves shaders in 2 ways: - 2 user SGPRs are unused, shaders can use them as temporary registers now - each pair of descriptors is always on the same cache line v2: cosmetic changes: add back v8i32, don't load a sampler state & fmask at the same time Reviewed-by: Nicolai Hähnle --- src/gallium/drivers/radeonsi/si_blit.c | 2 +- src/gallium/drivers/radeonsi/si_descriptors.c | 115 +++++++++--------- src/gallium/drivers/radeonsi/si_pipe.h | 1 - src/gallium/drivers/radeonsi/si_shader.c | 109 ++++++++++------- src/gallium/drivers/radeonsi/si_shader.h | 8 +- src/gallium/drivers/radeonsi/si_state.h | 19 +-- 6 files changed, 138 insertions(+), 116 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_blit.c b/src/gallium/drivers/radeonsi/si_blit.c index a93887ec271..115877060ba 100644 --- a/src/gallium/drivers/radeonsi/si_blit.c +++ b/src/gallium/drivers/radeonsi/si_blit.c @@ -80,7 +80,7 @@ static void si_blitter_begin(struct pipe_context *ctx, enum si_blitter_op op) if (op & SI_SAVE_TEXTURES) { util_blitter_save_fragment_sampler_states( sctx->blitter, 2, - sctx->samplers[PIPE_SHADER_FRAGMENT].states.saved_states); + sctx->samplers[PIPE_SHADER_FRAGMENT].views.sampler_states); util_blitter_save_fragment_sampler_views(sctx->blitter, 2, sctx->samplers[PIPE_SHADER_FRAGMENT].views.views); diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c index 6c796731a18..34cc06fc078 100644 --- a/src/gallium/drivers/radeonsi/si_descriptors.c +++ b/src/gallium/drivers/radeonsi/si_descriptors.c @@ -41,6 +41,18 @@ * * Also, uploading descriptors to newly allocated memory doesn't require * a KCACHE flush. + * + * + * Possible scenarios for one 16 dword image+sampler slot: + * + * | Image | w/ FMASK | Buffer | NULL + * [ 0: 3] Image[0:3] | Image[0:3] | Null[0:3] | Null[0:3] + * [ 4: 7] Image[4:7] | Image[4:7] | Buffer[0:3] | 0 + * [ 8:11] Null[0:3] | Fmask[0:3] | Null[0:3] | Null[0:3] + * [12:15] Sampler[0:3] | Fmask[4:7] | Sampler[0:3] | Sampler[0:3] + * + * FMASK implies MSAA, therefore no sampler state. + * Sampler states are never unbound except when FMASK is bound. */ #include "radeon/r600_cs.h" @@ -88,9 +100,9 @@ static void si_init_descriptors(struct si_descriptors *desc, desc->shader_userdata_offset = shader_userdata_index * 4; /* Initialize the array to NULL descriptors if the element size is 8. */ - if (element_dw_size == 8) - for (i = 0; i < num_elements; i++) - memcpy(desc->list + i*element_dw_size, null_descriptor, + if (element_dw_size % 8 == 0) + for (i = 0; i < num_elements * element_dw_size / 8; i++) + memcpy(desc->list + i*8, null_descriptor, sizeof(null_descriptor)); } @@ -174,27 +186,42 @@ static void si_sampler_views_begin_new_cs(struct si_context *sctx, RADEON_USAGE_READWRITE, RADEON_PRIO_DESCRIPTORS); } -static void si_set_sampler_view(struct si_context *sctx, unsigned shader, - unsigned slot, struct pipe_sampler_view *view, - unsigned *view_desc) +static void si_set_sampler_view(struct si_context *sctx, + struct si_sampler_views *views, + unsigned slot, struct pipe_sampler_view *view) { - struct si_sampler_views *views = &sctx->samplers[shader].views; - if (views->views[slot] == view) return; if (view) { struct si_sampler_view *rview = (struct si_sampler_view*)view; + struct r600_texture *rtex = (struct r600_texture*)view->texture; si_sampler_view_add_buffers(sctx, rview); pipe_sampler_view_reference(&views->views[slot], view); - memcpy(views->desc.list + slot*8, view_desc, 8*4); + memcpy(views->desc.list + slot * 16, rview->state, 8*4); + + if (rtex && rtex->fmask.size) { + memcpy(views->desc.list + slot*16 + 8, + rview->fmask_state, 8*4); + } else { + /* Disable FMASK and bind sampler state in [12:15]. */ + memcpy(views->desc.list + slot*16 + 8, + null_descriptor, 4*4); + + if (views->sampler_states[slot]) + memcpy(views->desc.list + slot*16 + 12, + views->sampler_states[slot], 4*4); + } + views->desc.enabled_mask |= 1llu << slot; } else { pipe_sampler_view_reference(&views->views[slot], NULL); - memcpy(views->desc.list + slot*8, null_descriptor, 8*4); + memcpy(views->desc.list + slot*16, null_descriptor, 8*4); + /* Only clear the lower dwords of FMASK. */ + memcpy(views->desc.list + slot*16 + 8, null_descriptor, 4*4); views->desc.enabled_mask &= ~(1llu << slot); } @@ -208,7 +235,6 @@ static void si_set_sampler_views(struct pipe_context *ctx, { struct si_context *sctx = (struct si_context *)ctx; struct si_textures_info *samplers = &sctx->samplers[shader]; - struct si_sampler_view **rviews = (struct si_sampler_view **)views; int i; if (!count || shader >= SI_NUM_SHADERS) @@ -220,13 +246,11 @@ static void si_set_sampler_views(struct pipe_context *ctx, if (!views || !views[i]) { samplers->depth_texture_mask &= ~(1 << slot); samplers->compressed_colortex_mask &= ~(1 << slot); - si_set_sampler_view(sctx, shader, slot, NULL, NULL); - si_set_sampler_view(sctx, shader, SI_FMASK_TEX_OFFSET + slot, - NULL, NULL); + si_set_sampler_view(sctx, &samplers->views, slot, NULL); continue; } - si_set_sampler_view(sctx, shader, slot, views[i], rviews[i]->state); + si_set_sampler_view(sctx, &samplers->views, slot, views[i]); if (views[i]->texture && views[i]->texture->target != PIPE_BUFFER) { struct r600_texture *rtex = @@ -243,60 +267,46 @@ static void si_set_sampler_views(struct pipe_context *ctx, } else { samplers->compressed_colortex_mask &= ~(1 << slot); } - - if (rtex->fmask.size) { - si_set_sampler_view(sctx, shader, SI_FMASK_TEX_OFFSET + slot, - views[i], rviews[i]->fmask_state); - } else { - si_set_sampler_view(sctx, shader, SI_FMASK_TEX_OFFSET + slot, - NULL, NULL); - } } else { samplers->depth_texture_mask &= ~(1 << slot); samplers->compressed_colortex_mask &= ~(1 << slot); - si_set_sampler_view(sctx, shader, SI_FMASK_TEX_OFFSET + slot, - NULL, NULL); } } } /* SAMPLER STATES */ -static void si_sampler_states_begin_new_cs(struct si_context *sctx, - struct si_sampler_states *states) -{ - if (!states->desc.buffer) - return; - radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, states->desc.buffer, - RADEON_USAGE_READWRITE, RADEON_PRIO_DESCRIPTORS); -} - static void si_bind_sampler_states(struct pipe_context *ctx, unsigned shader, unsigned start, unsigned count, void **states) { struct si_context *sctx = (struct si_context *)ctx; - struct si_sampler_states *samplers = &sctx->samplers[shader].states; + struct si_textures_info *samplers = &sctx->samplers[shader]; + struct si_descriptors *desc = &samplers->views.desc; struct si_sampler_state **sstates = (struct si_sampler_state**)states; int i; if (!count || shader >= SI_NUM_SHADERS) return; - if (start == 0) - samplers->saved_states[0] = states[0]; - if (start == 1) - samplers->saved_states[1] = states[0]; - else if (start == 0 && count >= 2) - samplers->saved_states[1] = states[1]; - for (i = 0; i < count; i++) { unsigned slot = start + i; - if (!sstates[i]) + if (!sstates[i] || + sstates[i] == samplers->views.sampler_states[slot]) continue; - memcpy(samplers->desc.list + slot*4, sstates[i]->val, 4*4); - samplers->desc.list_dirty = true; + samplers->views.sampler_states[slot] = sstates[i]; + + /* If FMASK is bound, don't overwrite it. + * The sampler state will be set after FMASK is unbound. + */ + if (samplers->views.views[i] && + samplers->views.views[i]->texture && + ((struct r600_texture*)samplers->views.views[i]->texture)->fmask.size) + continue; + + memcpy(desc->list + slot * 16 + 12, sstates[i]->val, 4*4); + desc->list_dirty = true; } } @@ -862,7 +872,9 @@ static void si_invalidate_buffer(struct pipe_context *ctx, struct pipe_resource while (mask) { unsigned i = u_bit_scan64(&mask); if (views->views[i]->texture == buf) { - si_desc_reset_buffer_offset(ctx, views->desc.list + i*8+4, + si_desc_reset_buffer_offset(ctx, + views->desc.list + + i * 16 + 4, old_va, buf); views->desc.list_dirty = true; @@ -882,7 +894,6 @@ static void si_mark_shader_pointers_dirty(struct si_context *sctx, sctx->const_buffers[shader].desc.pointer_dirty = true; sctx->rw_buffers[shader].desc.pointer_dirty = true; sctx->samplers[shader].views.desc.pointer_dirty = true; - sctx->samplers[shader].states.desc.pointer_dirty = true; if (shader == PIPE_SHADER_VERTEX) sctx->vertex_buffers.pointer_dirty = true; @@ -1003,7 +1014,6 @@ void si_emit_shader_userdata(struct si_context *sctx, struct r600_atom *atom) si_emit_shader_pointer(sctx, &sctx->const_buffers[i].desc, base, false); si_emit_shader_pointer(sctx, &sctx->samplers[i].views.desc, base, false); - si_emit_shader_pointer(sctx, &sctx->samplers[i].states.desc, base, false); } si_emit_shader_pointer(sctx, &sctx->vertex_buffers, sh_base[PIPE_SHADER_VERTEX], false); } @@ -1023,9 +1033,7 @@ void si_init_all_descriptors(struct si_context *sctx) RADEON_USAGE_READWRITE, RADEON_PRIO_RINGS_STREAMOUT); si_init_descriptors(&sctx->samplers[i].views.desc, - SI_SGPR_SAMPLER_VIEWS, 8, SI_NUM_SAMPLER_VIEWS); - si_init_descriptors(&sctx->samplers[i].states.desc, - SI_SGPR_SAMPLER_STATES, 4, SI_NUM_SAMPLER_STATES); + SI_SGPR_SAMPLERS, 16, SI_NUM_SAMPLERS); } si_init_descriptors(&sctx->vertex_buffers, SI_SGPR_VERTEX_BUFFERS, @@ -1056,8 +1064,7 @@ bool si_upload_shader_descriptors(struct si_context *sctx) for (i = 0; i < SI_NUM_SHADERS; i++) { if (!si_upload_descriptors(sctx, &sctx->const_buffers[i].desc) || !si_upload_descriptors(sctx, &sctx->rw_buffers[i].desc) || - !si_upload_descriptors(sctx, &sctx->samplers[i].views.desc) || - !si_upload_descriptors(sctx, &sctx->samplers[i].states.desc)) + !si_upload_descriptors(sctx, &sctx->samplers[i].views.desc)) return false; } return si_upload_vertex_buffer_descriptors(sctx); @@ -1071,7 +1078,6 @@ void si_release_all_descriptors(struct si_context *sctx) si_release_buffer_resources(&sctx->const_buffers[i]); si_release_buffer_resources(&sctx->rw_buffers[i]); si_release_sampler_views(&sctx->samplers[i].views); - si_release_descriptors(&sctx->samplers[i].states.desc); } si_release_descriptors(&sctx->vertex_buffers); } @@ -1084,7 +1090,6 @@ void si_all_descriptors_begin_new_cs(struct si_context *sctx) si_buffer_resources_begin_new_cs(sctx, &sctx->const_buffers[i]); si_buffer_resources_begin_new_cs(sctx, &sctx->rw_buffers[i]); si_sampler_views_begin_new_cs(sctx, &sctx->samplers[i].views); - si_sampler_states_begin_new_cs(sctx, &sctx->samplers[i].states); } si_vertex_buffers_begin_new_cs(sctx); si_shader_userdata_begin_new_cs(sctx); diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index 3c963db5078..b5790d6b564 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -113,7 +113,6 @@ struct si_cs_shader_state { struct si_textures_info { struct si_sampler_views views; - struct si_sampler_states states; uint32_t depth_texture_mask; /* which textures are depth */ uint32_t compressed_colortex_mask; }; diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index c1d3edc7143..db922f01fb8 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -86,8 +86,9 @@ struct si_shader_context LLVMValueRef const_buffers[SI_NUM_CONST_BUFFERS]; LLVMValueRef lds; LLVMValueRef *constants[SI_NUM_CONST_BUFFERS]; - LLVMValueRef sampler_views[SI_NUM_SAMPLER_VIEWS]; - LLVMValueRef sampler_states[SI_NUM_SAMPLER_STATES]; + LLVMValueRef sampler_views[SI_NUM_SAMPLERS]; + LLVMValueRef sampler_states[SI_NUM_SAMPLERS]; + LLVMValueRef fmasks[SI_NUM_USER_SAMPLERS]; LLVMValueRef so_buffers[4]; LLVMValueRef esgs_ring; LLVMValueRef gsvs_ring[4]; @@ -2480,13 +2481,58 @@ static void set_tex_fetch_args(struct gallivm_state *gallivm, static const struct lp_build_tgsi_action tex_action; +enum desc_type { + DESC_IMAGE, + DESC_FMASK, + DESC_SAMPLER +}; + +static LLVMTypeRef const_array(LLVMTypeRef elem_type, int num_elements) +{ + return LLVMPointerType(LLVMArrayType(elem_type, num_elements), + CONST_ADDR_SPACE); +} + +/** + * Load an image view, fmask view. or sampler state descriptor. + */ +static LLVMValueRef get_sampler_desc(struct si_shader_context *si_shader_ctx, + LLVMValueRef index, enum desc_type type) +{ + struct gallivm_state *gallivm = &si_shader_ctx->radeon_bld.gallivm; + LLVMTypeRef i32 = LLVMInt32TypeInContext(gallivm->context); + LLVMBuilderRef builder = gallivm->builder; + LLVMValueRef ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, + SI_PARAM_SAMPLERS); + + switch (type) { + case DESC_IMAGE: + /* The image is at [0:7]. */ + index = LLVMBuildMul(builder, index, LLVMConstInt(i32, 2, 0), ""); + break; + case DESC_FMASK: + /* The FMASK is at [8:15]. */ + index = LLVMBuildMul(builder, index, LLVMConstInt(i32, 2, 0), ""); + index = LLVMBuildAdd(builder, index, LLVMConstInt(i32, 1, 0), ""); + break; + case DESC_SAMPLER: + /* The sampler state is at [12:15]. */ + index = LLVMBuildMul(builder, index, LLVMConstInt(i32, 4, 0), ""); + index = LLVMBuildAdd(builder, index, LLVMConstInt(i32, 3, 0), ""); + ptr = LLVMBuildPointerCast(builder, ptr, + const_array(LLVMVectorType(i32, 4), 0), ""); + break; + } + + return build_indexed_load_const(si_shader_ctx, ptr, index); +} + static void tex_fetch_ptrs( struct lp_build_tgsi_context * bld_base, struct lp_build_emit_data * emit_data, LLVMValueRef *res_ptr, LLVMValueRef *samp_ptr, LLVMValueRef *fmask_ptr) { struct si_shader_context *si_shader_ctx = si_shader_context(bld_base); - struct gallivm_state *gallivm = bld_base->base.gallivm; const struct tgsi_full_instruction * inst = emit_data->inst; unsigned target = inst->Texture.Texture; unsigned sampler_src; @@ -2501,24 +2547,20 @@ static void tex_fetch_ptrs( ind_index = get_indirect_index(si_shader_ctx, ®->Indirect, reg->Register.Index); - *res_ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_SAMPLER_VIEWS); - *res_ptr = build_indexed_load_const(si_shader_ctx, *res_ptr, ind_index); - - *samp_ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_SAMPLER_STATES); - *samp_ptr = build_indexed_load_const(si_shader_ctx, *samp_ptr, ind_index); + *res_ptr = get_sampler_desc(si_shader_ctx, ind_index, DESC_IMAGE); if (target == TGSI_TEXTURE_2D_MSAA || target == TGSI_TEXTURE_2D_ARRAY_MSAA) { - ind_index = LLVMBuildAdd(gallivm->builder, ind_index, - lp_build_const_int32(gallivm, - SI_FMASK_TEX_OFFSET), ""); - *fmask_ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_SAMPLER_VIEWS); - *fmask_ptr = build_indexed_load_const(si_shader_ctx, *fmask_ptr, ind_index); + *samp_ptr = NULL; + *fmask_ptr = get_sampler_desc(si_shader_ctx, ind_index, DESC_FMASK); + } else { + *samp_ptr = get_sampler_desc(si_shader_ctx, ind_index, DESC_SAMPLER); + *fmask_ptr = NULL; } } else { *res_ptr = si_shader_ctx->sampler_views[sampler_index]; *samp_ptr = si_shader_ctx->sampler_states[sampler_index]; - *fmask_ptr = si_shader_ctx->sampler_views[SI_FMASK_TEX_OFFSET + sampler_index]; + *fmask_ptr = si_shader_ctx->fmasks[sampler_index]; } } @@ -3498,12 +3540,6 @@ static void create_meta_data(struct si_shader_context *si_shader_ctx) si_shader_ctx->const_md = LLVMMDNodeInContext(gallivm->context, args, 3); } -static LLVMTypeRef const_array(LLVMTypeRef elem_type, int num_elements) -{ - return LLVMPointerType(LLVMArrayType(elem_type, num_elements), - CONST_ADDR_SPACE); -} - static void declare_streamout_params(struct si_shader_context *si_shader_ctx, struct pipe_stream_output_info *so, LLVMTypeRef *params, LLVMTypeRef i32, @@ -3530,7 +3566,7 @@ static void create_function(struct si_shader_context *si_shader_ctx) struct lp_build_tgsi_context *bld_base = &si_shader_ctx->radeon_bld.soa.bld_base; struct gallivm_state *gallivm = bld_base->base.gallivm; struct si_shader *shader = si_shader_ctx->shader; - LLVMTypeRef params[SI_NUM_PARAMS], f32, i8, i32, v2i32, v3i32, v16i8, v4i32, v8i32; + LLVMTypeRef params[SI_NUM_PARAMS], f32, i8, i32, v2i32, v3i32, v16i8, v8i32; unsigned i, last_array_pointer, last_sgpr, num_params; i8 = LLVMInt8TypeInContext(gallivm->context); @@ -3538,15 +3574,14 @@ static void create_function(struct si_shader_context *si_shader_ctx) f32 = LLVMFloatTypeInContext(gallivm->context); v2i32 = LLVMVectorType(i32, 2); v3i32 = LLVMVectorType(i32, 3); - v4i32 = LLVMVectorType(i32, 4); v8i32 = LLVMVectorType(i32, 8); v16i8 = LLVMVectorType(i8, 16); params[SI_PARAM_RW_BUFFERS] = const_array(v16i8, SI_NUM_RW_BUFFERS); params[SI_PARAM_CONST_BUFFERS] = const_array(v16i8, SI_NUM_CONST_BUFFERS); - params[SI_PARAM_SAMPLER_STATES] = const_array(v4i32, SI_NUM_SAMPLER_STATES); - params[SI_PARAM_SAMPLER_VIEWS] = const_array(v8i32, SI_NUM_SAMPLER_VIEWS); - last_array_pointer = SI_PARAM_SAMPLER_VIEWS; + params[SI_PARAM_SAMPLERS] = const_array(v8i32, SI_NUM_SAMPLERS); + params[SI_PARAM_UNUSED] = LLVMPointerType(i32, CONST_ADDR_SPACE); + last_array_pointer = SI_PARAM_UNUSED; switch (si_shader_ctx->type) { case TGSI_PROCESSOR_VERTEX: @@ -3747,34 +3782,26 @@ static void preload_samplers(struct si_shader_context *si_shader_ctx) struct lp_build_tgsi_context * bld_base = &si_shader_ctx->radeon_bld.soa.bld_base; struct gallivm_state * gallivm = bld_base->base.gallivm; const struct tgsi_shader_info * info = bld_base->info; - unsigned i, num_samplers = info->file_max[TGSI_FILE_SAMPLER] + 1; - - LLVMValueRef res_ptr, samp_ptr; LLVMValueRef offset; if (num_samplers == 0) return; - res_ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_SAMPLER_VIEWS); - samp_ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_SAMPLER_STATES); - /* Load the resources and samplers, we rely on the code sinking to do the rest */ for (i = 0; i < num_samplers; ++i) { /* Resource */ offset = lp_build_const_int32(gallivm, i); - si_shader_ctx->sampler_views[i] = build_indexed_load_const(si_shader_ctx, res_ptr, offset); - - /* Sampler */ - offset = lp_build_const_int32(gallivm, i); - si_shader_ctx->sampler_states[i] = build_indexed_load_const(si_shader_ctx, samp_ptr, offset); + si_shader_ctx->sampler_views[i] = + get_sampler_desc(si_shader_ctx, offset, DESC_IMAGE); /* FMASK resource */ - if (info->is_msaa_sampler[i]) { - offset = lp_build_const_int32(gallivm, SI_FMASK_TEX_OFFSET + i); - si_shader_ctx->sampler_views[SI_FMASK_TEX_OFFSET + i] = - build_indexed_load_const(si_shader_ctx, res_ptr, offset); - } + if (info->is_msaa_sampler[i]) + si_shader_ctx->fmasks[i] = + get_sampler_desc(si_shader_ctx, offset, DESC_FMASK); + else + si_shader_ctx->sampler_states[i] = + get_sampler_desc(si_shader_ctx, offset, DESC_SAMPLER); } } diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h index c42c51e0455..dc75e0330e4 100644 --- a/src/gallium/drivers/radeonsi/si_shader.h +++ b/src/gallium/drivers/radeonsi/si_shader.h @@ -77,8 +77,8 @@ struct radeon_shader_reloc; #define SI_SGPR_RW_BUFFERS 0 /* rings (& stream-out, VS only) */ #define SI_SGPR_CONST_BUFFERS 2 -#define SI_SGPR_SAMPLER_STATES 4 -#define SI_SGPR_SAMPLER_VIEWS 6 +#define SI_SGPR_SAMPLERS 4 /* images & sampler states interleaved */ +/* TODO: gap */ #define SI_SGPR_VERTEX_BUFFERS 8 /* VS only */ #define SI_SGPR_BASE_VERTEX 10 /* VS only */ #define SI_SGPR_START_INSTANCE 11 /* VS only */ @@ -101,8 +101,8 @@ struct radeon_shader_reloc; /* LLVM function parameter indices */ #define SI_PARAM_RW_BUFFERS 0 #define SI_PARAM_CONST_BUFFERS 1 -#define SI_PARAM_SAMPLER_STATES 2 -#define SI_PARAM_SAMPLER_VIEWS 3 +#define SI_PARAM_SAMPLERS 2 +#define SI_PARAM_UNUSED 3 /* TODO: use */ /* VS only parameters */ #define SI_PARAM_VERTEX_BUFFERS 4 diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h index e9a017534d1..f64c4d45f1b 100644 --- a/src/gallium/drivers/radeonsi/si_state.h +++ b/src/gallium/drivers/radeonsi/si_state.h @@ -144,18 +144,13 @@ struct si_shader_data { uint32_t sh_base[SI_NUM_SHADERS]; }; +/* User sampler views: 0..15 + * Polygon stipple tex: 16 + */ #define SI_NUM_USER_SAMPLERS 16 /* AKA OpenGL textures units per shader */ #define SI_POLY_STIPPLE_SAMPLER SI_NUM_USER_SAMPLERS #define SI_NUM_SAMPLERS (SI_POLY_STIPPLE_SAMPLER + 1) -/* User sampler views: 0..15 - * Polygon stipple tex: 16 - * FMASK sampler views: 17..33 (no sampler states) - */ -#define SI_FMASK_TEX_OFFSET SI_NUM_SAMPLERS -#define SI_NUM_SAMPLER_VIEWS (SI_FMASK_TEX_OFFSET + SI_NUM_SAMPLERS) -#define SI_NUM_SAMPLER_STATES SI_NUM_SAMPLERS - /* User constant buffers: 0..15 * Driver state constants: 16 */ @@ -210,12 +205,8 @@ struct si_descriptors { struct si_sampler_views { struct si_descriptors desc; - struct pipe_sampler_view *views[SI_NUM_SAMPLER_VIEWS]; -}; - -struct si_sampler_states { - struct si_descriptors desc; - void *saved_states[2]; /* saved for u_blitter */ + struct pipe_sampler_view *views[SI_NUM_SAMPLERS]; + void *sampler_states[SI_NUM_SAMPLERS]; }; struct si_buffer_resources { From 9ae42ab1ec7024814494cf65c4527c1b9ad98cd0 Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Wed, 11 Nov 2015 16:30:41 -0800 Subject: [PATCH 10/26] mesa: Refactor _mesa_framebuffer_renderbuffer This function previously was only used in fbobject.c and contained a bunch of API validation. Split the function into framebuffer_renderbuffer that is static and contains the validation, and _mesa_framebuffer_renderbuffer that is suitable for calling from elsewhere in Mesa (e.g., meta). Signed-off-by: Ian Romanick Reviewed-by: Anuj Phogat --- src/mesa/main/fbobject.c | 42 +++++++++++++++++++++++++--------------- src/mesa/main/fbobject.h | 3 +-- 2 files changed, 27 insertions(+), 18 deletions(-) diff --git a/src/mesa/main/fbobject.c b/src/mesa/main/fbobject.c index 2d4acb35bd6..0b0653d096d 100644 --- a/src/mesa/main/fbobject.c +++ b/src/mesa/main/fbobject.c @@ -3413,8 +3413,27 @@ void _mesa_framebuffer_renderbuffer(struct gl_context *ctx, struct gl_framebuffer *fb, GLenum attachment, - struct gl_renderbuffer *rb, - const char *func) + struct gl_renderbuffer *rb) +{ + assert(!_mesa_is_winsys_fbo(fb)); + + FLUSH_VERTICES(ctx, _NEW_BUFFERS); + + assert(ctx->Driver.FramebufferRenderbuffer); + ctx->Driver.FramebufferRenderbuffer(ctx, fb, attachment, rb); + + /* Some subsequent GL commands may depend on the framebuffer's visual + * after the binding is updated. Update visual info now. + */ + _mesa_update_framebuffer_visual(ctx, fb); +} + +static void +framebuffer_renderbuffer(struct gl_context *ctx, + struct gl_framebuffer *fb, + GLenum attachment, + struct gl_renderbuffer *rb, + const char *func) { struct gl_renderbuffer_attachment *att; @@ -3444,18 +3463,9 @@ _mesa_framebuffer_renderbuffer(struct gl_context *ctx, } } - FLUSH_VERTICES(ctx, _NEW_BUFFERS); - - assert(ctx->Driver.FramebufferRenderbuffer); - ctx->Driver.FramebufferRenderbuffer(ctx, fb, attachment, rb); - - /* Some subsequent GL commands may depend on the framebuffer's visual - * after the binding is updated. Update visual info now. - */ - _mesa_update_framebuffer_visual(ctx, fb); + _mesa_framebuffer_renderbuffer(ctx, fb, attachment, rb); } - void GLAPIENTRY _mesa_FramebufferRenderbuffer(GLenum target, GLenum attachment, GLenum renderbuffertarget, @@ -3491,8 +3501,8 @@ _mesa_FramebufferRenderbuffer(GLenum target, GLenum attachment, rb = NULL; } - _mesa_framebuffer_renderbuffer(ctx, fb, attachment, rb, - "glFramebufferRenderbuffer"); + framebuffer_renderbuffer(ctx, fb, attachment, rb, + "glFramebufferRenderbuffer"); } @@ -3528,8 +3538,8 @@ _mesa_NamedFramebufferRenderbuffer(GLuint framebuffer, GLenum attachment, rb = NULL; } - _mesa_framebuffer_renderbuffer(ctx, fb, attachment, rb, - "glNamedFramebufferRenderbuffer"); + framebuffer_renderbuffer(ctx, fb, attachment, rb, + "glNamedFramebufferRenderbuffer"); } diff --git a/src/mesa/main/fbobject.h b/src/mesa/main/fbobject.h index 8dad0ff34e7..458e4409f98 100644 --- a/src/mesa/main/fbobject.h +++ b/src/mesa/main/fbobject.h @@ -91,8 +91,7 @@ extern void _mesa_framebuffer_renderbuffer(struct gl_context *ctx, struct gl_framebuffer *fb, GLenum attachment, - struct gl_renderbuffer *rb, - const char *func); + struct gl_renderbuffer *rb); extern void _mesa_validate_framebuffer(struct gl_context *ctx, struct gl_framebuffer *fb); From eb5bc62e97d8f66696dca7d5c6acb616a55a16d1 Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Thu, 12 Nov 2015 09:11:20 -0800 Subject: [PATCH 11/26] mesa: Refactor renderbuffer_storage to make _mesa_renderbuffer_storage Pulls the parts of renderbuffer_storage that aren't just parameter validation out into a function that can be called from other parts of Mesa (e.g., meta). Signed-off-by: Ian Romanick Reviewed-by: Anuj Phogat --- src/mesa/main/fbobject.c | 97 ++++++++++++++++++++++++---------------- src/mesa/main/fbobject.h | 5 +++ 2 files changed, 63 insertions(+), 39 deletions(-) diff --git a/src/mesa/main/fbobject.c b/src/mesa/main/fbobject.c index 0b0653d096d..1b9b692f001 100644 --- a/src/mesa/main/fbobject.c +++ b/src/mesa/main/fbobject.c @@ -2008,6 +2008,63 @@ invalidate_rb(GLuint key, void *data, void *userData) /** sentinal value, see below */ #define NO_SAMPLES 1000 +void +_mesa_renderbuffer_storage(struct gl_context *ctx, struct gl_renderbuffer *rb, + GLenum internalFormat, GLsizei width, + GLsizei height, GLsizei samples) +{ + const GLenum baseFormat = _mesa_base_fbo_format(ctx, internalFormat); + + assert(baseFormat != 0); + assert(width >= 0 && width <= (GLsizei) ctx->Const.MaxRenderbufferSize); + assert(height >= 0 && height <= (GLsizei) ctx->Const.MaxRenderbufferSize); + assert(samples != NO_SAMPLES); + if (samples != 0) { + assert(samples > 0); + assert(_mesa_check_sample_count(ctx, GL_RENDERBUFFER, + internalFormat, samples) == GL_NO_ERROR); + } + + FLUSH_VERTICES(ctx, _NEW_BUFFERS); + + if (rb->InternalFormat == internalFormat && + rb->Width == (GLuint) width && + rb->Height == (GLuint) height && + rb->NumSamples == samples) { + /* no change in allocation needed */ + return; + } + + /* These MUST get set by the AllocStorage func */ + rb->Format = MESA_FORMAT_NONE; + rb->NumSamples = samples; + + /* Now allocate the storage */ + assert(rb->AllocStorage); + if (rb->AllocStorage(ctx, rb, internalFormat, width, height)) { + /* No error - check/set fields now */ + /* If rb->Format == MESA_FORMAT_NONE, the format is unsupported. */ + assert(rb->Width == (GLuint) width); + assert(rb->Height == (GLuint) height); + rb->InternalFormat = internalFormat; + rb->_BaseFormat = baseFormat; + assert(rb->_BaseFormat != 0); + } + else { + /* Probably ran out of memory - clear the fields */ + rb->Width = 0; + rb->Height = 0; + rb->Format = MESA_FORMAT_NONE; + rb->InternalFormat = GL_NONE; + rb->_BaseFormat = GL_NONE; + rb->NumSamples = 0; + } + + /* Invalidate the framebuffers the renderbuffer is attached in. */ + if (rb->AttachedAnytime) { + _mesa_HashWalk(ctx->Shared->FrameBuffers, invalidate_rb, rb); + } +} /** * Helper function used by renderbuffer_storage_direct() and @@ -2067,45 +2124,7 @@ renderbuffer_storage(struct gl_context *ctx, struct gl_renderbuffer *rb, } } - FLUSH_VERTICES(ctx, _NEW_BUFFERS); - - if (rb->InternalFormat == internalFormat && - rb->Width == (GLuint) width && - rb->Height == (GLuint) height && - rb->NumSamples == samples) { - /* no change in allocation needed */ - return; - } - - /* These MUST get set by the AllocStorage func */ - rb->Format = MESA_FORMAT_NONE; - rb->NumSamples = samples; - - /* Now allocate the storage */ - assert(rb->AllocStorage); - if (rb->AllocStorage(ctx, rb, internalFormat, width, height)) { - /* No error - check/set fields now */ - /* If rb->Format == MESA_FORMAT_NONE, the format is unsupported. */ - assert(rb->Width == (GLuint) width); - assert(rb->Height == (GLuint) height); - rb->InternalFormat = internalFormat; - rb->_BaseFormat = baseFormat; - assert(rb->_BaseFormat != 0); - } - else { - /* Probably ran out of memory - clear the fields */ - rb->Width = 0; - rb->Height = 0; - rb->Format = MESA_FORMAT_NONE; - rb->InternalFormat = GL_NONE; - rb->_BaseFormat = GL_NONE; - rb->NumSamples = 0; - } - - /* Invalidate the framebuffers the renderbuffer is attached in. */ - if (rb->AttachedAnytime) { - _mesa_HashWalk(ctx->Shared->FrameBuffers, invalidate_rb, rb); - } + _mesa_renderbuffer_storage(ctx, rb, internalFormat, width, height, samples); } /** diff --git a/src/mesa/main/fbobject.h b/src/mesa/main/fbobject.h index 458e4409f98..f9a60605ea5 100644 --- a/src/mesa/main/fbobject.h +++ b/src/mesa/main/fbobject.h @@ -93,6 +93,11 @@ _mesa_framebuffer_renderbuffer(struct gl_context *ctx, GLenum attachment, struct gl_renderbuffer *rb); +extern void +_mesa_renderbuffer_storage(struct gl_context *ctx, struct gl_renderbuffer *rb, + GLenum internalFormat, GLsizei width, + GLsizei height, GLsizei samples); + extern void _mesa_validate_framebuffer(struct gl_context *ctx, struct gl_framebuffer *fb); From 1e055e9211fcb0a0e6595eeb433c580597a19dbd Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Wed, 11 Nov 2015 14:29:18 -0800 Subject: [PATCH 12/26] i965/meta: Use _mesa_CreateRenderbuffers instead of _mesa_GenRenderbuffers and _mesa_BindRenderbuffer This has the advantage that it does not pollute the global binding state. It also enables later patches that will stop calling _mesa_GenRenderbuffers / _mesa_CreateRenderbuffers which pollute the renderbuffer namespace. Signed-off-by: Ian Romanick Reviewed-by: Anuj Phogat --- src/mesa/drivers/dri/i965/brw_meta_updownsample.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_meta_updownsample.c b/src/mesa/drivers/dri/i965/brw_meta_updownsample.c index f39d50a69e6..563160e599b 100644 --- a/src/mesa/drivers/dri/i965/brw_meta_updownsample.c +++ b/src/mesa/drivers/dri/i965/brw_meta_updownsample.c @@ -55,13 +55,12 @@ brw_get_rb_for_slice(struct brw_context *brw, struct gl_renderbuffer *rb; struct intel_renderbuffer *irb; - /* This turns the GenRenderbuffers name into an actual struct + /* This turns the CreateRenderbuffers name into an actual struct * intel_renderbuffer. */ - _mesa_GenRenderbuffers(1, &rbo); - _mesa_BindRenderbuffer(GL_RENDERBUFFER, rbo); + _mesa_CreateRenderbuffers(1, &rbo); - rb = ctx->CurrentRenderbuffer; + rb = _mesa_lookup_renderbuffer(ctx, rbo); irb = intel_renderbuffer(rb); rb->Format = mt->format; From e273bbd60b7bf2ef19b575d2a16f47409b39ddf1 Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Wed, 11 Nov 2015 14:33:30 -0800 Subject: [PATCH 13/26] meta: Use _mesa_CreateRenderbuffers instead of _mesa_GenRenderbuffers and _mesa_BindRenderbuffer This has the advantage that it does not pollute the global binding state. It also enables later patches that will stop calling _mesa_GenRenderbuffers / _mesa_CreateRenderbuffers which pollute the renderbuffer namespace. Signed-off-by: Ian Romanick Reviewed-by: Anuj Phogat --- src/mesa/drivers/common/meta.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/src/mesa/drivers/common/meta.c b/src/mesa/drivers/common/meta.c index 5f2e79637c2..26867d3969d 100644 --- a/src/mesa/drivers/common/meta.c +++ b/src/mesa/drivers/common/meta.c @@ -3071,10 +3071,10 @@ decompress_texture_image(struct gl_context *ctx, /* Create/bind FBO/renderbuffer */ if (decompress_fbo->FBO == 0) { + _mesa_CreateRenderbuffers(1, &decompress_fbo->RBO); + _mesa_GenFramebuffers(1, &decompress_fbo->FBO); - _mesa_GenRenderbuffers(1, &decompress_fbo->RBO); _mesa_BindFramebuffer(GL_FRAMEBUFFER_EXT, decompress_fbo->FBO); - _mesa_BindRenderbuffer(GL_RENDERBUFFER_EXT, decompress_fbo->RBO); _mesa_FramebufferRenderbuffer(GL_FRAMEBUFFER_EXT, GL_COLOR_ATTACHMENT0_EXT, GL_RENDERBUFFER_EXT, @@ -3086,9 +3086,8 @@ decompress_texture_image(struct gl_context *ctx, /* alloc dest surface */ if (width > decompress_fbo->Width || height > decompress_fbo->Height) { - _mesa_BindRenderbuffer(GL_RENDERBUFFER_EXT, decompress_fbo->RBO); - _mesa_RenderbufferStorage(GL_RENDERBUFFER_EXT, rbFormat, - width, height); + _mesa_NamedRenderbufferStorage(decompress_fbo->RBO, rbFormat, + width, height); status = _mesa_CheckFramebufferStatus(GL_DRAW_FRAMEBUFFER); if (status != GL_FRAMEBUFFER_COMPLETE) { /* If the framebuffer isn't complete then we'll leave From ab2b6317037cbe6746a3653d37562169e30c13da Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Wed, 11 Nov 2015 14:34:11 -0800 Subject: [PATCH 14/26] meta: Don't save or restore the renderbuffer binding Nothing left in meta does anything with the RBO binding, so we don't need to save or restore it. The FBO binding is still modified. Signed-off-by: Ian Romanick Reviewed-by: Anuj Phogat --- src/mesa/drivers/common/meta.c | 6 ------ src/mesa/drivers/common/meta.h | 2 +- 2 files changed, 1 insertion(+), 7 deletions(-) diff --git a/src/mesa/drivers/common/meta.c b/src/mesa/drivers/common/meta.c index 26867d3969d..b42a3cf16ae 100644 --- a/src/mesa/drivers/common/meta.c +++ b/src/mesa/drivers/common/meta.c @@ -848,8 +848,6 @@ _mesa_meta_begin(struct gl_context *ctx, GLbitfield state) save->DrawBufferName = ctx->DrawBuffer->Name; save->ReadBufferName = ctx->ReadBuffer->Name; - save->RenderbufferName = (ctx->CurrentRenderbuffer ? - ctx->CurrentRenderbuffer->Name : 0); } } @@ -1241,10 +1239,6 @@ _mesa_meta_end(struct gl_context *ctx) if (ctx->ReadBuffer->Name != save->ReadBufferName) _mesa_BindFramebuffer(GL_READ_FRAMEBUFFER, save->ReadBufferName); - if (!ctx->CurrentRenderbuffer || - ctx->CurrentRenderbuffer->Name != save->RenderbufferName) - _mesa_BindRenderbuffer(GL_RENDERBUFFER, save->RenderbufferName); - if (state & MESA_META_DRAW_BUFFERS) { _mesa_drawbuffers(ctx, ctx->DrawBuffer, ctx->Const.MaxDrawBuffers, save->ColorDrawBuffers, NULL); diff --git a/src/mesa/drivers/common/meta.h b/src/mesa/drivers/common/meta.h index 074f70da6cd..3ff0fdddb59 100644 --- a/src/mesa/drivers/common/meta.h +++ b/src/mesa/drivers/common/meta.h @@ -186,7 +186,7 @@ struct save_state GLboolean RasterDiscard; GLboolean TransformFeedbackNeedsResume; - GLuint DrawBufferName, ReadBufferName, RenderbufferName; + GLuint DrawBufferName, ReadBufferName; /** MESA_META_DRAW_BUFFERS */ GLenum ColorDrawBuffers[MAX_DRAW_BUFFERS]; From 4c6b0e017c909c07b0f328261f263b93cd605c8a Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Wed, 11 Nov 2015 15:57:25 -0800 Subject: [PATCH 15/26] i965/meta: Return struct gl_renderbuffer* from brw_get_rb_for_slice instead of GL API handle Signed-off-by: Ian Romanick Reviewed-by: Anuj Phogat --- src/mesa/drivers/dri/i965/brw_context.h | 7 ++++--- .../drivers/dri/i965/brw_meta_fast_clear.c | 9 +++++---- .../drivers/dri/i965/brw_meta_stencil_blit.c | 18 +++++++++-------- .../drivers/dri/i965/brw_meta_updownsample.c | 20 ++++++++++--------- 4 files changed, 30 insertions(+), 24 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index 5c63b8f36c6..6b82bea52c0 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -1368,9 +1368,10 @@ GLboolean brwCreateContext(gl_api api, /*====================================================================== * brw_misc_state.c */ -GLuint brw_get_rb_for_slice(struct brw_context *brw, - struct intel_mipmap_tree *mt, - unsigned level, unsigned layer, bool flat); +struct gl_renderbuffer *brw_get_rb_for_slice(struct brw_context *brw, + struct intel_mipmap_tree *mt, + unsigned level, unsigned layer, + bool flat); void brw_meta_updownsample(struct brw_context *brw, struct intel_mipmap_tree *src, diff --git a/src/mesa/drivers/dri/i965/brw_meta_fast_clear.c b/src/mesa/drivers/dri/i965/brw_meta_fast_clear.c index 735d82495d8..51dbd000b1d 100644 --- a/src/mesa/drivers/dri/i965/brw_meta_fast_clear.c +++ b/src/mesa/drivers/dri/i965/brw_meta_fast_clear.c @@ -845,7 +845,8 @@ brw_meta_resolve_color(struct brw_context *brw, struct intel_mipmap_tree *mt) { struct gl_context *ctx = &brw->ctx; - GLuint fbo, rbo; + GLuint fbo; + struct gl_renderbuffer *rb; struct rect rect; brw_emit_mi_flush(brw); @@ -853,12 +854,12 @@ brw_meta_resolve_color(struct brw_context *brw, _mesa_meta_begin(ctx, MESA_META_ALL); _mesa_GenFramebuffers(1, &fbo); - rbo = brw_get_rb_for_slice(brw, mt, 0, 0, false); + rb = brw_get_rb_for_slice(brw, mt, 0, 0, false); _mesa_BindFramebuffer(GL_DRAW_FRAMEBUFFER, fbo); _mesa_FramebufferRenderbuffer(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, - GL_RENDERBUFFER, rbo); + GL_RENDERBUFFER, rb->Name); _mesa_DrawBuffer(GL_COLOR_ATTACHMENT0); brw_fast_clear_init(brw); @@ -881,7 +882,7 @@ brw_meta_resolve_color(struct brw_context *brw, set_fast_clear_op(brw, 0); use_rectlist(brw, false); - _mesa_DeleteRenderbuffers(1, &rbo); + _mesa_DeleteRenderbuffers(1, &rb->Name); _mesa_DeleteFramebuffers(1, &fbo); _mesa_meta_end(ctx); diff --git a/src/mesa/drivers/dri/i965/brw_meta_stencil_blit.c b/src/mesa/drivers/dri/i965/brw_meta_stencil_blit.c index c5f6c4f8fc8..4f3f7dbc78c 100644 --- a/src/mesa/drivers/dri/i965/brw_meta_stencil_blit.c +++ b/src/mesa/drivers/dri/i965/brw_meta_stencil_blit.c @@ -423,7 +423,8 @@ brw_meta_stencil_blit(struct brw_context *brw, struct gl_context *ctx = &brw->ctx; struct blit_dims dims = *orig_dims; struct fb_tex_blit_state blit; - GLuint prog, fbo, rbo; + GLuint prog, fbo; + struct gl_renderbuffer *rb; GLenum target; _mesa_meta_fb_tex_blit_begin(ctx, &blit); @@ -436,13 +437,13 @@ brw_meta_stencil_blit(struct brw_context *brw, _mesa_GenFramebuffers(1, &fbo); /* Force the surface to be configured for level zero. */ - rbo = brw_get_rb_for_slice(brw, dst_mt, 0, dst_layer, true); + rb = brw_get_rb_for_slice(brw, dst_mt, 0, dst_layer, true); adjust_msaa(&dims, dst_mt->num_samples); adjust_tiling(&dims, dst_mt->num_samples); _mesa_BindFramebuffer(GL_DRAW_FRAMEBUFFER, fbo); _mesa_FramebufferRenderbuffer(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, - GL_RENDERBUFFER, rbo); + GL_RENDERBUFFER, rb->Name); _mesa_DrawBuffer(GL_COLOR_ATTACHMENT0); ctx->DrawBuffer->_Status = GL_FRAMEBUFFER_COMPLETE; @@ -474,7 +475,7 @@ error: _mesa_meta_fb_tex_blit_end(ctx, target, &blit); _mesa_meta_end(ctx); - _mesa_DeleteRenderbuffers(1, &rbo); + _mesa_DeleteRenderbuffers(1, &rb->Name); _mesa_DeleteFramebuffers(1, &fbo); } @@ -532,7 +533,8 @@ brw_meta_stencil_updownsample(struct brw_context *brw, .dst_x0 = 0, .dst_y0 = 0, .dst_x1 = dst->logical_width0, .dst_y1 = dst->logical_height0, .mirror_x = 0, .mirror_y = 0 }; - GLuint fbo, rbo; + GLuint fbo; + struct gl_renderbuffer *rb; if (dst->stencil_mt) dst = dst->stencil_mt; @@ -541,15 +543,15 @@ brw_meta_stencil_updownsample(struct brw_context *brw, _mesa_meta_begin(ctx, MESA_META_ALL); _mesa_GenFramebuffers(1, &fbo); - rbo = brw_get_rb_for_slice(brw, src, 0, 0, false); + rb = brw_get_rb_for_slice(brw, src, 0, 0, false); _mesa_BindFramebuffer(GL_READ_FRAMEBUFFER, fbo); _mesa_FramebufferRenderbuffer(GL_READ_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, - GL_RENDERBUFFER, rbo); + GL_RENDERBUFFER, rb->Name); brw_meta_stencil_blit(brw, dst, 0, 0, &dims); brw_emit_mi_flush(brw); - _mesa_DeleteRenderbuffers(1, &rbo); + _mesa_DeleteRenderbuffers(1, &rb->Name); _mesa_DeleteFramebuffers(1, &fbo); } diff --git a/src/mesa/drivers/dri/i965/brw_meta_updownsample.c b/src/mesa/drivers/dri/i965/brw_meta_updownsample.c index 563160e599b..c1631aeb0b5 100644 --- a/src/mesa/drivers/dri/i965/brw_meta_updownsample.c +++ b/src/mesa/drivers/dri/i965/brw_meta_updownsample.c @@ -45,7 +45,7 @@ * * Clobbers the current renderbuffer binding (ctx->CurrentRenderbuffer). */ -GLuint +struct gl_renderbuffer * brw_get_rb_for_slice(struct brw_context *brw, struct intel_mipmap_tree *mt, unsigned level, unsigned layer, bool flat) @@ -88,7 +88,7 @@ brw_get_rb_for_slice(struct brw_context *brw, intel_miptree_reference(&irb->mt, mt); - return rbo; + return rb; } /** @@ -100,7 +100,9 @@ brw_meta_updownsample(struct brw_context *brw, struct intel_mipmap_tree *dst_mt) { struct gl_context *ctx = &brw->ctx; - GLuint fbos[2], src_rbo, dst_rbo, src_fbo, dst_fbo; + GLuint fbos[2], src_fbo, dst_fbo; + struct gl_renderbuffer *src_rb; + struct gl_renderbuffer *dst_rb; GLenum drawbuffer; GLbitfield attachment, blit_bit; @@ -119,19 +121,19 @@ brw_meta_updownsample(struct brw_context *brw, _mesa_meta_begin(ctx, MESA_META_ALL); _mesa_GenFramebuffers(2, fbos); - src_rbo = brw_get_rb_for_slice(brw, src_mt, 0, 0, false); - dst_rbo = brw_get_rb_for_slice(brw, dst_mt, 0, 0, false); + src_rb = brw_get_rb_for_slice(brw, src_mt, 0, 0, false); + dst_rb = brw_get_rb_for_slice(brw, dst_mt, 0, 0, false); src_fbo = fbos[0]; dst_fbo = fbos[1]; _mesa_BindFramebuffer(GL_READ_FRAMEBUFFER, src_fbo); _mesa_FramebufferRenderbuffer(GL_READ_FRAMEBUFFER, attachment, - GL_RENDERBUFFER, src_rbo); + GL_RENDERBUFFER, src_rb->Name); _mesa_ReadBuffer(drawbuffer); _mesa_BindFramebuffer(GL_DRAW_FRAMEBUFFER, dst_fbo); _mesa_FramebufferRenderbuffer(GL_DRAW_FRAMEBUFFER, attachment, - GL_RENDERBUFFER, dst_rbo); + GL_RENDERBUFFER, dst_rb->Name); _mesa_DrawBuffer(drawbuffer); _mesa_BlitFramebuffer(0, 0, @@ -140,8 +142,8 @@ brw_meta_updownsample(struct brw_context *brw, dst_mt->logical_width0, dst_mt->logical_height0, blit_bit, GL_NEAREST); - _mesa_DeleteRenderbuffers(1, &src_rbo); - _mesa_DeleteRenderbuffers(1, &dst_rbo); + _mesa_DeleteRenderbuffers(1, &src_rb->Name); + _mesa_DeleteRenderbuffers(1, &dst_rb->Name); _mesa_DeleteFramebuffers(2, fbos); _mesa_meta_end(ctx); From 03506c9ef1c533bafde01b793571799d3ab52bf5 Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Wed, 11 Nov 2015 16:35:05 -0800 Subject: [PATCH 16/26] i965/meta: Use internal functions for renderbuffer access Signed-off-by: Ian Romanick Reviewed-by: Anuj Phogat --- src/mesa/drivers/dri/i965/brw_meta_fast_clear.c | 5 ++--- src/mesa/drivers/dri/i965/brw_meta_stencil_blit.c | 8 ++++---- src/mesa/drivers/dri/i965/brw_meta_updownsample.c | 6 ++---- 3 files changed, 8 insertions(+), 11 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_meta_fast_clear.c b/src/mesa/drivers/dri/i965/brw_meta_fast_clear.c index 51dbd000b1d..38a505ae10d 100644 --- a/src/mesa/drivers/dri/i965/brw_meta_fast_clear.c +++ b/src/mesa/drivers/dri/i965/brw_meta_fast_clear.c @@ -857,9 +857,8 @@ brw_meta_resolve_color(struct brw_context *brw, rb = brw_get_rb_for_slice(brw, mt, 0, 0, false); _mesa_BindFramebuffer(GL_DRAW_FRAMEBUFFER, fbo); - _mesa_FramebufferRenderbuffer(GL_DRAW_FRAMEBUFFER, - GL_COLOR_ATTACHMENT0, - GL_RENDERBUFFER, rb->Name); + _mesa_framebuffer_renderbuffer(ctx, ctx->DrawBuffer, GL_COLOR_ATTACHMENT0, + rb); _mesa_DrawBuffer(GL_COLOR_ATTACHMENT0); brw_fast_clear_init(brw); diff --git a/src/mesa/drivers/dri/i965/brw_meta_stencil_blit.c b/src/mesa/drivers/dri/i965/brw_meta_stencil_blit.c index 4f3f7dbc78c..16412ad3c04 100644 --- a/src/mesa/drivers/dri/i965/brw_meta_stencil_blit.c +++ b/src/mesa/drivers/dri/i965/brw_meta_stencil_blit.c @@ -442,8 +442,8 @@ brw_meta_stencil_blit(struct brw_context *brw, adjust_tiling(&dims, dst_mt->num_samples); _mesa_BindFramebuffer(GL_DRAW_FRAMEBUFFER, fbo); - _mesa_FramebufferRenderbuffer(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, - GL_RENDERBUFFER, rb->Name); + _mesa_framebuffer_renderbuffer(ctx, ctx->DrawBuffer, GL_COLOR_ATTACHMENT0, + rb); _mesa_DrawBuffer(GL_COLOR_ATTACHMENT0); ctx->DrawBuffer->_Status = GL_FRAMEBUFFER_COMPLETE; @@ -546,8 +546,8 @@ brw_meta_stencil_updownsample(struct brw_context *brw, rb = brw_get_rb_for_slice(brw, src, 0, 0, false); _mesa_BindFramebuffer(GL_READ_FRAMEBUFFER, fbo); - _mesa_FramebufferRenderbuffer(GL_READ_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, - GL_RENDERBUFFER, rb->Name); + _mesa_framebuffer_renderbuffer(ctx, ctx->ReadBuffer, GL_STENCIL_ATTACHMENT, + rb); brw_meta_stencil_blit(brw, dst, 0, 0, &dims); brw_emit_mi_flush(brw); diff --git a/src/mesa/drivers/dri/i965/brw_meta_updownsample.c b/src/mesa/drivers/dri/i965/brw_meta_updownsample.c index c1631aeb0b5..149f4bcc810 100644 --- a/src/mesa/drivers/dri/i965/brw_meta_updownsample.c +++ b/src/mesa/drivers/dri/i965/brw_meta_updownsample.c @@ -127,13 +127,11 @@ brw_meta_updownsample(struct brw_context *brw, dst_fbo = fbos[1]; _mesa_BindFramebuffer(GL_READ_FRAMEBUFFER, src_fbo); - _mesa_FramebufferRenderbuffer(GL_READ_FRAMEBUFFER, attachment, - GL_RENDERBUFFER, src_rb->Name); + _mesa_framebuffer_renderbuffer(ctx, ctx->ReadBuffer, attachment, src_rb); _mesa_ReadBuffer(drawbuffer); _mesa_BindFramebuffer(GL_DRAW_FRAMEBUFFER, dst_fbo); - _mesa_FramebufferRenderbuffer(GL_DRAW_FRAMEBUFFER, attachment, - GL_RENDERBUFFER, dst_rb->Name); + _mesa_framebuffer_renderbuffer(ctx, ctx->DrawBuffer, attachment, dst_rb); _mesa_DrawBuffer(drawbuffer); _mesa_BlitFramebuffer(0, 0, From 47a5aa4bfa06e891f88e759008ee4e7129387d7e Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Wed, 11 Nov 2015 18:05:09 -0800 Subject: [PATCH 17/26] i965/meta: Don't pollute the renderbuffer namespace tl;dr: For many types of GL object, we can *NEVER* use the Gen function. In OpenGL ES (all versions!) and OpenGL compatibility profile, applications don't have to call Gen functions. The GL spec is very clear about how you can mix-and-match generated names and non-generated names: you can use any name you want for a particular object type until you call the Gen function for that object type. Here's the problem scenario: - Application calls a meta function that generates a name. The first Gen will probably return 1. - Application decides to use the same name for an object of the same type without calling Gen. Many demo programs use names 1, 2, 3, etc. without calling Gen. - Application calls the meta function again, and the meta function replaces the data. The application's data is lost, and the app fails. Have fun debugging that. Signed-off-by: Ian Romanick Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=92363 Reviewed-by: Anuj Phogat --- .../drivers/dri/i965/brw_meta_fast_clear.c | 3 ++- .../drivers/dri/i965/brw_meta_stencil_blit.c | 5 +++-- .../drivers/dri/i965/brw_meta_updownsample.c | 19 ++++++------------- 3 files changed, 11 insertions(+), 16 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_meta_fast_clear.c b/src/mesa/drivers/dri/i965/brw_meta_fast_clear.c index 38a505ae10d..b2b07e7e58e 100644 --- a/src/mesa/drivers/dri/i965/brw_meta_fast_clear.c +++ b/src/mesa/drivers/dri/i965/brw_meta_fast_clear.c @@ -36,6 +36,7 @@ #include "main/varray.h" #include "main/uniforms.h" #include "main/fbobject.h" +#include "main/renderbuffer.h" #include "main/texobj.h" #include "main/api_validate.h" @@ -881,7 +882,7 @@ brw_meta_resolve_color(struct brw_context *brw, set_fast_clear_op(brw, 0); use_rectlist(brw, false); - _mesa_DeleteRenderbuffers(1, &rb->Name); + _mesa_reference_renderbuffer(&rb, NULL); _mesa_DeleteFramebuffers(1, &fbo); _mesa_meta_end(ctx); diff --git a/src/mesa/drivers/dri/i965/brw_meta_stencil_blit.c b/src/mesa/drivers/dri/i965/brw_meta_stencil_blit.c index 16412ad3c04..5cfaec673c0 100644 --- a/src/mesa/drivers/dri/i965/brw_meta_stencil_blit.c +++ b/src/mesa/drivers/dri/i965/brw_meta_stencil_blit.c @@ -57,6 +57,7 @@ #include "main/blend.h" #include "main/varray.h" #include "main/shaderapi.h" +#include "main/renderbuffer.h" #include "util/ralloc.h" #include "drivers/common/meta.h" @@ -475,7 +476,7 @@ error: _mesa_meta_fb_tex_blit_end(ctx, target, &blit); _mesa_meta_end(ctx); - _mesa_DeleteRenderbuffers(1, &rb->Name); + _mesa_reference_renderbuffer(&rb, NULL); _mesa_DeleteFramebuffers(1, &fbo); } @@ -552,6 +553,6 @@ brw_meta_stencil_updownsample(struct brw_context *brw, brw_meta_stencil_blit(brw, dst, 0, 0, &dims); brw_emit_mi_flush(brw); - _mesa_DeleteRenderbuffers(1, &rb->Name); + _mesa_reference_renderbuffer(&rb, NULL); _mesa_DeleteFramebuffers(1, &fbo); } diff --git a/src/mesa/drivers/dri/i965/brw_meta_updownsample.c b/src/mesa/drivers/dri/i965/brw_meta_updownsample.c index 149f4bcc810..e90e6b1e326 100644 --- a/src/mesa/drivers/dri/i965/brw_meta_updownsample.c +++ b/src/mesa/drivers/dri/i965/brw_meta_updownsample.c @@ -29,6 +29,7 @@ #include "main/buffers.h" #include "main/enums.h" #include "main/fbobject.h" +#include "main/renderbuffer.h" #include "drivers/common/meta.h" @@ -51,18 +52,10 @@ brw_get_rb_for_slice(struct brw_context *brw, unsigned level, unsigned layer, bool flat) { struct gl_context *ctx = &brw->ctx; - GLuint rbo; - struct gl_renderbuffer *rb; - struct intel_renderbuffer *irb; - - /* This turns the CreateRenderbuffers name into an actual struct - * intel_renderbuffer. - */ - _mesa_CreateRenderbuffers(1, &rbo); - - rb = _mesa_lookup_renderbuffer(ctx, rbo); - irb = intel_renderbuffer(rb); + struct gl_renderbuffer *rb = ctx->Driver.NewRenderbuffer(ctx, 0xDEADBEEF); + struct intel_renderbuffer *irb = intel_renderbuffer(rb); + rb->RefCount = 1; rb->Format = mt->format; rb->_BaseFormat = _mesa_get_format_base_format(mt->format); @@ -140,8 +133,8 @@ brw_meta_updownsample(struct brw_context *brw, dst_mt->logical_width0, dst_mt->logical_height0, blit_bit, GL_NEAREST); - _mesa_DeleteRenderbuffers(1, &src_rb->Name); - _mesa_DeleteRenderbuffers(1, &dst_rb->Name); + _mesa_reference_renderbuffer(&src_rb, NULL); + _mesa_reference_renderbuffer(&dst_rb, NULL); _mesa_DeleteFramebuffers(2, fbos); _mesa_meta_end(ctx); From 4087c17832b737951280c7bf1977b589a88ec4cb Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Thu, 12 Nov 2015 09:26:41 -0800 Subject: [PATCH 18/26] meta/decompress: Track renderbuffer using gl_renderbuffer instead of GL API object handle Signed-off-by: Ian Romanick Reviewed-by: Anuj Phogat --- src/mesa/drivers/common/meta.c | 13 +++++++++---- src/mesa/drivers/common/meta.h | 3 ++- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/src/mesa/drivers/common/meta.c b/src/mesa/drivers/common/meta.c index b42a3cf16ae..217d376281f 100644 --- a/src/mesa/drivers/common/meta.c +++ b/src/mesa/drivers/common/meta.c @@ -2962,7 +2962,7 @@ meta_decompress_fbo_cleanup(struct decompress_fbo_state *decompress_fbo) { if (decompress_fbo->FBO != 0) { _mesa_DeleteFramebuffers(1, &decompress_fbo->FBO); - _mesa_DeleteRenderbuffers(1, &decompress_fbo->RBO); + _mesa_DeleteRenderbuffers(1, &decompress_fbo->rb->Name); } memset(decompress_fbo, 0, sizeof(*decompress_fbo)); @@ -3065,14 +3065,19 @@ decompress_texture_image(struct gl_context *ctx, /* Create/bind FBO/renderbuffer */ if (decompress_fbo->FBO == 0) { - _mesa_CreateRenderbuffers(1, &decompress_fbo->RBO); + GLuint RBO; + + _mesa_CreateRenderbuffers(1, &RBO); + + decompress_fbo->rb = _mesa_lookup_renderbuffer(ctx, RBO); + assert(decompress_fbo->rb != NULL && decompress_fbo->rb->Name == RBO); _mesa_GenFramebuffers(1, &decompress_fbo->FBO); _mesa_BindFramebuffer(GL_FRAMEBUFFER_EXT, decompress_fbo->FBO); _mesa_FramebufferRenderbuffer(GL_FRAMEBUFFER_EXT, GL_COLOR_ATTACHMENT0_EXT, GL_RENDERBUFFER_EXT, - decompress_fbo->RBO); + decompress_fbo->rb->Name); } else { _mesa_BindFramebuffer(GL_FRAMEBUFFER_EXT, decompress_fbo->FBO); @@ -3080,7 +3085,7 @@ decompress_texture_image(struct gl_context *ctx, /* alloc dest surface */ if (width > decompress_fbo->Width || height > decompress_fbo->Height) { - _mesa_NamedRenderbufferStorage(decompress_fbo->RBO, rbFormat, + _mesa_NamedRenderbufferStorage(decompress_fbo->rb->Name, rbFormat, width, height); status = _mesa_CheckFramebufferStatus(GL_DRAW_FRAMEBUFFER); if (status != GL_FRAMEBUFFER_COMPLETE) { diff --git a/src/mesa/drivers/common/meta.h b/src/mesa/drivers/common/meta.h index 3ff0fdddb59..7a120b6c44b 100644 --- a/src/mesa/drivers/common/meta.h +++ b/src/mesa/drivers/common/meta.h @@ -380,7 +380,8 @@ struct gen_mipmap_state */ struct decompress_fbo_state { - GLuint FBO, RBO; + struct gl_renderbuffer *rb; + GLuint FBO; GLint Width, Height; }; From 3aeff21fbfb33197538a30871dc054064dc0ff33 Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Thu, 12 Nov 2015 09:29:34 -0800 Subject: [PATCH 19/26] meta: Use internal functions for renderbuffer access Signed-off-by: Ian Romanick Reviewed-by: Anuj Phogat --- src/mesa/drivers/common/meta.c | 10 ++++------ src/mesa/drivers/common/meta_copy_image.c | 12 ++++-------- 2 files changed, 8 insertions(+), 14 deletions(-) diff --git a/src/mesa/drivers/common/meta.c b/src/mesa/drivers/common/meta.c index 217d376281f..9fc7bc6a6b6 100644 --- a/src/mesa/drivers/common/meta.c +++ b/src/mesa/drivers/common/meta.c @@ -3074,10 +3074,8 @@ decompress_texture_image(struct gl_context *ctx, _mesa_GenFramebuffers(1, &decompress_fbo->FBO); _mesa_BindFramebuffer(GL_FRAMEBUFFER_EXT, decompress_fbo->FBO); - _mesa_FramebufferRenderbuffer(GL_FRAMEBUFFER_EXT, - GL_COLOR_ATTACHMENT0_EXT, - GL_RENDERBUFFER_EXT, - decompress_fbo->rb->Name); + _mesa_framebuffer_renderbuffer(ctx, ctx->DrawBuffer, GL_COLOR_ATTACHMENT0, + decompress_fbo->rb); } else { _mesa_BindFramebuffer(GL_FRAMEBUFFER_EXT, decompress_fbo->FBO); @@ -3085,8 +3083,8 @@ decompress_texture_image(struct gl_context *ctx, /* alloc dest surface */ if (width > decompress_fbo->Width || height > decompress_fbo->Height) { - _mesa_NamedRenderbufferStorage(decompress_fbo->rb->Name, rbFormat, - width, height); + _mesa_renderbuffer_storage(ctx, decompress_fbo->rb, rbFormat, + width, height, 0); status = _mesa_CheckFramebufferStatus(GL_DRAW_FRAMEBUFFER); if (status != GL_FRAMEBUFFER_COMPLETE) { /* If the framebuffer isn't complete then we'll leave diff --git a/src/mesa/drivers/common/meta_copy_image.c b/src/mesa/drivers/common/meta_copy_image.c index 2c2b7ba6bf8..6534d432e61 100644 --- a/src/mesa/drivers/common/meta_copy_image.c +++ b/src/mesa/drivers/common/meta_copy_image.c @@ -241,10 +241,8 @@ _mesa_meta_CopyImageSubData_uncompressed(struct gl_context *ctx, _mesa_meta_bind_fbo_image(GL_READ_FRAMEBUFFER, attachment, src_view_tex_image, src_z); } else { - _mesa_FramebufferRenderbuffer(GL_READ_FRAMEBUFFER, - attachment, - GL_RENDERBUFFER, - src_renderbuffer->Name); + _mesa_framebuffer_renderbuffer(ctx, ctx->ReadBuffer, attachment, + src_renderbuffer); } status = _mesa_CheckFramebufferStatus(GL_READ_FRAMEBUFFER); @@ -252,10 +250,8 @@ _mesa_meta_CopyImageSubData_uncompressed(struct gl_context *ctx, goto meta_end; if (dst_renderbuffer) { - _mesa_FramebufferRenderbuffer(GL_DRAW_FRAMEBUFFER, - attachment, - GL_RENDERBUFFER, - dst_renderbuffer->Name); + _mesa_framebuffer_renderbuffer(ctx, ctx->DrawBuffer, attachment, + dst_renderbuffer); } else { _mesa_meta_bind_fbo_image(GL_DRAW_FRAMEBUFFER, attachment, dst_tex_image, dst_z); From 0ecc9d907e6a76367ddf1879d3083c794337a8b5 Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Thu, 12 Nov 2015 09:37:27 -0800 Subject: [PATCH 20/26] meta/decompress: Don't pollute the renderbuffer namespace tl;dr: For many types of GL object, we can *NEVER* use the Gen function. In OpenGL ES (all versions!) and OpenGL compatibility profile, applications don't have to call Gen functions. The GL spec is very clear about how you can mix-and-match generated names and non-generated names: you can use any name you want for a particular object type until you call the Gen function for that object type. Here's the problem scenario: - Application calls a meta function that generates a name. The first Gen will probably return 1. - Application decides to use the same name for an object of the same type without calling Gen. Many demo programs use names 1, 2, 3, etc. without calling Gen. - Application calls the meta function again, and the meta function replaces the data. The application's data is lost, and the app fails. Have fun debugging that. Fixes piglit 'object-namespace-pollution glGetTexImage-compressed renderbuffer' test. Signed-off-by: Ian Romanick Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=92363 Reviewed-by: Anuj Phogat --- src/mesa/drivers/common/meta.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/src/mesa/drivers/common/meta.c b/src/mesa/drivers/common/meta.c index 9fc7bc6a6b6..329e48f46f5 100644 --- a/src/mesa/drivers/common/meta.c +++ b/src/mesa/drivers/common/meta.c @@ -61,6 +61,7 @@ #include "main/polygon.h" #include "main/queryobj.h" #include "main/readpix.h" +#include "main/renderbuffer.h" #include "main/scissor.h" #include "main/shaderapi.h" #include "main/shaderobj.h" @@ -2962,7 +2963,7 @@ meta_decompress_fbo_cleanup(struct decompress_fbo_state *decompress_fbo) { if (decompress_fbo->FBO != 0) { _mesa_DeleteFramebuffers(1, &decompress_fbo->FBO); - _mesa_DeleteRenderbuffers(1, &decompress_fbo->rb->Name); + _mesa_reference_renderbuffer(&decompress_fbo->rb, NULL); } memset(decompress_fbo, 0, sizeof(*decompress_fbo)); @@ -3065,12 +3066,13 @@ decompress_texture_image(struct gl_context *ctx, /* Create/bind FBO/renderbuffer */ if (decompress_fbo->FBO == 0) { - GLuint RBO; + decompress_fbo->rb = ctx->Driver.NewRenderbuffer(ctx, 0xDEADBEEF); + if (decompress_fbo->rb == NULL) { + _mesa_meta_end(ctx); + return false; + } - _mesa_CreateRenderbuffers(1, &RBO); - - decompress_fbo->rb = _mesa_lookup_renderbuffer(ctx, RBO); - assert(decompress_fbo->rb != NULL && decompress_fbo->rb->Name == RBO); + decompress_fbo->rb->RefCount = 1; _mesa_GenFramebuffers(1, &decompress_fbo->FBO); _mesa_BindFramebuffer(GL_FRAMEBUFFER_EXT, decompress_fbo->FBO); From d9c8a8fe6199cb6526c01e19e36e77f0e4a1b257 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Wed, 10 Feb 2016 20:14:53 +0100 Subject: [PATCH 21/26] r300g: silence warnings --- src/gallium/drivers/r300/r300_transfer.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/gallium/drivers/r300/r300_transfer.c b/src/gallium/drivers/r300/r300_transfer.c index 842e70a6899..9d00f4d9373 100644 --- a/src/gallium/drivers/r300/r300_transfer.c +++ b/src/gallium/drivers/r300/r300_transfer.c @@ -246,9 +246,7 @@ r300_texture_transfer_map(struct pipe_context *ctx, void r300_texture_transfer_unmap(struct pipe_context *ctx, struct pipe_transfer *transfer) { - struct radeon_winsys *rws = r300_context(ctx)->rws; struct r300_transfer *trans = r300_transfer(transfer); - struct r300_resource *tex = r300_resource(transfer->resource); if (trans->linear_texture) { if (transfer->usage & PIPE_TRANSFER_WRITE) { From 79d0082c6442d4c1cade1c4a4904c349f8511f4a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Wed, 10 Feb 2016 20:15:07 +0100 Subject: [PATCH 22/26] radeon/uvd: silence a warning --- src/gallium/drivers/radeon/radeon_uvd.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/gallium/drivers/radeon/radeon_uvd.c b/src/gallium/drivers/radeon/radeon_uvd.c index 1f5a16aaca6..367aabc7a18 100644 --- a/src/gallium/drivers/radeon/radeon_uvd.c +++ b/src/gallium/drivers/radeon/radeon_uvd.c @@ -402,6 +402,9 @@ static struct ruvd_h264 get_h264_msg(struct ruvd_decoder *dec, struct pipe_h264_ result.log2_max_pic_order_cnt_lsb_minus4 = pic->pps->sps->log2_max_pic_order_cnt_lsb_minus4; switch (dec->base.chroma_format) { + case PIPE_VIDEO_CHROMA_FORMAT_NONE: + /* TODO: assert? */ + break; case PIPE_VIDEO_CHROMA_FORMAT_400: result.chroma_format = 0; break; From 088280e022bca10c3712dfa6f8e0ff343a37ce77 Mon Sep 17 00:00:00 2001 From: Ben Widawsky Date: Sat, 6 Feb 2016 18:11:21 -0800 Subject: [PATCH 23/26] i965: Make sure we blit a full compressed block This fixes an assertion failure in [at least] one of the Unreal Engine Linux demo/games that uses DXT1 compression. Specifically, the "Vehicle Game". At some point, the game ends up trying to blit mip level whose size is 2x2, which is smaller than a DXT1 block. As a result, the assertion in the blit path is triggered. It should be safe to simply make sure we align the width and height, which is sadly an example of compression being less efficient. NOTE: The demo seems to work fine without the assert, and therefore release builds of mesa wouldn't stumble over this. Perhaps there is some unnoticeable corruption, but I had trouble spotting it. Thanks to Jason for looking at my backtrace and figuring out what was going on. v2: Use NPOT alignment to make sure ASTC is handled properly (Ilia) Remove comment about how this doesn't fix other bugs, because it does. Cc: "11.0 11.1" Tested-by: Matt Turner Reviewed-by: Jason Ekstrand --- src/mesa/drivers/dri/i965/intel_copy_image.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/src/mesa/drivers/dri/i965/intel_copy_image.c b/src/mesa/drivers/dri/i965/intel_copy_image.c index 0a3337ee285..dbbac1c95a2 100644 --- a/src/mesa/drivers/dri/i965/intel_copy_image.c +++ b/src/mesa/drivers/dri/i965/intel_copy_image.c @@ -212,6 +212,7 @@ intel_copy_image_sub_data(struct gl_context *ctx, struct brw_context *brw = brw_context(ctx); struct intel_mipmap_tree *src_mt, *dst_mt; unsigned src_level, dst_level; + GLuint bw, bh; if (_mesa_meta_CopyImageSubData_uncompressed(ctx, src_image, src_renderbuffer, @@ -275,6 +276,19 @@ intel_copy_image_sub_data(struct gl_context *ctx, intel_miptree_all_slices_resolve_depth(brw, dst_mt); intel_miptree_resolve_color(brw, dst_mt); + _mesa_get_format_block_size(src_mt->format, &bw, &bh); + + /* It's legal to have a WxH that's smaller than a compressed block. This + * happens for example when you are using a higher level LOD. For this case, + * we still want to copy the entire block, or else the decompression will be + * incorrect. + */ + if (src_width < bw) + src_width = ALIGN_NPOT(src_width, bw); + + if (src_height < bh) + src_height = ALIGN_NPOT(src_height, bh); + if (copy_image_with_blitter(brw, src_mt, src_level, src_x, src_y, src_z, dst_mt, dst_level, From 6ee1c386fe8c9b45746e5bbb8a6f9b56da45fd50 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Sun, 24 Jan 2016 01:06:07 +0100 Subject: [PATCH 24/26] radeonsi: don't emit unnecessary NULL exports for unbound targets (v3) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit v2: remove semantic index == 0 checks add the else statement to remove shadowing of args v3: fix fbo-alphatest-nocolor regression Reviewed-by: Nicolai Hähnle (v2) --- src/gallium/drivers/radeonsi/si_shader.c | 92 +++++++++++++++++------- 1 file changed, 67 insertions(+), 25 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index db922f01fb8..34b84eb81d9 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -2287,7 +2287,6 @@ static void si_export_mrt_color(struct lp_build_tgsi_context *bld_base, { struct si_shader_context *si_shader_ctx = si_shader_context(bld_base); struct lp_build_context *base = &bld_base->base; - LLVMValueRef args[9]; int i; /* Clamp color */ @@ -2309,27 +2308,46 @@ static void si_export_mrt_color(struct lp_build_tgsi_context *bld_base, color[3] = si_scale_alpha_by_sample_mask(bld_base, color[3]); /* If last_cbuf > 0, FS_COLOR0_WRITES_ALL_CBUFS is true. */ - if (index == 0 && - si_shader_ctx->shader->key.ps.last_cbuf > 0) { - for (int c = 1; c <= si_shader_ctx->shader->key.ps.last_cbuf; c++) { + if (si_shader_ctx->shader->key.ps.last_cbuf > 0) { + LLVMValueRef args[8][9]; + int c, last = -1; + + /* Get the export arguments, also find out what the last one is. */ + for (c = 0; c <= si_shader_ctx->shader->key.ps.last_cbuf; c++) { si_llvm_init_export_args(bld_base, color, - V_008DFC_SQ_EXP_MRT + c, args); + V_008DFC_SQ_EXP_MRT + c, args[c]); + if (args[c][0] != bld_base->uint_bld.zero) + last = c; + } + + /* Emit all exports. */ + for (c = 0; c <= si_shader_ctx->shader->key.ps.last_cbuf; c++) { + if (is_last && last == c) { + args[c][1] = bld_base->uint_bld.one; /* whether the EXEC mask is valid */ + args[c][2] = bld_base->uint_bld.one; /* DONE bit */ + } else if (args[c][0] == bld_base->uint_bld.zero) + continue; /* unnecessary NULL export */ + lp_build_intrinsic(base->gallivm->builder, "llvm.SI.export", LLVMVoidTypeInContext(base->gallivm->context), - args, 9, 0); + args[c], 9, 0); } - } + } else { + LLVMValueRef args[9]; - /* Export */ - si_llvm_init_export_args(bld_base, color, V_008DFC_SQ_EXP_MRT + index, - args); - if (is_last) { - args[1] = bld_base->uint_bld.one; /* whether the EXEC mask is valid */ - args[2] = bld_base->uint_bld.one; /* DONE bit */ + /* Export */ + si_llvm_init_export_args(bld_base, color, V_008DFC_SQ_EXP_MRT + index, + args); + if (is_last) { + args[1] = bld_base->uint_bld.one; /* whether the EXEC mask is valid */ + args[2] = bld_base->uint_bld.one; /* DONE bit */ + } else if (args[0] == bld_base->uint_bld.zero) + return; /* unnecessary NULL export */ + + lp_build_intrinsic(base->gallivm->builder, "llvm.SI.export", + LLVMVoidTypeInContext(base->gallivm->context), + args, 9, 0); } - lp_build_intrinsic(base->gallivm->builder, "llvm.SI.export", - LLVMVoidTypeInContext(base->gallivm->context), - args, 9, 0); } static void si_export_null(struct lp_build_tgsi_context *bld_base) @@ -2364,19 +2382,43 @@ static void si_llvm_emit_fs_epilogue(struct lp_build_tgsi_context * bld_base) int last_color_export = -1; int i; - /* If there are no outputs, add a dummy export. */ - if (!info->num_outputs) { - si_export_null(bld_base); - return; - } - /* Determine the last export. If MRTZ is present, it's always last. * Otherwise, find the last color export. */ - if (!info->writes_z && !info->writes_stencil && !info->writes_samplemask) - for (i = 0; i < info->num_outputs; i++) - if (info->output_semantic_name[i] == TGSI_SEMANTIC_COLOR) + if (!info->writes_z && !info->writes_stencil && !info->writes_samplemask) { + unsigned spi_format = shader->key.ps.spi_shader_col_format; + + /* Don't export NULL and return if alpha-test is enabled. */ + if (shader->key.ps.alpha_func != PIPE_FUNC_ALWAYS && + shader->key.ps.alpha_func != PIPE_FUNC_NEVER && + (spi_format & 0xf) == 0) + spi_format |= V_028714_SPI_SHADER_32_AR; + + for (i = 0; i < info->num_outputs; i++) { + unsigned index = info->output_semantic_index[i]; + + if (info->output_semantic_name[i] != TGSI_SEMANTIC_COLOR) + continue; + + /* If last_cbuf > 0, FS_COLOR0_WRITES_ALL_CBUFS is true. */ + if (shader->key.ps.last_cbuf > 0) { + /* Just set this if any of the colorbuffers are enabled. */ + if (spi_format & + ((1llu << (4 * (shader->key.ps.last_cbuf + 1))) - 1)) + last_color_export = i; + continue; + } + + if ((spi_format >> (index * 4)) & 0xf) last_color_export = i; + } + + /* If there are no outputs, export NULL. */ + if (last_color_export == -1) { + si_export_null(bld_base); + return; + } + } for (i = 0; i < info->num_outputs; i++) { unsigned semantic_name = info->output_semantic_name[i]; From 70dff4a55e767de8b9ce10f055b94ebb1f6a9755 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 10 Feb 2016 12:07:49 -0800 Subject: [PATCH 25/26] nir/lower_vec_to_movs: Better report channels handled by insert_mov This fixes two issues. First, we had a use-after-free in the case where the instruction got deleted and we tried to return mov->dest.write_mask. Second, in the case where we are doing a self-mov of a register, we delete those channels that are moved to themselves from the write-mask. This means that those channels aren't reported as being handled even though they are. We now stash off the write-mask before remove unneeded channels so that they still get reported as handled. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=94073 Reviewed-by: Matt Turner Cc: "11.0 11.1" --- src/compiler/nir/nir_lower_vec_to_movs.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/compiler/nir/nir_lower_vec_to_movs.c b/src/compiler/nir/nir_lower_vec_to_movs.c index 06d627900c6..f51cede3920 100644 --- a/src/compiler/nir/nir_lower_vec_to_movs.c +++ b/src/compiler/nir/nir_lower_vec_to_movs.c @@ -83,6 +83,8 @@ insert_mov(nir_alu_instr *vec, unsigned start_idx, nir_shader *shader) } } + unsigned channels_handled = mov->dest.write_mask; + /* In some situations (if the vecN is involved in a phi-web), we can end * up with a mov from a register to itself. Some of those channels may end * up doing nothing and there's no reason to have them as part of the mov. @@ -103,7 +105,7 @@ insert_mov(nir_alu_instr *vec, unsigned start_idx, nir_shader *shader) ralloc_free(mov); } - return mov->dest.write_mask; + return channels_handled; } static bool From 8750299a420af76cebd3067f6f603eacde06ae06 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 9 Feb 2016 14:51:28 -0800 Subject: [PATCH 26/26] nir: Remove the const_offset from nir_tex_instr When NIR was originally drafted, there was no easy way to determine if something was constant or not. The result was that we had lots of special-casing for constant values such as this. Now that load_const instructions are SSA-only, it's really easy to find constants and this isn't really needed anymore. Reviewed-by: Connor Abbott Reviewed-by: Rob Clark --- src/compiler/nir/glsl_to_nir.cpp | 16 ++++-------- src/compiler/nir/nir.h | 3 --- src/compiler/nir/nir_clone.c | 1 - src/compiler/nir/nir_instr_set.c | 3 --- src/compiler/nir/nir_print.c | 14 ---------- .../drivers/freedreno/ir3/ir3_compiler_nir.c | 15 ----------- src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 26 ++++++++----------- src/mesa/drivers/dri/i965/brw_vec4_nir.cpp | 21 ++++++++------- 8 files changed, 27 insertions(+), 72 deletions(-) diff --git a/src/compiler/nir/glsl_to_nir.cpp b/src/compiler/nir/glsl_to_nir.cpp index ee1a0cb9348..a23fba75010 100644 --- a/src/compiler/nir/glsl_to_nir.cpp +++ b/src/compiler/nir/glsl_to_nir.cpp @@ -1825,7 +1825,7 @@ nir_visitor::visit(ir_texture *ir) num_srcs++; if (ir->shadow_comparitor != NULL) num_srcs++; - if (ir->offset != NULL && ir->offset->as_constant() == NULL) + if (ir->offset != NULL) num_srcs++; nir_tex_instr *instr = nir_tex_instr_create(this->shader, num_srcs); @@ -1882,16 +1882,10 @@ nir_visitor::visit(ir_texture *ir) /* we don't support multiple offsets yet */ assert(ir->offset->type->is_vector() || ir->offset->type->is_scalar()); - ir_constant *const_offset = ir->offset->as_constant(); - if (const_offset != NULL) { - for (unsigned i = 0; i < const_offset->type->vector_elements; i++) - instr->const_offset[i] = const_offset->value.i[i]; - } else { - instr->src[src_number].src = - nir_src_for_ssa(evaluate_rvalue(ir->offset)); - instr->src[src_number].src_type = nir_tex_src_offset; - src_number++; - } + instr->src[src_number].src = + nir_src_for_ssa(evaluate_rvalue(ir->offset)); + instr->src[src_number].src_type = nir_tex_src_offset; + src_number++; } switch (ir->op) { diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h index 8085341b552..cccb3a41da5 100644 --- a/src/compiler/nir/nir.h +++ b/src/compiler/nir/nir.h @@ -1010,9 +1010,6 @@ typedef struct { */ bool is_new_style_shadow; - /* constant offset - must be 0 if the offset source is used */ - int const_offset[4]; - /* gather component selector */ unsigned component : 2; diff --git a/src/compiler/nir/nir_clone.c b/src/compiler/nir/nir_clone.c index a666d8ee451..b6bb5fe5668 100644 --- a/src/compiler/nir/nir_clone.c +++ b/src/compiler/nir/nir_clone.c @@ -355,7 +355,6 @@ clone_tex(clone_state *state, const nir_tex_instr *tex) ntex->is_array = tex->is_array; ntex->is_shadow = tex->is_shadow; ntex->is_new_style_shadow = tex->is_new_style_shadow; - memcpy(ntex->const_offset, tex->const_offset, sizeof(ntex->const_offset)); ntex->component = tex->component; ntex->texture_index = tex->texture_index; diff --git a/src/compiler/nir/nir_instr_set.c b/src/compiler/nir/nir_instr_set.c index c3cf2579be7..159ded0e72b 100644 --- a/src/compiler/nir/nir_instr_set.c +++ b/src/compiler/nir/nir_instr_set.c @@ -152,7 +152,6 @@ hash_tex(uint32_t hash, const nir_tex_instr *instr) hash = HASH(hash, instr->is_array); hash = HASH(hash, instr->is_shadow); hash = HASH(hash, instr->is_new_style_shadow); - hash = HASH(hash, instr->const_offset); unsigned component = instr->component; hash = HASH(hash, component); hash = HASH(hash, instr->texture_index); @@ -303,8 +302,6 @@ nir_instrs_equal(const nir_instr *instr1, const nir_instr *instr2) tex1->is_array != tex2->is_array || tex1->is_shadow != tex2->is_shadow || tex1->is_new_style_shadow != tex2->is_new_style_shadow || - memcmp(tex1->const_offset, tex2->const_offset, - sizeof(tex1->const_offset)) != 0 || tex1->component != tex2->component || tex1->texture_index != tex2->texture_index || tex1->texture_array_size != tex2->texture_array_size || diff --git a/src/compiler/nir/nir_print.c b/src/compiler/nir/nir_print.c index d9d75c2e8df..8ccaf8a0edb 100644 --- a/src/compiler/nir/nir_print.c +++ b/src/compiler/nir/nir_print.c @@ -637,20 +637,6 @@ print_tex_instr(nir_tex_instr *instr, print_state *state) fprintf(fp, ", "); } - bool has_nonzero_offset = false; - for (unsigned i = 0; i < 4; i++) { - if (instr->const_offset[i] != 0) { - has_nonzero_offset = true; - break; - } - } - - if (has_nonzero_offset) { - fprintf(fp, "[%i %i %i %i] (offset), ", - instr->const_offset[0], instr->const_offset[1], - instr->const_offset[2], instr->const_offset[3]); - } - if (instr->op == nir_texop_tg4) { fprintf(fp, "%u (gather_component), ", instr->component); } diff --git a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c index 63dd1700f9c..ffa75775505 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c @@ -1434,21 +1434,6 @@ emit_tex(struct ir3_compile *ctx, nir_tex_instr *tex) tex_info(tex, &flags, &coords); - if (!has_off) { - /* could still have a constant offset: */ - if (tex->const_offset[0] || tex->const_offset[1] || - tex->const_offset[2] || tex->const_offset[3]) { - off = const_off; - - off[0] = create_immed(b, tex->const_offset[0]); - off[1] = create_immed(b, tex->const_offset[1]); - off[2] = create_immed(b, tex->const_offset[2]); - off[3] = create_immed(b, tex->const_offset[3]); - - has_off = true; - } - } - /* scale up integer coords for TXF based on the LOD */ if (ctx->unminify_coords && (opc == OPC_ISAML)) { assert(has_lod); diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp index ade5b46928d..1fc21e474a7 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp @@ -2951,7 +2951,6 @@ fs_visitor::nir_emit_texture(const fs_builder &bld, nir_tex_instr *instr) instr->is_array; int lod_components = 0; - int UNUSED offset_components = 0; fs_reg coordinate, shadow_comparitor, lod, lod2, sample_index, mcs, tex_offset; @@ -2999,13 +2998,18 @@ fs_visitor::nir_emit_texture(const fs_builder &bld, nir_tex_instr *instr) case nir_tex_src_ms_index: sample_index = retype(src, BRW_REGISTER_TYPE_UD); break; - case nir_tex_src_offset: - tex_offset = retype(src, BRW_REGISTER_TYPE_D); - if (instr->is_array) - offset_components = instr->coord_components - 1; - else - offset_components = instr->coord_components; + + case nir_tex_src_offset: { + nir_const_value *const_offset = + nir_src_as_const_value(instr->src[i].src); + if (const_offset) { + tex_offset = brw_imm_ud(brw_texture_offset(const_offset->i, 3)); + } else { + tex_offset = retype(src, BRW_REGISTER_TYPE_D); + } break; + } + case nir_tex_src_projector: unreachable("should be lowered"); @@ -3049,14 +3053,6 @@ fs_visitor::nir_emit_texture(const fs_builder &bld, nir_tex_instr *instr) } } - for (unsigned i = 0; i < 3; i++) { - if (instr->const_offset[i] != 0) { - assert(offset_components == 0); - tex_offset = brw_imm_ud(brw_texture_offset(instr->const_offset, 3)); - break; - } - } - enum glsl_base_type dest_base_type = brw_glsl_base_type_for_nir_type (instr->dest_type); diff --git a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp index ca6a9def7ad..74ec4f0e87f 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp @@ -1657,6 +1657,7 @@ vec4_visitor::nir_emit_texture(nir_tex_instr *instr) dst_reg dest = get_nir_dest(instr->dest, instr->dest_type); /* Load the texture operation sources */ + uint32_t constant_offset = 0; for (unsigned i = 0; i < instr->num_srcs; i++) { switch (instr->src[i].src_type) { case nir_tex_src_comparitor: @@ -1713,9 +1714,17 @@ vec4_visitor::nir_emit_texture(nir_tex_instr *instr) break; } - case nir_tex_src_offset: - offset_value = get_nir_src(instr->src[i].src, BRW_REGISTER_TYPE_D, 2); + case nir_tex_src_offset: { + nir_const_value *const_offset = + nir_src_as_const_value(instr->src[i].src); + if (const_offset) { + constant_offset = brw_texture_offset(const_offset->i, 3); + } else { + offset_value = + get_nir_src(instr->src[i].src, BRW_REGISTER_TYPE_D, 2); + } break; + } case nir_tex_src_texture_offset: { /* The highest texture which may be used by this operation is @@ -1771,14 +1780,6 @@ vec4_visitor::nir_emit_texture(nir_tex_instr *instr) } } - uint32_t constant_offset = 0; - for (unsigned i = 0; i < 3; i++) { - if (instr->const_offset[i] != 0) { - constant_offset = brw_texture_offset(instr->const_offset, 3); - break; - } - } - /* Stuff the channel select bits in the top of the texture offset */ if (instr->op == nir_texop_tg4) { if (instr->component == 1 &&