diff --git a/docs/GL3.txt b/docs/GL3.txt index 331b2daaeb6..561f20421db 100644 --- a/docs/GL3.txt +++ b/docs/GL3.txt @@ -196,7 +196,7 @@ GL 4.5, GLSL 4.50: GL_ARB_get_texture_sub_image DONE (all drivers) GL_ARB_shader_texture_image_samples not started GL_ARB_texture_barrier DONE (nv50, nvc0, r600, radeonsi) - GL_KHR_context_flush_control DONE (all - but needs GLX/EXT extension to be useful) + GL_KHR_context_flush_control DONE (all - but needs GLX/EGL extension to be useful) GL_KHR_robust_buffer_access_behavior not started GL_KHR_robustness 90% done (the ARB variant) GL_EXT_shader_integer_mix DONE (all drivers that support GLSL) diff --git a/src/gallium/auxiliary/nir/tgsi_to_nir.c b/src/gallium/auxiliary/nir/tgsi_to_nir.c index 278d5e9bf5b..db50734efd5 100644 --- a/src/gallium/auxiliary/nir/tgsi_to_nir.c +++ b/src/gallium/auxiliary/nir/tgsi_to_nir.c @@ -65,24 +65,24 @@ struct ttn_compile { nir_register *addr_reg; /** - * Stack of cf_node_lists where instructions should be pushed as we pop + * Stack of nir_cursors where instructions should be pushed as we pop * back out of the control flow stack. * * For each IF/ELSE/ENDIF block, if_stack[if_stack_pos] has where the else * instructions should be placed, and if_stack[if_stack_pos - 1] has where * the next instructions outside of the if/then/else block go. */ - struct exec_list **if_stack; + nir_cursor *if_stack; unsigned if_stack_pos; /** - * Stack of cf_node_lists where instructions should be pushed as we pop + * Stack of nir_cursors where instructions should be pushed as we pop * back out of the control flow stack. * * loop_stack[loop_stack_pos - 1] contains the cf_node_list for the outside * of the loop. */ - struct exec_list **loop_stack; + nir_cursor *loop_stack; unsigned loop_stack_pos; /* How many TGSI_FILE_IMMEDIATE vec4s have been parsed so far. */ @@ -922,7 +922,7 @@ ttn_if(struct ttn_compile *c, nir_ssa_def *src, bool is_uint) nir_builder *b = &c->build; /* Save the outside-of-the-if-statement node list. */ - c->if_stack[c->if_stack_pos] = b->cf_node_list; + c->if_stack[c->if_stack_pos] = b->cursor; c->if_stack_pos++; src = ttn_channel(b, src, X); @@ -933,11 +933,11 @@ ttn_if(struct ttn_compile *c, nir_ssa_def *src, bool is_uint) } else { if_stmt->condition = nir_src_for_ssa(nir_fne(b, src, nir_imm_int(b, 0))); } - nir_cf_node_insert_end(b->cf_node_list, &if_stmt->cf_node); + nir_builder_cf_insert(b, &if_stmt->cf_node); - nir_builder_insert_after_cf_list(b, &if_stmt->then_list); + b->cursor = nir_after_cf_list(&if_stmt->then_list); - c->if_stack[c->if_stack_pos] = &if_stmt->else_list; + c->if_stack[c->if_stack_pos] = nir_after_cf_list(&if_stmt->else_list); c->if_stack_pos++; } @@ -946,7 +946,7 @@ ttn_else(struct ttn_compile *c) { nir_builder *b = &c->build; - nir_builder_insert_after_cf_list(b, c->if_stack[c->if_stack_pos - 1]); + b->cursor = c->if_stack[c->if_stack_pos - 1]; } static void @@ -955,7 +955,7 @@ ttn_endif(struct ttn_compile *c) nir_builder *b = &c->build; c->if_stack_pos -= 2; - nir_builder_insert_after_cf_list(b, c->if_stack[c->if_stack_pos]); + b->cursor = c->if_stack[c->if_stack_pos]; } static void @@ -964,13 +964,13 @@ ttn_bgnloop(struct ttn_compile *c) nir_builder *b = &c->build; /* Save the outside-of-the-loop node list. */ - c->loop_stack[c->loop_stack_pos] = b->cf_node_list; + c->loop_stack[c->loop_stack_pos] = b->cursor; c->loop_stack_pos++; nir_loop *loop = nir_loop_create(b->shader); - nir_cf_node_insert_end(b->cf_node_list, &loop->cf_node); + nir_builder_cf_insert(b, &loop->cf_node); - nir_builder_insert_after_cf_list(b, &loop->body); + b->cursor = nir_after_cf_list(&loop->body); } static void @@ -993,7 +993,7 @@ ttn_endloop(struct ttn_compile *c) nir_builder *b = &c->build; c->loop_stack_pos--; - nir_builder_insert_after_cf_list(b, c->loop_stack[c->loop_stack_pos]); + b->cursor = c->loop_stack[c->loop_stack_pos]; } static void @@ -1803,7 +1803,7 @@ tgsi_to_nir(const void *tgsi_tokens, nir_function_impl *impl = nir_function_impl_create(overload); nir_builder_init(&c->build, impl); - nir_builder_insert_after_cf_list(&c->build, &impl->body); + c->build.cursor = nir_after_cf_list(&impl->body); s->num_inputs = scan.file_max[TGSI_FILE_INPUT] + 1; s->num_uniforms = scan.const_file_max[0] + 1; @@ -1819,10 +1819,10 @@ tgsi_to_nir(const void *tgsi_tokens, c->num_samp_types = scan.file_max[TGSI_FILE_SAMPLER_VIEW] + 1; c->samp_types = rzalloc_array(c, nir_alu_type, c->num_samp_types); - c->if_stack = rzalloc_array(c, struct exec_list *, + c->if_stack = rzalloc_array(c, nir_cursor, (scan.opcode_count[TGSI_OPCODE_IF] + scan.opcode_count[TGSI_OPCODE_UIF]) * 2); - c->loop_stack = rzalloc_array(c, struct exec_list *, + c->loop_stack = rzalloc_array(c, nir_cursor, scan.opcode_count[TGSI_OPCODE_BGNLOOP]); ret = tgsi_parse_init(&parser, tgsi_tokens); diff --git a/src/gallium/auxiliary/util/u_blitter.h b/src/gallium/auxiliary/util/u_blitter.h index becdb029f13..eab48c5f00d 100644 --- a/src/gallium/auxiliary/util/u_blitter.h +++ b/src/gallium/auxiliary/util/u_blitter.h @@ -372,30 +372,28 @@ void util_blitter_custom_resolve_color(struct blitter_context *blitter, * * States not listed here are not affected by util_blitter. */ -static inline -void util_blitter_save_blend(struct blitter_context *blitter, - void *state) +static inline void +util_blitter_save_blend(struct blitter_context *blitter, void *state) { blitter->saved_blend_state = state; } -static inline -void util_blitter_save_depth_stencil_alpha(struct blitter_context *blitter, - void *state) +static inline void +util_blitter_save_depth_stencil_alpha(struct blitter_context *blitter, + void *state) { blitter->saved_dsa_state = state; } -static inline -void util_blitter_save_vertex_elements(struct blitter_context *blitter, - void *state) +static inline void +util_blitter_save_vertex_elements(struct blitter_context *blitter, void *state) { blitter->saved_velem_state = state; } -static inline -void util_blitter_save_stencil_ref(struct blitter_context *blitter, - const struct pipe_stencil_ref *state) +static inline void +util_blitter_save_stencil_ref(struct blitter_context *blitter, + const struct pipe_stencil_ref *state) { blitter->saved_stencil_ref = *state; } @@ -407,23 +405,20 @@ void util_blitter_save_rasterizer(struct blitter_context *blitter, blitter->saved_rs_state = state; } -static inline -void util_blitter_save_fragment_shader(struct blitter_context *blitter, - void *fs) +static inline void +util_blitter_save_fragment_shader(struct blitter_context *blitter, void *fs) { blitter->saved_fs = fs; } -static inline -void util_blitter_save_vertex_shader(struct blitter_context *blitter, - void *vs) +static inline void +util_blitter_save_vertex_shader(struct blitter_context *blitter, void *vs) { blitter->saved_vs = vs; } -static inline -void util_blitter_save_geometry_shader(struct blitter_context *blitter, - void *gs) +static inline void +util_blitter_save_geometry_shader(struct blitter_context *blitter, void *gs) { blitter->saved_gs = gs; } @@ -442,24 +437,24 @@ util_blitter_save_tesseval_shader(struct blitter_context *blitter, blitter->saved_tes = sh; } -static inline -void util_blitter_save_framebuffer(struct blitter_context *blitter, - const struct pipe_framebuffer_state *state) +static inline void +util_blitter_save_framebuffer(struct blitter_context *blitter, + const struct pipe_framebuffer_state *state) { blitter->saved_fb_state.nr_cbufs = 0; /* It's ~0 now, meaning it's unsaved. */ util_copy_framebuffer_state(&blitter->saved_fb_state, state); } -static inline -void util_blitter_save_viewport(struct blitter_context *blitter, - struct pipe_viewport_state *state) +static inline void +util_blitter_save_viewport(struct blitter_context *blitter, + struct pipe_viewport_state *state) { blitter->saved_viewport = *state; } -static inline -void util_blitter_save_scissor(struct blitter_context *blitter, - struct pipe_scissor_state *state) +static inline void +util_blitter_save_scissor(struct blitter_context *blitter, + struct pipe_scissor_state *state) { blitter->saved_scissor = *state; } diff --git a/src/gallium/auxiliary/util/u_debug.c b/src/gallium/auxiliary/util/u_debug.c index b4503deb8f6..5fe9e33e208 100644 --- a/src/gallium/auxiliary/util/u_debug.c +++ b/src/gallium/auxiliary/util/u_debug.c @@ -41,6 +41,7 @@ #include "util/u_tile.h" #include "util/u_prim.h" #include "util/u_surface.h" +#include #include #include /* CHAR_BIT */ @@ -275,7 +276,7 @@ debug_get_flags_option(const char *name, for (; flags->name; ++flags) namealign = MAX2(namealign, strlen(flags->name)); for (flags = orig; flags->name; ++flags) - _debug_printf("| %*s [0x%0*lx]%s%s\n", namealign, flags->name, + _debug_printf("| %*s [0x%0*"PRIu64"]%s%s\n", namealign, flags->name, (int)sizeof(uint64_t)*CHAR_BIT/4, flags->value, flags->desc ? " " : "", flags->desc ? flags->desc : ""); } @@ -290,9 +291,9 @@ debug_get_flags_option(const char *name, if (debug_get_option_should_print()) { if (str) { - debug_printf("%s: %s = 0x%lx (%s)\n", __FUNCTION__, name, result, str); + debug_printf("%s: %s = 0x%"PRIu64" (%s)\n", __FUNCTION__, name, result, str); } else { - debug_printf("%s: %s = 0x%lx\n", __FUNCTION__, name, result); + debug_printf("%s: %s = 0x%"PRIu64"\n", __FUNCTION__, name, result); } } diff --git a/src/gallium/drivers/freedreno/a3xx/a3xx.xml.h b/src/gallium/drivers/freedreno/a3xx/a3xx.xml.h index 441bfec5756..a157dc33db9 100644 --- a/src/gallium/drivers/freedreno/a3xx/a3xx.xml.h +++ b/src/gallium/drivers/freedreno/a3xx/a3xx.xml.h @@ -680,6 +680,7 @@ static inline uint32_t REG_A3XX_CP_PROTECT_REG(uint32_t i0) { return 0x00000460 #define A3XX_GRAS_CL_CLIP_CNTL_VP_CLIP_CODE_IGNORE 0x00080000 #define A3XX_GRAS_CL_CLIP_CNTL_VP_XFORM_DISABLE 0x00100000 #define A3XX_GRAS_CL_CLIP_CNTL_PERSP_DIVISION_DISABLE 0x00200000 +#define A3XX_GRAS_CL_CLIP_CNTL_ZERO_GB_SCALE_Z 0x00400000 #define A3XX_GRAS_CL_CLIP_CNTL_ZCOORD 0x00800000 #define A3XX_GRAS_CL_CLIP_CNTL_WCOORD 0x01000000 #define A3XX_GRAS_CL_CLIP_CNTL_ZCLIP_DISABLE 0x02000000 diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_emit.c b/src/gallium/drivers/freedreno/a3xx/fd3_emit.c index 752e7f88cb9..6f514ed05df 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_emit.c +++ b/src/gallium/drivers/freedreno/a3xx/fd3_emit.c @@ -563,10 +563,29 @@ fd3_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring, val |= COND(fp->writes_pos, A3XX_GRAS_CL_CLIP_CNTL_ZCLIP_DISABLE); val |= COND(fp->frag_coord, A3XX_GRAS_CL_CLIP_CNTL_ZCOORD | A3XX_GRAS_CL_CLIP_CNTL_WCOORD); + /* TODO only use if prog doesn't use clipvertex/clipdist */ + val |= MIN2(util_bitcount(ctx->rasterizer->clip_plane_enable), 6) << 26; OUT_PKT0(ring, REG_A3XX_GRAS_CL_CLIP_CNTL, 1); OUT_RING(ring, val); } + if (dirty & (FD_DIRTY_RASTERIZER | FD_DIRTY_UCP)) { + uint32_t planes = ctx->rasterizer->clip_plane_enable; + int count = 0; + + while (planes && count < 6) { + int i = ffs(planes) - 1; + + planes &= ~(1U << i); + fd_wfi(ctx, ring); + OUT_PKT0(ring, REG_A3XX_GRAS_CL_USER_PLANE(count++), 4); + OUT_RING(ring, fui(ctx->ucp.ucp[i][0])); + OUT_RING(ring, fui(ctx->ucp.ucp[i][1])); + OUT_RING(ring, fui(ctx->ucp.ucp[i][2])); + OUT_RING(ring, fui(ctx->ucp.ucp[i][3])); + } + } + /* NOTE: since primitive_restart is not actually part of any * state object, we need to make sure that we always emit * PRIM_VTX_CNTL.. either that or be more clever and detect diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_rasterizer.c b/src/gallium/drivers/freedreno/a3xx/fd3_rasterizer.c index 583caaa806f..260eacd301a 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_rasterizer.c +++ b/src/gallium/drivers/freedreno/a3xx/fd3_rasterizer.c @@ -65,7 +65,8 @@ fd3_rasterizer_state_create(struct pipe_context *pctx, if (cso->multisample) TODO */ - so->gras_cl_clip_cntl = A3XX_GRAS_CL_CLIP_CNTL_IJ_PERSP_CENTER; /* ??? */ + so->gras_cl_clip_cntl = A3XX_GRAS_CL_CLIP_CNTL_IJ_PERSP_CENTER /* ??? */ | + COND(cso->clip_halfz, A3XX_GRAS_CL_CLIP_CNTL_ZERO_GB_SCALE_Z); so->gras_su_point_minmax = A3XX_GRAS_SU_POINT_MINMAX_MIN(psize_min) | A3XX_GRAS_SU_POINT_MINMAX_MAX(psize_max); diff --git a/src/gallium/drivers/freedreno/freedreno_context.h b/src/gallium/drivers/freedreno/freedreno_context.h index 509a90fdf23..3486c2fd1b7 100644 --- a/src/gallium/drivers/freedreno/freedreno_context.h +++ b/src/gallium/drivers/freedreno/freedreno_context.h @@ -334,6 +334,7 @@ struct fd_context { FD_DIRTY_INDEXBUF = (1 << 16), FD_DIRTY_SCISSOR = (1 << 17), FD_DIRTY_STREAMOUT = (1 << 18), + FD_DIRTY_UCP = (1 << 19), } dirty; struct pipe_blend_state *blend; @@ -355,6 +356,7 @@ struct fd_context { struct fd_constbuf_stateobj constbuf[PIPE_SHADER_TYPES]; struct pipe_index_buffer indexbuf; struct fd_streamout_stateobj streamout; + struct pipe_clip_state ucp; /* GMEM/tile handling fxns: */ void (*emit_tile_init)(struct fd_context *ctx); diff --git a/src/gallium/drivers/freedreno/freedreno_screen.c b/src/gallium/drivers/freedreno/freedreno_screen.c index 86e9a21da2f..17dd47c71ab 100644 --- a/src/gallium/drivers/freedreno/freedreno_screen.c +++ b/src/gallium/drivers/freedreno/freedreno_screen.c @@ -191,6 +191,7 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) return 16383; case PIPE_CAP_DEPTH_CLIP_DISABLE: + case PIPE_CAP_CLIP_HALFZ: case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE: return is_a3xx(screen); @@ -228,7 +229,6 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_TGSI_FS_FINE_DERIVATIVE: case PIPE_CAP_CONDITIONAL_RENDER_INVERTED: case PIPE_CAP_SAMPLER_VIEW_TARGET: - case PIPE_CAP_CLIP_HALFZ: case PIPE_CAP_POLYGON_OFFSET_CLAMP: case PIPE_CAP_MULTISAMPLE_Z_RESOLVE: case PIPE_CAP_RESOURCE_FROM_USER_MEMORY: diff --git a/src/gallium/drivers/freedreno/freedreno_state.c b/src/gallium/drivers/freedreno/freedreno_state.c index 7bf8bdb4507..e75865a9387 100644 --- a/src/gallium/drivers/freedreno/freedreno_state.c +++ b/src/gallium/drivers/freedreno/freedreno_state.c @@ -65,7 +65,9 @@ static void fd_set_clip_state(struct pipe_context *pctx, const struct pipe_clip_state *clip) { - DBG("TODO: "); + struct fd_context *ctx = fd_context(pctx); + ctx->ucp = *clip; + ctx->dirty |= FD_DIRTY_UCP; } static void diff --git a/src/gallium/drivers/freedreno/ir3/ir3_nir_lower_if_else.c b/src/gallium/drivers/freedreno/ir3/ir3_nir_lower_if_else.c index bed7b7b826a..d57eb2ba713 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_nir_lower_if_else.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_nir_lower_if_else.c @@ -172,7 +172,7 @@ flatten_block(nir_builder *bld, nir_block *if_block, nir_block *prev_block, (intr->intrinsic == nir_intrinsic_discard_if)) { nir_ssa_def *discard_cond; - nir_builder_insert_after_instr(bld, + bld->cursor = nir_after_instr( nir_block_last_instr(prev_block)); if (invert) { diff --git a/src/gallium/drivers/nouveau/nv30/nv30_context.c b/src/gallium/drivers/nouveau/nv30/nv30_context.c index 46590eecdf3..a36fd57fae7 100644 --- a/src/gallium/drivers/nouveau/nv30/nv30_context.c +++ b/src/gallium/drivers/nouveau/nv30/nv30_context.c @@ -190,7 +190,7 @@ nv30_context_destroy(struct pipe_context *pipe) } while(0) struct pipe_context * -nv30_context_create(struct pipe_screen *pscreen, void *priv, unsigned flags) +nv30_context_create(struct pipe_screen *pscreen, void *priv, unsigned ctxflags) { struct nv30_screen *screen = nv30_screen(pscreen); struct nv30_context *nv30 = CALLOC_STRUCT(nv30_context); diff --git a/src/gallium/drivers/nouveau/nv50/nv50_context.c b/src/gallium/drivers/nouveau/nv50/nv50_context.c index 11638dd7f14..4949459a803 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_context.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_context.c @@ -240,7 +240,7 @@ nv50_context_get_sample_position(struct pipe_context *, unsigned, unsigned, float *); struct pipe_context * -nv50_create(struct pipe_screen *pscreen, void *priv, unsigned flags) +nv50_create(struct pipe_screen *pscreen, void *priv, unsigned ctxflags) { struct nv50_screen *screen = nv50_screen(pscreen); struct nv50_context *nv50; diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_context.c b/src/gallium/drivers/nouveau/nvc0/nvc0_context.c index 613cad69aa5..f7604f11788 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_context.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_context.c @@ -262,7 +262,7 @@ nvc0_context_get_sample_position(struct pipe_context *, unsigned, unsigned, float *); struct pipe_context * -nvc0_create(struct pipe_screen *pscreen, void *priv, unsigned flags) +nvc0_create(struct pipe_screen *pscreen, void *priv, unsigned ctxflags) { struct nvc0_screen *screen = nvc0_screen(pscreen); struct nvc0_context *nvc0; diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_query.c b/src/gallium/drivers/nouveau/nvc0/nvc0_query.c index f7b85a8e931..b13df6a9485 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_query.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query.c @@ -56,10 +56,10 @@ struct nvc0_query { #define NVC0_QUERY_ALLOC_SPACE 256 -static boolean nvc0_mp_pm_query_begin(struct nvc0_context *, +static boolean nvc0_hw_sm_query_begin(struct nvc0_context *, struct nvc0_query *); -static void nvc0_mp_pm_query_end(struct nvc0_context *, struct nvc0_query *); -static boolean nvc0_mp_pm_query_result(struct nvc0_context *, +static void nvc0_hw_sm_query_end(struct nvc0_context *, struct nvc0_query *); +static boolean nvc0_hw_sm_query_result(struct nvc0_context *, struct nvc0_query *, void *, boolean); static inline struct nvc0_query * @@ -159,7 +159,7 @@ nvc0_query_create(struct pipe_context *pipe, unsigned type, unsigned index) } else #endif if (nvc0->screen->base.device->drm_version >= 0x01000101) { - if (type >= NVE4_PM_QUERY(0) && type <= NVE4_PM_QUERY_LAST) { + if (type >= NVE4_HW_SM_QUERY(0) && type <= NVE4_HW_SM_QUERY_LAST) { /* for each MP: * [00] = WS0.C0 * [04] = WS0.C1 @@ -189,7 +189,7 @@ nvc0_query_create(struct pipe_context *pipe, unsigned type, unsigned index) space = (4 * 4 + 4 + 4) * nvc0->screen->mp_count * sizeof(uint32_t); break; } else - if (type >= NVC0_PM_QUERY(0) && type <= NVC0_PM_QUERY_LAST) { + if (type >= NVC0_HW_SM_QUERY(0) && type <= NVC0_HW_SM_QUERY_LAST) { /* for each MP: * [00] = MP.C0 * [04] = MP.C1 @@ -327,9 +327,9 @@ nvc0_query_begin(struct pipe_context *pipe, struct pipe_query *pq) q->u.value = 0; } else #endif - if ((q->type >= NVE4_PM_QUERY(0) && q->type <= NVE4_PM_QUERY_LAST) || - (q->type >= NVC0_PM_QUERY(0) && q->type <= NVC0_PM_QUERY_LAST)) { - ret = nvc0_mp_pm_query_begin(nvc0, q); + if ((q->type >= NVE4_HW_SM_QUERY(0) && q->type <= NVE4_HW_SM_QUERY_LAST) || + (q->type >= NVC0_HW_SM_QUERY(0) && q->type <= NVC0_HW_SM_QUERY_LAST)) { + ret = nvc0_hw_sm_query_begin(nvc0, q); } break; } @@ -412,9 +412,9 @@ nvc0_query_end(struct pipe_context *pipe, struct pipe_query *pq) return; } else #endif - if ((q->type >= NVE4_PM_QUERY(0) && q->type <= NVE4_PM_QUERY_LAST) || - (q->type >= NVC0_PM_QUERY(0) && q->type <= NVC0_PM_QUERY_LAST)) { - nvc0_mp_pm_query_end(nvc0, q); + if ((q->type >= NVE4_HW_SM_QUERY(0) && q->type <= NVE4_HW_SM_QUERY_LAST) || + (q->type >= NVC0_HW_SM_QUERY(0) && q->type <= NVC0_HW_SM_QUERY_LAST)) { + nvc0_hw_sm_query_end(nvc0, q); } break; } @@ -453,9 +453,9 @@ nvc0_query_result(struct pipe_context *pipe, struct pipe_query *pq, return true; } else #endif - if ((q->type >= NVE4_PM_QUERY(0) && q->type <= NVE4_PM_QUERY_LAST) || - (q->type >= NVC0_PM_QUERY(0) && q->type <= NVC0_PM_QUERY_LAST)) { - return nvc0_mp_pm_query_result(nvc0, q, result, wait); + if ((q->type >= NVE4_HW_SM_QUERY(0) && q->type <= NVE4_HW_SM_QUERY_LAST) || + (q->type >= NVC0_HW_SM_QUERY(0) && q->type <= NVC0_HW_SM_QUERY_LAST)) { + return nvc0_hw_sm_query_result(nvc0, q, result, wait); } if (q->state != NVC0_QUERY_STATE_READY) @@ -692,7 +692,7 @@ static const char *nvc0_drv_stat_names[] = * We could add a kernel interface for it, but reading the counters like this * has the advantage of being async (if get_result isn't called immediately). */ -static const uint64_t nve4_read_mp_pm_counters_code[] = +static const uint64_t nve4_read_hw_sm_counters_code[] = { /* sched 0x20 0x20 0x20 0x20 0x20 0x20 0x20 * mov b32 $r8 $tidx @@ -776,6 +776,33 @@ static const uint64_t nve4_read_mp_pm_counters_code[] = static const char *nve4_pm_query_names[] = { /* MP counters */ + "active_cycles", + "active_warps", + "atom_count", + "branch", + "divergent_branch", + "gld_request", + "global_ld_mem_divergence_replays", + "global_store_transaction", + "global_st_mem_divergence_replays", + "gred_count", + "gst_request", + "inst_executed", + "inst_issued", + "inst_issued1", + "inst_issued2", + "l1_global_load_hit", + "l1_global_load_miss", + "l1_local_load_hit", + "l1_local_load_miss", + "l1_local_store_hit", + "l1_local_store_miss", + "l1_shared_load_transactions", + "l1_shared_store_transactions", + "local_load", + "local_load_transactions", + "local_store", + "local_store_transactions", "prof_trigger_00", "prof_trigger_01", "prof_trigger_02", @@ -784,41 +811,14 @@ static const char *nve4_pm_query_names[] = "prof_trigger_05", "prof_trigger_06", "prof_trigger_07", - "warps_launched", - "threads_launched", - "sm_cta_launched", - "inst_issued1", - "inst_issued2", - "inst_executed", - "local_load", - "local_store", "shared_load", - "shared_store", - "l1_local_load_hit", - "l1_local_load_miss", - "l1_local_store_hit", - "l1_local_store_miss", - "gld_request", - "gst_request", - "l1_global_load_hit", - "l1_global_load_miss", - "uncached_global_load_transaction", - "global_store_transaction", - "branch", - "divergent_branch", - "active_warps", - "active_cycles", - "inst_issued", - "atom_count", - "gred_count", "shared_load_replay", + "shared_store", "shared_store_replay", - "local_load_transactions", - "local_store_transactions", - "l1_shared_load_transactions", - "l1_shared_store_transactions", - "global_ld_mem_divergence_replays", - "global_st_mem_divergence_replays", + "sm_cta_launched", + "threads_launched", + "uncached_global_load_transaction", + "warps_launched", /* metrics, i.e. functions of the MP counters */ "metric-ipc", /* inst_executed, clock */ "metric-ipac", /* inst_executed, active_cycles */ @@ -852,7 +852,7 @@ struct nvc0_mp_counter_cfg #define NVC0_COUNTER_OP2_AVG_DIV_MM 5 /* avg(ctr0 / ctr1) */ #define NVC0_COUNTER_OP2_AVG_DIV_M0 6 /* avg(ctr0) / ctr1 of MP[0]) */ -struct nvc0_mp_pm_query_cfg +struct nvc0_hw_sm_query_cfg { struct nvc0_mp_counter_cfg ctr[4]; uint8_t num_counters; @@ -860,17 +860,17 @@ struct nvc0_mp_pm_query_cfg uint8_t norm[2]; /* normalization num,denom */ }; -#define _Q1A(n, f, m, g, s, nu, dn) [NVE4_PM_QUERY_##n] = { { { f, NVE4_COMPUTE_MP_PM_FUNC_MODE_##m, 0, 0, NVE4_COMPUTE_MP_PM_A_SIGSEL_##g, s }, {}, {}, {} }, 1, NVC0_COUNTER_OPn_SUM, { nu, dn } } -#define _Q1B(n, f, m, g, s, nu, dn) [NVE4_PM_QUERY_##n] = { { { f, NVE4_COMPUTE_MP_PM_FUNC_MODE_##m, 0, 1, NVE4_COMPUTE_MP_PM_B_SIGSEL_##g, s }, {}, {}, {} }, 1, NVC0_COUNTER_OPn_SUM, { nu, dn } } -#define _M2A(n, f0, m0, g0, s0, f1, m1, g1, s1, o, nu, dn) [NVE4_PM_QUERY_METRIC_##n] = { { \ +#define _Q1A(n, f, m, g, s, nu, dn) [NVE4_HW_SM_QUERY_##n] = { { { f, NVE4_COMPUTE_MP_PM_FUNC_MODE_##m, 0, 0, NVE4_COMPUTE_MP_PM_A_SIGSEL_##g, s }, {}, {}, {} }, 1, NVC0_COUNTER_OPn_SUM, { nu, dn } } +#define _Q1B(n, f, m, g, s, nu, dn) [NVE4_HW_SM_QUERY_##n] = { { { f, NVE4_COMPUTE_MP_PM_FUNC_MODE_##m, 0, 1, NVE4_COMPUTE_MP_PM_B_SIGSEL_##g, s }, {}, {}, {} }, 1, NVC0_COUNTER_OPn_SUM, { nu, dn } } +#define _M2A(n, f0, m0, g0, s0, f1, m1, g1, s1, o, nu, dn) [NVE4_HW_SM_QUERY_METRIC_##n] = { { \ { f0, NVE4_COMPUTE_MP_PM_FUNC_MODE_##m0, 0, 0, NVE4_COMPUTE_MP_PM_A_SIGSEL_##g0, s0 }, \ { f1, NVE4_COMPUTE_MP_PM_FUNC_MODE_##m1, 0, 0, NVE4_COMPUTE_MP_PM_A_SIGSEL_##g1, s1 }, \ {}, {}, }, 2, NVC0_COUNTER_OP2_##o, { nu, dn } } -#define _M2B(n, f0, m0, g0, s0, f1, m1, g1, s1, o, nu, dn) [NVE4_PM_QUERY_METRIC_##n] = { { \ +#define _M2B(n, f0, m0, g0, s0, f1, m1, g1, s1, o, nu, dn) [NVE4_HW_SM_QUERY_METRIC_##n] = { { \ { f0, NVE4_COMPUTE_MP_PM_FUNC_MODE_##m0, 0, 1, NVE4_COMPUTE_MP_PM_B_SIGSEL_##g0, s0 }, \ { f1, NVE4_COMPUTE_MP_PM_FUNC_MODE_##m1, 0, 1, NVE4_COMPUTE_MP_PM_B_SIGSEL_##g1, s1 }, \ {}, {}, }, 2, NVC0_COUNTER_OP2_##o, { nu, dn } } -#define _M2AB(n, f0, m0, g0, s0, f1, m1, g1, s1, o, nu, dn) [NVE4_PM_QUERY_METRIC_##n] = { { \ +#define _M2AB(n, f0, m0, g0, s0, f1, m1, g1, s1, o, nu, dn) [NVE4_HW_SM_QUERY_METRIC_##n] = { { \ { f0, NVE4_COMPUTE_MP_PM_FUNC_MODE_##m0, 0, 0, NVE4_COMPUTE_MP_PM_A_SIGSEL_##g0, s0 }, \ { f1, NVE4_COMPUTE_MP_PM_FUNC_MODE_##m1, 0, 1, NVE4_COMPUTE_MP_PM_B_SIGSEL_##g1, s1 }, \ {}, {}, }, 2, NVC0_COUNTER_OP2_##o, { nu, dn } } @@ -881,8 +881,35 @@ struct nvc0_mp_pm_query_cfg * metric-ipXc: we simply multiply by 4 to account for the 4 warp schedulers; * this is inaccurate ! */ -static const struct nvc0_mp_pm_query_cfg nve4_mp_pm_queries[] = +static const struct nvc0_hw_sm_query_cfg nve4_hw_sm_queries[] = { + _Q1B(ACTIVE_CYCLES, 0x0001, B6, WARP, 0x00000000, 1, 1), + _Q1B(ACTIVE_WARPS, 0x003f, B6, WARP, 0x31483104, 2, 1), + _Q1A(ATOM_COUNT, 0x0001, B6, BRANCH, 0x00000000, 1, 1), + _Q1A(BRANCH, 0x0001, B6, BRANCH, 0x0000000c, 1, 1), + _Q1A(DIVERGENT_BRANCH, 0x0001, B6, BRANCH, 0x00000010, 1, 1), + _Q1A(GLD_REQUEST, 0x0001, B6, LDST, 0x00000010, 1, 1), + _Q1B(GLD_MEM_DIV_REPLAY, 0x0001, B6, REPLAY, 0x00000010, 1, 1), + _Q1B(GST_TRANSACTIONS, 0x0001, B6, MEM, 0x00000004, 1, 1), + _Q1B(GST_MEM_DIV_REPLAY, 0x0001, B6, REPLAY, 0x00000014, 1, 1), + _Q1A(GRED_COUNT, 0x0001, B6, BRANCH, 0x00000008, 1, 1), + _Q1A(GST_REQUEST, 0x0001, B6, LDST, 0x00000014, 1, 1), + _Q1A(INST_EXECUTED, 0x0003, B6, EXEC, 0x00000398, 1, 1), + _Q1A(INST_ISSUED, 0x0003, B6, ISSUE, 0x00000104, 1, 1), + _Q1A(INST_ISSUED1, 0x0001, B6, ISSUE, 0x00000004, 1, 1), + _Q1A(INST_ISSUED2, 0x0001, B6, ISSUE, 0x00000008, 1, 1), + _Q1B(L1_GLD_HIT, 0x0001, B6, L1, 0x00000010, 1, 1), + _Q1B(L1_GLD_MISS, 0x0001, B6, L1, 0x00000014, 1, 1), + _Q1B(L1_LOCAL_LD_HIT, 0x0001, B6, L1, 0x00000000, 1, 1), + _Q1B(L1_LOCAL_LD_MISS, 0x0001, B6, L1, 0x00000004, 1, 1), + _Q1B(L1_LOCAL_ST_HIT, 0x0001, B6, L1, 0x00000008, 1, 1), + _Q1B(L1_LOCAL_ST_MISS, 0x0001, B6, L1, 0x0000000c, 1, 1), + _Q1B(L1_SHARED_LD_TRANSACTIONS, 0x0001, B6, TRANSACTION, 0x00000008, 1, 1), + _Q1B(L1_SHARED_ST_TRANSACTIONS, 0x0001, B6, TRANSACTION, 0x0000000c, 1, 1), + _Q1A(LOCAL_LD, 0x0001, B6, LDST, 0x00000008, 1, 1), + _Q1B(LOCAL_LD_TRANSACTIONS, 0x0001, B6, TRANSACTION, 0x00000000, 1, 1), + _Q1A(LOCAL_ST, 0x0001, B6, LDST, 0x0000000c, 1, 1), + _Q1B(LOCAL_ST_TRANSACTIONS, 0x0001, B6, TRANSACTION, 0x00000004, 1, 1), _Q1A(PROF_TRIGGER_0, 0x0001, B6, USER, 0x00000000, 1, 1), _Q1A(PROF_TRIGGER_1, 0x0001, B6, USER, 0x00000004, 1, 1), _Q1A(PROF_TRIGGER_2, 0x0001, B6, USER, 0x00000008, 1, 1), @@ -891,41 +918,14 @@ static const struct nvc0_mp_pm_query_cfg nve4_mp_pm_queries[] = _Q1A(PROF_TRIGGER_5, 0x0001, B6, USER, 0x00000014, 1, 1), _Q1A(PROF_TRIGGER_6, 0x0001, B6, USER, 0x00000018, 1, 1), _Q1A(PROF_TRIGGER_7, 0x0001, B6, USER, 0x0000001c, 1, 1), - _Q1A(LAUNCHED_WARPS, 0x0001, B6, LAUNCH, 0x00000004, 1, 1), - _Q1A(LAUNCHED_THREADS, 0x003f, B6, LAUNCH, 0x398a4188, 1, 1), - _Q1B(LAUNCHED_CTA, 0x0001, B6, WARP, 0x0000001c, 1, 1), - _Q1A(INST_ISSUED1, 0x0001, B6, ISSUE, 0x00000004, 1, 1), - _Q1A(INST_ISSUED2, 0x0001, B6, ISSUE, 0x00000008, 1, 1), - _Q1A(INST_ISSUED, 0x0003, B6, ISSUE, 0x00000104, 1, 1), - _Q1A(INST_EXECUTED, 0x0003, B6, EXEC, 0x00000398, 1, 1), - _Q1A(LD_SHARED, 0x0001, B6, LDST, 0x00000000, 1, 1), - _Q1A(ST_SHARED, 0x0001, B6, LDST, 0x00000004, 1, 1), - _Q1A(LD_LOCAL, 0x0001, B6, LDST, 0x00000008, 1, 1), - _Q1A(ST_LOCAL, 0x0001, B6, LDST, 0x0000000c, 1, 1), - _Q1A(GLD_REQUEST, 0x0001, B6, LDST, 0x00000010, 1, 1), - _Q1A(GST_REQUEST, 0x0001, B6, LDST, 0x00000014, 1, 1), - _Q1B(L1_LOCAL_LOAD_HIT, 0x0001, B6, L1, 0x00000000, 1, 1), - _Q1B(L1_LOCAL_LOAD_MISS, 0x0001, B6, L1, 0x00000004, 1, 1), - _Q1B(L1_LOCAL_STORE_HIT, 0x0001, B6, L1, 0x00000008, 1, 1), - _Q1B(L1_LOCAL_STORE_MISS, 0x0001, B6, L1, 0x0000000c, 1, 1), - _Q1B(L1_GLOBAL_LOAD_HIT, 0x0001, B6, L1, 0x00000010, 1, 1), - _Q1B(L1_GLOBAL_LOAD_MISS, 0x0001, B6, L1, 0x00000014, 1, 1), - _Q1B(GLD_TRANSACTIONS_UNCACHED, 0x0001, B6, MEM, 0x00000000, 1, 1), - _Q1B(GST_TRANSACTIONS, 0x0001, B6, MEM, 0x00000004, 1, 1), - _Q1A(BRANCH, 0x0001, B6, BRANCH, 0x0000000c, 1, 1), - _Q1A(BRANCH_DIVERGENT, 0x0001, B6, BRANCH, 0x00000010, 1, 1), - _Q1B(ACTIVE_WARPS, 0x003f, B6, WARP, 0x31483104, 2, 1), - _Q1B(ACTIVE_CYCLES, 0x0001, B6, WARP, 0x00000000, 1, 1), - _Q1A(ATOM_COUNT, 0x0001, B6, BRANCH, 0x00000000, 1, 1), - _Q1A(GRED_COUNT, 0x0001, B6, BRANCH, 0x00000008, 1, 1), - _Q1B(LD_SHARED_REPLAY, 0x0001, B6, REPLAY, 0x00000008, 1, 1), - _Q1B(ST_SHARED_REPLAY, 0x0001, B6, REPLAY, 0x0000000c, 1, 1), - _Q1B(LD_LOCAL_TRANSACTIONS, 0x0001, B6, TRANSACTION, 0x00000000, 1, 1), - _Q1B(ST_LOCAL_TRANSACTIONS, 0x0001, B6, TRANSACTION, 0x00000004, 1, 1), - _Q1B(L1_LD_SHARED_TRANSACTIONS, 0x0001, B6, TRANSACTION, 0x00000008, 1, 1), - _Q1B(L1_ST_SHARED_TRANSACTIONS, 0x0001, B6, TRANSACTION, 0x0000000c, 1, 1), - _Q1B(GLD_MEM_DIV_REPLAY, 0x0001, B6, REPLAY, 0x00000010, 1, 1), - _Q1B(GST_MEM_DIV_REPLAY, 0x0001, B6, REPLAY, 0x00000014, 1, 1), + _Q1A(SHARED_LD, 0x0001, B6, LDST, 0x00000000, 1, 1), + _Q1B(SHARED_LD_REPLAY, 0x0001, B6, REPLAY, 0x00000008, 1, 1), + _Q1A(SHARED_ST, 0x0001, B6, LDST, 0x00000004, 1, 1), + _Q1B(SHARED_ST_REPLAY, 0x0001, B6, REPLAY, 0x0000000c, 1, 1), + _Q1B(SM_CTA_LAUNCHED, 0x0001, B6, WARP, 0x0000001c, 1, 1), + _Q1A(THREADS_LAUNCHED, 0x003f, B6, LAUNCH, 0x398a4188, 1, 1), + _Q1B(UNCACHED_GLD_TRANSACTIONS, 0x0001, B6, MEM, 0x00000000, 1, 1), + _Q1A(WARPS_LAUNCHED, 0x0001, B6, LAUNCH, 0x00000004, 1, 1), _M2AB(IPC, 0x3, B6, EXEC, 0x398, 0xffff, LOGOP, WARP, 0x0, DIV_SUM_M0, 10, 1), _M2AB(IPAC, 0x3, B6, EXEC, 0x398, 0x1, B6, WARP, 0x0, AVG_DIV_MM, 10, 1), _M2A(IPEC, 0x3, B6, EXEC, 0x398, 0xe, LOGOP, EXEC, 0x398, AVG_DIV_MM, 10, 1), @@ -940,7 +940,7 @@ static const struct nvc0_mp_pm_query_cfg nve4_mp_pm_queries[] = #undef _M2B /* === PERFORMANCE MONITORING COUNTERS for NVC0:NVE4 === */ -static const uint64_t nvc0_read_mp_pm_counters_code[] = +static const uint64_t nvc0_read_hw_sm_counters_code[] = { /* mov b32 $r8 $tidx * mov b32 $r9 $physid @@ -993,29 +993,21 @@ static const uint64_t nvc0_read_mp_pm_counters_code[] = static const char *nvc0_pm_query_names[] = { /* MP counters */ - "inst_executed", + "active_cycles", + "active_warps", + "atom_count", "branch", "divergent_branch", - "active_warps", - "active_cycles", - "warps_launched", - "threads_launched", - "shared_load", - "shared_store", - "local_load", - "local_store", - "gred_count", - "atom_count", "gld_request", + "gred_count", "gst_request", + "inst_executed", "inst_issued1_0", "inst_issued1_1", "inst_issued2_0", "inst_issued2_1", - "thread_inst_executed_0", - "thread_inst_executed_1", - "thread_inst_executed_2", - "thread_inst_executed_3", + "local_load", + "local_store", "prof_trigger_00", "prof_trigger_01", "prof_trigger_02", @@ -1024,35 +1016,35 @@ static const char *nvc0_pm_query_names[] = "prof_trigger_05", "prof_trigger_06", "prof_trigger_07", + "shared_load", + "shared_store", + "threads_launched", + "thread_inst_executed_0", + "thread_inst_executed_1", + "thread_inst_executed_2", + "thread_inst_executed_3", + "warps_launched", }; -#define _Q(n, f, m, g, c, s0, s1, s2, s3, s4, s5) [NVC0_PM_QUERY_##n] = { { { f, NVC0_COMPUTE_MP_PM_OP_MODE_##m, c, 0, g, s0|(s1 << 8)|(s2 << 16)|(s3 << 24)|(s4##ULL << 32)|(s5##ULL << 40) }, {}, {}, {} }, 1, NVC0_COUNTER_OPn_SUM, { 1, 1 } } +#define _Q(n, f, m, g, c, s0, s1, s2, s3, s4, s5) [NVC0_HW_SM_QUERY_##n] = { { { f, NVC0_COMPUTE_MP_PM_OP_MODE_##m, c, 0, g, s0|(s1 << 8)|(s2 << 16)|(s3 << 24)|(s4##ULL << 32)|(s5##ULL << 40) }, {}, {}, {} }, 1, NVC0_COUNTER_OPn_SUM, { 1, 1 } } -static const struct nvc0_mp_pm_query_cfg nvc0_mp_pm_queries[] = +static const struct nvc0_hw_sm_query_cfg nvc0_hw_sm_queries[] = { - _Q(INST_EXECUTED, 0xaaaa, LOGOP, 0x2d, 3, 0x00, 0x11, 0x22, 0x00, 0x00, 0x00), - _Q(BRANCH, 0xaaaa, LOGOP, 0x1a, 2, 0x00, 0x11, 0x00, 0x00, 0x00, 0x00), - _Q(BRANCH_DIVERGENT, 0xaaaa, LOGOP, 0x19, 2, 0x20, 0x31, 0x00, 0x00, 0x00, 0x00), - _Q(ACTIVE_WARPS, 0xaaaa, LOGOP, 0x24, 6, 0x10, 0x21, 0x32, 0x43, 0x54, 0x65), _Q(ACTIVE_CYCLES, 0xaaaa, LOGOP, 0x11, 1, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00), - _Q(LAUNCHED_WARPS, 0xaaaa, LOGOP, 0x26, 1, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00), - _Q(LAUNCHED_THREADS, 0xaaaa, LOGOP, 0x26, 6, 0x10, 0x21, 0x32, 0x43, 0x54, 0x65), - _Q(LD_SHARED, 0xaaaa, LOGOP, 0x64, 1, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00), - _Q(ST_SHARED, 0xaaaa, LOGOP, 0x64, 1, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00), - _Q(LD_LOCAL, 0xaaaa, LOGOP, 0x64, 1, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00), - _Q(ST_LOCAL, 0xaaaa, LOGOP, 0x64, 1, 0x50, 0x00, 0x00, 0x00, 0x00, 0x00), - _Q(GRED_COUNT, 0xaaaa, LOGOP, 0x63, 1, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00), + _Q(ACTIVE_WARPS, 0xaaaa, LOGOP, 0x24, 6, 0x10, 0x21, 0x32, 0x43, 0x54, 0x65), _Q(ATOM_COUNT, 0xaaaa, LOGOP, 0x63, 1, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00), + _Q(BRANCH, 0xaaaa, LOGOP, 0x1a, 2, 0x00, 0x11, 0x00, 0x00, 0x00, 0x00), + _Q(DIVERGENT_BRANCH, 0xaaaa, LOGOP, 0x19, 2, 0x20, 0x31, 0x00, 0x00, 0x00, 0x00), _Q(GLD_REQUEST, 0xaaaa, LOGOP, 0x64, 1, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00), + _Q(GRED_COUNT, 0xaaaa, LOGOP, 0x63, 1, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00), _Q(GST_REQUEST, 0xaaaa, LOGOP, 0x64, 1, 0x60, 0x00, 0x00, 0x00, 0x00, 0x00), + _Q(INST_EXECUTED, 0xaaaa, LOGOP, 0x2d, 3, 0x00, 0x11, 0x22, 0x00, 0x00, 0x00), _Q(INST_ISSUED1_0, 0xaaaa, LOGOP, 0x7e, 1, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00), _Q(INST_ISSUED1_1, 0xaaaa, LOGOP, 0x7e, 1, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00), _Q(INST_ISSUED2_0, 0xaaaa, LOGOP, 0x7e, 1, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00), _Q(INST_ISSUED2_1, 0xaaaa, LOGOP, 0x7e, 1, 0x50, 0x00, 0x00, 0x00, 0x00, 0x00), - _Q(TH_INST_EXECUTED_0, 0xaaaa, LOGOP, 0xa3, 6, 0x00, 0x11, 0x22, 0x33, 0x44, 0x55), - _Q(TH_INST_EXECUTED_1, 0xaaaa, LOGOP, 0xa5, 6, 0x00, 0x11, 0x22, 0x33, 0x44, 0x55), - _Q(TH_INST_EXECUTED_2, 0xaaaa, LOGOP, 0xa4, 6, 0x00, 0x11, 0x22, 0x33, 0x44, 0x55), - _Q(TH_INST_EXECUTED_3, 0xaaaa, LOGOP, 0xa6, 6, 0x00, 0x11, 0x22, 0x33, 0x44, 0x55), + _Q(LOCAL_LD, 0xaaaa, LOGOP, 0x64, 1, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00), + _Q(LOCAL_ST, 0xaaaa, LOGOP, 0x64, 1, 0x50, 0x00, 0x00, 0x00, 0x00, 0x00), _Q(PROF_TRIGGER_0, 0xaaaa, LOGOP, 0x01, 1, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00), _Q(PROF_TRIGGER_1, 0xaaaa, LOGOP, 0x01, 1, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00), _Q(PROF_TRIGGER_2, 0xaaaa, LOGOP, 0x01, 1, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00), @@ -1061,38 +1053,46 @@ static const struct nvc0_mp_pm_query_cfg nvc0_mp_pm_queries[] = _Q(PROF_TRIGGER_5, 0xaaaa, LOGOP, 0x01, 1, 0x50, 0x00, 0x00, 0x00, 0x00, 0x00), _Q(PROF_TRIGGER_6, 0xaaaa, LOGOP, 0x01, 1, 0x60, 0x00, 0x00, 0x00, 0x00, 0x00), _Q(PROF_TRIGGER_7, 0xaaaa, LOGOP, 0x01, 1, 0x70, 0x00, 0x00, 0x00, 0x00, 0x00), + _Q(SHARED_LD, 0xaaaa, LOGOP, 0x64, 1, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00), + _Q(SHARED_ST, 0xaaaa, LOGOP, 0x64, 1, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00), + _Q(THREADS_LAUNCHED, 0xaaaa, LOGOP, 0x26, 6, 0x10, 0x21, 0x32, 0x43, 0x54, 0x65), + _Q(TH_INST_EXECUTED_0, 0xaaaa, LOGOP, 0xa3, 6, 0x00, 0x11, 0x22, 0x33, 0x44, 0x55), + _Q(TH_INST_EXECUTED_1, 0xaaaa, LOGOP, 0xa5, 6, 0x00, 0x11, 0x22, 0x33, 0x44, 0x55), + _Q(TH_INST_EXECUTED_2, 0xaaaa, LOGOP, 0xa4, 6, 0x00, 0x11, 0x22, 0x33, 0x44, 0x55), + _Q(TH_INST_EXECUTED_3, 0xaaaa, LOGOP, 0xa6, 6, 0x00, 0x11, 0x22, 0x33, 0x44, 0x55), + _Q(WARPS_LAUNCHED, 0xaaaa, LOGOP, 0x26, 1, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00), }; #undef _Q -static const struct nvc0_mp_pm_query_cfg * -nvc0_mp_pm_query_get_cfg(struct nvc0_context *nvc0, struct nvc0_query *q) +static const struct nvc0_hw_sm_query_cfg * +nvc0_hw_sm_query_get_cfg(struct nvc0_context *nvc0, struct nvc0_query *q) { struct nvc0_screen *screen = nvc0->screen; if (screen->base.class_3d >= NVE4_3D_CLASS) - return &nve4_mp_pm_queries[q->type - PIPE_QUERY_DRIVER_SPECIFIC]; - return &nvc0_mp_pm_queries[q->type - NVC0_PM_QUERY(0)]; + return &nve4_hw_sm_queries[q->type - PIPE_QUERY_DRIVER_SPECIFIC]; + return &nvc0_hw_sm_queries[q->type - NVC0_HW_SM_QUERY(0)]; } boolean -nvc0_mp_pm_query_begin(struct nvc0_context *nvc0, struct nvc0_query *q) +nvc0_hw_sm_query_begin(struct nvc0_context *nvc0, struct nvc0_query *q) { struct nvc0_screen *screen = nvc0->screen; struct nouveau_pushbuf *push = nvc0->base.pushbuf; const bool is_nve4 = screen->base.class_3d >= NVE4_3D_CLASS; - const struct nvc0_mp_pm_query_cfg *cfg; + const struct nvc0_hw_sm_query_cfg *cfg; unsigned i, c; unsigned num_ab[2] = { 0, 0 }; - cfg = nvc0_mp_pm_query_get_cfg(nvc0, q); + cfg = nvc0_hw_sm_query_get_cfg(nvc0, q); /* check if we have enough free counter slots */ for (i = 0; i < cfg->num_counters; ++i) num_ab[cfg->ctr[i].sig_dom]++; - if (screen->pm.num_mp_pm_active[0] + num_ab[0] > 4 || - screen->pm.num_mp_pm_active[1] + num_ab[1] > 4) { + if (screen->pm.num_hw_sm_active[0] + num_ab[0] > 4 || + screen->pm.num_hw_sm_active[1] + num_ab[1] > 4) { NOUVEAU_ERR("Not enough free MP counter slots !\n"); return false; } @@ -1113,14 +1113,14 @@ nvc0_mp_pm_query_begin(struct nvc0_context *nvc0, struct nvc0_query *q) for (i = 0; i < cfg->num_counters; ++i) { const unsigned d = cfg->ctr[i].sig_dom; - if (!screen->pm.num_mp_pm_active[d]) { + if (!screen->pm.num_hw_sm_active[d]) { uint32_t m = (1 << 22) | (1 << (7 + (8 * !d))); - if (screen->pm.num_mp_pm_active[!d]) + if (screen->pm.num_hw_sm_active[!d]) m |= 1 << (7 + (8 * d)); BEGIN_NVC0(push, SUBC_SW(0x0600), 1); PUSH_DATA (push, m); } - screen->pm.num_mp_pm_active[d]++; + screen->pm.num_hw_sm_active[d]++; for (c = d * 4; c < (d * 4 + 4); ++c) { if (!screen->pm.mp_counter[c]) { @@ -1163,7 +1163,7 @@ nvc0_mp_pm_query_begin(struct nvc0_context *nvc0, struct nvc0_query *q) } static void -nvc0_mp_pm_query_end(struct nvc0_context *nvc0, struct nvc0_query *q) +nvc0_hw_sm_query_end(struct nvc0_context *nvc0, struct nvc0_query *q) { struct nvc0_screen *screen = nvc0->screen; struct pipe_context *pipe = &nvc0->base.pipe; @@ -1174,9 +1174,9 @@ nvc0_mp_pm_query_end(struct nvc0_context *nvc0, struct nvc0_query *q) const uint block[3] = { 32, is_nve4 ? 4 : 1, 1 }; const uint grid[3] = { screen->mp_count, 1, 1 }; unsigned c; - const struct nvc0_mp_pm_query_cfg *cfg; + const struct nvc0_hw_sm_query_cfg *cfg; - cfg = nvc0_mp_pm_query_get_cfg(nvc0, q); + cfg = nvc0_hw_sm_query_get_cfg(nvc0, q); if (unlikely(!screen->pm.prog)) { struct nvc0_program *prog = CALLOC_STRUCT(nvc0_program); @@ -1185,11 +1185,11 @@ nvc0_mp_pm_query_end(struct nvc0_context *nvc0, struct nvc0_query *q) prog->num_gprs = 14; prog->parm_size = 12; if (is_nve4) { - prog->code = (uint32_t *)nve4_read_mp_pm_counters_code; - prog->code_size = sizeof(nve4_read_mp_pm_counters_code); + prog->code = (uint32_t *)nve4_read_hw_sm_counters_code; + prog->code_size = sizeof(nve4_read_hw_sm_counters_code); } else { - prog->code = (uint32_t *)nvc0_read_mp_pm_counters_code; - prog->code_size = sizeof(nvc0_read_mp_pm_counters_code); + prog->code = (uint32_t *)nvc0_read_hw_sm_counters_code; + prog->code_size = sizeof(nvc0_read_hw_sm_counters_code); } screen->pm.prog = prog; } @@ -1207,7 +1207,7 @@ nvc0_mp_pm_query_end(struct nvc0_context *nvc0, struct nvc0_query *q) /* release counters for this query */ for (c = 0; c < 8; ++c) { if (nvc0_query(screen->pm.mp_counter[c]) == q) { - screen->pm.num_mp_pm_active[c / 4]--; + screen->pm.num_hw_sm_active[c / 4]--; screen->pm.mp_counter[c] = NULL; } } @@ -1234,7 +1234,7 @@ nvc0_mp_pm_query_end(struct nvc0_context *nvc0, struct nvc0_query *q) q = nvc0_query(screen->pm.mp_counter[c]); if (!q) continue; - cfg = nvc0_mp_pm_query_get_cfg(nvc0, q); + cfg = nvc0_hw_sm_query_get_cfg(nvc0, q); for (i = 0; i < cfg->num_counters; ++i) { if (mask & (1 << q->ctr[i])) break; @@ -1250,10 +1250,10 @@ nvc0_mp_pm_query_end(struct nvc0_context *nvc0, struct nvc0_query *q) } static inline bool -nvc0_mp_pm_query_read_data(uint32_t count[32][4], +nvc0_hw_sm_query_read_data(uint32_t count[32][4], struct nvc0_context *nvc0, bool wait, struct nvc0_query *q, - const struct nvc0_mp_pm_query_cfg *cfg, + const struct nvc0_hw_sm_query_cfg *cfg, unsigned mp_count) { unsigned p, c; @@ -1275,10 +1275,10 @@ nvc0_mp_pm_query_read_data(uint32_t count[32][4], } static inline bool -nve4_mp_pm_query_read_data(uint32_t count[32][4], +nve4_hw_sm_query_read_data(uint32_t count[32][4], struct nvc0_context *nvc0, bool wait, struct nvc0_query *q, - const struct nvc0_mp_pm_query_cfg *cfg, + const struct nvc0_hw_sm_query_cfg *cfg, unsigned mp_count) { unsigned p, c, d; @@ -1317,22 +1317,22 @@ nve4_mp_pm_query_read_data(uint32_t count[32][4], * NOTE: Interpretation of IPC requires knowledge of MP count. */ static boolean -nvc0_mp_pm_query_result(struct nvc0_context *nvc0, struct nvc0_query *q, +nvc0_hw_sm_query_result(struct nvc0_context *nvc0, struct nvc0_query *q, void *result, boolean wait) { uint32_t count[32][4]; uint64_t value = 0; unsigned mp_count = MIN2(nvc0->screen->mp_count_compute, 32); unsigned p, c; - const struct nvc0_mp_pm_query_cfg *cfg; + const struct nvc0_hw_sm_query_cfg *cfg; bool ret; - cfg = nvc0_mp_pm_query_get_cfg(nvc0, q); + cfg = nvc0_hw_sm_query_get_cfg(nvc0, q); if (nvc0->screen->base.class_3d >= NVE4_3D_CLASS) - ret = nve4_mp_pm_query_read_data(count, nvc0, wait, q, cfg, mp_count); + ret = nve4_hw_sm_query_read_data(count, nvc0, wait, q, cfg, mp_count); else - ret = nvc0_mp_pm_query_read_data(count, nvc0, wait, q, cfg, mp_count); + ret = nvc0_hw_sm_query_read_data(count, nvc0, wait, q, cfg, mp_count); if (!ret) return false; @@ -1410,11 +1410,11 @@ nvc0_screen_get_driver_query_info(struct pipe_screen *pscreen, if (screen->base.device->drm_version >= 0x01000101) { if (screen->compute) { if (screen->base.class_3d == NVE4_3D_CLASS) { - count += NVE4_PM_QUERY_COUNT; + count += NVE4_HW_SM_QUERY_COUNT; } else if (screen->base.class_3d < NVE4_3D_CLASS) { /* NVC0_COMPUTE is not always enabled */ - count += NVC0_PM_QUERY_COUNT; + count += NVC0_HW_SM_QUERY_COUNT; } } } @@ -1444,15 +1444,15 @@ nvc0_screen_get_driver_query_info(struct pipe_screen *pscreen, if (screen->compute) { if (screen->base.class_3d == NVE4_3D_CLASS) { info->name = nve4_pm_query_names[id - NVC0_QUERY_DRV_STAT_COUNT]; - info->query_type = NVE4_PM_QUERY(id - NVC0_QUERY_DRV_STAT_COUNT); + info->query_type = NVE4_HW_SM_QUERY(id - NVC0_QUERY_DRV_STAT_COUNT); info->max_value.u64 = - (id < NVE4_PM_QUERY_METRIC_MP_OCCUPANCY) ? 0 : 100; + (id < NVE4_HW_SM_QUERY_METRIC_MP_OCCUPANCY) ? 0 : 100; info->group_id = NVC0_QUERY_MP_COUNTER_GROUP; return 1; } else if (screen->base.class_3d < NVE4_3D_CLASS) { info->name = nvc0_pm_query_names[id - NVC0_QUERY_DRV_STAT_COUNT]; - info->query_type = NVC0_PM_QUERY(id - NVC0_QUERY_DRV_STAT_COUNT); + info->query_type = NVC0_HW_SM_QUERY(id - NVC0_QUERY_DRV_STAT_COUNT); info->group_id = NVC0_QUERY_MP_COUNTER_GROUP; return 1; } @@ -1494,7 +1494,7 @@ nvc0_screen_get_driver_query_group_info(struct pipe_screen *pscreen, info->type = PIPE_DRIVER_QUERY_GROUP_TYPE_GPU; if (screen->base.class_3d == NVE4_3D_CLASS) { - info->num_queries = NVE4_PM_QUERY_COUNT; + info->num_queries = NVE4_HW_SM_QUERY_COUNT; /* On NVE4+, each multiprocessor have 8 hardware counters separated * in two distinct domains, but we allow only one active query @@ -1504,7 +1504,7 @@ nvc0_screen_get_driver_query_group_info(struct pipe_screen *pscreen, return 1; } else if (screen->base.class_3d < NVE4_3D_CLASS) { - info->num_queries = NVC0_PM_QUERY_COUNT; + info->num_queries = NVC0_HW_SM_QUERY_COUNT; /* On NVC0:NVE4, each multiprocessor have 8 hardware counters * in a single domain. */ diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.h b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.h index d8826ae0c0d..f57a316f01e 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.h +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.h @@ -95,7 +95,7 @@ struct nvc0_screen { struct { struct nvc0_program *prog; /* compute state object to read MP counters */ struct pipe_query *mp_counter[8]; /* counter to query allocation */ - uint8_t num_mp_pm_active[2]; + uint8_t num_hw_sm_active[2]; bool mp_counters_enabled; } pm; @@ -120,156 +120,139 @@ nvc0_screen(struct pipe_screen *screen) /* Performance counter queries: */ -#define NVE4_PM_QUERY_COUNT 49 -#define NVE4_PM_QUERY(i) (PIPE_QUERY_DRIVER_SPECIFIC + (i)) -#define NVE4_PM_QUERY_LAST NVE4_PM_QUERY(NVE4_PM_QUERY_COUNT - 1) -#define NVE4_PM_QUERY_PROF_TRIGGER_0 0 -#define NVE4_PM_QUERY_PROF_TRIGGER_1 1 -#define NVE4_PM_QUERY_PROF_TRIGGER_2 2 -#define NVE4_PM_QUERY_PROF_TRIGGER_3 3 -#define NVE4_PM_QUERY_PROF_TRIGGER_4 4 -#define NVE4_PM_QUERY_PROF_TRIGGER_5 5 -#define NVE4_PM_QUERY_PROF_TRIGGER_6 6 -#define NVE4_PM_QUERY_PROF_TRIGGER_7 7 -#define NVE4_PM_QUERY_LAUNCHED_WARPS 8 -#define NVE4_PM_QUERY_LAUNCHED_THREADS 9 -#define NVE4_PM_QUERY_LAUNCHED_CTA 10 -#define NVE4_PM_QUERY_INST_ISSUED1 11 -#define NVE4_PM_QUERY_INST_ISSUED2 12 -#define NVE4_PM_QUERY_INST_EXECUTED 13 -#define NVE4_PM_QUERY_LD_LOCAL 14 -#define NVE4_PM_QUERY_ST_LOCAL 15 -#define NVE4_PM_QUERY_LD_SHARED 16 -#define NVE4_PM_QUERY_ST_SHARED 17 -#define NVE4_PM_QUERY_L1_LOCAL_LOAD_HIT 18 -#define NVE4_PM_QUERY_L1_LOCAL_LOAD_MISS 19 -#define NVE4_PM_QUERY_L1_LOCAL_STORE_HIT 20 -#define NVE4_PM_QUERY_L1_LOCAL_STORE_MISS 21 -#define NVE4_PM_QUERY_GLD_REQUEST 22 -#define NVE4_PM_QUERY_GST_REQUEST 23 -#define NVE4_PM_QUERY_L1_GLOBAL_LOAD_HIT 24 -#define NVE4_PM_QUERY_L1_GLOBAL_LOAD_MISS 25 -#define NVE4_PM_QUERY_GLD_TRANSACTIONS_UNCACHED 26 -#define NVE4_PM_QUERY_GST_TRANSACTIONS 27 -#define NVE4_PM_QUERY_BRANCH 28 -#define NVE4_PM_QUERY_BRANCH_DIVERGENT 29 -#define NVE4_PM_QUERY_ACTIVE_WARPS 30 -#define NVE4_PM_QUERY_ACTIVE_CYCLES 31 -#define NVE4_PM_QUERY_INST_ISSUED 32 -#define NVE4_PM_QUERY_ATOM_COUNT 33 -#define NVE4_PM_QUERY_GRED_COUNT 34 -#define NVE4_PM_QUERY_LD_SHARED_REPLAY 35 -#define NVE4_PM_QUERY_ST_SHARED_REPLAY 36 -#define NVE4_PM_QUERY_LD_LOCAL_TRANSACTIONS 37 -#define NVE4_PM_QUERY_ST_LOCAL_TRANSACTIONS 38 -#define NVE4_PM_QUERY_L1_LD_SHARED_TRANSACTIONS 39 -#define NVE4_PM_QUERY_L1_ST_SHARED_TRANSACTIONS 40 -#define NVE4_PM_QUERY_GLD_MEM_DIV_REPLAY 41 -#define NVE4_PM_QUERY_GST_MEM_DIV_REPLAY 42 -#define NVE4_PM_QUERY_METRIC_IPC 43 -#define NVE4_PM_QUERY_METRIC_IPAC 44 -#define NVE4_PM_QUERY_METRIC_IPEC 45 -#define NVE4_PM_QUERY_METRIC_MP_OCCUPANCY 46 -#define NVE4_PM_QUERY_METRIC_MP_EFFICIENCY 47 -#define NVE4_PM_QUERY_METRIC_INST_REPLAY_OHEAD 48 +#define NVE4_HW_SM_QUERY(i) (PIPE_QUERY_DRIVER_SPECIFIC + (i)) +#define NVE4_HW_SM_QUERY_LAST NVE4_HW_SM_QUERY(NVE4_HW_SM_QUERY_COUNT - 1) +enum nve4_pm_queries +{ + NVE4_HW_SM_QUERY_ACTIVE_CYCLES = 0, + NVE4_HW_SM_QUERY_ACTIVE_WARPS, + NVE4_HW_SM_QUERY_ATOM_COUNT, + NVE4_HW_SM_QUERY_BRANCH, + NVE4_HW_SM_QUERY_DIVERGENT_BRANCH, + NVE4_HW_SM_QUERY_GLD_REQUEST, + NVE4_HW_SM_QUERY_GLD_MEM_DIV_REPLAY, + NVE4_HW_SM_QUERY_GST_TRANSACTIONS, + NVE4_HW_SM_QUERY_GST_MEM_DIV_REPLAY, + NVE4_HW_SM_QUERY_GRED_COUNT, + NVE4_HW_SM_QUERY_GST_REQUEST, + NVE4_HW_SM_QUERY_INST_EXECUTED, + NVE4_HW_SM_QUERY_INST_ISSUED, + NVE4_HW_SM_QUERY_INST_ISSUED1, + NVE4_HW_SM_QUERY_INST_ISSUED2, + NVE4_HW_SM_QUERY_L1_GLD_HIT, + NVE4_HW_SM_QUERY_L1_GLD_MISS, + NVE4_HW_SM_QUERY_L1_LOCAL_LD_HIT, + NVE4_HW_SM_QUERY_L1_LOCAL_LD_MISS, + NVE4_HW_SM_QUERY_L1_LOCAL_ST_HIT, + NVE4_HW_SM_QUERY_L1_LOCAL_ST_MISS, + NVE4_HW_SM_QUERY_L1_SHARED_LD_TRANSACTIONS, + NVE4_HW_SM_QUERY_L1_SHARED_ST_TRANSACTIONS, + NVE4_HW_SM_QUERY_LOCAL_LD, + NVE4_HW_SM_QUERY_LOCAL_LD_TRANSACTIONS, + NVE4_HW_SM_QUERY_LOCAL_ST, + NVE4_HW_SM_QUERY_LOCAL_ST_TRANSACTIONS, + NVE4_HW_SM_QUERY_PROF_TRIGGER_0, + NVE4_HW_SM_QUERY_PROF_TRIGGER_1, + NVE4_HW_SM_QUERY_PROF_TRIGGER_2, + NVE4_HW_SM_QUERY_PROF_TRIGGER_3, + NVE4_HW_SM_QUERY_PROF_TRIGGER_4, + NVE4_HW_SM_QUERY_PROF_TRIGGER_5, + NVE4_HW_SM_QUERY_PROF_TRIGGER_6, + NVE4_HW_SM_QUERY_PROF_TRIGGER_7, + NVE4_HW_SM_QUERY_SHARED_LD, + NVE4_HW_SM_QUERY_SHARED_LD_REPLAY, + NVE4_HW_SM_QUERY_SHARED_ST, + NVE4_HW_SM_QUERY_SHARED_ST_REPLAY, + NVE4_HW_SM_QUERY_SM_CTA_LAUNCHED, + NVE4_HW_SM_QUERY_THREADS_LAUNCHED, + NVE4_HW_SM_QUERY_UNCACHED_GLD_TRANSACTIONS, + NVE4_HW_SM_QUERY_WARPS_LAUNCHED, + NVE4_HW_SM_QUERY_METRIC_IPC, + NVE4_HW_SM_QUERY_METRIC_IPAC, + NVE4_HW_SM_QUERY_METRIC_IPEC, + NVE4_HW_SM_QUERY_METRIC_MP_OCCUPANCY, + NVE4_HW_SM_QUERY_METRIC_MP_EFFICIENCY, + NVE4_HW_SM_QUERY_METRIC_INST_REPLAY_OHEAD, + NVE4_HW_SM_QUERY_COUNT +}; -/* -#define NVE4_PM_QUERY_GR_IDLE 50 -#define NVE4_PM_QUERY_BSP_IDLE 51 -#define NVE4_PM_QUERY_VP_IDLE 52 -#define NVE4_PM_QUERY_PPP_IDLE 53 -#define NVE4_PM_QUERY_CE0_IDLE 54 -#define NVE4_PM_QUERY_CE1_IDLE 55 -#define NVE4_PM_QUERY_CE2_IDLE 56 -*/ -/* L2 queries (PCOUNTER) */ -/* -#define NVE4_PM_QUERY_L2_SUBP_WRITE_L1_SECTOR_QUERIES 57 -... -*/ -/* TEX queries (PCOUNTER) */ -/* -#define NVE4_PM_QUERY_TEX0_CACHE_SECTOR_QUERIES 58 -... -*/ - -#define NVC0_PM_QUERY_COUNT 31 -#define NVC0_PM_QUERY(i) (PIPE_QUERY_DRIVER_SPECIFIC + 2048 + (i)) -#define NVC0_PM_QUERY_LAST NVC0_PM_QUERY(NVC0_PM_QUERY_COUNT - 1) -#define NVC0_PM_QUERY_INST_EXECUTED 0 -#define NVC0_PM_QUERY_BRANCH 1 -#define NVC0_PM_QUERY_BRANCH_DIVERGENT 2 -#define NVC0_PM_QUERY_ACTIVE_WARPS 3 -#define NVC0_PM_QUERY_ACTIVE_CYCLES 4 -#define NVC0_PM_QUERY_LAUNCHED_WARPS 5 -#define NVC0_PM_QUERY_LAUNCHED_THREADS 6 -#define NVC0_PM_QUERY_LD_SHARED 7 -#define NVC0_PM_QUERY_ST_SHARED 8 -#define NVC0_PM_QUERY_LD_LOCAL 9 -#define NVC0_PM_QUERY_ST_LOCAL 10 -#define NVC0_PM_QUERY_GRED_COUNT 11 -#define NVC0_PM_QUERY_ATOM_COUNT 12 -#define NVC0_PM_QUERY_GLD_REQUEST 13 -#define NVC0_PM_QUERY_GST_REQUEST 14 -#define NVC0_PM_QUERY_INST_ISSUED1_0 15 -#define NVC0_PM_QUERY_INST_ISSUED1_1 16 -#define NVC0_PM_QUERY_INST_ISSUED2_0 17 -#define NVC0_PM_QUERY_INST_ISSUED2_1 18 -#define NVC0_PM_QUERY_TH_INST_EXECUTED_0 19 -#define NVC0_PM_QUERY_TH_INST_EXECUTED_1 20 -#define NVC0_PM_QUERY_TH_INST_EXECUTED_2 21 -#define NVC0_PM_QUERY_TH_INST_EXECUTED_3 22 -#define NVC0_PM_QUERY_PROF_TRIGGER_0 23 -#define NVC0_PM_QUERY_PROF_TRIGGER_1 24 -#define NVC0_PM_QUERY_PROF_TRIGGER_2 25 -#define NVC0_PM_QUERY_PROF_TRIGGER_3 26 -#define NVC0_PM_QUERY_PROF_TRIGGER_4 27 -#define NVC0_PM_QUERY_PROF_TRIGGER_5 28 -#define NVC0_PM_QUERY_PROF_TRIGGER_6 29 -#define NVC0_PM_QUERY_PROF_TRIGGER_7 30 +#define NVC0_HW_SM_QUERY(i) (PIPE_QUERY_DRIVER_SPECIFIC + 2048 + (i)) +#define NVC0_HW_SM_QUERY_LAST NVC0_HW_SM_QUERY(NVC0_HW_SM_QUERY_COUNT - 1) +enum nvc0_pm_queries +{ + NVC0_HW_SM_QUERY_ACTIVE_CYCLES = 0, + NVC0_HW_SM_QUERY_ACTIVE_WARPS, + NVC0_HW_SM_QUERY_ATOM_COUNT, + NVC0_HW_SM_QUERY_BRANCH, + NVC0_HW_SM_QUERY_DIVERGENT_BRANCH, + NVC0_HW_SM_QUERY_GLD_REQUEST, + NVC0_HW_SM_QUERY_GRED_COUNT, + NVC0_HW_SM_QUERY_GST_REQUEST, + NVC0_HW_SM_QUERY_INST_EXECUTED, + NVC0_HW_SM_QUERY_INST_ISSUED1_0, + NVC0_HW_SM_QUERY_INST_ISSUED1_1, + NVC0_HW_SM_QUERY_INST_ISSUED2_0, + NVC0_HW_SM_QUERY_INST_ISSUED2_1, + NVC0_HW_SM_QUERY_LOCAL_LD, + NVC0_HW_SM_QUERY_LOCAL_ST, + NVC0_HW_SM_QUERY_PROF_TRIGGER_0, + NVC0_HW_SM_QUERY_PROF_TRIGGER_1, + NVC0_HW_SM_QUERY_PROF_TRIGGER_2, + NVC0_HW_SM_QUERY_PROF_TRIGGER_3, + NVC0_HW_SM_QUERY_PROF_TRIGGER_4, + NVC0_HW_SM_QUERY_PROF_TRIGGER_5, + NVC0_HW_SM_QUERY_PROF_TRIGGER_6, + NVC0_HW_SM_QUERY_PROF_TRIGGER_7, + NVC0_HW_SM_QUERY_SHARED_LD, + NVC0_HW_SM_QUERY_SHARED_ST, + NVC0_HW_SM_QUERY_THREADS_LAUNCHED, + NVC0_HW_SM_QUERY_TH_INST_EXECUTED_0, + NVC0_HW_SM_QUERY_TH_INST_EXECUTED_1, + NVC0_HW_SM_QUERY_TH_INST_EXECUTED_2, + NVC0_HW_SM_QUERY_TH_INST_EXECUTED_3, + NVC0_HW_SM_QUERY_WARPS_LAUNCHED, + NVC0_HW_SM_QUERY_COUNT +}; /* Driver statistics queries: */ -#ifdef NOUVEAU_ENABLE_DRIVER_STATISTICS - #define NVC0_QUERY_DRV_STAT(i) (PIPE_QUERY_DRIVER_SPECIFIC + 1024 + (i)) -#define NVC0_QUERY_DRV_STAT_COUNT 29 #define NVC0_QUERY_DRV_STAT_LAST NVC0_QUERY_DRV_STAT(NVC0_QUERY_DRV_STAT_COUNT - 1) -#define NVC0_QUERY_DRV_STAT_TEX_OBJECT_CURRENT_COUNT 0 -#define NVC0_QUERY_DRV_STAT_TEX_OBJECT_CURRENT_BYTES 1 -#define NVC0_QUERY_DRV_STAT_BUF_OBJECT_CURRENT_COUNT 2 -#define NVC0_QUERY_DRV_STAT_BUF_OBJECT_CURRENT_BYTES_VID 3 -#define NVC0_QUERY_DRV_STAT_BUF_OBJECT_CURRENT_BYTES_SYS 4 -#define NVC0_QUERY_DRV_STAT_TEX_TRANSFERS_READ 5 -#define NVC0_QUERY_DRV_STAT_TEX_TRANSFERS_WRITE 6 -#define NVC0_QUERY_DRV_STAT_TEX_COPY_COUNT 7 -#define NVC0_QUERY_DRV_STAT_TEX_BLIT_COUNT 8 -#define NVC0_QUERY_DRV_STAT_TEX_CACHE_FLUSH_COUNT 9 -#define NVC0_QUERY_DRV_STAT_BUF_TRANSFERS_READ 10 -#define NVC0_QUERY_DRV_STAT_BUF_TRANSFERS_WRITE 11 -#define NVC0_QUERY_DRV_STAT_BUF_READ_BYTES_STAGING_VID 12 -#define NVC0_QUERY_DRV_STAT_BUF_WRITE_BYTES_DIRECT 13 -#define NVC0_QUERY_DRV_STAT_BUF_WRITE_BYTES_STAGING_VID 14 -#define NVC0_QUERY_DRV_STAT_BUF_WRITE_BYTES_STAGING_SYS 15 -#define NVC0_QUERY_DRV_STAT_BUF_COPY_BYTES 16 -#define NVC0_QUERY_DRV_STAT_BUF_NON_KERNEL_FENCE_SYNC_COUNT 17 -#define NVC0_QUERY_DRV_STAT_ANY_NON_KERNEL_FENCE_SYNC_COUNT 18 -#define NVC0_QUERY_DRV_STAT_QUERY_SYNC_COUNT 19 -#define NVC0_QUERY_DRV_STAT_GPU_SERIALIZE_COUNT 20 -#define NVC0_QUERY_DRV_STAT_DRAW_CALLS_ARRAY 21 -#define NVC0_QUERY_DRV_STAT_DRAW_CALLS_INDEXED 22 -#define NVC0_QUERY_DRV_STAT_DRAW_CALLS_FALLBACK_COUNT 23 -#define NVC0_QUERY_DRV_STAT_USER_BUFFER_UPLOAD_BYTES 24 -#define NVC0_QUERY_DRV_STAT_CONSTBUF_UPLOAD_COUNT 25 -#define NVC0_QUERY_DRV_STAT_CONSTBUF_UPLOAD_BYTES 26 -#define NVC0_QUERY_DRV_STAT_PUSHBUF_COUNT 27 -#define NVC0_QUERY_DRV_STAT_RESOURCE_VALIDATE_COUNT 28 - -#else - -#define NVC0_QUERY_DRV_STAT_COUNT 0 - +enum nvc0_drv_stats_queries +{ +#ifdef NOUVEAU_ENABLE_DRIVER_STATISTICS + NVC0_QUERY_DRV_STAT_TEX_OBJECT_CURRENT_COUNT = 0, + NVC0_QUERY_DRV_STAT_TEX_OBJECT_CURRENT_BYTES, + NVC0_QUERY_DRV_STAT_BUF_OBJECT_CURRENT_COUNT, + NVC0_QUERY_DRV_STAT_BUF_OBJECT_CURRENT_BYTES_VID, + NVC0_QUERY_DRV_STAT_BUF_OBJECT_CURRENT_BYTES_SYS, + NVC0_QUERY_DRV_STAT_TEX_TRANSFERS_READ, + NVC0_QUERY_DRV_STAT_TEX_TRANSFERS_WRITE, + NVC0_QUERY_DRV_STAT_TEX_COPY_COUNT, + NVC0_QUERY_DRV_STAT_TEX_BLIT_COUNT, + NVC0_QUERY_DRV_STAT_TEX_CACHE_FLUSH_COUNT, + NVC0_QUERY_DRV_STAT_BUF_TRANSFERS_READ, + NVC0_QUERY_DRV_STAT_BUF_TRANSFERS_WRITE, + NVC0_QUERY_DRV_STAT_BUF_READ_BYTES_STAGING_VID, + NVC0_QUERY_DRV_STAT_BUF_WRITE_BYTES_DIRECT, + NVC0_QUERY_DRV_STAT_BUF_WRITE_BYTES_STAGING_VID, + NVC0_QUERY_DRV_STAT_BUF_WRITE_BYTES_STAGING_SYS, + NVC0_QUERY_DRV_STAT_BUF_COPY_BYTES, + NVC0_QUERY_DRV_STAT_BUF_NON_KERNEL_FENCE_SYNC_COUNT, + NVC0_QUERY_DRV_STAT_ANY_NON_KERNEL_FENCE_SYNC_COUNT, + NVC0_QUERY_DRV_STAT_QUERY_SYNC_COUNT, + NVC0_QUERY_DRV_STAT_GPU_SERIALIZE_COUNT, + NVC0_QUERY_DRV_STAT_DRAW_CALLS_ARRAY, + NVC0_QUERY_DRV_STAT_DRAW_CALLS_INDEXED, + NVC0_QUERY_DRV_STAT_DRAW_CALLS_FALLBACK_COUNT, + NVC0_QUERY_DRV_STAT_USER_BUFFER_UPLOAD_BYTES, + NVC0_QUERY_DRV_STAT_CONSTBUF_UPLOAD_COUNT, + NVC0_QUERY_DRV_STAT_CONSTBUF_UPLOAD_BYTES, + NVC0_QUERY_DRV_STAT_PUSHBUF_COUNT, + NVC0_QUERY_DRV_STAT_RESOURCE_VALIDATE_COUNT, #endif + NVC0_QUERY_DRV_STAT_COUNT +}; int nvc0_screen_get_driver_query_info(struct pipe_screen *, unsigned, struct pipe_driver_query_info *); diff --git a/src/gallium/drivers/r600/compute_memory_pool.c b/src/gallium/drivers/r600/compute_memory_pool.c index 413aa3d7c59..7c5113e9197 100644 --- a/src/gallium/drivers/r600/compute_memory_pool.c +++ b/src/gallium/drivers/r600/compute_memory_pool.c @@ -120,7 +120,7 @@ int64_t compute_memory_prealloc_chunk( assert(size_in_dw <= pool->size_in_dw); - COMPUTE_DBG(pool->screen, "* compute_memory_prealloc_chunk() size_in_dw = %ld\n", + COMPUTE_DBG(pool->screen, "* compute_memory_prealloc_chunk() size_in_dw = %"PRIi64"\n", size_in_dw); LIST_FOR_EACH_ENTRY(item, pool->item_list, link) { @@ -151,7 +151,7 @@ struct list_head *compute_memory_postalloc_chunk( struct compute_memory_item *next; struct list_head *next_link; - COMPUTE_DBG(pool->screen, "* compute_memory_postalloc_chunck() start_in_dw = %ld\n", + COMPUTE_DBG(pool->screen, "* compute_memory_postalloc_chunck() start_in_dw = %"PRIi64"\n", start_in_dw); /* Check if we can insert it in the front of the list */ @@ -568,7 +568,7 @@ void compute_memory_free(struct compute_memory_pool* pool, int64_t id) struct pipe_screen *screen = (struct pipe_screen *)pool->screen; struct pipe_resource *res; - COMPUTE_DBG(pool->screen, "* compute_memory_free() id + %ld \n", id); + COMPUTE_DBG(pool->screen, "* compute_memory_free() id + %"PRIi64" \n", id); LIST_FOR_EACH_ENTRY_SAFE(item, next, pool->item_list, link) { @@ -628,7 +628,7 @@ struct compute_memory_item* compute_memory_alloc( { struct compute_memory_item *new_item = NULL; - COMPUTE_DBG(pool->screen, "* compute_memory_alloc() size_in_dw = %ld (%ld bytes)\n", + COMPUTE_DBG(pool->screen, "* compute_memory_alloc() size_in_dw = %"PRIi64" (%"PRIi64" bytes)\n", size_in_dw, 4 * size_in_dw); new_item = (struct compute_memory_item *) diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c index 6a91d4709f4..7c82390ba40 100644 --- a/src/gallium/drivers/r600/evergreen_state.c +++ b/src/gallium/drivers/r600/evergreen_state.c @@ -2143,11 +2143,11 @@ static void evergreen_emit_shader_stages(struct r600_context *rctx, struct r600_ if (state->geom_enable) { uint32_t cut_val; - if (rctx->gs_shader->current->shader.gs_max_out_vertices <= 128) + if (rctx->gs_shader->gs_max_out_vertices <= 128) cut_val = V_028A40_GS_CUT_128; - else if (rctx->gs_shader->current->shader.gs_max_out_vertices <= 256) + else if (rctx->gs_shader->gs_max_out_vertices <= 256) cut_val = V_028A40_GS_CUT_256; - else if (rctx->gs_shader->current->shader.gs_max_out_vertices <= 512) + else if (rctx->gs_shader->gs_max_out_vertices <= 512) cut_val = V_028A40_GS_CUT_512; else cut_val = V_028A40_GS_CUT_1024; @@ -3013,7 +3013,7 @@ void evergreen_update_gs_state(struct pipe_context *ctx, struct r600_pipe_shader struct r600_shader *rshader = &shader->shader; struct r600_shader *cp_shader = &shader->gs_copy_shader->shader; unsigned gsvs_itemsize = - (cp_shader->ring_item_size * rshader->gs_max_out_vertices) >> 2; + (cp_shader->ring_item_size * shader->selector->gs_max_out_vertices) >> 2; r600_init_command_buffer(cb, 64); @@ -3022,14 +3022,14 @@ void evergreen_update_gs_state(struct pipe_context *ctx, struct r600_pipe_shader r600_store_context_reg(cb, R_028AB8_VGT_VTX_CNT_EN, 1); r600_store_context_reg(cb, R_028B38_VGT_GS_MAX_VERT_OUT, - S_028B38_MAX_VERT_OUT(rshader->gs_max_out_vertices)); + S_028B38_MAX_VERT_OUT(shader->selector->gs_max_out_vertices)); r600_store_context_reg(cb, R_028A6C_VGT_GS_OUT_PRIM_TYPE, - r600_conv_prim_to_gs_out(rshader->gs_output_prim)); + r600_conv_prim_to_gs_out(shader->selector->gs_output_prim)); if (rctx->screen->b.info.drm_minor >= 35) { r600_store_context_reg(cb, R_028B90_VGT_GS_INSTANCE_CNT, - S_028B90_CNT(MIN2(rshader->gs_num_invocations, 127)) | - S_028B90_ENABLE(rshader->gs_num_invocations > 0)); + S_028B90_CNT(MIN2(shader->selector->gs_num_invocations, 127)) | + S_028B90_ENABLE(shader->selector->gs_num_invocations > 0)); } r600_store_context_reg_seq(cb, R_02891C_SQ_GS_VERT_ITEMSIZE, 4); r600_store_value(cb, cp_shader->ring_item_size >> 2); diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c index 762cc7fac44..b514c58f9d8 100644 --- a/src/gallium/drivers/r600/r600_asm.c +++ b/src/gallium/drivers/r600/r600_asm.c @@ -2029,6 +2029,8 @@ void r600_bytecode_disasm(struct r600_bytecode *bc) fprintf(stderr, "CND:%X ", cf->cond); if (cf->pop_count) fprintf(stderr, "POP:%X ", cf->pop_count); + if (cf->end_of_program) + fprintf(stderr, "EOP "); fprintf(stderr, "\n"); } } diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h index 384ba800a79..ee3e928861b 100644 --- a/src/gallium/drivers/r600/r600_pipe.h +++ b/src/gallium/drivers/r600/r600_pipe.h @@ -36,6 +36,8 @@ #include "util/list.h" #include "util/u_transfer.h" +#include "tgsi/tgsi_scan.h" + #define R600_NUM_ATOMS 75 #define R600_MAX_VIEWPORTS 16 @@ -305,12 +307,18 @@ struct r600_pipe_shader_selector { struct tgsi_token *tokens; struct pipe_stream_output_info so; + struct tgsi_shader_info info; unsigned num_shaders; /* PIPE_SHADER_[VERTEX|FRAGMENT|...] */ unsigned type; + /* geometry shader properties */ + unsigned gs_output_prim; + unsigned gs_max_out_vertices; + unsigned gs_num_invocations; + unsigned nr_ps_max_color_exports; }; @@ -936,28 +944,5 @@ static inline bool r600_can_read_depth(struct r600_texture *rtex) #define V_028A6C_OUTPRIM_TYPE_LINESTRIP 1 #define V_028A6C_OUTPRIM_TYPE_TRISTRIP 2 -static inline unsigned r600_conv_prim_to_gs_out(unsigned mode) -{ - static const int prim_conv[] = { - V_028A6C_OUTPRIM_TYPE_POINTLIST, - V_028A6C_OUTPRIM_TYPE_LINESTRIP, - V_028A6C_OUTPRIM_TYPE_LINESTRIP, - V_028A6C_OUTPRIM_TYPE_LINESTRIP, - V_028A6C_OUTPRIM_TYPE_TRISTRIP, - V_028A6C_OUTPRIM_TYPE_TRISTRIP, - V_028A6C_OUTPRIM_TYPE_TRISTRIP, - V_028A6C_OUTPRIM_TYPE_TRISTRIP, - V_028A6C_OUTPRIM_TYPE_TRISTRIP, - V_028A6C_OUTPRIM_TYPE_TRISTRIP, - V_028A6C_OUTPRIM_TYPE_LINESTRIP, - V_028A6C_OUTPRIM_TYPE_LINESTRIP, - V_028A6C_OUTPRIM_TYPE_TRISTRIP, - V_028A6C_OUTPRIM_TYPE_TRISTRIP, - V_028A6C_OUTPRIM_TYPE_TRISTRIP - }; - assert(mode < Elements(prim_conv)); - - return prim_conv[mode]; -} - +unsigned r600_conv_prim_to_gs_out(unsigned mode); #endif diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index 4c4b6005981..b7d7828a9c2 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -1809,7 +1809,6 @@ static int r600_shader_from_tgsi(struct r600_context *rctx, struct tgsi_token *tokens = pipeshader->selector->tokens; struct pipe_stream_output_info so = pipeshader->selector->so; struct tgsi_full_immediate *immediate; - struct tgsi_full_property *property; struct r600_shader_ctx ctx; struct r600_bytecode_output output[32]; unsigned output_done, noutput; @@ -1840,7 +1839,7 @@ static int r600_shader_from_tgsi(struct r600_context *rctx, shader->indirect_files = ctx.info.indirect_files; indirect_gprs = ctx.info.indirect_files & ~(1 << TGSI_FILE_CONSTANT); tgsi_parse_init(&ctx.parse, tokens); - ctx.type = ctx.parse.FullHeader.Processor.Processor; + ctx.type = ctx.info.processor; shader->processor_type = ctx.type; ctx.bc->type = shader->processor_type; @@ -1968,6 +1967,12 @@ static int r600_shader_from_tgsi(struct r600_context *rctx, ctx.nliterals = 0; ctx.literals = NULL; shader->fs_write_all = FALSE; + if (ctx.info.properties[TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS]) + shader->fs_write_all = TRUE; + + shader->vs_position_window_space = FALSE; + if (ctx.info.properties[TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION]) + shader->vs_position_window_space = TRUE; if (shader->vs_as_gs_a) vs_add_primid_output(&ctx, key.vs.prim_id_out); @@ -1994,34 +1999,7 @@ static int r600_shader_from_tgsi(struct r600_context *rctx, goto out_err; break; case TGSI_TOKEN_TYPE_INSTRUCTION: - break; case TGSI_TOKEN_TYPE_PROPERTY: - property = &ctx.parse.FullToken.FullProperty; - switch (property->Property.PropertyName) { - case TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS: - if (property->u[0].Data == 1) - shader->fs_write_all = TRUE; - break; - case TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION: - if (property->u[0].Data == 1) - shader->vs_position_window_space = TRUE; - break; - case TGSI_PROPERTY_VS_PROHIBIT_UCPS: - /* we don't need this one */ - break; - case TGSI_PROPERTY_GS_INPUT_PRIM: - shader->gs_input_prim = property->u[0].Data; - break; - case TGSI_PROPERTY_GS_OUTPUT_PRIM: - shader->gs_output_prim = property->u[0].Data; - break; - case TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES: - shader->gs_max_out_vertices = property->u[0].Data; - break; - case TGSI_PROPERTY_GS_INVOCATIONS: - shader->gs_num_invocations = property->u[0].Data; - break; - } break; default: R600_ERR("unsupported token type %d\n", ctx.parse.FullToken.Token.Type); diff --git a/src/gallium/drivers/r600/r600_shader.h b/src/gallium/drivers/r600/r600_shader.h index 927bac57673..f5ca9d67f1e 100644 --- a/src/gallium/drivers/r600/r600_shader.h +++ b/src/gallium/drivers/r600/r600_shader.h @@ -78,11 +78,6 @@ struct r600_shader { /* Temporarily workaround SB not handling CF_INDEX_[01] index registers */ boolean uses_index_registers; - /* geometry shader properties */ - unsigned gs_input_prim; - unsigned gs_output_prim; - unsigned gs_max_out_vertices; - unsigned gs_num_invocations; /* size in bytes of a data item in the ring (single vertex data) */ unsigned ring_item_size; diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c index 5cc2283792d..51527631efd 100644 --- a/src/gallium/drivers/r600/r600_state.c +++ b/src/gallium/drivers/r600/r600_state.c @@ -1951,11 +1951,11 @@ static void r600_emit_shader_stages(struct r600_context *rctx, struct r600_atom if (state->geom_enable) { uint32_t cut_val; - if (rctx->gs_shader->current->shader.gs_max_out_vertices <= 128) + if (rctx->gs_shader->gs_max_out_vertices <= 128) cut_val = V_028A40_GS_CUT_128; - else if (rctx->gs_shader->current->shader.gs_max_out_vertices <= 256) + else if (rctx->gs_shader->gs_max_out_vertices <= 256) cut_val = V_028A40_GS_CUT_256; - else if (rctx->gs_shader->current->shader.gs_max_out_vertices <= 512) + else if (rctx->gs_shader->gs_max_out_vertices <= 512) cut_val = V_028A40_GS_CUT_512; else cut_val = V_028A40_GS_CUT_1024; @@ -2650,7 +2650,7 @@ void r600_update_gs_state(struct pipe_context *ctx, struct r600_pipe_shader *sha struct r600_shader *rshader = &shader->shader; struct r600_shader *cp_shader = &shader->gs_copy_shader->shader; unsigned gsvs_itemsize = - (cp_shader->ring_item_size * rshader->gs_max_out_vertices) >> 2; + (cp_shader->ring_item_size * shader->selector->gs_max_out_vertices) >> 2; r600_init_command_buffer(cb, 64); @@ -2659,10 +2659,10 @@ void r600_update_gs_state(struct pipe_context *ctx, struct r600_pipe_shader *sha if (rctx->b.chip_class >= R700) { r600_store_context_reg(cb, R_028B38_VGT_GS_MAX_VERT_OUT, - S_028B38_MAX_VERT_OUT(rshader->gs_max_out_vertices)); + S_028B38_MAX_VERT_OUT(shader->selector->gs_max_out_vertices)); } r600_store_context_reg(cb, R_028A6C_VGT_GS_OUT_PRIM_TYPE, - r600_conv_prim_to_gs_out(rshader->gs_output_prim)); + r600_conv_prim_to_gs_out(shader->selector->gs_output_prim)); r600_store_context_reg(cb, R_0288C8_SQ_GS_VERT_ITEMSIZE, cp_shader->ring_item_size >> 2); diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c index a05dd8352c7..a65064945cf 100644 --- a/src/gallium/drivers/r600/r600_state_common.c +++ b/src/gallium/drivers/r600/r600_state_common.c @@ -34,6 +34,7 @@ #include "util/u_upload_mgr.h" #include "util/u_math.h" #include "tgsi/tgsi_parse.h" +#include "tgsi/tgsi_scan.h" void r600_init_command_buffer(struct r600_command_buffer *cb, unsigned num_dw) { @@ -123,6 +124,31 @@ static unsigned r600_conv_pipe_prim(unsigned prim) return prim_conv[prim]; } +unsigned r600_conv_prim_to_gs_out(unsigned mode) +{ + static const int prim_conv[] = { + [PIPE_PRIM_POINTS] = V_028A6C_OUTPRIM_TYPE_POINTLIST, + [PIPE_PRIM_LINES] = V_028A6C_OUTPRIM_TYPE_LINESTRIP, + [PIPE_PRIM_LINE_LOOP] = V_028A6C_OUTPRIM_TYPE_LINESTRIP, + [PIPE_PRIM_LINE_STRIP] = V_028A6C_OUTPRIM_TYPE_LINESTRIP, + [PIPE_PRIM_TRIANGLES] = V_028A6C_OUTPRIM_TYPE_TRISTRIP, + [PIPE_PRIM_TRIANGLE_STRIP] = V_028A6C_OUTPRIM_TYPE_TRISTRIP, + [PIPE_PRIM_TRIANGLE_FAN] = V_028A6C_OUTPRIM_TYPE_TRISTRIP, + [PIPE_PRIM_QUADS] = V_028A6C_OUTPRIM_TYPE_TRISTRIP, + [PIPE_PRIM_QUAD_STRIP] = V_028A6C_OUTPRIM_TYPE_TRISTRIP, + [PIPE_PRIM_POLYGON] = V_028A6C_OUTPRIM_TYPE_TRISTRIP, + [PIPE_PRIM_LINES_ADJACENCY] = V_028A6C_OUTPRIM_TYPE_LINESTRIP, + [PIPE_PRIM_LINE_STRIP_ADJACENCY] = V_028A6C_OUTPRIM_TYPE_LINESTRIP, + [PIPE_PRIM_TRIANGLES_ADJACENCY] = V_028A6C_OUTPRIM_TYPE_TRISTRIP, + [PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY] = V_028A6C_OUTPRIM_TYPE_TRISTRIP, + [PIPE_PRIM_PATCHES] = V_028A6C_OUTPRIM_TYPE_POINTLIST, + [R600_PRIM_RECTANGLE_LIST] = V_028A6C_OUTPRIM_TYPE_TRISTRIP + }; + assert(mode < Elements(prim_conv)); + + return prim_conv[mode]; +} + /* common state between evergreen and r600 */ static void r600_bind_blend_state_internal(struct r600_context *rctx, @@ -818,6 +844,19 @@ static void *r600_create_shader_state(struct pipe_context *ctx, sel->type = pipe_shader_type; sel->tokens = tgsi_dup_tokens(state->tokens); sel->so = state->stream_output; + tgsi_scan_shader(state->tokens, &sel->info); + + switch (pipe_shader_type) { + case PIPE_SHADER_GEOMETRY: + sel->gs_output_prim = + sel->info.properties[TGSI_PROPERTY_GS_OUTPUT_PRIM]; + sel->gs_max_out_vertices = + sel->info.properties[TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES]; + sel->gs_num_invocations = + sel->info.properties[TGSI_PROPERTY_GS_INVOCATIONS]; + break; + } + return sel; } @@ -1524,7 +1563,7 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info unsigned prim = info.mode; if (rctx->gs_shader) { - prim = rctx->gs_shader->current->shader.gs_output_prim; + prim = rctx->gs_shader->gs_output_prim; } prim = r600_conv_prim_to_gs_out(prim); /* decrease the number of types to 3 */ diff --git a/src/gallium/drivers/r600/sb/sb_bc_decoder.cpp b/src/gallium/drivers/r600/sb/sb_bc_decoder.cpp index 5e233f982ea..5fe8f50aa4c 100644 --- a/src/gallium/drivers/r600/sb/sb_bc_decoder.cpp +++ b/src/gallium/drivers/r600/sb/sb_bc_decoder.cpp @@ -32,6 +32,7 @@ int bc_decoder::decode_cf(unsigned &i, bc_cf& bc) { int r = 0; uint32_t dw0 = dw[i]; uint32_t dw1 = dw[i+1]; + assert(i+1 <= ndw); if ((dw1 >> 29) & 1) { // CF_ALU return decode_cf_alu(i, bc); diff --git a/src/gallium/drivers/r600/sb/sb_bc_finalize.cpp b/src/gallium/drivers/r600/sb/sb_bc_finalize.cpp index 8c2cd1460e5..dadee456a1f 100644 --- a/src/gallium/drivers/r600/sb/sb_bc_finalize.cpp +++ b/src/gallium/drivers/r600/sb/sb_bc_finalize.cpp @@ -199,6 +199,9 @@ void bc_finalizer::finalize_if(region_node* r) { cf_node *if_jump = sh.create_cf(CF_OP_JUMP); cf_node *if_pop = sh.create_cf(CF_OP_POP); + if (!last_cf || last_cf->get_parent_region() == r) { + last_cf = if_pop; + } if_pop->bc.pop_count = 1; if_pop->jump_after(if_pop); diff --git a/src/gallium/drivers/r600/sb/sb_bc_parser.cpp b/src/gallium/drivers/r600/sb/sb_bc_parser.cpp index 4879c036f9f..c4799270d9f 100644 --- a/src/gallium/drivers/r600/sb/sb_bc_parser.cpp +++ b/src/gallium/drivers/r600/sb/sb_bc_parser.cpp @@ -95,7 +95,7 @@ int bc_parser::decode_shader() { if ((r = decode_cf(i, eop))) return r; - } while (!eop || (i >> 1) <= max_cf); + } while (!eop || (i >> 1) < max_cf); return 0; } @@ -769,6 +769,7 @@ int bc_parser::prepare_ir() { } int bc_parser::prepare_loop(cf_node* c) { + assert(c->bc.addr-1 < cf_map.size()); cf_node *end = cf_map[c->bc.addr - 1]; assert(end->bc.op == CF_OP_LOOP_END); @@ -788,8 +789,12 @@ int bc_parser::prepare_loop(cf_node* c) { } int bc_parser::prepare_if(cf_node* c) { + assert(c->bc.addr-1 < cf_map.size()); cf_node *c_else = NULL, *end = cf_map[c->bc.addr]; + if (!end) + return 0; // not quite sure how this happens, malformed input? + BCP_DUMP( sblog << "parsing JUMP @" << c->bc.id; sblog << "\n"; @@ -815,7 +820,7 @@ int bc_parser::prepare_if(cf_node* c) { if (c_else->parent != c->parent) c_else = NULL; - if (end->parent != c->parent) + if (end && end->parent != c->parent) end = NULL; region_node *reg = sh->create_region(); diff --git a/src/gallium/drivers/r600/sb/sb_sched.cpp b/src/gallium/drivers/r600/sb/sb_sched.cpp index 62680788c5e..c98b8fff764 100644 --- a/src/gallium/drivers/r600/sb/sb_sched.cpp +++ b/src/gallium/drivers/r600/sb/sb_sched.cpp @@ -236,7 +236,7 @@ void rp_gpr_tracker::unreserve(alu_node* n) { for (i = 0; i < nsrc; ++i) { value *v = n->src[i]; - if (v->is_readonly()) + if (v->is_readonly() || v->is_undef()) continue; if (i == 1 && opt) continue; diff --git a/src/gallium/drivers/radeon/r600_query.c b/src/gallium/drivers/radeon/r600_query.c index 7057aa19a7c..deeae0a6a65 100644 --- a/src/gallium/drivers/radeon/r600_query.c +++ b/src/gallium/drivers/radeon/r600_query.c @@ -197,7 +197,7 @@ static void r600_emit_query_begin(struct r600_common_context *ctx, struct r600_q radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0)); radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_ZPASS_DONE) | EVENT_INDEX(1)); radeon_emit(cs, va); - radeon_emit(cs, (va >> 32UL) & 0xFF); + radeon_emit(cs, (va >> 32) & 0xFFFF); break; case PIPE_QUERY_PRIMITIVES_EMITTED: case PIPE_QUERY_PRIMITIVES_GENERATED: @@ -206,13 +206,13 @@ static void r600_emit_query_begin(struct r600_common_context *ctx, struct r600_q radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0)); radeon_emit(cs, EVENT_TYPE(event_type_for_stream(query)) | EVENT_INDEX(3)); radeon_emit(cs, va); - radeon_emit(cs, (va >> 32UL) & 0xFF); + radeon_emit(cs, (va >> 32) & 0xFFFF); break; case PIPE_QUERY_TIME_ELAPSED: radeon_emit(cs, PKT3(PKT3_EVENT_WRITE_EOP, 4, 0)); radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5)); radeon_emit(cs, va); - radeon_emit(cs, (3 << 29) | ((va >> 32UL) & 0xFF)); + radeon_emit(cs, (3 << 29) | ((va >> 32) & 0xFFFF)); radeon_emit(cs, 0); radeon_emit(cs, 0); break; @@ -220,7 +220,7 @@ static void r600_emit_query_begin(struct r600_common_context *ctx, struct r600_q radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0)); radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_SAMPLE_PIPELINESTAT) | EVENT_INDEX(2)); radeon_emit(cs, va); - radeon_emit(cs, (va >> 32UL) & 0xFF); + radeon_emit(cs, (va >> 32) & 0xFFFF); break; default: assert(0); @@ -254,7 +254,7 @@ static void r600_emit_query_end(struct r600_common_context *ctx, struct r600_que radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0)); radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_ZPASS_DONE) | EVENT_INDEX(1)); radeon_emit(cs, va); - radeon_emit(cs, (va >> 32UL) & 0xFF); + radeon_emit(cs, (va >> 32) & 0xFFFF); break; case PIPE_QUERY_PRIMITIVES_EMITTED: case PIPE_QUERY_PRIMITIVES_GENERATED: @@ -264,7 +264,7 @@ static void r600_emit_query_end(struct r600_common_context *ctx, struct r600_que radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0)); radeon_emit(cs, EVENT_TYPE(event_type_for_stream(query)) | EVENT_INDEX(3)); radeon_emit(cs, va); - radeon_emit(cs, (va >> 32UL) & 0xFF); + radeon_emit(cs, (va >> 32) & 0xFFFF); break; case PIPE_QUERY_TIME_ELAPSED: va += query->buffer.results_end + query->result_size/2; @@ -273,7 +273,7 @@ static void r600_emit_query_end(struct r600_common_context *ctx, struct r600_que radeon_emit(cs, PKT3(PKT3_EVENT_WRITE_EOP, 4, 0)); radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5)); radeon_emit(cs, va); - radeon_emit(cs, (3 << 29) | ((va >> 32UL) & 0xFF)); + radeon_emit(cs, (3 << 29) | ((va >> 32) & 0xFFFF)); radeon_emit(cs, 0); radeon_emit(cs, 0); break; @@ -282,7 +282,7 @@ static void r600_emit_query_end(struct r600_common_context *ctx, struct r600_que radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0)); radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_SAMPLE_PIPELINESTAT) | EVENT_INDEX(2)); radeon_emit(cs, va); - radeon_emit(cs, (va >> 32UL) & 0xFF); + radeon_emit(cs, (va >> 32) & 0xFFFF); break; default: assert(0); @@ -341,8 +341,8 @@ static void r600_emit_query_predication(struct r600_common_context *ctx, struct while (results_base < qbuf->results_end) { radeon_emit(cs, PKT3(PKT3_SET_PREDICATION, 1, 0)); - radeon_emit(cs, (va + results_base) & 0xFFFFFFFFUL); - radeon_emit(cs, op | (((va + results_base) >> 32UL) & 0xFF)); + radeon_emit(cs, va + results_base); + radeon_emit(cs, op | (((va + results_base) >> 32) & 0xFF)); r600_emit_reloc(ctx, &ctx->rings.gfx, qbuf->buf, RADEON_USAGE_READ, RADEON_PRIO_MIN); results_base += query->result_size; diff --git a/src/gallium/drivers/radeonsi/si_compute.c b/src/gallium/drivers/radeonsi/si_compute.c index d4fe5653687..0cdecd6da79 100644 --- a/src/gallium/drivers/radeonsi/si_compute.c +++ b/src/gallium/drivers/radeonsi/si_compute.c @@ -362,7 +362,7 @@ static void si_launch_grid( shader_va += pc; #endif si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ, RADEON_PRIO_SHADER_DATA); - si_pm4_set_reg(pm4, R_00B830_COMPUTE_PGM_LO, (shader_va >> 8) & 0xffffffff); + si_pm4_set_reg(pm4, R_00B830_COMPUTE_PGM_LO, shader_va >> 8); si_pm4_set_reg(pm4, R_00B834_COMPUTE_PGM_HI, shader_va >> 40); si_pm4_set_reg(pm4, R_00B848_COMPUTE_PGM_RSRC1, diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c index 890be071596..b74c893c7d5 100644 --- a/src/gallium/drivers/radeonsi/si_descriptors.c +++ b/src/gallium/drivers/radeonsi/si_descriptors.c @@ -426,7 +426,7 @@ static bool si_upload_vertex_buffer_descriptors(struct si_context *sctx) va = rbuffer->gpu_address + offset; /* Fill in T# buffer resource description */ - desc[0] = va & 0xFFFFFFFF; + desc[0] = va; desc[1] = S_008F04_BASE_ADDRESS_HI(va >> 32) | S_008F04_STRIDE(vb->stride); diff --git a/src/gallium/drivers/radeonsi/si_dma.c b/src/gallium/drivers/radeonsi/si_dma.c index 7a0076e7aa9..1a7eeaecf9e 100644 --- a/src/gallium/drivers/radeonsi/si_dma.c +++ b/src/gallium/drivers/radeonsi/si_dma.c @@ -86,8 +86,8 @@ static void si_dma_copy_buffer(struct si_context *ctx, for (i = 0; i < ncopy; i++) { csize = size < max_csize ? size : max_csize; cs->buf[cs->cdw++] = SI_DMA_PACKET(SI_DMA_PACKET_COPY, sub_cmd, csize); - cs->buf[cs->cdw++] = dst_offset & 0xffffffff; - cs->buf[cs->cdw++] = src_offset & 0xffffffff; + cs->buf[cs->cdw++] = dst_offset; + cs->buf[cs->cdw++] = src_offset; cs->buf[cs->cdw++] = (dst_offset >> 32UL) & 0xff; cs->buf[cs->cdw++] = (src_offset >> 32UL) & 0xff; dst_offset += csize << shift; diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index 98b42890f7d..ab5b3ee9ce9 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -3781,7 +3781,7 @@ void si_shader_apply_scratch_relocs(struct si_context *sctx, uint64_t scratch_va) { unsigned i; - uint32_t scratch_rsrc_dword0 = scratch_va & 0xffffffff; + uint32_t scratch_rsrc_dword0 = scratch_va; uint32_t scratch_rsrc_dword1 = S_008F04_BASE_ADDRESS_HI(scratch_va >> 32) | S_008F04_STRIDE(shader->scratch_bytes_per_wave / 64); diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c index c923ea7e154..806ab5f0e22 100644 --- a/src/gallium/drivers/radeonsi/si_state.c +++ b/src/gallium/drivers/radeonsi/si_state.c @@ -35,10 +35,10 @@ #include "util/u_pstipple.h" static void si_init_atom(struct r600_atom *atom, struct r600_atom **list_elem, - void (*emit)(struct si_context *ctx, struct r600_atom *state), + void (*emit_func)(struct si_context *ctx, struct r600_atom *state), unsigned num_dw) { - atom->emit = (void*)emit; + atom->emit = (void*)emit_func; atom->num_dw = num_dw; atom->dirty = false; *list_elem = atom; diff --git a/src/gallium/drivers/vc4/vc4_nir_lower_blend.c b/src/gallium/drivers/vc4/vc4_nir_lower_blend.c index a372a6c0cdc..808cbea8fde 100644 --- a/src/gallium/drivers/vc4/vc4_nir_lower_blend.c +++ b/src/gallium/drivers/vc4/vc4_nir_lower_blend.c @@ -409,7 +409,7 @@ vc4_nir_lower_blend_block(nir_block *block, void *state) nir_cf_node_get_function(&block->cf_node); nir_builder b; nir_builder_init(&b, impl); - nir_builder_insert_before_instr(&b, &intr->instr); + b.cursor = nir_before_instr(&intr->instr); vc4_nir_lower_blend_instr(c, &b, intr); } return true; diff --git a/src/gallium/drivers/vc4/vc4_nir_lower_io.c b/src/gallium/drivers/vc4/vc4_nir_lower_io.c index 229d41147d8..b632370cbb2 100644 --- a/src/gallium/drivers/vc4/vc4_nir_lower_io.c +++ b/src/gallium/drivers/vc4/vc4_nir_lower_io.c @@ -56,7 +56,7 @@ static void vc4_nir_lower_input(struct vc4_compile *c, nir_builder *b, nir_intrinsic_instr *intr) { - nir_builder_insert_before_instr(b, &intr->instr); + b->cursor = nir_before_instr(&intr->instr); if (c->stage == QSTAGE_FRAG && intr->const_index[0] == VC4_NIR_TLB_COLOR_READ_INPUT) { @@ -160,7 +160,7 @@ vc4_nir_lower_output(struct vc4_compile *c, nir_builder *b, /* All TGSI-to-NIR outputs are VEC4. */ assert(intr->num_components == 4); - nir_builder_insert_before_instr(b, &intr->instr); + b->cursor = nir_before_instr(&intr->instr); for (unsigned i = 0; i < intr->num_components; i++) { nir_intrinsic_instr *intr_comp = @@ -189,7 +189,7 @@ vc4_nir_lower_uniform(struct vc4_compile *c, nir_builder *b, return; assert(intr->num_components == 4); - nir_builder_insert_before_instr(b, &intr->instr); + b->cursor = nir_before_instr(&intr->instr); /* Generate scalar loads equivalent to the original VEC4. */ nir_ssa_def *dests[4]; diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c index 3a9ac445b24..600ced924ba 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c +++ b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c @@ -101,30 +101,54 @@ static struct radeon_bo *get_radeon_bo(struct pb_buffer *_buf) return bo; } +static bool radeon_bo_is_busy(struct radeon_bo *bo) +{ + struct drm_radeon_gem_busy args = {0}; + + args.handle = bo->handle; + return drmCommandWriteRead(bo->rws->fd, DRM_RADEON_GEM_BUSY, + &args, sizeof(args)) != 0; +} + +static void radeon_bo_wait_idle(struct radeon_bo *bo) +{ + struct drm_radeon_gem_wait_idle args = {0}; + + args.handle = bo->handle; + while (drmCommandWrite(bo->rws->fd, DRM_RADEON_GEM_WAIT_IDLE, + &args, sizeof(args)) == -EBUSY); +} + static bool radeon_bo_wait(struct pb_buffer *_buf, uint64_t timeout, enum radeon_bo_usage usage) { - struct radeon_bo *bo = get_radeon_bo(_buf); + struct radeon_bo *bo = get_radeon_bo(_buf); + int64_t abs_timeout; - /* Wait if any ioctl is being submitted with this buffer. */ - if (!os_wait_until_zero(&bo->num_active_ioctls, timeout)) - return false; + /* No timeout. Just query. */ + if (timeout == 0) + return !bo->num_active_ioctls && !radeon_bo_is_busy(bo); - /* TODO: handle arbitrary timeout */ - if (!timeout) { - struct drm_radeon_gem_busy args = {0}; + abs_timeout = os_time_get_absolute_timeout(timeout); - args.handle = bo->handle; - return drmCommandWriteRead(bo->rws->fd, DRM_RADEON_GEM_BUSY, - &args, sizeof(args)) == 0; - } else { - struct drm_radeon_gem_wait_idle args = {0}; + /* Wait if any ioctl is being submitted with this buffer. */ + if (!os_wait_until_zero_abs_timeout(&bo->num_active_ioctls, abs_timeout)) + return false; - args.handle = bo->handle; - while (drmCommandWrite(bo->rws->fd, DRM_RADEON_GEM_WAIT_IDLE, - &args, sizeof(args)) == -EBUSY); + /* Infinite timeout. */ + if (abs_timeout == PIPE_TIMEOUT_INFINITE) { + radeon_bo_wait_idle(bo); return true; } + + /* Other timeouts need to be emulated with a loop. */ + while (radeon_bo_is_busy(bo)) { + if (os_time_get_nano() >= abs_timeout) + return false; + os_time_sleep(10); + } + + return true; } static enum radeon_bo_domain get_valid_domain(enum radeon_bo_domain domain) diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c index f04a696988a..341af55df8b 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c +++ b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c @@ -645,29 +645,8 @@ static bool radeon_fence_wait(struct radeon_winsys *ws, struct pipe_fence_handle *fence, uint64_t timeout) { - struct pb_buffer *rfence = (struct pb_buffer*)fence; - - if (timeout == 0) - return ws->buffer_wait(rfence, 0, RADEON_USAGE_READWRITE); - - if (timeout != PIPE_TIMEOUT_INFINITE) { - int64_t start_time = os_time_get(); - - /* Convert to microseconds. */ - timeout /= 1000; - - /* Wait in a loop. */ - while (!ws->buffer_wait(rfence, 0, RADEON_USAGE_READWRITE)) { - if (os_time_get() - start_time >= timeout) { - return FALSE; - } - os_time_sleep(10); - } - return TRUE; - } - - ws->buffer_wait(rfence, PIPE_TIMEOUT_INFINITE, RADEON_USAGE_READWRITE); - return TRUE; + return ws->buffer_wait((struct pb_buffer*)fence, timeout, + RADEON_USAGE_READWRITE); } static void radeon_fence_reference(struct pipe_fence_handle **dst, diff --git a/src/glsl/builtin_functions.cpp b/src/glsl/builtin_functions.cpp index 1bc3de4aec5..5e051996758 100644 --- a/src/glsl/builtin_functions.cpp +++ b/src/glsl/builtin_functions.cpp @@ -284,8 +284,9 @@ texture_multisample(const _mesa_glsl_parse_state *state) static bool texture_multisample_array(const _mesa_glsl_parse_state *state) { - return state->is_version(150, 0) || - state->ARB_texture_multisample_enable; + return state->is_version(150, 320) || + state->ARB_texture_multisample_enable || + state->OES_texture_storage_multisample_2d_array_enable; } static bool @@ -665,10 +666,7 @@ private: B1(any); B1(all); B1(not); - B2(textureSize); - ir_function_signature *_textureSize(builtin_available_predicate avail, - const glsl_type *return_type, - const glsl_type *sampler_type); + BA2(textureSize); /** Flags to _texture() */ #define TEX_PROJECT 1 diff --git a/src/glsl/builtin_types.cpp b/src/glsl/builtin_types.cpp index 9cf198fd127..0d0d71d56df 100644 --- a/src/glsl/builtin_types.cpp +++ b/src/glsl/builtin_types.cpp @@ -307,7 +307,8 @@ _mesa_glsl_initialize_types(struct _mesa_glsl_parse_state *state) add_type(symbols, glsl_type::usamplerCubeArray_type); } - if (state->ARB_texture_multisample_enable) { + if (state->ARB_texture_multisample_enable || + state->OES_texture_storage_multisample_2d_array_enable) { add_type(symbols, glsl_type::sampler2DMS_type); add_type(symbols, glsl_type::isampler2DMS_type); add_type(symbols, glsl_type::usampler2DMS_type); diff --git a/src/glsl/glcpp/glcpp-parse.y b/src/glsl/glcpp/glcpp-parse.y index 18e50afe476..2d631f08c29 100644 --- a/src/glsl/glcpp/glcpp-parse.y +++ b/src/glsl/glcpp/glcpp-parse.y @@ -2382,6 +2382,8 @@ _glcpp_parser_handle_version_declaration(glcpp_parser_t *parser, intmax_t versio add_builtin_define(parser, "GL_OES_EGL_image_external", 1); if (extensions->OES_standard_derivatives) add_builtin_define(parser, "GL_OES_standard_derivatives", 1); + if (extensions->ARB_texture_multisample) + add_builtin_define(parser, "GL_OES_texture_storage_multisample_2d_array", 1); } } else { add_builtin_define(parser, "GL_ARB_draw_buffers", 1); diff --git a/src/glsl/glsl_lexer.ll b/src/glsl/glsl_lexer.ll index 24998c19467..90e84ed1bfe 100644 --- a/src/glsl/glsl_lexer.ll +++ b/src/glsl/glsl_lexer.ll @@ -347,9 +347,9 @@ usampler2DArray KEYWORD(130, 300, 130, 300, USAMPLER2DARRAY); sampler2DMS KEYWORD_WITH_ALT(150, 300, 150, 310, yyextra->ARB_texture_multisample_enable, SAMPLER2DMS); isampler2DMS KEYWORD_WITH_ALT(150, 300, 150, 310, yyextra->ARB_texture_multisample_enable, ISAMPLER2DMS); usampler2DMS KEYWORD_WITH_ALT(150, 300, 150, 310, yyextra->ARB_texture_multisample_enable, USAMPLER2DMS); -sampler2DMSArray KEYWORD_WITH_ALT(150, 300, 150, 0, yyextra->ARB_texture_multisample_enable, SAMPLER2DMSARRAY); -isampler2DMSArray KEYWORD_WITH_ALT(150, 300, 150, 0, yyextra->ARB_texture_multisample_enable, ISAMPLER2DMSARRAY); -usampler2DMSArray KEYWORD_WITH_ALT(150, 300, 150, 0, yyextra->ARB_texture_multisample_enable, USAMPLER2DMSARRAY); +sampler2DMSArray KEYWORD_WITH_ALT(150, 300, 150, 320, yyextra->ARB_texture_multisample_enable || yyextra->OES_texture_storage_multisample_2d_array_enable, SAMPLER2DMSARRAY); +isampler2DMSArray KEYWORD_WITH_ALT(150, 300, 150, 320, yyextra->ARB_texture_multisample_enable || yyextra->OES_texture_storage_multisample_2d_array_enable, ISAMPLER2DMSARRAY); +usampler2DMSArray KEYWORD_WITH_ALT(150, 300, 150, 320, yyextra->ARB_texture_multisample_enable || yyextra->OES_texture_storage_multisample_2d_array_enable, USAMPLER2DMSARRAY); /* keywords available with ARB_texture_cube_map_array_enable extension on desktop GLSL */ samplerCubeArray KEYWORD_WITH_ALT(400, 0, 400, 0, yyextra->ARB_texture_cube_map_array_enable, SAMPLERCUBEARRAY); diff --git a/src/glsl/glsl_parser_extras.cpp b/src/glsl/glsl_parser_extras.cpp index ca772e8ab33..5c8f98b091d 100644 --- a/src/glsl/glsl_parser_extras.cpp +++ b/src/glsl/glsl_parser_extras.cpp @@ -628,6 +628,7 @@ static const _mesa_glsl_extension _mesa_glsl_supported_extensions[] = { EXT(OES_EGL_image_external, false, true, OES_EGL_image_external), EXT(OES_standard_derivatives, false, true, OES_standard_derivatives), EXT(OES_texture_3D, false, true, EXT_texture3D), + EXT(OES_texture_storage_multisample_2d_array, false, true, ARB_texture_multisample), /* All other extensions go here, sorted alphabetically. */ diff --git a/src/glsl/glsl_parser_extras.h b/src/glsl/glsl_parser_extras.h index e2145bea5fa..295cd10ba14 100644 --- a/src/glsl/glsl_parser_extras.h +++ b/src/glsl/glsl_parser_extras.h @@ -548,6 +548,8 @@ struct _mesa_glsl_parse_state { bool OES_standard_derivatives_warn; bool OES_texture_3D_enable; bool OES_texture_3D_warn; + bool OES_texture_storage_multisample_2d_array_enable; + bool OES_texture_storage_multisample_2d_array_warn; /* All other extensions go here, sorted alphabetically. */ diff --git a/src/glsl/glsl_types.h b/src/glsl/glsl_types.h index 28e2e93a305..f7d000c028b 100644 --- a/src/glsl/glsl_types.h +++ b/src/glsl/glsl_types.h @@ -620,7 +620,7 @@ struct glsl_type { const glsl_type *field_type(const char *name) const; /** - * Get the location of a filed within a record type + * Get the location of a field within a record type */ int field_index(const char *name) const; diff --git a/src/glsl/ir_builder.cpp b/src/glsl/ir_builder.cpp index cd03859cac0..c9cf1240dfe 100644 --- a/src/glsl/ir_builder.cpp +++ b/src/glsl/ir_builder.cpp @@ -566,6 +566,12 @@ csel(operand a, operand b, operand c) return expr(ir_triop_csel, a, b, c); } +ir_expression * +bitfield_extract(operand a, operand b, operand c) +{ + return expr(ir_triop_bitfield_extract, a, b, c); +} + ir_expression * bitfield_insert(operand a, operand b, operand c, operand d) { diff --git a/src/glsl/ir_builder.h b/src/glsl/ir_builder.h index f76453ffcf0..b483ebf6269 100644 --- a/src/glsl/ir_builder.h +++ b/src/glsl/ir_builder.h @@ -200,6 +200,7 @@ ir_expression *interpolate_at_sample(operand a, operand b); ir_expression *fma(operand a, operand b, operand c); ir_expression *lrp(operand x, operand y, operand a); ir_expression *csel(operand a, operand b, operand c); +ir_expression *bitfield_extract(operand a, operand b, operand c); ir_expression *bitfield_insert(operand a, operand b, operand c, operand d); ir_swizzle *swizzle(operand a, int swizzle, int components); diff --git a/src/glsl/ir_optimization.h b/src/glsl/ir_optimization.h index eef107e5249..265b2234cb6 100644 --- a/src/glsl/ir_optimization.h +++ b/src/glsl/ir_optimization.h @@ -66,7 +66,10 @@ enum lower_packing_builtins_op { LOWER_UNPACK_SNORM_4x8 = 0x0200, LOWER_PACK_UNORM_4x8 = 0x0400, - LOWER_UNPACK_UNORM_4x8 = 0x0800 + LOWER_UNPACK_UNORM_4x8 = 0x0800, + + LOWER_PACK_USE_BFI = 0x1000, + LOWER_PACK_USE_BFE = 0x2000, }; bool do_common_optimization(exec_list *ir, bool linked, diff --git a/src/glsl/link_uniforms.cpp b/src/glsl/link_uniforms.cpp index 254086dc050..a0cb6182925 100644 --- a/src/glsl/link_uniforms.cpp +++ b/src/glsl/link_uniforms.cpp @@ -47,10 +47,9 @@ static unsigned values_for_type(const glsl_type *type) { - if (type->is_sampler() || type->is_subroutine()) { + if (type->is_sampler()) { return 1; - } else if (type->is_array() && (type->fields.array->is_sampler() || - type->fields.array->is_subroutine())) { + } else if (type->is_array() && type->fields.array->is_sampler()) { return type->array_size(); } else { return type->component_slots(); diff --git a/src/glsl/lower_packing_builtins.cpp b/src/glsl/lower_packing_builtins.cpp index a6fb8a8837e..c8bf68be829 100644 --- a/src/glsl/lower_packing_builtins.cpp +++ b/src/glsl/lower_packing_builtins.cpp @@ -118,6 +118,8 @@ public: *rvalue = split_unpack_half_2x16(op0); break; case LOWER_PACK_UNPACK_NONE: + case LOWER_PACK_USE_BFI: + case LOWER_PACK_USE_BFE: assert(!"not reached"); break; } @@ -222,9 +224,16 @@ private: /* uvec2 u = UVEC2_RVAL; */ ir_variable *u = factory.make_temp(glsl_type::uvec2_type, - "tmp_pack_uvec2_to_uint"); + "tmp_pack_uvec2_to_uint"); factory.emit(assign(u, uvec2_rval)); + if (op_mask & LOWER_PACK_USE_BFI) { + return bitfield_insert(bit_and(swizzle_x(u), constant(0xffffu)), + swizzle_y(u), + constant(16), + constant(16)); + } + /* return (u.y << 16) | (u.x & 0xffff); */ return bit_or(lshift(swizzle_y(u), constant(16u)), bit_and(swizzle_x(u), constant(0xffffu))); @@ -242,9 +251,22 @@ private: { assert(uvec4_rval->type == glsl_type::uvec4_type); - /* uvec4 u = UVEC4_RVAL; */ ir_variable *u = factory.make_temp(glsl_type::uvec4_type, - "tmp_pack_uvec4_to_uint"); + "tmp_pack_uvec4_to_uint"); + + if (op_mask & LOWER_PACK_USE_BFI) { + /* uvec4 u = UVEC4_RVAL; */ + factory.emit(assign(u, uvec4_rval)); + + return bitfield_insert(bitfield_insert( + bitfield_insert( + bit_and(swizzle_x(u), constant(0xffu)), + swizzle_y(u), constant(8), constant(8)), + swizzle_z(u), constant(16), constant(8)), + swizzle_w(u), constant(24), constant(8)); + } + + /* uvec4 u = UVEC4_RVAL & 0xff */ factory.emit(assign(u, bit_and(uvec4_rval, constant(0xffu)))); /* return (u.w << 24) | (u.z << 16) | (u.y << 8) | u.x; */ @@ -284,6 +306,39 @@ private: return deref(u2).val; } + /** + * \brief Unpack a uint32 into two int16's. + * + * Specifically each 16-bit value is sign-extended to the full width of an + * int32 on return. + */ + ir_rvalue * + unpack_uint_to_ivec2(ir_rvalue *uint_rval) + { + assert(uint_rval->type == glsl_type::uint_type); + + if (!(op_mask & LOWER_PACK_USE_BFE)) { + return rshift(lshift(u2i(unpack_uint_to_uvec2(uint_rval)), + constant(16u)), + constant(16u)); + } + + ir_variable *i = factory.make_temp(glsl_type::int_type, + "tmp_unpack_uint_to_ivec2_i"); + factory.emit(assign(i, u2i(uint_rval))); + + /* ivec2 i2; */ + ir_variable *i2 = factory.make_temp(glsl_type::ivec2_type, + "tmp_unpack_uint_to_ivec2_i2"); + + factory.emit(assign(i2, bitfield_extract(i, constant(0), constant(16)), + WRITEMASK_X)); + factory.emit(assign(i2, bitfield_extract(i, constant(16), constant(16)), + WRITEMASK_Y)); + + return deref(i2).val; + } + /** * \brief Unpack a uint32 into four uint8's. * @@ -308,13 +363,23 @@ private: /* u4.x = u & 0xffu; */ factory.emit(assign(u4, bit_and(u, constant(0xffu)), WRITEMASK_X)); - /* u4.y = (u >> 8u) & 0xffu; */ - factory.emit(assign(u4, bit_and(rshift(u, constant(8u)), - constant(0xffu)), WRITEMASK_Y)); + if (op_mask & LOWER_PACK_USE_BFE) { + /* u4.y = bitfield_extract(u, 8, 8); */ + factory.emit(assign(u4, bitfield_extract(u, constant(8), constant(8)), + WRITEMASK_Y)); - /* u4.z = (u >> 16u) & 0xffu; */ - factory.emit(assign(u4, bit_and(rshift(u, constant(16u)), - constant(0xffu)), WRITEMASK_Z)); + /* u4.z = bitfield_extract(u, 16, 8); */ + factory.emit(assign(u4, bitfield_extract(u, constant(16), constant(8)), + WRITEMASK_Z)); + } else { + /* u4.y = (u >> 8u) & 0xffu; */ + factory.emit(assign(u4, bit_and(rshift(u, constant(8u)), + constant(0xffu)), WRITEMASK_Y)); + + /* u4.z = (u >> 16u) & 0xffu; */ + factory.emit(assign(u4, bit_and(rshift(u, constant(16u)), + constant(0xffu)), WRITEMASK_Z)); + } /* u4.w = (u >> 24u) */ factory.emit(assign(u4, rshift(u, constant(24u)), WRITEMASK_W)); @@ -322,6 +387,43 @@ private: return deref(u4).val; } + /** + * \brief Unpack a uint32 into four int8's. + * + * Specifically each 8-bit value is sign-extended to the full width of an + * int32 on return. + */ + ir_rvalue * + unpack_uint_to_ivec4(ir_rvalue *uint_rval) + { + assert(uint_rval->type == glsl_type::uint_type); + + if (!(op_mask & LOWER_PACK_USE_BFE)) { + return rshift(lshift(u2i(unpack_uint_to_uvec4(uint_rval)), + constant(24u)), + constant(24u)); + } + + ir_variable *i = factory.make_temp(glsl_type::int_type, + "tmp_unpack_uint_to_ivec4_i"); + factory.emit(assign(i, u2i(uint_rval))); + + /* ivec4 i4; */ + ir_variable *i4 = factory.make_temp(glsl_type::ivec4_type, + "tmp_unpack_uint_to_ivec4_i4"); + + factory.emit(assign(i4, bitfield_extract(i, constant(0), constant(8)), + WRITEMASK_X)); + factory.emit(assign(i4, bitfield_extract(i, constant(8), constant(8)), + WRITEMASK_Y)); + factory.emit(assign(i4, bitfield_extract(i, constant(16), constant(8)), + WRITEMASK_Z)); + factory.emit(assign(i4, bitfield_extract(i, constant(24), constant(8)), + WRITEMASK_W)); + + return deref(i4).val; + } + /** * \brief Lower a packSnorm2x16 expression. * @@ -468,9 +570,7 @@ private: assert(uint_rval->type == glsl_type::uint_type); ir_rvalue *result = - clamp(div(i2f(rshift(lshift(u2i(unpack_uint_to_uvec2(uint_rval)), - constant(16)), - constant(16u))), + clamp(div(i2f(unpack_uint_to_ivec2(uint_rval)), constant(32767.0f)), constant(-1.0f), constant(1.0f)); @@ -527,9 +627,7 @@ private: assert(uint_rval->type == glsl_type::uint_type); ir_rvalue *result = - clamp(div(i2f(rshift(lshift(u2i(unpack_uint_to_uvec4(uint_rval)), - constant(24u)), - constant(24u))), + clamp(div(i2f(unpack_uint_to_ivec4(uint_rval)), constant(127.0f)), constant(-1.0f), constant(1.0f)); diff --git a/src/glsl/nir/nir.c b/src/glsl/nir/nir.c index 77cc4f078a3..bf001312121 100644 --- a/src/glsl/nir/nir.c +++ b/src/glsl/nir/nir.c @@ -664,102 +664,51 @@ add_defs_uses(nir_instr *instr) } void -nir_instr_insert_before(nir_instr *instr, nir_instr *before) +nir_instr_insert(nir_cursor cursor, nir_instr *instr) { - assert(before->type != nir_instr_type_jump); - before->block = instr->block; - add_defs_uses(before); - exec_node_insert_node_before(&instr->node, &before->node); -} + switch (cursor.option) { + case nir_cursor_before_block: + /* Only allow inserting jumps into empty blocks. */ + if (instr->type == nir_instr_type_jump) + assert(exec_list_is_empty(&cursor.block->instr_list)); -void -nir_instr_insert_after(nir_instr *instr, nir_instr *after) -{ - if (after->type == nir_instr_type_jump) { - assert(instr == nir_block_last_instr(instr->block)); + instr->block = cursor.block; + add_defs_uses(instr); + exec_list_push_head(&cursor.block->instr_list, &instr->node); + break; + case nir_cursor_after_block: { + /* Inserting instructions after a jump is illegal. */ + nir_instr *last = nir_block_last_instr(cursor.block); + assert(last == NULL || last->type != nir_instr_type_jump); + (void) last; + + instr->block = cursor.block; + add_defs_uses(instr); + exec_list_push_tail(&cursor.block->instr_list, &instr->node); + break; + } + case nir_cursor_before_instr: assert(instr->type != nir_instr_type_jump); + instr->block = cursor.instr->block; + add_defs_uses(instr); + exec_node_insert_node_before(&cursor.instr->node, &instr->node); + break; + case nir_cursor_after_instr: + /* Inserting instructions after a jump is illegal. */ + assert(cursor.instr->type != nir_instr_type_jump); + + /* Only allow inserting jumps at the end of the block. */ + if (instr->type == nir_instr_type_jump) + assert(cursor.instr == nir_block_last_instr(cursor.instr->block)); + + instr->block = cursor.instr->block; + add_defs_uses(instr); + exec_node_insert_after(&cursor.instr->node, &instr->node); + break; } - after->block = instr->block; - add_defs_uses(after); - exec_node_insert_after(&instr->node, &after->node); - - if (after->type == nir_instr_type_jump) - nir_handle_add_jump(after->block); -} - -void -nir_instr_insert_before_block(nir_block *block, nir_instr *before) -{ - if (before->type == nir_instr_type_jump) - assert(exec_list_is_empty(&block->instr_list)); - - before->block = block; - add_defs_uses(before); - exec_list_push_head(&block->instr_list, &before->node); - - if (before->type == nir_instr_type_jump) - nir_handle_add_jump(block); -} - -void -nir_instr_insert_after_block(nir_block *block, nir_instr *after) -{ - if (after->type == nir_instr_type_jump) { - assert(exec_list_is_empty(&block->instr_list) || - nir_block_last_instr(block)->type != nir_instr_type_jump); - } - - after->block = block; - add_defs_uses(after); - exec_list_push_tail(&block->instr_list, &after->node); - - if (after->type == nir_instr_type_jump) - nir_handle_add_jump(block); -} - -void -nir_instr_insert_before_cf(nir_cf_node *node, nir_instr *before) -{ - if (node->type == nir_cf_node_block) { - nir_instr_insert_before_block(nir_cf_node_as_block(node), before); - } else { - nir_cf_node *prev = nir_cf_node_prev(node); - assert(prev->type == nir_cf_node_block); - nir_block *prev_block = nir_cf_node_as_block(prev); - - nir_instr_insert_before_block(prev_block, before); - } -} - -void -nir_instr_insert_after_cf(nir_cf_node *node, nir_instr *after) -{ - if (node->type == nir_cf_node_block) { - nir_instr_insert_after_block(nir_cf_node_as_block(node), after); - } else { - nir_cf_node *next = nir_cf_node_next(node); - assert(next->type == nir_cf_node_block); - nir_block *next_block = nir_cf_node_as_block(next); - - nir_instr_insert_before_block(next_block, after); - } -} - -void -nir_instr_insert_before_cf_list(struct exec_list *list, nir_instr *before) -{ - nir_cf_node *first_node = exec_node_data(nir_cf_node, - exec_list_get_head(list), node); - nir_instr_insert_before_cf(first_node, before); -} - -void -nir_instr_insert_after_cf_list(struct exec_list *list, nir_instr *after) -{ - nir_cf_node *last_node = exec_node_data(nir_cf_node, - exec_list_get_tail(list), node); - nir_instr_insert_after_cf(last_node, after); + if (instr->type == nir_instr_type_jump) + nir_handle_add_jump(instr->block); } static bool diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h index 8a2396422b9..af9f6ebb513 100644 --- a/src/glsl/nir/nir.h +++ b/src/glsl/nir/nir.h @@ -1567,20 +1567,182 @@ nir_deref *nir_copy_deref(void *mem_ctx, nir_deref *deref); nir_load_const_instr * nir_deref_get_const_initializer_load(nir_shader *shader, nir_deref_var *deref); -void nir_instr_insert_before(nir_instr *instr, nir_instr *before); -void nir_instr_insert_after(nir_instr *instr, nir_instr *after); +/** + * NIR Cursors and Instruction Insertion API + * @{ + * + * A tiny struct representing a point to insert/extract instructions or + * control flow nodes. Helps reduce the combinatorial explosion of possible + * points to insert/extract. + * + * \sa nir_control_flow.h + */ +typedef enum { + nir_cursor_before_block, + nir_cursor_after_block, + nir_cursor_before_instr, + nir_cursor_after_instr, +} nir_cursor_option; -void nir_instr_insert_before_block(nir_block *block, nir_instr *before); -void nir_instr_insert_after_block(nir_block *block, nir_instr *after); +typedef struct { + nir_cursor_option option; + union { + nir_block *block; + nir_instr *instr; + }; +} nir_cursor; -void nir_instr_insert_before_cf(nir_cf_node *node, nir_instr *before); -void nir_instr_insert_after_cf(nir_cf_node *node, nir_instr *after); +static inline nir_block * +nir_cursor_current_block(nir_cursor cursor) +{ + if (cursor.option == nir_cursor_before_instr || + cursor.option == nir_cursor_after_instr) { + return cursor.instr->block; + } else { + return cursor.block; + } +} -void nir_instr_insert_before_cf_list(struct exec_list *list, nir_instr *before); -void nir_instr_insert_after_cf_list(struct exec_list *list, nir_instr *after); +static inline nir_cursor +nir_before_block(nir_block *block) +{ + nir_cursor cursor; + cursor.option = nir_cursor_before_block; + cursor.block = block; + return cursor; +} + +static inline nir_cursor +nir_after_block(nir_block *block) +{ + nir_cursor cursor; + cursor.option = nir_cursor_after_block; + cursor.block = block; + return cursor; +} + +static inline nir_cursor +nir_before_instr(nir_instr *instr) +{ + nir_cursor cursor; + cursor.option = nir_cursor_before_instr; + cursor.instr = instr; + return cursor; +} + +static inline nir_cursor +nir_after_instr(nir_instr *instr) +{ + nir_cursor cursor; + cursor.option = nir_cursor_after_instr; + cursor.instr = instr; + return cursor; +} + +static inline nir_cursor +nir_after_block_before_jump(nir_block *block) +{ + nir_instr *last_instr = nir_block_last_instr(block); + if (last_instr && last_instr->type == nir_instr_type_jump) { + return nir_before_instr(last_instr); + } else { + return nir_after_block(block); + } +} + +static inline nir_cursor +nir_before_cf_node(nir_cf_node *node) +{ + if (node->type == nir_cf_node_block) + return nir_before_block(nir_cf_node_as_block(node)); + + return nir_after_block(nir_cf_node_as_block(nir_cf_node_prev(node))); +} + +static inline nir_cursor +nir_after_cf_node(nir_cf_node *node) +{ + if (node->type == nir_cf_node_block) + return nir_after_block(nir_cf_node_as_block(node)); + + return nir_before_block(nir_cf_node_as_block(nir_cf_node_next(node))); +} + +static inline nir_cursor +nir_before_cf_list(struct exec_list *cf_list) +{ + nir_cf_node *first_node = exec_node_data(nir_cf_node, + exec_list_get_head(cf_list), node); + return nir_before_cf_node(first_node); +} + +static inline nir_cursor +nir_after_cf_list(struct exec_list *cf_list) +{ + nir_cf_node *last_node = exec_node_data(nir_cf_node, + exec_list_get_tail(cf_list), node); + return nir_after_cf_node(last_node); +} + +/** + * Insert a NIR instruction at the given cursor. + * + * Note: This does not update the cursor. + */ +void nir_instr_insert(nir_cursor cursor, nir_instr *instr); + +static inline void +nir_instr_insert_before(nir_instr *instr, nir_instr *before) +{ + nir_instr_insert(nir_before_instr(instr), before); +} + +static inline void +nir_instr_insert_after(nir_instr *instr, nir_instr *after) +{ + nir_instr_insert(nir_after_instr(instr), after); +} + +static inline void +nir_instr_insert_before_block(nir_block *block, nir_instr *before) +{ + nir_instr_insert(nir_before_block(block), before); +} + +static inline void +nir_instr_insert_after_block(nir_block *block, nir_instr *after) +{ + nir_instr_insert(nir_after_block(block), after); +} + +static inline void +nir_instr_insert_before_cf(nir_cf_node *node, nir_instr *before) +{ + nir_instr_insert(nir_before_cf_node(node), before); +} + +static inline void +nir_instr_insert_after_cf(nir_cf_node *node, nir_instr *after) +{ + nir_instr_insert(nir_after_cf_node(node), after); +} + +static inline void +nir_instr_insert_before_cf_list(struct exec_list *list, nir_instr *before) +{ + nir_instr_insert(nir_before_cf_list(list), before); +} + +static inline void +nir_instr_insert_after_cf_list(struct exec_list *list, nir_instr *after) +{ + nir_instr_insert(nir_after_cf_list(list), after); +} void nir_instr_remove(nir_instr *instr); +/** @} */ + typedef bool (*nir_foreach_ssa_def_cb)(nir_ssa_def *def, void *state); typedef bool (*nir_foreach_dest_cb)(nir_dest *dest, void *state); typedef bool (*nir_foreach_src_cb)(nir_src *src, void *state); diff --git a/src/glsl/nir/nir_builder.h b/src/glsl/nir/nir_builder.h index 7d449262585..3aa0efded3c 100644 --- a/src/glsl/nir/nir_builder.h +++ b/src/glsl/nir/nir_builder.h @@ -24,16 +24,12 @@ #ifndef NIR_BUILDER_H #define NIR_BUILDER_H +#include "nir_control_flow.h" + struct exec_list; typedef struct nir_builder { - struct exec_list *cf_node_list; - - nir_block *before_block; - nir_block *after_block; - - nir_instr *before_instr; - nir_instr *after_instr; + nir_cursor cursor; nir_shader *shader; nir_function_impl *impl; @@ -47,75 +43,20 @@ nir_builder_init(nir_builder *build, nir_function_impl *impl) build->shader = impl->overload->function->shader; } -static inline void -nir_builder_insert_after_cf_list(nir_builder *build, - struct exec_list *cf_node_list) -{ - build->cf_node_list = cf_node_list; - build->before_block = NULL; - build->after_block = NULL; - build->before_instr = NULL; - build->after_instr = NULL; -} - -static inline void -nir_builder_insert_before_block(nir_builder *build, - nir_block *block) -{ - build->cf_node_list = NULL; - build->before_block = block; - build->after_block = NULL; - build->before_instr = NULL; - build->after_instr = NULL; -} - -static inline void -nir_builder_insert_after_block(nir_builder *build, - nir_block *block) -{ - build->cf_node_list = NULL; - build->before_block = NULL; - build->after_block = block; - build->before_instr = NULL; - build->after_instr = NULL; -} - -static inline void -nir_builder_insert_before_instr(nir_builder *build, nir_instr *before_instr) -{ - build->cf_node_list = NULL; - build->before_block = NULL; - build->after_block = NULL; - build->before_instr = before_instr; - build->after_instr = NULL; -} - -static inline void -nir_builder_insert_after_instr(nir_builder *build, nir_instr *after_instr) -{ - build->cf_node_list = NULL; - build->before_block = NULL; - build->after_block = NULL; - build->before_instr = NULL; - build->after_instr = after_instr; -} - static inline void nir_builder_instr_insert(nir_builder *build, nir_instr *instr) { - if (build->cf_node_list) { - nir_instr_insert_after_cf_list(build->cf_node_list, instr); - } else if (build->before_block) { - nir_instr_insert_before_block(build->before_block, instr); - } else if (build->after_block) { - nir_instr_insert_after_block(build->after_block, instr); - } else if (build->before_instr) { - nir_instr_insert_before(build->before_instr, instr); - } else { - assert(build->after_instr); - nir_instr_insert_after(build->after_instr, instr); - build->after_instr = instr; - } + nir_instr_insert(build->cursor, instr); + + /* Move the cursor forward. */ + if (build->cursor.option == nir_cursor_after_instr) + build->cursor.instr = instr; +} + +static inline void +nir_builder_cf_insert(nir_builder *build, nir_cf_node *cf) +{ + nir_cf_node_insert(build->cursor, cf); } static inline nir_ssa_def * diff --git a/src/glsl/nir/nir_control_flow.h b/src/glsl/nir/nir_control_flow.h index 5efd41caadf..b71382fc597 100644 --- a/src/glsl/nir/nir_control_flow.h +++ b/src/glsl/nir/nir_control_flow.h @@ -45,95 +45,6 @@ extern "C" { * deleting them. */ -/* Helper struct for representing a point to extract/insert. Helps reduce the - * combinatorial explosion of possible points to extract. - */ - -typedef enum { - nir_cursor_before_block, - nir_cursor_after_block, - nir_cursor_before_instr, - nir_cursor_after_instr, -} nir_cursor_option; - -typedef struct { - nir_cursor_option option; - union { - nir_block *block; - nir_instr *instr; - }; -} nir_cursor; - -static inline nir_cursor -nir_before_block(nir_block *block) -{ - nir_cursor cursor; - cursor.option = nir_cursor_before_block; - cursor.block = block; - return cursor; -} - -static inline nir_cursor -nir_after_block(nir_block *block) -{ - nir_cursor cursor; - cursor.option = nir_cursor_after_block; - cursor.block = block; - return cursor; -} - -static inline nir_cursor -nir_before_instr(nir_instr *instr) -{ - nir_cursor cursor; - cursor.option = nir_cursor_before_instr; - cursor.instr = instr; - return cursor; -} - -static inline nir_cursor -nir_after_instr(nir_instr *instr) -{ - nir_cursor cursor; - cursor.option = nir_cursor_after_instr; - cursor.instr = instr; - return cursor; -} - -static inline nir_cursor -nir_before_cf_node(nir_cf_node *node) -{ - if (node->type == nir_cf_node_block) - return nir_before_block(nir_cf_node_as_block(node)); - - return nir_after_block(nir_cf_node_as_block(nir_cf_node_prev(node))); -} - -static inline nir_cursor -nir_after_cf_node(nir_cf_node *node) -{ - if (node->type == nir_cf_node_block) - return nir_after_block(nir_cf_node_as_block(node)); - - return nir_before_block(nir_cf_node_as_block(nir_cf_node_next(node))); -} - -static inline nir_cursor -nir_before_cf_list(struct exec_list *cf_list) -{ - nir_cf_node *first_node = exec_node_data(nir_cf_node, - exec_list_get_head(cf_list), node); - return nir_before_cf_node(first_node); -} - -static inline nir_cursor -nir_after_cf_list(struct exec_list *cf_list) -{ - nir_cf_node *last_node = exec_node_data(nir_cf_node, - exec_list_get_tail(cf_list), node); - return nir_after_cf_node(last_node); -} - /** Control flow insertion. */ /** puts a control flow node where the cursor is */ diff --git a/src/glsl/nir/nir_lower_idiv.c b/src/glsl/nir/nir_lower_idiv.c index 7b680320783..0e1653dd274 100644 --- a/src/glsl/nir/nir_lower_idiv.c +++ b/src/glsl/nir/nir_lower_idiv.c @@ -50,7 +50,7 @@ convert_instr(nir_builder *bld, nir_alu_instr *alu) is_signed = (op == nir_op_idiv); - nir_builder_insert_before_instr(bld, &alu->instr); + bld->cursor = nir_before_instr(&alu->instr); numer = nir_ssa_for_src(bld, alu->src[0].src, nir_ssa_alu_instr_src_components(alu, 0)); diff --git a/src/glsl/nir/nir_lower_io.c b/src/glsl/nir/nir_lower_io.c index c9697e7845e..afb463040cc 100644 --- a/src/glsl/nir/nir_lower_io.c +++ b/src/glsl/nir/nir_lower_io.c @@ -84,7 +84,7 @@ get_io_offset(nir_deref_var *deref, nir_instr *instr, nir_src *indirect, unsigned base_offset = 0; nir_builder *b = &state->builder; - nir_builder_insert_before_instr(b, instr); + b->cursor = nir_before_instr(instr); nir_deref *tail = &deref->deref; while (tail->child != NULL) { diff --git a/src/glsl/nir/nir_lower_load_const_to_scalar.c b/src/glsl/nir/nir_lower_load_const_to_scalar.c index a90e5245898..b83ef052ea9 100644 --- a/src/glsl/nir/nir_lower_load_const_to_scalar.c +++ b/src/glsl/nir/nir_lower_load_const_to_scalar.c @@ -43,7 +43,7 @@ lower_load_const_instr_scalar(nir_load_const_instr *lower) nir_builder b; nir_builder_init(&b, nir_cf_node_get_function(&lower->instr.block->cf_node)); - nir_builder_insert_before_instr(&b, &lower->instr); + b.cursor = nir_before_instr(&lower->instr); /* Emit the individual loads. */ nir_ssa_def *loads[4]; diff --git a/src/glsl/nir/nir_lower_tex_projector.c b/src/glsl/nir/nir_lower_tex_projector.c index 357131cd728..8a482b182a9 100644 --- a/src/glsl/nir/nir_lower_tex_projector.c +++ b/src/glsl/nir/nir_lower_tex_projector.c @@ -46,7 +46,7 @@ nir_lower_tex_projector_block(nir_block *block, void *void_state) continue; nir_tex_instr *tex = nir_instr_as_tex(instr); - nir_builder_insert_before_instr(b, &tex->instr); + b->cursor = nir_before_instr(&tex->instr); /* Find the projector in the srcs list, if present. */ int proj_index; diff --git a/src/glsl/nir/nir_normalize_cubemap_coords.c b/src/glsl/nir/nir_normalize_cubemap_coords.c index 0da8447aca1..75b647f96cb 100644 --- a/src/glsl/nir/nir_normalize_cubemap_coords.c +++ b/src/glsl/nir/nir_normalize_cubemap_coords.c @@ -52,7 +52,7 @@ normalize_cubemap_coords_block(nir_block *block, void *void_state) if (tex->sampler_dim != GLSL_SAMPLER_DIM_CUBE) continue; - nir_builder_insert_before_instr(b, &tex->instr); + b->cursor = nir_before_instr(&tex->instr); for (unsigned i = 0; i < tex->num_srcs; i++) { if (tex->src[i].src_type != nir_tex_src_coord) diff --git a/src/glsl/nir/spirv_to_nir.c b/src/glsl/nir/spirv_to_nir.c index 612d2fff293..8fa80ba0f85 100644 --- a/src/glsl/nir/spirv_to_nir.c +++ b/src/glsl/nir/spirv_to_nir.c @@ -2310,7 +2310,7 @@ vtn_get_phi_node_src(struct vtn_builder *b, nir_block *block, } } - nir_builder_insert_before_block(&b->nb, block); + b->nb.cursor = nir_before_block(block); struct vtn_ssa_value *phi = vtn_phi_node_create(b, type); struct set_entry *entry2; @@ -2569,10 +2569,7 @@ vtn_handle_body_instruction(struct vtn_builder *b, SpvOp opcode, struct vtn_block *block = vtn_value(b, w[1], vtn_value_type_block)->block; assert(block->block == NULL); - struct exec_node *list_tail = exec_list_get_tail(b->nb.cf_node_list); - nir_cf_node *tail_node = exec_node_data(nir_cf_node, list_tail, node); - assert(tail_node->type == nir_cf_node_block); - block->block = nir_cf_node_as_block(tail_node); + block->block = nir_cursor_current_block(b->nb.cursor); break; } @@ -2754,17 +2751,15 @@ vtn_walk_blocks(struct vtn_builder *b, struct vtn_block *start, vtn_value(b, block->merge_block_id, vtn_value_type_block)->block; nir_loop *loop = nir_loop_create(b->shader); - nir_cf_node_insert_end(b->nb.cf_node_list, &loop->cf_node); - - struct exec_list *old_list = b->nb.cf_node_list; + nir_cf_node_insert(b->nb.cursor, &loop->cf_node); /* Reset the merge_op to prerevent infinite recursion */ block->merge_op = SpvOpNop; - nir_builder_insert_after_cf_list(&b->nb, &loop->body); + b->nb.cursor = nir_after_cf_list(&loop->body); vtn_walk_blocks(b, block, new_break_block, new_cont_block, NULL); - nir_builder_insert_after_cf_list(&b->nb, old_list); + b->nb.cursor = nir_after_cf_node(&loop->cf_node); block = new_break_block; continue; } @@ -2776,10 +2771,8 @@ vtn_walk_blocks(struct vtn_builder *b, struct vtn_block *start, vtn_foreach_instruction(b, block->label, block->branch, vtn_handle_body_instruction); - nir_cf_node *cur_cf_node = - exec_node_data(nir_cf_node, exec_list_get_tail(b->nb.cf_node_list), - node); - nir_block *cur_block = nir_cf_node_as_block(cur_cf_node); + nir_block *cur_block = nir_cursor_current_block(b->nb.cursor); + assert(cur_block == block->block); _mesa_hash_table_insert(b->block_table, cur_block, block); switch (branch_op) { @@ -2824,7 +2817,7 @@ vtn_walk_blocks(struct vtn_builder *b, struct vtn_block *start, nir_if *if_stmt = nir_if_create(b->shader); if_stmt->condition = nir_src_for_ssa(vtn_ssa_value(b, w[1])->def); - nir_cf_node_insert_end(b->nb.cf_node_list, &if_stmt->cf_node); + nir_cf_node_insert(b->nb.cursor, &if_stmt->cf_node); if (then_block == break_block) { nir_jump_instr *jump = nir_jump_instr_create(b->shader, @@ -2859,15 +2852,13 @@ vtn_walk_blocks(struct vtn_builder *b, struct vtn_block *start, struct vtn_block *merge_block = vtn_value(b, block->merge_block_id, vtn_value_type_block)->block; - struct exec_list *old_list = b->nb.cf_node_list; - - nir_builder_insert_after_cf_list(&b->nb, &if_stmt->then_list); + b->nb.cursor = nir_after_cf_list(&if_stmt->then_list); vtn_walk_blocks(b, then_block, break_block, cont_block, merge_block); - nir_builder_insert_after_cf_list(&b->nb, &if_stmt->else_list); + b->nb.cursor = nir_after_cf_list(&if_stmt->else_list); vtn_walk_blocks(b, else_block, break_block, cont_block, merge_block); - nir_builder_insert_after_cf_list(&b->nb, old_list); + b->nb.cursor = nir_after_cf_node(&if_stmt->cf_node); block = merge_block; continue; } @@ -2967,7 +2958,7 @@ spirv_to_nir(const uint32_t *words, size_t word_count, b->block_table = _mesa_hash_table_create(b, _mesa_hash_pointer, _mesa_key_pointer_equal); nir_builder_init(&b->nb, b->impl); - nir_builder_insert_after_cf_list(&b->nb, &b->impl->body); + b->nb.cursor = nir_after_cf_list(&b->impl->body); vtn_walk_blocks(b, func->start_block, NULL, NULL, NULL); vtn_foreach_instruction(b, func->start_block->label, func->end, vtn_handle_phi_second_pass); diff --git a/src/mapi/es1api/ABI-check b/src/mapi/es1api/ABI-check index 44654cde863..819568f6d1a 100755 --- a/src/mapi/es1api/ABI-check +++ b/src/mapi/es1api/ABI-check @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # Print defined gl.* functions not in GL ES 1.1 or in # (FIXME, none of these should be part of the ABI) diff --git a/src/mapi/es2api/ABI-check b/src/mapi/es2api/ABI-check index abbb55c2232..e0bf3c83143 100755 --- a/src/mapi/es2api/ABI-check +++ b/src/mapi/es2api/ABI-check @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # Print defined gl.* functions not in GL ES 3.0 or in # (FIXME, none of these should be part of the ABI) diff --git a/src/mapi/glapi/gen/KHR_texture_compression_astc.xml b/src/mapi/glapi/gen/KHR_texture_compression_astc.xml new file mode 100644 index 00000000000..7b5864d4e13 --- /dev/null +++ b/src/mapi/glapi/gen/KHR_texture_compression_astc.xml @@ -0,0 +1,40 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/src/mapi/glapi/gen/Makefile.am b/src/mapi/glapi/gen/Makefile.am index 86a92437f16..9224de2b9aa 100644 --- a/src/mapi/glapi/gen/Makefile.am +++ b/src/mapi/glapi/gen/Makefile.am @@ -190,6 +190,7 @@ API_XML = \ INTEL_performance_query.xml \ KHR_debug.xml \ KHR_context_flush_control.xml \ + KHR_texture_compression_astc.xml \ NV_conditional_render.xml \ NV_primitive_restart.xml \ NV_texture_barrier.xml \ diff --git a/src/mapi/glapi/gen/es_EXT.xml b/src/mapi/glapi/gen/es_EXT.xml index 642e3b319bb..cfca5a980bb 100644 --- a/src/mapi/glapi/gen/es_EXT.xml +++ b/src/mapi/glapi/gen/es_EXT.xml @@ -798,4 +798,23 @@ + + + + + + + + + + + + + + + + + + + diff --git a/src/mapi/glapi/gen/gl_API.xml b/src/mapi/glapi/gen/gl_API.xml index 658efa485f6..f0dcdca2aee 100644 --- a/src/mapi/glapi/gen/gl_API.xml +++ b/src/mapi/glapi/gen/gl_API.xml @@ -8168,7 +8168,7 @@ - + diff --git a/src/mapi/glapi/gen/gl_x86-64_asm.py b/src/mapi/glapi/gen/gl_x86-64_asm.py index cf42371f8c3..dcd113e268a 100644 --- a/src/mapi/glapi/gen/gl_x86-64_asm.py +++ b/src/mapi/glapi/gen/gl_x86-64_asm.py @@ -144,12 +144,6 @@ class PrintGenericStubs(gl_XML.gl_print_base): print '' print '#ifdef GLX_USE_TLS' print '' - print '\t.globl _x86_64_get_get_dispatch; HIDDEN(_x86_64_get_get_dispatch)' - print '_x86_64_get_get_dispatch:' - print '\tlea\t_x86_64_get_dispatch(%rip), %rax' - print '\tret' - print '' - print '\t.p2align\t4,,15' print '_x86_64_get_dispatch:' print '\tmovq\t_glapi_tls_Dispatch@GOTTPOFF(%rip), %rax' print '\tmovq\t%fs:(%rax), %rax' diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h index 3bbaf977bc5..f9dcdc735b3 100644 --- a/src/mesa/drivers/dri/i965/brw_defines.h +++ b/src/mesa/drivers/dri/i965/brw_defines.h @@ -278,6 +278,7 @@ #define GEN8_SURFACE_TILING_W (1 << 12) #define GEN8_SURFACE_TILING_X (2 << 12) #define GEN8_SURFACE_TILING_Y (3 << 12) +#define GEN8_SURFACE_SAMPLER_L2_BYPASS_DISABLE (1 << 9) #define BRW_SURFACE_RC_READ_WRITE (1 << 8) #define BRW_SURFACE_MIPLAYOUT_SHIFT 10 #define BRW_SURFACE_MIPMAPLAYOUT_BELOW 0 @@ -506,6 +507,38 @@ #define BRW_SURFACEFORMAT_R8G8B8_UINT 0x1C8 #define BRW_SURFACEFORMAT_R8G8B8_SINT 0x1C9 #define BRW_SURFACEFORMAT_RAW 0x1FF + +#define GEN9_SURFACE_ASTC_HDR_FORMAT_BIT 0x100 + +#define BRW_SURFACEFORMAT_ASTC_LDR_2D_4x4_U8sRGB 0x200 +#define BRW_SURFACEFORMAT_ASTC_LDR_2D_5x4_U8sRGB 0x208 +#define BRW_SURFACEFORMAT_ASTC_LDR_2D_5x5_U8sRGB 0x209 +#define BRW_SURFACEFORMAT_ASTC_LDR_2D_6x5_U8sRGB 0x211 +#define BRW_SURFACEFORMAT_ASTC_LDR_2D_6x6_U8sRGB 0x212 +#define BRW_SURFACEFORMAT_ASTC_LDR_2D_8x5_U8sRGB 0x221 +#define BRW_SURFACEFORMAT_ASTC_LDR_2D_8x6_U8sRGB 0x222 +#define BRW_SURFACEFORMAT_ASTC_LDR_2D_8x8_U8sRGB 0x224 +#define BRW_SURFACEFORMAT_ASTC_LDR_2D_10x5_U8sRGB 0x231 +#define BRW_SURFACEFORMAT_ASTC_LDR_2D_10x6_U8sRGB 0x232 +#define BRW_SURFACEFORMAT_ASTC_LDR_2D_10x8_U8sRGB 0x234 +#define BRW_SURFACEFORMAT_ASTC_LDR_2D_10x10_U8sRGB 0x236 +#define BRW_SURFACEFORMAT_ASTC_LDR_2D_12x10_U8sRGB 0x23E +#define BRW_SURFACEFORMAT_ASTC_LDR_2D_12x12_U8sRGB 0x23F +#define BRW_SURFACEFORMAT_ASTC_LDR_2D_4x4_FLT16 0x240 +#define BRW_SURFACEFORMAT_ASTC_LDR_2D_5x4_FLT16 0x248 +#define BRW_SURFACEFORMAT_ASTC_LDR_2D_5x5_FLT16 0x249 +#define BRW_SURFACEFORMAT_ASTC_LDR_2D_6x5_FLT16 0x251 +#define BRW_SURFACEFORMAT_ASTC_LDR_2D_6x6_FLT16 0x252 +#define BRW_SURFACEFORMAT_ASTC_LDR_2D_8x5_FLT16 0x261 +#define BRW_SURFACEFORMAT_ASTC_LDR_2D_8x6_FLT16 0x262 +#define BRW_SURFACEFORMAT_ASTC_LDR_2D_8x8_FLT16 0x264 +#define BRW_SURFACEFORMAT_ASTC_LDR_2D_10x5_FLT16 0x271 +#define BRW_SURFACEFORMAT_ASTC_LDR_2D_10x6_FLT16 0x272 +#define BRW_SURFACEFORMAT_ASTC_LDR_2D_10x8_FLT16 0x274 +#define BRW_SURFACEFORMAT_ASTC_LDR_2D_10x10_FLT16 0x276 +#define BRW_SURFACEFORMAT_ASTC_LDR_2D_12x10_FLT16 0x27E +#define BRW_SURFACEFORMAT_ASTC_LDR_2D_12x12_FLT16 0x27F + #define BRW_SURFACE_FORMAT_SHIFT 18 #define BRW_SURFACE_FORMAT_MASK INTEL_MASK(26, 18) diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 159f7161e11..76530a476d6 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -427,7 +427,9 @@ fs_reg::equals(const fs_reg &r) const negate == r.negate && abs == r.abs && !reladdr && !r.reladdr && - memcmp(&fixed_hw_reg, &r.fixed_hw_reg, sizeof(fixed_hw_reg)) == 0 && + ((file != HW_REG && file != IMM) || + memcmp(&fixed_hw_reg, &r.fixed_hw_reg, + sizeof(fixed_hw_reg)) == 0) && stride == r.stride); } @@ -1789,54 +1791,46 @@ fs_visitor::assign_constant_locations() if (dispatch_width != 8) return; + unsigned int num_pull_constants = 0; + pull_constant_loc = ralloc_array(mem_ctx, int, uniforms); memset(pull_constant_loc, -1, sizeof(pull_constant_loc[0]) * uniforms); - /* Walk through and find array access of uniforms. Put a copy of that - * uniform in the pull constant buffer. + bool is_live[uniforms]; + memset(is_live, 0, sizeof(is_live)); + + /* First, we walk through the instructions and do two things: + * + * 1) Figure out which uniforms are live. + * + * 2) Find all indirect access of uniform arrays and flag them as needing + * to go into the pull constant buffer. * * Note that we don't move constant-indexed accesses to arrays. No * testing has been done of the performance impact of this choice. */ foreach_block_and_inst_safe(block, fs_inst, inst, cfg) { for (int i = 0 ; i < inst->sources; i++) { - if (inst->src[i].file != UNIFORM || !inst->src[i].reladdr) - continue; - - int uniform = inst->src[i].reg; - - /* If this array isn't already present in the pull constant buffer, - * add it. - */ - if (pull_constant_loc[uniform] == -1) { - const gl_constant_value **values = &stage_prog_data->param[uniform]; - - assert(param_size[uniform]); - - for (int j = 0; j < param_size[uniform]; j++) { - pull_constant_loc[uniform + j] = stage_prog_data->nr_pull_params; - - stage_prog_data->pull_param[stage_prog_data->nr_pull_params++] = - values[j]; - } - } - } - } - - /* Find which UNIFORM registers are still in use. */ - bool is_live[uniforms]; - for (unsigned int i = 0; i < uniforms; i++) { - is_live[i] = false; - } - - foreach_block_and_inst(block, fs_inst, inst, cfg) { - for (int i = 0; i < inst->sources; i++) { if (inst->src[i].file != UNIFORM) continue; - int constant_nr = inst->src[i].reg + inst->src[i].reg_offset; - if (constant_nr >= 0 && constant_nr < (int) uniforms) - is_live[constant_nr] = true; + if (inst->src[i].reladdr) { + int uniform = inst->src[i].reg; + + /* If this array isn't already present in the pull constant buffer, + * add it. + */ + if (pull_constant_loc[uniform] == -1) { + assert(param_size[uniform]); + for (int j = 0; j < param_size[uniform]; j++) + pull_constant_loc[uniform + j] = num_pull_constants++; + } + } else { + /* Mark the the one accessed uniform as live */ + int constant_nr = inst->src[i].reg + inst->src[i].reg_offset; + if (constant_nr >= 0 && constant_nr < (int) uniforms) + is_live[constant_nr] = true; + } } } @@ -1870,27 +1864,29 @@ fs_visitor::assign_constant_locations() } else { /* Demote to a pull constant. */ push_constant_loc[i] = -1; - - int pull_index = stage_prog_data->nr_pull_params++; - stage_prog_data->pull_param[pull_index] = stage_prog_data->param[i]; - pull_constant_loc[i] = pull_index; + pull_constant_loc[i] = num_pull_constants++; } } stage_prog_data->nr_params = num_push_constants; + stage_prog_data->nr_pull_params = num_pull_constants; /* Up until now, the param[] array has been indexed by reg + reg_offset - * of UNIFORM registers. Condense it to only contain the uniforms we - * chose to upload as push constants. + * of UNIFORM registers. Move pull constants into pull_param[] and + * condense param[] to only contain the uniforms we chose to push. + * + * NOTE: Because we are condensing the params[] array, we know that + * push_constant_loc[i] <= i and we can do it in one smooth loop without + * having to make a copy. */ for (unsigned int i = 0; i < uniforms; i++) { - int remapped = push_constant_loc[i]; + const gl_constant_value *value = stage_prog_data->param[i]; - if (remapped == -1) - continue; - - assert(remapped <= (int)i); - stage_prog_data->param[remapped] = stage_prog_data->param[i]; + if (pull_constant_loc[i] != -1) { + stage_prog_data->pull_param[pull_constant_loc[i]] = value; + } else if (push_constant_loc[i] != -1) { + stage_prog_data->param[push_constant_loc[i]] = value; + } } } @@ -4806,11 +4802,11 @@ fs_visitor::optimize() */ bld = fs_builder(this, 64); - split_virtual_grfs(); - assign_constant_locations(); demote_pull_constants(); + split_virtual_grfs(); + #define OPT(pass, args...) ({ \ pass_num++; \ bool this_progress = pass(args); \ diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h index 31f39fe0adc..0a89d2e7640 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.h +++ b/src/mesa/drivers/dri/i965/brw_fs.h @@ -225,7 +225,6 @@ public: void emit_gen6_gather_wa(uint8_t wa, fs_reg dst); fs_reg resolve_source_modifiers(const fs_reg &src); void emit_discard_jump(); - bool try_replace_with_sel(); bool opt_peephole_sel(); bool opt_peephole_predicated_break(); bool opt_saturate_propagation(); diff --git a/src/mesa/drivers/dri/i965/brw_fs_builder.h b/src/mesa/drivers/dri/i965/brw_fs_builder.h index 34545eaa0fb..df10a9de293 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_builder.h +++ b/src/mesa/drivers/dri/i965/brw_fs_builder.h @@ -372,6 +372,8 @@ namespace brw { emit_minmax(const dst_reg &dst, const src_reg &src0, const src_reg &src1, brw_conditional_mod mod) const { + assert(mod == BRW_CONDITIONAL_GE || mod == BRW_CONDITIONAL_L); + if (shader->devinfo->gen >= 6) { set_condmod(mod, SEL(dst, fix_unsigned_negate(src0), fix_unsigned_negate(src1))); diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp index cd2b850581e..da8d47f1c5e 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp @@ -132,7 +132,7 @@ fs_visitor::nir_setup_outputs(nir_shader *shader) switch (stage) { case MESA_SHADER_VERTEX: - for (int i = 0; i < ALIGN(type_size_scalar(var->type), 4) / 4; i++) { + for (unsigned int i = 0; i < ALIGN(type_size_scalar(var->type), 4) / 4; i++) { int output = var->data.location + i; this->outputs[output] = offset(reg, bld, 4 * i); this->output_components[output] = vector_elements; @@ -191,8 +191,8 @@ fs_visitor::nir_setup_uniforms(nir_shader *shader) nir_setup_builtin_uniform(var); else nir_setup_uniform(var); - - param_size[var->data.driver_location] = type_size_scalar(var->type); + if(type_size_scalar(var->type) > 0) + param_size[var->data.driver_location] = type_size_scalar(var->type); } } else { /* prog_to_nir only creates a single giant uniform variable so we can @@ -203,7 +203,8 @@ fs_visitor::nir_setup_uniforms(nir_shader *shader) &prog->Parameters->ParameterValues[p][i]; } } - param_size[0] = prog->Parameters->NumParameters * 4; + if(prog->Parameters->NumParameters > 0) + param_size[0] = prog->Parameters->NumParameters * 4; } } @@ -416,8 +417,6 @@ fs_visitor::nir_emit_if(nir_if *if_stmt) nir_emit_cf_list(&if_stmt->else_list); bld.emit(BRW_OPCODE_ENDIF); - - try_replace_with_sel(); } void diff --git a/src/mesa/drivers/dri/i965/brw_fs_register_coalesce.cpp b/src/mesa/drivers/dri/i965/brw_fs_register_coalesce.cpp index 72e873857ce..34f8715eeb9 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_register_coalesce.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_register_coalesce.cpp @@ -95,42 +95,51 @@ is_coalesce_candidate(const fs_visitor *v, const fs_inst *inst) static bool can_coalesce_vars(brw::fs_live_variables *live_intervals, const cfg_t *cfg, const fs_inst *inst, - int var_to, int var_from) + int dst_var, int src_var) { - if (!live_intervals->vars_interfere(var_from, var_to)) + if (!live_intervals->vars_interfere(src_var, dst_var)) return true; - int start_to = live_intervals->start[var_to]; - int end_to = live_intervals->end[var_to]; - int start_from = live_intervals->start[var_from]; - int end_from = live_intervals->end[var_from]; + int dst_start = live_intervals->start[dst_var]; + int dst_end = live_intervals->end[dst_var]; + int src_start = live_intervals->start[src_var]; + int src_end = live_intervals->end[src_var]; /* Variables interfere and one line range isn't a subset of the other. */ - if ((end_to > end_from && start_from < start_to) || - (end_from > end_to && start_to < start_from)) + if ((dst_end > src_end && src_start < dst_start) || + (src_end > dst_end && dst_start < src_start)) return false; - int start_ip = MIN2(start_to, start_from); - int scan_ip = -1; + /* Check for a write to either register in the intersection of their live + * ranges. + */ + int start_ip = MAX2(dst_start, src_start); + int end_ip = MIN2(dst_end, src_end); - foreach_block_and_inst(block, fs_inst, scan_inst, cfg) { - scan_ip++; - - if (scan_ip < start_ip) + foreach_block(block, cfg) { + if (block->end_ip < start_ip) continue; - if (scan_inst->is_control_flow()) - return false; + int scan_ip = block->start_ip - 1; - if (scan_ip <= live_intervals->start[var_to]) - continue; + foreach_inst_in_block(fs_inst, scan_inst, block) { + scan_ip++; - if (scan_ip > live_intervals->end[var_to]) - return true; + /* Ignore anything before the intersection of the live ranges */ + if (scan_ip < start_ip) + continue; - if (scan_inst->dst.equals(inst->dst) || - scan_inst->dst.equals(inst->src[0])) - return false; + /* Ignore the copying instruction itself */ + if (scan_inst == inst) + continue; + + if (scan_ip > end_ip) + return true; /* registers do not interfere */ + + if (scan_inst->overwrites_reg(inst->dst) || + scan_inst->overwrites_reg(inst->src[0])) + return false; /* registers interfere */ + } } return true; @@ -145,11 +154,11 @@ fs_visitor::register_coalesce() int src_size = 0; int channels_remaining = 0; - int reg_from = -1, reg_to = -1; - int reg_to_offset[MAX_VGRF_SIZE]; + int src_reg = -1, dst_reg = -1; + int dst_reg_offset[MAX_VGRF_SIZE]; fs_inst *mov[MAX_VGRF_SIZE]; - int var_to[MAX_VGRF_SIZE]; - int var_from[MAX_VGRF_SIZE]; + int dst_var[MAX_VGRF_SIZE]; + int src_var[MAX_VGRF_SIZE]; foreach_block_and_inst(block, fs_inst, inst, cfg) { if (!is_coalesce_candidate(this, inst)) @@ -161,8 +170,8 @@ fs_visitor::register_coalesce() continue; } - if (reg_from != inst->src[0].reg) { - reg_from = inst->src[0].reg; + if (src_reg != inst->src[0].reg) { + src_reg = inst->src[0].reg; src_size = alloc.sizes[inst->src[0].reg]; assert(src_size <= MAX_VGRF_SIZE); @@ -170,15 +179,15 @@ fs_visitor::register_coalesce() channels_remaining = src_size; memset(mov, 0, sizeof(mov)); - reg_to = inst->dst.reg; + dst_reg = inst->dst.reg; } - if (reg_to != inst->dst.reg) + if (dst_reg != inst->dst.reg) continue; if (inst->opcode == SHADER_OPCODE_LOAD_PAYLOAD) { for (int i = 0; i < src_size; i++) { - reg_to_offset[i] = i; + dst_reg_offset[i] = i; } mov[0] = inst; channels_remaining -= inst->regs_written; @@ -194,9 +203,9 @@ fs_visitor::register_coalesce() channels_remaining = -1; continue; } - reg_to_offset[offset] = inst->dst.reg_offset; + dst_reg_offset[offset] = inst->dst.reg_offset; if (inst->regs_written > 1) - reg_to_offset[offset + 1] = inst->dst.reg_offset + 1; + dst_reg_offset[offset + 1] = inst->dst.reg_offset + 1; mov[offset] = inst; channels_remaining -= inst->regs_written; } @@ -206,20 +215,20 @@ fs_visitor::register_coalesce() bool can_coalesce = true; for (int i = 0; i < src_size; i++) { - if (reg_to_offset[i] != reg_to_offset[0] + i) { + if (dst_reg_offset[i] != dst_reg_offset[0] + i) { /* Registers are out-of-order. */ can_coalesce = false; - reg_from = -1; + src_reg = -1; break; } - var_to[i] = live_intervals->var_from_vgrf[reg_to] + reg_to_offset[i]; - var_from[i] = live_intervals->var_from_vgrf[reg_from] + i; + dst_var[i] = live_intervals->var_from_vgrf[dst_reg] + dst_reg_offset[i]; + src_var[i] = live_intervals->var_from_vgrf[src_reg] + i; if (!can_coalesce_vars(live_intervals, cfg, inst, - var_to[i], var_from[i])) { + dst_var[i], src_var[i])) { can_coalesce = false; - reg_from = -1; + src_reg = -1; break; } } @@ -242,31 +251,31 @@ fs_visitor::register_coalesce() foreach_block_and_inst(block, fs_inst, scan_inst, cfg) { if (scan_inst->dst.file == GRF && - scan_inst->dst.reg == reg_from) { - scan_inst->dst.reg = reg_to; + scan_inst->dst.reg == src_reg) { + scan_inst->dst.reg = dst_reg; scan_inst->dst.reg_offset = - reg_to_offset[scan_inst->dst.reg_offset]; + dst_reg_offset[scan_inst->dst.reg_offset]; } for (int j = 0; j < scan_inst->sources; j++) { if (scan_inst->src[j].file == GRF && - scan_inst->src[j].reg == reg_from) { - scan_inst->src[j].reg = reg_to; + scan_inst->src[j].reg == src_reg) { + scan_inst->src[j].reg = dst_reg; scan_inst->src[j].reg_offset = - reg_to_offset[scan_inst->src[j].reg_offset]; + dst_reg_offset[scan_inst->src[j].reg_offset]; } } } for (int i = 0; i < src_size; i++) { - live_intervals->start[var_to[i]] = - MIN2(live_intervals->start[var_to[i]], - live_intervals->start[var_from[i]]); - live_intervals->end[var_to[i]] = - MAX2(live_intervals->end[var_to[i]], - live_intervals->end[var_from[i]]); + live_intervals->start[dst_var[i]] = + MIN2(live_intervals->start[dst_var[i]], + live_intervals->start[src_var[i]]); + live_intervals->end[dst_var[i]] = + MAX2(live_intervals->end[dst_var[i]], + live_intervals->end[src_var[i]]); } - reg_from = -1; + src_reg = -1; } if (progress) { diff --git a/src/mesa/drivers/dri/i965/brw_fs_surface_builder.cpp b/src/mesa/drivers/dri/i965/brw_fs_surface_builder.cpp index 50e0acd05f5..727e8d1b82a 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_surface_builder.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_surface_builder.cpp @@ -686,7 +686,7 @@ namespace { if (is_signed) bld.emit_minmax(offset(dst, bld, c), offset(dst, bld, c), fs_reg(-(int)scale(widths[c] - s) - 1), - BRW_CONDITIONAL_G); + BRW_CONDITIONAL_GE); } } @@ -717,7 +717,7 @@ namespace { if (is_signed) bld.emit_minmax(offset(dst, bld, c), offset(dst, bld, c), fs_reg(-1.0f), - BRW_CONDITIONAL_G); + BRW_CONDITIONAL_GE); } } return dst; @@ -741,7 +741,7 @@ namespace { /* Clamp the normalized floating-point argument. */ if (is_signed) { bld.emit_minmax(offset(fdst, bld, c), offset(src, bld, c), - fs_reg(-1.0f), BRW_CONDITIONAL_G); + fs_reg(-1.0f), BRW_CONDITIONAL_GE); bld.emit_minmax(offset(fdst, bld, c), offset(fdst, bld, c), fs_reg(1.0f), BRW_CONDITIONAL_L); @@ -812,7 +812,7 @@ namespace { /* Clamp to the minimum value. */ if (widths[c] < 16) bld.emit_minmax(offset(fdst, bld, c), offset(fdst, bld, c), - fs_reg(0.0f), BRW_CONDITIONAL_G); + fs_reg(0.0f), BRW_CONDITIONAL_GE); /* Convert to 16-bit floating-point. */ bld.F32TO16(offset(dst, bld, c), offset(fdst, bld, c)); diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp index 111db8c4323..504673f8bd9 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp @@ -441,95 +441,6 @@ fs_visitor::swizzle_result(ir_texture_opcode op, int dest_components, } } -/** - * Try to replace IF/MOV/ELSE/MOV/ENDIF with SEL. - * - * Many GLSL shaders contain the following pattern: - * - * x = condition ? foo : bar - * - * The compiler emits an ir_if tree for this, since each subexpression might be - * a complex tree that could have side-effects or short-circuit logic. - * - * However, the common case is to simply select one of two constants or - * variable values---which is exactly what SEL is for. In this case, the - * assembly looks like: - * - * (+f0) IF - * MOV dst src0 - * ELSE - * MOV dst src1 - * ENDIF - * - * which can be easily translated into: - * - * (+f0) SEL dst src0 src1 - * - * If src0 is an immediate value, we promote it to a temporary GRF. - */ -bool -fs_visitor::try_replace_with_sel() -{ - fs_inst *endif_inst = (fs_inst *) instructions.get_tail(); - assert(endif_inst->opcode == BRW_OPCODE_ENDIF); - - /* Pattern match in reverse: IF, MOV, ELSE, MOV, ENDIF. */ - int opcodes[] = { - BRW_OPCODE_IF, BRW_OPCODE_MOV, BRW_OPCODE_ELSE, BRW_OPCODE_MOV, - }; - - fs_inst *match = (fs_inst *) endif_inst->prev; - for (int i = 0; i < 4; i++) { - if (match->is_head_sentinel() || match->opcode != opcodes[4-i-1]) - return false; - match = (fs_inst *) match->prev; - } - - /* The opcodes match; it looks like the right sequence of instructions. */ - fs_inst *else_mov = (fs_inst *) endif_inst->prev; - fs_inst *then_mov = (fs_inst *) else_mov->prev->prev; - fs_inst *if_inst = (fs_inst *) then_mov->prev; - - /* Check that the MOVs are the right form. */ - if (then_mov->dst.equals(else_mov->dst) && - !then_mov->is_partial_write() && - !else_mov->is_partial_write()) { - - /* Remove the matched instructions; we'll emit a SEL to replace them. */ - while (!if_inst->next->is_tail_sentinel()) - if_inst->next->exec_node::remove(); - if_inst->exec_node::remove(); - - /* Only the last source register can be a constant, so if the MOV in - * the "then" clause uses a constant, we need to put it in a temporary. - */ - fs_reg src0(then_mov->src[0]); - if (src0.file == IMM) { - src0 = vgrf(glsl_type::float_type); - src0.type = then_mov->src[0].type; - bld.MOV(src0, then_mov->src[0]); - } - - if (if_inst->conditional_mod) { - /* Sandybridge-specific IF with embedded comparison */ - bld.CMP(bld.null_reg_d(), if_inst->src[0], if_inst->src[1], - if_inst->conditional_mod); - set_predicate(BRW_PREDICATE_NORMAL, - bld.emit(BRW_OPCODE_SEL, then_mov->dst, - src0, else_mov->src[0])); - } else { - /* Separate CMP and IF instructions */ - set_predicate_inv(if_inst->predicate, if_inst->predicate_inverse, - bld.emit(BRW_OPCODE_SEL, then_mov->dst, - src0, else_mov->src[0])); - } - - return true; - } - - return false; -} - /** Emits a dummy fragment shader consisting of magenta for bringup purposes. */ void fs_visitor::emit_dummy_fs() diff --git a/src/mesa/drivers/dri/i965/brw_nir.c b/src/mesa/drivers/dri/i965/brw_nir.c index 0276d47c4d4..4c8602a1085 100644 --- a/src/mesa/drivers/dri/i965/brw_nir.c +++ b/src/mesa/drivers/dri/i965/brw_nir.c @@ -63,6 +63,8 @@ nir_optimize(nir_shader *nir, bool is_scalar) nir_validate_shader(nir); progress |= nir_opt_remove_phis(nir); nir_validate_shader(nir); + progress |= nir_opt_undef(nir); + nir_validate_shader(nir); } while (progress); } diff --git a/src/mesa/drivers/dri/i965/brw_surface_formats.c b/src/mesa/drivers/dri/i965/brw_surface_formats.c index a33fd88a026..97fff60f3e5 100644 --- a/src/mesa/drivers/dri/i965/brw_surface_formats.c +++ b/src/mesa/drivers/dri/i965/brw_surface_formats.c @@ -307,6 +307,34 @@ const struct surface_format_info surface_formats[] = { SF( x, x, x, x, x, x, x, x, x, ETC2_EAC_SRGB8_A8) SF( x, x, x, x, x, x, x, x, x, R8G8B8_UINT) SF( x, x, x, x, x, x, x, x, x, R8G8B8_SINT) + SF(80, 80, x, x, x, x, x, x, x, ASTC_LDR_2D_4x4_FLT16) + SF(80, 80, x, x, x, x, x, x, x, ASTC_LDR_2D_5x4_FLT16) + SF(80, 80, x, x, x, x, x, x, x, ASTC_LDR_2D_5x5_FLT16) + SF(80, 80, x, x, x, x, x, x, x, ASTC_LDR_2D_6x5_FLT16) + SF(80, 80, x, x, x, x, x, x, x, ASTC_LDR_2D_6x6_FLT16) + SF(80, 80, x, x, x, x, x, x, x, ASTC_LDR_2D_8x5_FLT16) + SF(80, 80, x, x, x, x, x, x, x, ASTC_LDR_2D_8x6_FLT16) + SF(80, 80, x, x, x, x, x, x, x, ASTC_LDR_2D_8x8_FLT16) + SF(80, 80, x, x, x, x, x, x, x, ASTC_LDR_2D_10x5_FLT16) + SF(80, 80, x, x, x, x, x, x, x, ASTC_LDR_2D_10x6_FLT16) + SF(80, 80, x, x, x, x, x, x, x, ASTC_LDR_2D_10x8_FLT16) + SF(80, 80, x, x, x, x, x, x, x, ASTC_LDR_2D_10x10_FLT16) + SF(80, 80, x, x, x, x, x, x, x, ASTC_LDR_2D_12x10_FLT16) + SF(80, 80, x, x, x, x, x, x, x, ASTC_LDR_2D_12x12_FLT16) + SF(80, 80, x, x, x, x, x, x, x, ASTC_LDR_2D_4x4_U8sRGB) + SF(80, 80, x, x, x, x, x, x, x, ASTC_LDR_2D_5x4_U8sRGB) + SF(80, 80, x, x, x, x, x, x, x, ASTC_LDR_2D_5x5_U8sRGB) + SF(80, 80, x, x, x, x, x, x, x, ASTC_LDR_2D_6x5_U8sRGB) + SF(80, 80, x, x, x, x, x, x, x, ASTC_LDR_2D_6x6_U8sRGB) + SF(80, 80, x, x, x, x, x, x, x, ASTC_LDR_2D_8x5_U8sRGB) + SF(80, 80, x, x, x, x, x, x, x, ASTC_LDR_2D_8x6_U8sRGB) + SF(80, 80, x, x, x, x, x, x, x, ASTC_LDR_2D_8x8_U8sRGB) + SF(80, 80, x, x, x, x, x, x, x, ASTC_LDR_2D_10x5_U8sRGB) + SF(80, 80, x, x, x, x, x, x, x, ASTC_LDR_2D_10x6_U8sRGB) + SF(80, 80, x, x, x, x, x, x, x, ASTC_LDR_2D_10x8_U8sRGB) + SF(80, 80, x, x, x, x, x, x, x, ASTC_LDR_2D_10x10_U8sRGB) + SF(80, 80, x, x, x, x, x, x, x, ASTC_LDR_2D_12x10_U8sRGB) + SF(80, 80, x, x, x, x, x, x, x, ASTC_LDR_2D_12x12_U8sRGB) }; #undef x #undef Y @@ -503,6 +531,35 @@ brw_format_for_mesa_format(mesa_format mesa_format) [MESA_FORMAT_BPTC_RGB_SIGNED_FLOAT] = BRW_SURFACEFORMAT_BC6H_SF16, [MESA_FORMAT_BPTC_RGB_UNSIGNED_FLOAT] = BRW_SURFACEFORMAT_BC6H_UF16, + [MESA_FORMAT_RGBA_ASTC_4x4] = BRW_SURFACEFORMAT_ASTC_LDR_2D_4x4_FLT16, + [MESA_FORMAT_RGBA_ASTC_5x4] = BRW_SURFACEFORMAT_ASTC_LDR_2D_5x4_FLT16, + [MESA_FORMAT_RGBA_ASTC_5x5] = BRW_SURFACEFORMAT_ASTC_LDR_2D_5x5_FLT16, + [MESA_FORMAT_RGBA_ASTC_6x5] = BRW_SURFACEFORMAT_ASTC_LDR_2D_6x5_FLT16, + [MESA_FORMAT_RGBA_ASTC_6x6] = BRW_SURFACEFORMAT_ASTC_LDR_2D_6x6_FLT16, + [MESA_FORMAT_RGBA_ASTC_8x5] = BRW_SURFACEFORMAT_ASTC_LDR_2D_8x5_FLT16, + [MESA_FORMAT_RGBA_ASTC_8x6] = BRW_SURFACEFORMAT_ASTC_LDR_2D_8x6_FLT16, + [MESA_FORMAT_RGBA_ASTC_8x8] = BRW_SURFACEFORMAT_ASTC_LDR_2D_8x8_FLT16, + [MESA_FORMAT_RGBA_ASTC_10x5] = BRW_SURFACEFORMAT_ASTC_LDR_2D_10x5_FLT16, + [MESA_FORMAT_RGBA_ASTC_10x6] = BRW_SURFACEFORMAT_ASTC_LDR_2D_10x6_FLT16, + [MESA_FORMAT_RGBA_ASTC_10x8] = BRW_SURFACEFORMAT_ASTC_LDR_2D_10x8_FLT16, + [MESA_FORMAT_RGBA_ASTC_10x10] = BRW_SURFACEFORMAT_ASTC_LDR_2D_10x10_FLT16, + [MESA_FORMAT_RGBA_ASTC_12x10] = BRW_SURFACEFORMAT_ASTC_LDR_2D_12x10_FLT16, + [MESA_FORMAT_RGBA_ASTC_12x12] = BRW_SURFACEFORMAT_ASTC_LDR_2D_12x12_FLT16, + [MESA_FORMAT_SRGB8_ALPHA8_ASTC_4x4] = BRW_SURFACEFORMAT_ASTC_LDR_2D_4x4_U8sRGB, + [MESA_FORMAT_SRGB8_ALPHA8_ASTC_5x4] = BRW_SURFACEFORMAT_ASTC_LDR_2D_5x4_U8sRGB, + [MESA_FORMAT_SRGB8_ALPHA8_ASTC_5x5] = BRW_SURFACEFORMAT_ASTC_LDR_2D_5x5_U8sRGB, + [MESA_FORMAT_SRGB8_ALPHA8_ASTC_6x5] = BRW_SURFACEFORMAT_ASTC_LDR_2D_6x5_U8sRGB, + [MESA_FORMAT_SRGB8_ALPHA8_ASTC_6x6] = BRW_SURFACEFORMAT_ASTC_LDR_2D_6x6_U8sRGB, + [MESA_FORMAT_SRGB8_ALPHA8_ASTC_8x5] = BRW_SURFACEFORMAT_ASTC_LDR_2D_8x5_U8sRGB, + [MESA_FORMAT_SRGB8_ALPHA8_ASTC_8x6] = BRW_SURFACEFORMAT_ASTC_LDR_2D_8x6_U8sRGB, + [MESA_FORMAT_SRGB8_ALPHA8_ASTC_8x8] = BRW_SURFACEFORMAT_ASTC_LDR_2D_8x8_U8sRGB, + [MESA_FORMAT_SRGB8_ALPHA8_ASTC_10x5] = BRW_SURFACEFORMAT_ASTC_LDR_2D_10x5_U8sRGB, + [MESA_FORMAT_SRGB8_ALPHA8_ASTC_10x6] = BRW_SURFACEFORMAT_ASTC_LDR_2D_10x6_U8sRGB, + [MESA_FORMAT_SRGB8_ALPHA8_ASTC_10x8] = BRW_SURFACEFORMAT_ASTC_LDR_2D_10x8_U8sRGB, + [MESA_FORMAT_SRGB8_ALPHA8_ASTC_10x10] = BRW_SURFACEFORMAT_ASTC_LDR_2D_10x10_U8sRGB, + [MESA_FORMAT_SRGB8_ALPHA8_ASTC_12x10] = BRW_SURFACEFORMAT_ASTC_LDR_2D_12x10_U8sRGB, + [MESA_FORMAT_SRGB8_ALPHA8_ASTC_12x12] = BRW_SURFACEFORMAT_ASTC_LDR_2D_12x12_U8sRGB, + [MESA_FORMAT_A_SNORM8] = 0, [MESA_FORMAT_L_SNORM8] = 0, [MESA_FORMAT_L8A8_SNORM] = 0, @@ -768,6 +825,36 @@ translate_tex_format(struct brw_context *brw, } return brw_format_for_mesa_format(mesa_format); + case MESA_FORMAT_RGBA_ASTC_4x4: + case MESA_FORMAT_RGBA_ASTC_5x4: + case MESA_FORMAT_RGBA_ASTC_5x5: + case MESA_FORMAT_RGBA_ASTC_6x5: + case MESA_FORMAT_RGBA_ASTC_6x6: + case MESA_FORMAT_RGBA_ASTC_8x5: + case MESA_FORMAT_RGBA_ASTC_8x6: + case MESA_FORMAT_RGBA_ASTC_8x8: + case MESA_FORMAT_RGBA_ASTC_10x5: + case MESA_FORMAT_RGBA_ASTC_10x6: + case MESA_FORMAT_RGBA_ASTC_10x8: + case MESA_FORMAT_RGBA_ASTC_10x10: + case MESA_FORMAT_RGBA_ASTC_12x10: + case MESA_FORMAT_RGBA_ASTC_12x12: { + GLuint brw_fmt = brw_format_for_mesa_format(mesa_format); + + /** + * On Gen9+, it is possible to process these formats using the LDR + * Profile or the Full Profile mode of the hardware. Because, it isn't + * possible to determine if an HDR or LDR texture is being rendered, we + * can't determine which mode to enable in the hardware. Therefore, to + * handle all cases, always default to Full profile unless we are + * processing sRGBs, which are incompatible with this mode. + */ + if (brw->gen >= 9) + brw_fmt |= GEN9_SURFACE_ASTC_HDR_FORMAT_BIT; + + return brw_fmt; + } + default: assert(brw_format_for_mesa_format(mesa_format) != 0); return brw_format_for_mesa_format(mesa_format); diff --git a/src/mesa/drivers/dri/i965/brw_tex_layout.c b/src/mesa/drivers/dri/i965/brw_tex_layout.c index e96732a1908..268b995f92e 100644 --- a/src/mesa/drivers/dri/i965/brw_tex_layout.c +++ b/src/mesa/drivers/dri/i965/brw_tex_layout.c @@ -123,12 +123,6 @@ intel_horizontal_texture_alignment_unit(struct brw_context *brw, return 16; /** - * From the "Alignment Unit Size" section of various specs, namely: - * - Gen3 Spec: "Memory Data Formats" Volume, Section 1.20.1.4 - * - i965 and G45 PRMs: Volume 1, Section 6.17.3.4. - * - Ironlake and Sandybridge PRMs: Volume 1, Part 1, Section 7.18.3.4 - * - BSpec (for Ivybridge and slight variations in separate stencil) - * * +----------------------------------------------------------------------+ * | | alignment unit width ("i") | * | Surface Property |-----------------------------| @@ -146,32 +140,6 @@ intel_horizontal_texture_alignment_unit(struct brw_context *brw, * On IVB+, non-special cases can be overridden by setting the SURFACE_STATE * "Surface Horizontal Alignment" field to HALIGN_4 or HALIGN_8. */ - if (_mesa_is_format_compressed(mt->format)) { - /* The hardware alignment requirements for compressed textures - * happen to match the block boundaries. - */ - unsigned int i, j; - _mesa_get_format_block_size(mt->format, &i, &j); - - /* On Gen9+ we can pick our own alignment for compressed textures but it - * has to be a multiple of the block size. The minimum alignment we can - * pick is 4 so we effectively have to align to 4 times the block - * size - */ - if (brw->gen >= 9) - return i * 4; - else - return i; - } - - if (mt->format == MESA_FORMAT_S_UINT8) - return 8; - - if (brw->gen >= 9 && mt->tr_mode != INTEL_MIPTREE_TRMODE_NONE) { - uint32_t align = tr_mode_horizontal_texture_alignment(brw, mt); - /* XY_FAST_COPY_BLT doesn't support horizontal alignment < 32. */ - return align < 32 ? 32 : align; - } if (brw->gen >= 7 && mt->format == MESA_FORMAT_Z_UNORM16) return 8; @@ -248,12 +216,6 @@ intel_vertical_texture_alignment_unit(struct brw_context *brw, const struct intel_mipmap_tree *mt) { /** - * From the "Alignment Unit Size" section of various specs, namely: - * - Gen3 Spec: "Memory Data Formats" Volume, Section 1.20.1.4 - * - i965 and G45 PRMs: Volume 1, Section 6.17.3.4. - * - Ironlake and Sandybridge PRMs: Volume 1, Part 1, Section 7.18.3.4 - * - BSpec (for Ivybridge and slight variations in separate stencil) - * * +----------------------------------------------------------------------+ * | | alignment unit height ("j") | * | Surface Property |-----------------------------| @@ -270,18 +232,6 @@ intel_vertical_texture_alignment_unit(struct brw_context *brw, * Where "*" means either VALIGN_2 or VALIGN_4 depending on the setting of * the SURFACE_STATE "Surface Vertical Alignment" field. */ - if (_mesa_is_format_compressed(mt->format)) - /* See comment above for the horizontal alignment */ - return brw->gen >= 9 ? 16 : 4; - - if (mt->format == MESA_FORMAT_S_UINT8) - return brw->gen >= 7 ? 8 : 4; - - if (mt->tr_mode != INTEL_MIPTREE_TRMODE_NONE) { - uint32_t align = tr_mode_vertical_texture_alignment(brw, mt); - /* XY_FAST_COPY_BLT doesn't support vertical alignment < 64 */ - return align < 64 ? 64 : align; - } /* Broadwell only supports VALIGN of 4, 8, and 16. The BSpec says 4 * should always be used, except for stencil buffers, which should be 8. @@ -367,7 +317,7 @@ brw_miptree_layout_2d(struct intel_mipmap_tree *mt) mt->total_width = mt->physical_width0; if (mt->compressed) - mt->total_width = ALIGN(mt->total_width, bw); + mt->total_width = ALIGN_NPOT(mt->total_width, bw); /* May need to adjust width to accommodate the placement of * the 2nd mipmap. This occurs when the alignment @@ -378,10 +328,10 @@ brw_miptree_layout_2d(struct intel_mipmap_tree *mt) unsigned mip1_width; if (mt->compressed) { - mip1_width = ALIGN(minify(mt->physical_width0, 1), mt->align_w) + - ALIGN(minify(mt->physical_width0, 2), bw); + mip1_width = ALIGN_NPOT(minify(mt->physical_width0, 1), mt->align_w) + + ALIGN_NPOT(minify(mt->physical_width0, 2), bw); } else { - mip1_width = ALIGN(minify(mt->physical_width0, 1), mt->align_w) + + mip1_width = ALIGN_NPOT(minify(mt->physical_width0, 1), mt->align_w) + minify(mt->physical_width0, 2); } @@ -390,6 +340,7 @@ brw_miptree_layout_2d(struct intel_mipmap_tree *mt) } } + mt->total_width /= bw; mt->total_height = 0; for (unsigned level = mt->first_level; level <= mt->last_level; level++) { @@ -397,7 +348,7 @@ brw_miptree_layout_2d(struct intel_mipmap_tree *mt) intel_miptree_set_level_info(mt, level, x, y, depth); - img_height = ALIGN(height, mt->align_h); + img_height = ALIGN_NPOT(height, mt->align_h); if (mt->compressed) img_height /= bh; @@ -414,7 +365,7 @@ brw_miptree_layout_2d(struct intel_mipmap_tree *mt) /* Layout_below: step right after second mipmap. */ if (level == mt->first_level + 1) { - x += ALIGN(width, mt->align_w); + x += ALIGN_NPOT(width, mt->align_w) / bw; } else { y += img_height; } @@ -434,7 +385,7 @@ brw_miptree_get_horizontal_slice_pitch(const struct brw_context *brw, { if ((brw->gen < 9 && mt->target == GL_TEXTURE_3D) || (brw->gen == 4 && mt->target == GL_TEXTURE_CUBE_MAP)) { - return ALIGN(minify(mt->physical_width0, level), mt->align_w); + return ALIGN_NPOT(minify(mt->physical_width0, level), mt->align_w); } else { return 0; } @@ -475,11 +426,11 @@ brw_miptree_get_vertical_slice_pitch(const struct brw_context *brw, } else if (mt->target == GL_TEXTURE_3D || (brw->gen == 4 && mt->target == GL_TEXTURE_CUBE_MAP) || mt->array_layout == ALL_SLICES_AT_EACH_LOD) { - return ALIGN(minify(mt->physical_height0, level), mt->align_h); + return ALIGN_NPOT(minify(mt->physical_height0, level), mt->align_h); } else { - const unsigned h0 = ALIGN(mt->physical_height0, mt->align_h); - const unsigned h1 = ALIGN(minify(mt->physical_height0, 1), mt->align_h); + const unsigned h0 = ALIGN_NPOT(mt->physical_height0, mt->align_h); + const unsigned h1 = ALIGN_NPOT(minify(mt->physical_height0, 1), mt->align_h); return h0 + h1 + (brw->gen >= 7 ? 12 : 11) * mt->align_h; } @@ -551,7 +502,7 @@ brw_miptree_layout_texture_array(struct brw_context *brw, for (unsigned level = mt->first_level; level <= mt->last_level; level++) { unsigned img_height; - img_height = ALIGN(height, mt->align_h); + img_height = ALIGN_NPOT(height, mt->align_h); if (mt->compressed) img_height /= mt->align_h; @@ -574,18 +525,20 @@ static void brw_miptree_layout_texture_3d(struct brw_context *brw, struct intel_mipmap_tree *mt) { - unsigned yscale = mt->compressed ? 4 : 1; - mt->total_width = 0; mt->total_height = 0; unsigned ysum = 0; + unsigned bh, bw; + + _mesa_get_format_block_size(mt->format, &bw, &bh); + for (unsigned level = mt->first_level; level <= mt->last_level; level++) { unsigned WL = MAX2(mt->physical_width0 >> level, 1); unsigned HL = MAX2(mt->physical_height0 >> level, 1); unsigned DL = MAX2(mt->physical_depth0 >> level, 1); - unsigned wL = ALIGN(WL, mt->align_w); - unsigned hL = ALIGN(HL, mt->align_h); + unsigned wL = ALIGN_NPOT(WL, mt->align_w); + unsigned hL = ALIGN_NPOT(HL, mt->align_h); if (mt->target == GL_TEXTURE_CUBE_MAP) DL = 6; @@ -596,9 +549,9 @@ brw_miptree_layout_texture_3d(struct brw_context *brw, unsigned x = (q % (1 << level)) * wL; unsigned y = ysum + (q >> level) * hL; - intel_miptree_set_image_offset(mt, level, q, x, y / yscale); - mt->total_width = MAX2(mt->total_width, x + wL); - mt->total_height = MAX2(mt->total_height, (y + hL) / yscale); + intel_miptree_set_image_offset(mt, level, q, x / bw, y / bh); + mt->total_width = MAX2(mt->total_width, (x + wL) / bw); + mt->total_height = MAX2(mt->total_height, (y + hL) / bh); } ysum += ALIGN(DL, 1 << level) / (1 << level) * hL; @@ -767,6 +720,13 @@ intel_miptree_set_alignment(struct brw_context *brw, struct intel_mipmap_tree *mt, uint32_t layout_flags) { + /** + * From the "Alignment Unit Size" section of various specs, namely: + * - Gen3 Spec: "Memory Data Formats" Volume, Section 1.20.1.4 + * - i965 and G45 PRMs: Volume 1, Section 6.17.3.4. + * - Ironlake and Sandybridge PRMs: Volume 1, Part 1, Section 7.18.3.4 + * - BSpec (for Ivybridge and slight variations in separate stencil) + */ bool gen6_hiz_or_stencil = false; if (brw->gen == 6 && mt->array_layout == ALL_SLICES_AT_EACH_LOD) { @@ -798,6 +758,29 @@ intel_miptree_set_alignment(struct brw_context *brw, mt->align_w = 128 / mt->cpp; mt->align_h = 32; } + } else if (mt->compressed) { + /* The hardware alignment requirements for compressed textures + * happen to match the block boundaries. + */ + _mesa_get_format_block_size(mt->format, &mt->align_w, &mt->align_h); + + /* On Gen9+ we can pick our own alignment for compressed textures but it + * has to be a multiple of the block size. The minimum alignment we can + * pick is 4 so we effectively have to align to 4 times the block + * size + */ + if (brw->gen >= 9) { + mt->align_w *= 4; + mt->align_h *= 4; + } + } else if (mt->format == MESA_FORMAT_S_UINT8) { + mt->align_w = 8; + mt->align_h = brw->gen >= 7 ? 8 : 4; + } else if (brw->gen >= 9 && mt->tr_mode != INTEL_MIPTREE_TRMODE_NONE) { + /* XY_FAST_COPY_BLT doesn't support horizontal alignment < 32 or + * vertical alignment < 64. */ + mt->align_w = MAX2(tr_mode_horizontal_texture_alignment(brw, mt), 32); + mt->align_h = MAX2(tr_mode_vertical_texture_alignment(brw, mt), 64); } else { mt->align_w = intel_horizontal_texture_alignment_unit(brw, mt, layout_flags); diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp index 63f75da7e99..5e528b5c5a1 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp @@ -216,8 +216,9 @@ dst_reg::equals(const dst_reg &r) const writemask == r.writemask && (reladdr == r.reladdr || (reladdr && r.reladdr && reladdr->equals(*r.reladdr))) && - memcmp(&fixed_hw_reg, &r.fixed_hw_reg, - sizeof(fixed_hw_reg)) == 0); + ((file != HW_REG && file != IMM) || + memcmp(&fixed_hw_reg, &r.fixed_hw_reg, + sizeof(fixed_hw_reg)) == 0)); } bool diff --git a/src/mesa/drivers/dri/i965/gen8_surface_state.c b/src/mesa/drivers/dri/i965/gen8_surface_state.c index 6c4d3e197a5..d2f333fd4dd 100644 --- a/src/mesa/drivers/dri/i965/gen8_surface_state.c +++ b/src/mesa/drivers/dri/i965/gen8_surface_state.c @@ -238,6 +238,20 @@ gen8_emit_texture_surface_state(struct brw_context *brw, surf[0] |= BRW_SURFACE_CUBEFACE_ENABLES; } + /* From the CHV PRM, Volume 2d, page 321 (RENDER_SURFACE_STATE dword 0 + * bit 9 "Sampler L2 Bypass Mode Disable" Programming Notes): + * + * This bit must be set for the following surface types: BC2_UNORM + * BC3_UNORM BC5_UNORM BC5_SNORM BC7_UNORM + */ + if ((brw->gen >= 9 || brw->is_cherryview) && + (format == BRW_SURFACEFORMAT_BC2_UNORM || + format == BRW_SURFACEFORMAT_BC3_UNORM || + format == BRW_SURFACEFORMAT_BC5_UNORM || + format == BRW_SURFACEFORMAT_BC5_SNORM || + format == BRW_SURFACEFORMAT_BC7_UNORM)) + surf[0] |= GEN8_SURFACE_SAMPLER_L2_BYPASS_DISABLE; + if (_mesa_is_array_texture(target) || target == GL_TEXTURE_CUBE_MAP) surf[0] |= GEN8_SURFACE_IS_ARRAY; diff --git a/src/mesa/drivers/dri/i965/intel_copy_image.c b/src/mesa/drivers/dri/i965/intel_copy_image.c index 3706704bf1a..ac2738f59a0 100644 --- a/src/mesa/drivers/dri/i965/intel_copy_image.c +++ b/src/mesa/drivers/dri/i965/intel_copy_image.c @@ -41,7 +41,6 @@ copy_image_with_blitter(struct brw_context *brw, { GLuint bw, bh; uint32_t src_image_x, src_image_y, dst_image_x, dst_image_y; - int cpp; /* The blitter doesn't understand multisampling at all. */ if (src_mt->num_samples > 0 || dst_mt->num_samples > 0) @@ -86,16 +85,6 @@ copy_image_with_blitter(struct brw_context *brw, src_y /= (int)bh; src_width /= (int)bw; src_height /= (int)bh; - - /* Inside of the miptree, the x offsets are stored in pixels while - * the y offsets are stored in blocks. We need to scale just the x - * offset. - */ - src_image_x /= bw; - - cpp = _mesa_get_format_bytes(src_mt->format); - } else { - cpp = src_mt->cpp; } src_x += src_image_x; src_y += src_image_y; @@ -111,18 +100,12 @@ copy_image_with_blitter(struct brw_context *brw, dst_x /= (int)bw; dst_y /= (int)bh; - - /* Inside of the miptree, the x offsets are stored in pixels while - * the y offsets are stored in blocks. We need to scale just the x - * offset. - */ - dst_image_x /= bw; } dst_x += dst_image_x; dst_y += dst_image_y; return intelEmitCopyBlit(brw, - cpp, + src_mt->cpp, src_mt->pitch, src_mt->bo, src_mt->offset, src_mt->tiling, diff --git a/src/mesa/drivers/dri/i965/intel_debug.c b/src/mesa/drivers/dri/i965/intel_debug.c index 58f41bfd55d..6bd55d395b2 100644 --- a/src/mesa/drivers/dri/i965/intel_debug.c +++ b/src/mesa/drivers/dri/i965/intel_debug.c @@ -68,7 +68,7 @@ static const struct dri_debug_control debug_control[] = { { "optimizer", DEBUG_OPTIMIZER }, { "ann", DEBUG_ANNOTATION }, { "no8", DEBUG_NO8 }, - { "vec4vs", DEBUG_VEC4VS }, + { "vec4", DEBUG_VEC4VS }, { "spill", DEBUG_SPILL }, { "cs", DEBUG_CS }, { NULL, 0 } diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c index e85c3f00c7b..0bcbbbcde8f 100644 --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c @@ -313,15 +313,7 @@ intel_miptree_create_layout(struct brw_context *brw, mt->fast_clear_state = INTEL_FAST_CLEAR_STATE_NO_MCS; mt->disable_aux_buffers = (layout_flags & MIPTREE_LAYOUT_DISABLE_AUX) != 0; exec_list_make_empty(&mt->hiz_map); - - /* The cpp is bytes per (1, blockheight)-sized block for compressed - * textures. This is why you'll see divides by blockheight all over - */ - unsigned bw, bh; - _mesa_get_format_block_size(format, &bw, &bh); - assert(_mesa_get_format_bytes(mt->format) % bw == 0); - mt->cpp = _mesa_get_format_bytes(mt->format) / bw; - + mt->cpp = _mesa_get_format_bytes(format); mt->num_samples = num_samples; mt->compressed = _mesa_is_format_compressed(format); mt->msaa_layout = INTEL_MSAA_LAYOUT_NONE; @@ -1272,8 +1264,8 @@ intel_miptree_copy_slice(struct brw_context *brw, if (dst_mt->compressed) { unsigned int i, j; _mesa_get_format_block_size(dst_mt->format, &i, &j); - height = ALIGN(height, j) / j; - width = ALIGN(width, i); + height = ALIGN_NPOT(height, j) / j; + width = ALIGN_NPOT(width, i) / i; } /* If it's a packed depth/stencil buffer with separate stencil, the blit @@ -2105,7 +2097,9 @@ intel_miptree_map_gtt(struct brw_context *brw, */ _mesa_get_format_block_size(mt->format, &bw, &bh); assert(y % bh == 0); + assert(x % bw == 0); y /= bh; + x /= bw; base = intel_miptree_map_raw(brw, mt) + mt->offset; diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.h b/src/mesa/drivers/dri/i965/intel_mipmap_tree.h index 790d3129207..c28162a1983 100644 --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.h +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.h @@ -390,7 +390,7 @@ struct intel_mipmap_tree */ GLuint physical_width0, physical_height0, physical_depth0; - GLuint cpp; /**< bytes per pixel */ + GLuint cpp; /**< bytes per pixel (or bytes per block if compressed) */ GLuint num_samples; bool compressed; diff --git a/src/mesa/drivers/dri/i965/intel_upload.c b/src/mesa/drivers/dri/i965/intel_upload.c index 870aabc8863..deaae6c7ed5 100644 --- a/src/mesa/drivers/dri/i965/intel_upload.c +++ b/src/mesa/drivers/dri/i965/intel_upload.c @@ -44,12 +44,6 @@ #define INTEL_UPLOAD_SIZE (64*1024) -/** - * Like ALIGN(), but works with a non-power-of-two alignment. - */ -#define ALIGN_NPOT(value, alignment) \ - (((value) + (alignment) - 1) / (alignment) * (alignment)) - void intel_upload_finish(struct brw_context *brw) { diff --git a/src/mesa/main/extensions.c b/src/mesa/main/extensions.c index 4a3c231e36f..a57d5baeafd 100644 --- a/src/mesa/main/extensions.c +++ b/src/mesa/main/extensions.c @@ -336,12 +336,15 @@ static const struct extension extension_table[] = { { "GL_OES_texture_half_float", o(OES_texture_half_float), ES2, 2005 }, { "GL_OES_texture_half_float_linear", o(OES_texture_half_float_linear), ES2, 2005 }, { "GL_OES_texture_mirrored_repeat", o(dummy_true), ES1, 2005 }, + { "GL_OES_texture_storage_multisample_2d_array",o(ARB_texture_multisample), ES31, 2014 }, { "GL_OES_texture_npot", o(ARB_texture_non_power_of_two), ES1 | ES2, 2005 }, { "GL_OES_vertex_array_object", o(dummy_true), ES1 | ES2, 2010 }, /* KHR extensions */ { "GL_KHR_debug", o(dummy_true), GL, 2012 }, { "GL_KHR_context_flush_control", o(dummy_true), GL | ES2, 2014 }, + { "GL_KHR_texture_compression_astc_hdr", o(KHR_texture_compression_astc_hdr), GL | ES2, 2012 }, + { "GL_KHR_texture_compression_astc_ldr", o(KHR_texture_compression_astc_ldr), GL | ES2, 2012 }, /* Vendor extensions */ { "GL_3DFX_texture_compression_FXT1", o(TDFX_texture_compression_FXT1), GL, 1999 }, diff --git a/src/mesa/main/format_info.py b/src/mesa/main/format_info.py index 839d4073c61..22eb5a734a6 100644 --- a/src/mesa/main/format_info.py +++ b/src/mesa/main/format_info.py @@ -122,6 +122,9 @@ def get_channel_bits(fmat, chan_name): elif fmat.layout == 'bptc': bits = 16 if fmat.name.endswith('_FLOAT') else 8 return bits if fmat.has_channel(chan_name) else 0 + elif fmat.layout == 'astc': + bits = 16 if 'RGBA' in fmat.name else 8 + return bits if fmat.has_channel(chan_name) else 0 else: assert False else: diff --git a/src/mesa/main/formats.c b/src/mesa/main/formats.c index 34a4434c3ba..587221ca5a0 100644 --- a/src/mesa/main/formats.c +++ b/src/mesa/main/formats.c @@ -197,6 +197,7 @@ _mesa_get_format_max_bits(mesa_format format) * MESA_FORMAT_LAYOUT_ETC1 * MESA_FORMAT_LAYOUT_ETC2 * MESA_FORMAT_LAYOUT_BPTC + * MESA_FORMAT_LAYOUT_ASTC * MESA_FORMAT_LAYOUT_OTHER */ extern enum mesa_format_layout @@ -663,6 +664,48 @@ _mesa_get_srgb_format_linear(mesa_format format) case MESA_FORMAT_BPTC_SRGB_ALPHA_UNORM: format = MESA_FORMAT_BPTC_RGBA_UNORM; break; + case MESA_FORMAT_SRGB8_ALPHA8_ASTC_4x4: + format = MESA_FORMAT_RGBA_ASTC_4x4; + break; + case MESA_FORMAT_SRGB8_ALPHA8_ASTC_5x4: + format = MESA_FORMAT_RGBA_ASTC_5x4; + break; + case MESA_FORMAT_SRGB8_ALPHA8_ASTC_5x5: + format = MESA_FORMAT_RGBA_ASTC_5x5; + break; + case MESA_FORMAT_SRGB8_ALPHA8_ASTC_6x5: + format = MESA_FORMAT_RGBA_ASTC_6x5; + break; + case MESA_FORMAT_SRGB8_ALPHA8_ASTC_6x6: + format = MESA_FORMAT_RGBA_ASTC_6x6; + break; + case MESA_FORMAT_SRGB8_ALPHA8_ASTC_8x5: + format = MESA_FORMAT_RGBA_ASTC_8x5; + break; + case MESA_FORMAT_SRGB8_ALPHA8_ASTC_8x6: + format = MESA_FORMAT_RGBA_ASTC_8x6; + break; + case MESA_FORMAT_SRGB8_ALPHA8_ASTC_8x8: + format = MESA_FORMAT_RGBA_ASTC_8x8; + break; + case MESA_FORMAT_SRGB8_ALPHA8_ASTC_10x5: + format = MESA_FORMAT_RGBA_ASTC_10x5; + break; + case MESA_FORMAT_SRGB8_ALPHA8_ASTC_10x6: + format = MESA_FORMAT_RGBA_ASTC_10x6; + break; + case MESA_FORMAT_SRGB8_ALPHA8_ASTC_10x8: + format = MESA_FORMAT_RGBA_ASTC_10x8; + break; + case MESA_FORMAT_SRGB8_ALPHA8_ASTC_10x10: + format = MESA_FORMAT_RGBA_ASTC_10x10; + break; + case MESA_FORMAT_SRGB8_ALPHA8_ASTC_12x10: + format = MESA_FORMAT_RGBA_ASTC_12x10; + break; + case MESA_FORMAT_SRGB8_ALPHA8_ASTC_12x12: + format = MESA_FORMAT_RGBA_ASTC_12x12; + break; case MESA_FORMAT_B8G8R8X8_SRGB: format = MESA_FORMAT_B8G8R8X8_UNORM; break; diff --git a/src/mesa/main/formats.csv b/src/mesa/main/formats.csv index e159e7dd6aa..80729d98787 100644 --- a/src/mesa/main/formats.csv +++ b/src/mesa/main/formats.csv @@ -301,3 +301,34 @@ MESA_FORMAT_BPTC_RGBA_UNORM , bptc , 4, 4, x128, , , MESA_FORMAT_BPTC_SRGB_ALPHA_UNORM , bptc , 4, 4, x128, , , , xyzw, srgb MESA_FORMAT_BPTC_RGB_SIGNED_FLOAT , bptc , 4, 4, x128, , , , xyz1, rgb MESA_FORMAT_BPTC_RGB_UNSIGNED_FLOAT , bptc , 4, 4, x128, , , , xyz1, rgb + +# ASTC compressed formats +MESA_FORMAT_RGBA_ASTC_4x4 , astc , 4, 4, x128, , , , xyzw, rgb +MESA_FORMAT_RGBA_ASTC_5x4 , astc , 5, 4, x128, , , , xyzw, rgb +MESA_FORMAT_RGBA_ASTC_5x5 , astc , 5, 5, x128, , , , xyzw, rgb +MESA_FORMAT_RGBA_ASTC_6x5 , astc , 6, 5, x128, , , , xyzw, rgb +MESA_FORMAT_RGBA_ASTC_6x6 , astc , 6, 6, x128, , , , xyzw, rgb +MESA_FORMAT_RGBA_ASTC_8x5 , astc , 8, 5, x128, , , , xyzw, rgb +MESA_FORMAT_RGBA_ASTC_8x6 , astc , 8, 6, x128, , , , xyzw, rgb +MESA_FORMAT_RGBA_ASTC_8x8 , astc , 8, 8, x128, , , , xyzw, rgb +MESA_FORMAT_RGBA_ASTC_10x5 , astc ,10, 5, x128, , , , xyzw, rgb +MESA_FORMAT_RGBA_ASTC_10x6 , astc ,10, 6, x128, , , , xyzw, rgb +MESA_FORMAT_RGBA_ASTC_10x8 , astc ,10, 8, x128, , , , xyzw, rgb +MESA_FORMAT_RGBA_ASTC_10x10 , astc ,10,10, x128, , , , xyzw, rgb +MESA_FORMAT_RGBA_ASTC_12x10 , astc ,12,10, x128, , , , xyzw, rgb +MESA_FORMAT_RGBA_ASTC_12x12 , astc ,12,12, x128, , , , xyzw, rgb + +MESA_FORMAT_SRGB8_ALPHA8_ASTC_4x4 , astc , 4, 4, x128, , , , xyzw, srgb +MESA_FORMAT_SRGB8_ALPHA8_ASTC_5x4 , astc , 5, 4, x128, , , , xyzw, srgb +MESA_FORMAT_SRGB8_ALPHA8_ASTC_5x5 , astc , 5, 5, x128, , , , xyzw, srgb +MESA_FORMAT_SRGB8_ALPHA8_ASTC_6x5 , astc , 6, 5, x128, , , , xyzw, srgb +MESA_FORMAT_SRGB8_ALPHA8_ASTC_6x6 , astc , 6, 6, x128, , , , xyzw, srgb +MESA_FORMAT_SRGB8_ALPHA8_ASTC_8x5 , astc , 8, 5, x128, , , , xyzw, srgb +MESA_FORMAT_SRGB8_ALPHA8_ASTC_8x6 , astc , 8, 6, x128, , , , xyzw, srgb +MESA_FORMAT_SRGB8_ALPHA8_ASTC_8x8 , astc , 8, 8, x128, , , , xyzw, srgb +MESA_FORMAT_SRGB8_ALPHA8_ASTC_10x5 , astc ,10, 5, x128, , , , xyzw, srgb +MESA_FORMAT_SRGB8_ALPHA8_ASTC_10x6 , astc ,10, 6, x128, , , , xyzw, srgb +MESA_FORMAT_SRGB8_ALPHA8_ASTC_10x8 , astc ,10, 8, x128, , , , xyzw, srgb +MESA_FORMAT_SRGB8_ALPHA8_ASTC_10x10 , astc ,10,10, x128, , , , xyzw, srgb +MESA_FORMAT_SRGB8_ALPHA8_ASTC_12x10 , astc ,12,10, x128, , , , xyzw, srgb +MESA_FORMAT_SRGB8_ALPHA8_ASTC_12x12 , astc ,12,12, x128, , , , xyzw, srgb diff --git a/src/mesa/main/formats.h b/src/mesa/main/formats.h index 4936fa0d482..ccb09b263ff 100644 --- a/src/mesa/main/formats.h +++ b/src/mesa/main/formats.h @@ -70,6 +70,7 @@ enum mesa_format_layout { MESA_FORMAT_LAYOUT_ETC1, MESA_FORMAT_LAYOUT_ETC2, MESA_FORMAT_LAYOUT_BPTC, + MESA_FORMAT_LAYOUT_ASTC, MESA_FORMAT_LAYOUT_OTHER, }; @@ -586,6 +587,36 @@ typedef enum MESA_FORMAT_BPTC_RGB_SIGNED_FLOAT, MESA_FORMAT_BPTC_RGB_UNSIGNED_FLOAT, + /* ASTC compressed formats */ + MESA_FORMAT_RGBA_ASTC_4x4, + MESA_FORMAT_RGBA_ASTC_5x4, + MESA_FORMAT_RGBA_ASTC_5x5, + MESA_FORMAT_RGBA_ASTC_6x5, + MESA_FORMAT_RGBA_ASTC_6x6, + MESA_FORMAT_RGBA_ASTC_8x5, + MESA_FORMAT_RGBA_ASTC_8x6, + MESA_FORMAT_RGBA_ASTC_8x8, + MESA_FORMAT_RGBA_ASTC_10x5, + MESA_FORMAT_RGBA_ASTC_10x6, + MESA_FORMAT_RGBA_ASTC_10x8, + MESA_FORMAT_RGBA_ASTC_10x10, + MESA_FORMAT_RGBA_ASTC_12x10, + MESA_FORMAT_RGBA_ASTC_12x12, + + MESA_FORMAT_SRGB8_ALPHA8_ASTC_4x4, + MESA_FORMAT_SRGB8_ALPHA8_ASTC_5x4, + MESA_FORMAT_SRGB8_ALPHA8_ASTC_5x5, + MESA_FORMAT_SRGB8_ALPHA8_ASTC_6x5, + MESA_FORMAT_SRGB8_ALPHA8_ASTC_6x6, + MESA_FORMAT_SRGB8_ALPHA8_ASTC_8x5, + MESA_FORMAT_SRGB8_ALPHA8_ASTC_8x6, + MESA_FORMAT_SRGB8_ALPHA8_ASTC_8x8, + MESA_FORMAT_SRGB8_ALPHA8_ASTC_10x5, + MESA_FORMAT_SRGB8_ALPHA8_ASTC_10x6, + MESA_FORMAT_SRGB8_ALPHA8_ASTC_10x8, + MESA_FORMAT_SRGB8_ALPHA8_ASTC_10x10, + MESA_FORMAT_SRGB8_ALPHA8_ASTC_12x10, + MESA_FORMAT_SRGB8_ALPHA8_ASTC_12x12, MESA_FORMAT_COUNT } mesa_format; diff --git a/src/mesa/main/genmipmap.c b/src/mesa/main/genmipmap.c index c18f9d5223f..4ec8385ec2f 100644 --- a/src/mesa/main/genmipmap.c +++ b/src/mesa/main/genmipmap.c @@ -111,6 +111,7 @@ _mesa_generate_texture_mipmap(struct gl_context *ctx, if (_mesa_is_enum_format_integer(srcImage->InternalFormat) || _mesa_is_depthstencil_format(srcImage->InternalFormat) || + _mesa_is_astc_format(srcImage->InternalFormat) || _mesa_is_stencil_format(srcImage->InternalFormat)) { _mesa_unlock_texture(ctx, texObj); _mesa_error(ctx, GL_INVALID_OPERATION, diff --git a/src/mesa/main/get.c b/src/mesa/main/get.c index 680576cab8f..4855187aa6f 100644 --- a/src/mesa/main/get.c +++ b/src/mesa/main/get.c @@ -35,6 +35,7 @@ #include "mtypes.h" #include "state.h" #include "texcompress.h" +#include "texstate.h" #include "framebuffer.h" #include "samplerobj.h" #include "stencil.h" @@ -993,16 +994,7 @@ find_custom_value(struct gl_context *ctx, const struct value_desc *d, union valu { struct gl_sampler_object *samp = ctx->Texture.Unit[ctx->Texture.CurrentUnit].Sampler; - - /* - * The sampler object may have been deleted on another context, - * so we try to lookup the sampler object before returning its Name. - */ - if (samp && _mesa_lookup_samplerobj(ctx, samp->Name)) { - v->value_int = samp->Name; - } else { - v->value_int = 0; - } + v->value_int = samp ? samp->Name : 0; } break; /* GL_ARB_uniform_buffer_object */ @@ -1750,6 +1742,52 @@ _mesa_GetDoublev(GLenum pname, GLdouble *params) } } +/** + * Convert a GL texture binding enum such as GL_TEXTURE_BINDING_2D + * into the corresponding Mesa texture target index. + * \return TEXTURE_x_INDEX or -1 if binding is invalid + */ +static int +tex_binding_to_index(const struct gl_context *ctx, GLenum binding) +{ + switch (binding) { + case GL_TEXTURE_BINDING_1D: + return _mesa_is_desktop_gl(ctx) ? TEXTURE_1D_INDEX : -1; + case GL_TEXTURE_BINDING_2D: + return TEXTURE_2D_INDEX; + case GL_TEXTURE_BINDING_3D: + return ctx->API != API_OPENGLES ? TEXTURE_3D_INDEX : -1; + case GL_TEXTURE_BINDING_CUBE_MAP: + return ctx->Extensions.ARB_texture_cube_map + ? TEXTURE_CUBE_INDEX : -1; + case GL_TEXTURE_BINDING_RECTANGLE: + return _mesa_is_desktop_gl(ctx) && ctx->Extensions.NV_texture_rectangle + ? TEXTURE_RECT_INDEX : -1; + case GL_TEXTURE_BINDING_1D_ARRAY: + return _mesa_is_desktop_gl(ctx) && ctx->Extensions.EXT_texture_array + ? TEXTURE_1D_ARRAY_INDEX : -1; + case GL_TEXTURE_BINDING_2D_ARRAY: + return (_mesa_is_desktop_gl(ctx) && ctx->Extensions.EXT_texture_array) + || _mesa_is_gles3(ctx) + ? TEXTURE_2D_ARRAY_INDEX : -1; + case GL_TEXTURE_BINDING_BUFFER: + return ctx->API == API_OPENGL_CORE && + ctx->Extensions.ARB_texture_buffer_object ? + TEXTURE_BUFFER_INDEX : -1; + case GL_TEXTURE_BINDING_CUBE_MAP_ARRAY: + return _mesa_is_desktop_gl(ctx) && ctx->Extensions.ARB_texture_cube_map_array + ? TEXTURE_CUBE_ARRAY_INDEX : -1; + case GL_TEXTURE_BINDING_2D_MULTISAMPLE: + return _mesa_is_desktop_gl(ctx) && ctx->Extensions.ARB_texture_multisample + ? TEXTURE_2D_MULTISAMPLE_INDEX : -1; + case GL_TEXTURE_BINDING_2D_MULTISAMPLE_ARRAY: + return _mesa_is_desktop_gl(ctx) && ctx->Extensions.ARB_texture_multisample + ? TEXTURE_2D_MULTISAMPLE_ARRAY_INDEX : -1; + default: + return -1; + } +} + static enum value_type find_value_indexed(const char *func, GLenum pname, GLuint index, union value *v) { @@ -2013,6 +2051,45 @@ find_value_indexed(const char *func, GLenum pname, GLuint index, union value *v) v->value_int = ctx->ImageUnits[index].Format; return TYPE_INT; + /* ARB_direct_state_access */ + case GL_TEXTURE_BINDING_1D: + case GL_TEXTURE_BINDING_1D_ARRAY: + case GL_TEXTURE_BINDING_2D: + case GL_TEXTURE_BINDING_2D_ARRAY: + case GL_TEXTURE_BINDING_2D_MULTISAMPLE: + case GL_TEXTURE_BINDING_2D_MULTISAMPLE_ARRAY: + case GL_TEXTURE_BINDING_3D: + case GL_TEXTURE_BINDING_BUFFER: + case GL_TEXTURE_BINDING_CUBE_MAP: + case GL_TEXTURE_BINDING_CUBE_MAP_ARRAY: + case GL_TEXTURE_BINDING_RECTANGLE: { + int target; + + if (ctx->API != API_OPENGL_CORE) + goto invalid_enum; + target = tex_binding_to_index(ctx, pname); + if (target < 0) + goto invalid_enum; + if (index >= _mesa_max_tex_unit(ctx)) + goto invalid_value; + + v->value_int = ctx->Texture.Unit[index].CurrentTex[target]->Name; + return TYPE_INT; + } + + case GL_SAMPLER_BINDING: { + struct gl_sampler_object *samp; + + if (ctx->API != API_OPENGL_CORE) + goto invalid_enum; + if (index >= _mesa_max_tex_unit(ctx)) + goto invalid_value; + + samp = ctx->Texture.Unit[index].Sampler; + v->value_int = samp ? samp->Name : 0; + return TYPE_INT; + } + case GL_MAX_COMPUTE_WORK_GROUP_COUNT: if (!_mesa_has_compute_shaders(ctx)) goto invalid_enum; diff --git a/src/mesa/main/get_hash_params.py b/src/mesa/main/get_hash_params.py index 73213f407f3..b3c337e9d45 100644 --- a/src/mesa/main/get_hash_params.py +++ b/src/mesa/main/get_hash_params.py @@ -434,6 +434,9 @@ descriptor=[ [ "SAMPLE_MASK", "CONTEXT_BOOL(Multisample.SampleMask), extra_ARB_texture_multisample" ], [ "MAX_SAMPLE_MASK_WORDS", "CONST(1), extra_ARB_texture_multisample" ], +# GL_ARB_texture_multisample / ES 3.1 with GL_OES_texture_storage_multisample_2d_array + [ "TEXTURE_BINDING_2D_MULTISAMPLE_ARRAY", "LOC_CUSTOM, TYPE_INT, TEXTURE_2D_MULTISAMPLE_ARRAY_INDEX, extra_ARB_texture_multisample" ], + # GL_ARB_texture_gather / GLES 3.1 [ "MIN_PROGRAM_TEXTURE_GATHER_OFFSET", "CONTEXT_INT(Const.MinProgramTextureGatherOffset), extra_ARB_texture_gather"], [ "MAX_PROGRAM_TEXTURE_GATHER_OFFSET", "CONTEXT_INT(Const.MaxProgramTextureGatherOffset), extra_ARB_texture_gather"], @@ -740,9 +743,6 @@ descriptor=[ [ "TEXTURE_BUFFER_FORMAT_ARB", "LOC_CUSTOM, TYPE_INT, 0, extra_texture_buffer_object" ], [ "TEXTURE_BUFFER_ARB", "LOC_CUSTOM, TYPE_INT, 0, extra_texture_buffer_object" ], -# GL_ARB_texture_multisample / GL 3.2 - [ "TEXTURE_BINDING_2D_MULTISAMPLE_ARRAY", "LOC_CUSTOM, TYPE_INT, TEXTURE_2D_MULTISAMPLE_ARRAY_INDEX, extra_ARB_texture_multisample" ], - # GL 3.0 [ "CONTEXT_FLAGS", "CONTEXT_INT(Const.ContextFlags), extra_version_30" ], diff --git a/src/mesa/main/glformats.c b/src/mesa/main/glformats.c index 3eb66dab7f8..ce66699db8f 100644 --- a/src/mesa/main/glformats.c +++ b/src/mesa/main/glformats.c @@ -820,6 +820,47 @@ _mesa_is_enum_format_signed_int(GLenum format) } } +/** + * Test if the given format is an ASTC format. + */ +GLboolean +_mesa_is_astc_format(GLenum internalFormat) +{ + switch (internalFormat) { + case GL_COMPRESSED_RGBA_ASTC_4x4_KHR: + case GL_COMPRESSED_RGBA_ASTC_5x4_KHR: + case GL_COMPRESSED_RGBA_ASTC_5x5_KHR: + case GL_COMPRESSED_RGBA_ASTC_6x5_KHR: + case GL_COMPRESSED_RGBA_ASTC_6x6_KHR: + case GL_COMPRESSED_RGBA_ASTC_8x5_KHR: + case GL_COMPRESSED_RGBA_ASTC_8x6_KHR: + case GL_COMPRESSED_RGBA_ASTC_8x8_KHR: + case GL_COMPRESSED_RGBA_ASTC_10x5_KHR: + case GL_COMPRESSED_RGBA_ASTC_10x6_KHR: + case GL_COMPRESSED_RGBA_ASTC_10x8_KHR: + case GL_COMPRESSED_RGBA_ASTC_10x10_KHR: + case GL_COMPRESSED_RGBA_ASTC_12x10_KHR: + case GL_COMPRESSED_RGBA_ASTC_12x12_KHR: + case GL_COMPRESSED_SRGB8_ALPHA8_ASTC_4x4_KHR: + case GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x4_KHR: + case GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x5_KHR: + case GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x5_KHR: + case GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x6_KHR: + case GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x5_KHR: + case GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x6_KHR: + case GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x8_KHR: + case GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x5_KHR: + case GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x6_KHR: + case GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x8_KHR: + case GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x10_KHR: + case GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x10_KHR: + case GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x12_KHR: + return true; + default: + return false; + } +} + /** * Test if the given format is an integer (non-normalized) format. @@ -1262,6 +1303,35 @@ _mesa_is_compressed_format(const struct gl_context *ctx, GLenum format) case GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT: return _mesa_is_desktop_gl(ctx) && ctx->Extensions.ARB_texture_compression_bptc; + case GL_COMPRESSED_RGBA_ASTC_4x4_KHR: + case GL_COMPRESSED_RGBA_ASTC_5x4_KHR: + case GL_COMPRESSED_RGBA_ASTC_5x5_KHR: + case GL_COMPRESSED_RGBA_ASTC_6x5_KHR: + case GL_COMPRESSED_RGBA_ASTC_6x6_KHR: + case GL_COMPRESSED_RGBA_ASTC_8x5_KHR: + case GL_COMPRESSED_RGBA_ASTC_8x6_KHR: + case GL_COMPRESSED_RGBA_ASTC_8x8_KHR: + case GL_COMPRESSED_RGBA_ASTC_10x5_KHR: + case GL_COMPRESSED_RGBA_ASTC_10x6_KHR: + case GL_COMPRESSED_RGBA_ASTC_10x8_KHR: + case GL_COMPRESSED_RGBA_ASTC_10x10_KHR: + case GL_COMPRESSED_RGBA_ASTC_12x10_KHR: + case GL_COMPRESSED_RGBA_ASTC_12x12_KHR: + case GL_COMPRESSED_SRGB8_ALPHA8_ASTC_4x4_KHR: + case GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x4_KHR: + case GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x5_KHR: + case GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x5_KHR: + case GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x6_KHR: + case GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x5_KHR: + case GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x6_KHR: + case GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x8_KHR: + case GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x5_KHR: + case GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x6_KHR: + case GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x8_KHR: + case GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x10_KHR: + case GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x10_KHR: + case GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x12_KHR: + return ctx->Extensions.KHR_texture_compression_astc_ldr; case GL_PALETTE4_RGB8_OES: case GL_PALETTE4_RGBA8_OES: case GL_PALETTE4_R5_G6_B5_OES: diff --git a/src/mesa/main/glformats.h b/src/mesa/main/glformats.h index 419955a6033..aec905d2342 100644 --- a/src/mesa/main/glformats.h +++ b/src/mesa/main/glformats.h @@ -56,6 +56,9 @@ _mesa_bytes_per_pixel( GLenum format, GLenum type ); extern GLint _mesa_bytes_per_vertex_attrib(GLint comps, GLenum type); +extern GLboolean +_mesa_is_astc_format(GLenum internalFormat); + extern GLboolean _mesa_is_type_unsigned(GLenum type); diff --git a/src/mesa/main/macros.h b/src/mesa/main/macros.h index 54df50c9cfe..ed207d44a64 100644 --- a/src/mesa/main/macros.h +++ b/src/mesa/main/macros.h @@ -690,7 +690,22 @@ minify(unsigned value, unsigned levels) * * \sa ROUND_DOWN_TO() */ -#define ALIGN(value, alignment) (((value) + (alignment) - 1) & ~((alignment) - 1)) +static inline uintptr_t +ALIGN(uintptr_t value, int32_t alignment) +{ + assert((alignment > 0) && _mesa_is_pow_two(alignment)); + return (((value) + (alignment) - 1) & ~((alignment) - 1)); +} + +/** + * Like ALIGN(), but works with a non-power-of-two alignment. + */ +static inline uintptr_t +ALIGN_NPOT(uintptr_t value, int32_t alignment) +{ + assert(alignment > 0); + return (value + alignment - 1) / alignment * alignment; +} /** * Align a value down to an alignment value @@ -703,7 +718,12 @@ minify(unsigned value, unsigned levels) * * \sa ALIGN() */ -#define ROUND_DOWN_TO(value, alignment) ((value) & ~(alignment - 1)) +static inline uintptr_t +ROUND_DOWN_TO(uintptr_t value, int32_t alignment) +{ + assert((alignment > 0) && _mesa_is_pow_two(alignment)); + return ((value) & ~(alignment - 1)); +} /** Cross product of two 3-element vectors */ diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h index 4883cbc93d5..4e88494c387 100644 --- a/src/mesa/main/mtypes.h +++ b/src/mesa/main/mtypes.h @@ -3751,6 +3751,8 @@ struct gl_extensions GLboolean ATI_fragment_shader; GLboolean ATI_separate_stencil; GLboolean INTEL_performance_query; + GLboolean KHR_texture_compression_astc_hdr; + GLboolean KHR_texture_compression_astc_ldr; GLboolean MESA_pack_invert; GLboolean MESA_ycbcr_texture; GLboolean NV_conditional_render; diff --git a/src/mesa/main/tests/dispatch_sanity.cpp b/src/mesa/main/tests/dispatch_sanity.cpp index 59107eb67b1..b941f3e522e 100644 --- a/src/mesa/main/tests/dispatch_sanity.cpp +++ b/src/mesa/main/tests/dispatch_sanity.cpp @@ -2480,5 +2480,8 @@ const struct function gles31_functions_possible[] = { { "glVertexAttribBinding", 31, -1 }, { "glVertexBindingDivisor", 31, -1 }, + /* GL_OES_texture_storage_multisample_2d_array */ + { "glTexStorage3DMultisampleOES", 31, -1 }, + { NULL, 0, -1 }, }; diff --git a/src/mesa/main/texcompress.c b/src/mesa/main/texcompress.c index edfb03625c2..394c8bab214 100644 --- a/src/mesa/main/texcompress.c +++ b/src/mesa/main/texcompress.c @@ -229,6 +229,28 @@ _mesa_gl_compressed_format_base_format(GLenum format) * what GL_NUM_COMPRESSED_TEXTURE_FORMATS and * GL_COMPRESSED_TEXTURE_FORMATS return." * + * The KHR_texture_compression_astc_hdr spec says: + * + * "Interactions with OpenGL 4.2 + * + * OpenGL 4.2 supports the feature that compressed textures can be + * compressed online, by passing the compressed texture format enum as + * the internal format when uploading a texture using TexImage1D, + * TexImage2D or TexImage3D (see Section 3.9.3, Texture Image + * Specification, subsection Encoding of Special Internal Formats). + * + * Due to the complexity of the ASTC compression algorithm, it is not + * usually suitable for online use, and therefore ASTC support will be + * limited to pre-compressed textures only. Where on-device compression + * is required, a domain-specific limited compressor will typically + * be used, and this is therefore not suitable for implementation in + * the driver. + * + * In particular, the ASTC format specifiers will not be added to + * Table 3.14, and thus will not be accepted by the TexImage*D + * functions, and will not be returned by the (already deprecated) + * COMPRESSED_TEXTURE_FORMATS query." + * * There is no formal spec for GL_ATI_texture_compression_3dc. Since the * formats added by this extension are luminance-alpha formats, it is * reasonable to expect them to follow the same rules as @@ -378,15 +400,15 @@ _mesa_glenum_to_compressed_format(GLenum format) case GL_COMPRESSED_RGB_S3TC_DXT1_EXT: case GL_RGB_S3TC: + case GL_RGB4_S3TC: return MESA_FORMAT_RGB_DXT1; case GL_COMPRESSED_RGBA_S3TC_DXT1_EXT: - case GL_RGB4_S3TC: return MESA_FORMAT_RGBA_DXT1; case GL_COMPRESSED_RGBA_S3TC_DXT3_EXT: case GL_RGBA_S3TC: + case GL_RGBA4_S3TC: return MESA_FORMAT_RGBA_DXT3; case GL_COMPRESSED_RGBA_S3TC_DXT5_EXT: - case GL_RGBA4_S3TC: return MESA_FORMAT_RGBA_DXT5; case GL_COMPRESSED_SRGB_S3TC_DXT1_EXT: @@ -449,6 +471,63 @@ _mesa_glenum_to_compressed_format(GLenum format) case GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT: return MESA_FORMAT_BPTC_RGB_UNSIGNED_FLOAT; + case GL_COMPRESSED_RGBA_ASTC_4x4_KHR: + return MESA_FORMAT_RGBA_ASTC_4x4; + case GL_COMPRESSED_RGBA_ASTC_5x4_KHR: + return MESA_FORMAT_RGBA_ASTC_5x4; + case GL_COMPRESSED_RGBA_ASTC_5x5_KHR: + return MESA_FORMAT_RGBA_ASTC_5x5; + case GL_COMPRESSED_RGBA_ASTC_6x5_KHR: + return MESA_FORMAT_RGBA_ASTC_6x5; + case GL_COMPRESSED_RGBA_ASTC_6x6_KHR: + return MESA_FORMAT_RGBA_ASTC_6x6; + case GL_COMPRESSED_RGBA_ASTC_8x5_KHR: + return MESA_FORMAT_RGBA_ASTC_8x5; + case GL_COMPRESSED_RGBA_ASTC_8x6_KHR: + return MESA_FORMAT_RGBA_ASTC_8x6; + case GL_COMPRESSED_RGBA_ASTC_8x8_KHR: + return MESA_FORMAT_RGBA_ASTC_8x8; + case GL_COMPRESSED_RGBA_ASTC_10x5_KHR: + return MESA_FORMAT_RGBA_ASTC_10x5; + case GL_COMPRESSED_RGBA_ASTC_10x6_KHR: + return MESA_FORMAT_RGBA_ASTC_10x6; + case GL_COMPRESSED_RGBA_ASTC_10x8_KHR: + return MESA_FORMAT_RGBA_ASTC_10x8; + case GL_COMPRESSED_RGBA_ASTC_10x10_KHR: + return MESA_FORMAT_RGBA_ASTC_10x10; + case GL_COMPRESSED_RGBA_ASTC_12x10_KHR: + return MESA_FORMAT_RGBA_ASTC_12x10; + case GL_COMPRESSED_RGBA_ASTC_12x12_KHR: + return MESA_FORMAT_RGBA_ASTC_12x12; + case GL_COMPRESSED_SRGB8_ALPHA8_ASTC_4x4_KHR: + return MESA_FORMAT_SRGB8_ALPHA8_ASTC_4x4; + case GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x4_KHR: + return MESA_FORMAT_SRGB8_ALPHA8_ASTC_5x4; + case GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x5_KHR: + return MESA_FORMAT_SRGB8_ALPHA8_ASTC_5x5; + case GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x5_KHR: + return MESA_FORMAT_SRGB8_ALPHA8_ASTC_6x5; + case GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x6_KHR: + return MESA_FORMAT_SRGB8_ALPHA8_ASTC_6x6; + case GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x5_KHR: + return MESA_FORMAT_SRGB8_ALPHA8_ASTC_8x5; + case GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x6_KHR: + return MESA_FORMAT_SRGB8_ALPHA8_ASTC_8x6; + case GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x8_KHR: + return MESA_FORMAT_SRGB8_ALPHA8_ASTC_8x8; + case GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x5_KHR: + return MESA_FORMAT_SRGB8_ALPHA8_ASTC_10x5; + case GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x6_KHR: + return MESA_FORMAT_SRGB8_ALPHA8_ASTC_10x6; + case GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x8_KHR: + return MESA_FORMAT_SRGB8_ALPHA8_ASTC_10x8; + case GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x10_KHR: + return MESA_FORMAT_SRGB8_ALPHA8_ASTC_10x10; + case GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x10_KHR: + return MESA_FORMAT_SRGB8_ALPHA8_ASTC_12x10; + case GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x12_KHR: + return MESA_FORMAT_SRGB8_ALPHA8_ASTC_12x12; + default: return MESA_FORMAT_NONE; } @@ -539,6 +618,63 @@ _mesa_compressed_format_to_glenum(struct gl_context *ctx, mesa_format mesaFormat case MESA_FORMAT_BPTC_RGB_UNSIGNED_FLOAT: return GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT; + case MESA_FORMAT_RGBA_ASTC_4x4: + return GL_COMPRESSED_RGBA_ASTC_4x4_KHR; + case MESA_FORMAT_RGBA_ASTC_5x4: + return GL_COMPRESSED_RGBA_ASTC_5x4_KHR; + case MESA_FORMAT_RGBA_ASTC_5x5: + return GL_COMPRESSED_RGBA_ASTC_5x5_KHR; + case MESA_FORMAT_RGBA_ASTC_6x5: + return GL_COMPRESSED_RGBA_ASTC_6x5_KHR; + case MESA_FORMAT_RGBA_ASTC_6x6: + return GL_COMPRESSED_RGBA_ASTC_6x6_KHR; + case MESA_FORMAT_RGBA_ASTC_8x5: + return GL_COMPRESSED_RGBA_ASTC_8x5_KHR; + case MESA_FORMAT_RGBA_ASTC_8x6: + return GL_COMPRESSED_RGBA_ASTC_8x6_KHR; + case MESA_FORMAT_RGBA_ASTC_8x8: + return GL_COMPRESSED_RGBA_ASTC_8x8_KHR; + case MESA_FORMAT_RGBA_ASTC_10x5: + return GL_COMPRESSED_RGBA_ASTC_10x5_KHR; + case MESA_FORMAT_RGBA_ASTC_10x6: + return GL_COMPRESSED_RGBA_ASTC_10x6_KHR; + case MESA_FORMAT_RGBA_ASTC_10x8: + return GL_COMPRESSED_RGBA_ASTC_10x8_KHR; + case MESA_FORMAT_RGBA_ASTC_10x10: + return GL_COMPRESSED_RGBA_ASTC_10x10_KHR; + case MESA_FORMAT_RGBA_ASTC_12x10: + return GL_COMPRESSED_RGBA_ASTC_12x10_KHR; + case MESA_FORMAT_RGBA_ASTC_12x12: + return GL_COMPRESSED_RGBA_ASTC_12x12_KHR; + case MESA_FORMAT_SRGB8_ALPHA8_ASTC_4x4: + return GL_COMPRESSED_SRGB8_ALPHA8_ASTC_4x4_KHR; + case MESA_FORMAT_SRGB8_ALPHA8_ASTC_5x4: + return GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x4_KHR; + case MESA_FORMAT_SRGB8_ALPHA8_ASTC_5x5: + return GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x5_KHR; + case MESA_FORMAT_SRGB8_ALPHA8_ASTC_6x5: + return GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x5_KHR; + case MESA_FORMAT_SRGB8_ALPHA8_ASTC_6x6: + return GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x6_KHR; + case MESA_FORMAT_SRGB8_ALPHA8_ASTC_8x5: + return GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x5_KHR; + case MESA_FORMAT_SRGB8_ALPHA8_ASTC_8x6: + return GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x6_KHR; + case MESA_FORMAT_SRGB8_ALPHA8_ASTC_8x8: + return GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x8_KHR; + case MESA_FORMAT_SRGB8_ALPHA8_ASTC_10x5: + return GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x5_KHR; + case MESA_FORMAT_SRGB8_ALPHA8_ASTC_10x6: + return GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x6_KHR; + case MESA_FORMAT_SRGB8_ALPHA8_ASTC_10x8: + return GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x8_KHR; + case MESA_FORMAT_SRGB8_ALPHA8_ASTC_10x10: + return GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x10_KHR; + case MESA_FORMAT_SRGB8_ALPHA8_ASTC_12x10: + return GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x10_KHR; + case MESA_FORMAT_SRGB8_ALPHA8_ASTC_12x12: + return GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x12_KHR; + default: _mesa_problem(ctx, "Unexpected mesa texture format in" " _mesa_compressed_format_to_glenum()"); diff --git a/src/mesa/main/texformat.c b/src/mesa/main/texformat.c index f4d17e1bdb5..fd9f335a767 100644 --- a/src/mesa/main/texformat.c +++ b/src/mesa/main/texformat.c @@ -38,6 +38,7 @@ #include "mtypes.h" #include "texcompress.h" #include "texformat.h" +#include "glformats.h" #define RETURN_IF_SUPPORTED(f) do { \ if (ctx->TextureFormatSupported[f]) \ @@ -276,87 +277,6 @@ _mesa_choose_tex_format(struct gl_context *ctx, GLenum target, RETURN_IF_SUPPORTED(MESA_FORMAT_YCBCR_REV); break; - /* For non-generic compressed format we assert two things: - * - * 1. The format has already been validated against the set of available - * extensions. - * - * 2. The driver only enables the extension if it supports all of the - * formats that are part of that extension. - */ - case GL_COMPRESSED_RGB_FXT1_3DFX: - return MESA_FORMAT_RGB_FXT1; - case GL_COMPRESSED_RGBA_FXT1_3DFX: - return MESA_FORMAT_RGBA_FXT1; - case GL_COMPRESSED_RGB_S3TC_DXT1_EXT: - case GL_RGB_S3TC: - case GL_RGB4_S3TC: - return MESA_FORMAT_RGB_DXT1; - case GL_COMPRESSED_RGBA_S3TC_DXT1_EXT: - return MESA_FORMAT_RGBA_DXT1; - case GL_COMPRESSED_RGBA_S3TC_DXT3_EXT: - case GL_RGBA_S3TC: - case GL_RGBA4_S3TC: - return MESA_FORMAT_RGBA_DXT3; - case GL_COMPRESSED_RGBA_S3TC_DXT5_EXT: - return MESA_FORMAT_RGBA_DXT5; - case GL_COMPRESSED_RED_RGTC1: - return MESA_FORMAT_R_RGTC1_UNORM; - case GL_COMPRESSED_SIGNED_RED_RGTC1: - return MESA_FORMAT_R_RGTC1_SNORM; - case GL_COMPRESSED_RG_RGTC2: - return MESA_FORMAT_RG_RGTC2_UNORM; - case GL_COMPRESSED_SIGNED_RG_RGTC2: - return MESA_FORMAT_RG_RGTC2_SNORM; - case GL_COMPRESSED_LUMINANCE_LATC1_EXT: - return MESA_FORMAT_L_LATC1_UNORM; - case GL_COMPRESSED_SIGNED_LUMINANCE_LATC1_EXT: - return MESA_FORMAT_L_LATC1_SNORM; - case GL_COMPRESSED_LUMINANCE_ALPHA_LATC2_EXT: - return MESA_FORMAT_LA_LATC2_UNORM; - case GL_COMPRESSED_SIGNED_LUMINANCE_ALPHA_LATC2_EXT: - return MESA_FORMAT_LA_LATC2_SNORM; - case GL_COMPRESSED_LUMINANCE_ALPHA_3DC_ATI: - return MESA_FORMAT_LA_LATC2_UNORM; - case GL_ETC1_RGB8_OES: - return MESA_FORMAT_ETC1_RGB8; - case GL_COMPRESSED_RGB8_ETC2: - return MESA_FORMAT_ETC2_RGB8; - case GL_COMPRESSED_SRGB8_ETC2: - return MESA_FORMAT_ETC2_SRGB8; - case GL_COMPRESSED_RGBA8_ETC2_EAC: - return MESA_FORMAT_ETC2_RGBA8_EAC; - case GL_COMPRESSED_SRGB8_ALPHA8_ETC2_EAC: - return MESA_FORMAT_ETC2_SRGB8_ALPHA8_EAC; - case GL_COMPRESSED_R11_EAC: - return MESA_FORMAT_ETC2_R11_EAC; - case GL_COMPRESSED_RG11_EAC: - return MESA_FORMAT_ETC2_RG11_EAC; - case GL_COMPRESSED_SIGNED_R11_EAC: - return MESA_FORMAT_ETC2_SIGNED_R11_EAC; - case GL_COMPRESSED_SIGNED_RG11_EAC: - return MESA_FORMAT_ETC2_SIGNED_RG11_EAC; - case GL_COMPRESSED_RGB8_PUNCHTHROUGH_ALPHA1_ETC2: - return MESA_FORMAT_ETC2_RGB8_PUNCHTHROUGH_ALPHA1; - case GL_COMPRESSED_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2: - return MESA_FORMAT_ETC2_SRGB8_PUNCHTHROUGH_ALPHA1; - case GL_COMPRESSED_SRGB_S3TC_DXT1_EXT: - return MESA_FORMAT_SRGB_DXT1; - case GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT: - return MESA_FORMAT_SRGBA_DXT1; - case GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT: - return MESA_FORMAT_SRGBA_DXT3; - case GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT: - return MESA_FORMAT_SRGBA_DXT5; - case GL_COMPRESSED_RGBA_BPTC_UNORM: - return MESA_FORMAT_BPTC_RGBA_UNORM; - case GL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM: - return MESA_FORMAT_BPTC_SRGB_ALPHA_UNORM; - case GL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT: - return MESA_FORMAT_BPTC_RGB_SIGNED_FLOAT; - case GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT: - return MESA_FORMAT_BPTC_RGB_UNSIGNED_FLOAT; - case GL_ALPHA16F_ARB: RETURN_IF_SUPPORTED(MESA_FORMAT_A_FLOAT16); RETURN_IF_SUPPORTED(MESA_FORMAT_A_FLOAT32); @@ -844,6 +764,18 @@ _mesa_choose_tex_format(struct gl_context *ctx, GLenum target, case GL_BGRA: RETURN_IF_SUPPORTED(MESA_FORMAT_B8G8R8A8_UNORM); break; + + default: + /* For non-generic compressed format we assert two things: + * + * 1. The format has already been validated against the set of available + * extensions. + * + * 2. The driver only enables the extension if it supports all of the + * formats that are part of that extension. + */ + if (_mesa_is_compressed_format(ctx, internalFormat)) + return _mesa_glenum_to_compressed_format(internalFormat); } _mesa_problem(ctx, "unexpected format %s in _mesa_choose_tex_format()", diff --git a/src/mesa/main/teximage.c b/src/mesa/main/teximage.c index 274ecad44e9..bfb0858b9bb 100644 --- a/src/mesa/main/teximage.c +++ b/src/mesa/main/teximage.c @@ -565,6 +565,10 @@ _mesa_base_tex_format( struct gl_context *ctx, GLint internalFormat ) } } + if (ctx->Extensions.KHR_texture_compression_astc_ldr && + _mesa_is_astc_format(internalFormat)) + return GL_RGBA; + if (_mesa_is_gles3(ctx) || ctx->Extensions.ARB_ES3_compatibility) { switch (internalFormat) { case GL_COMPRESSED_RGB8_ETC2: @@ -1763,7 +1767,7 @@ _mesa_test_proxy_teximage(struct gl_context *ctx, GLenum target, GLint level, /** * Return true if the format is only valid for glCompressedTexImage. */ -static GLboolean +static bool compressedteximage_only_format(const struct gl_context *ctx, GLenum format) { switch (format) { @@ -1778,12 +1782,21 @@ compressedteximage_only_format(const struct gl_context *ctx, GLenum format) case GL_PALETTE8_R5_G6_B5_OES: case GL_PALETTE8_RGBA4_OES: case GL_PALETTE8_RGB5_A1_OES: - return GL_TRUE; + return true; default: - return GL_FALSE; + return false; } } +/** + * Return true if the format doesn't support online compression. + */ +static bool +_mesa_format_no_online_compression(const struct gl_context *ctx, GLenum format) +{ + return _mesa_is_astc_format(format) || + compressedteximage_only_format(ctx, format); +} /* Writes to an GL error pointer if non-null and returns whether or not the * error is GL_NO_ERROR */ @@ -1845,19 +1858,68 @@ _mesa_target_can_be_compressed(const struct gl_context *ctx, GLenum target, return write_error(error, GL_INVALID_OPERATION); target_can_be_compresed = ctx->Extensions.ARB_texture_cube_map_array; - break; - case GL_TEXTURE_3D: - /* See ETC2/EAC comment in switch case GL_TEXTURE_CUBE_MAP_ARRAY. */ - if (layout == MESA_FORMAT_LAYOUT_ETC2 && _mesa_is_gles3(ctx)) + /* From the KHR_texture_compression_astc_hdr spec: + * + * Add a second new column "3D Tex." which is empty for all non-ASTC + * formats. If only the LDR profile is supported by the + * implementation, this column is also empty for all ASTC formats. If + * both the LDR and HDR profiles are supported only, this column is + * checked for all ASTC formats. + * + * Add a third new column "Cube Map Array Tex." which is empty for all + * non-ASTC formats, and checked for all ASTC formats. + * + * and, + * + * 'An INVALID_OPERATION error is generated by CompressedTexImage3D + * if is TEXTURE_CUBE_MAP_ARRAY and the + * "Cube Map Array" column of table 8.19 is *not* checked, or if + * is TEXTURE_3D and the "3D Tex." column of table + * 8.19 is *not* checked' + * + * The instances of above should say . + */ + + /* Throw an INVALID_OPERATION error if the target is + * TEXTURE_CUBE_MAP_ARRAY and the format is not ASTC. + */ + if (target_can_be_compresed && + ctx->Extensions.KHR_texture_compression_astc_ldr && + layout != MESA_FORMAT_LAYOUT_ASTC) return write_error(error, GL_INVALID_OPERATION); - if (layout == MESA_FORMAT_LAYOUT_BPTC) { + break; + case GL_TEXTURE_3D: + switch (layout) { + case MESA_FORMAT_LAYOUT_ETC2: + /* See ETC2/EAC comment in case GL_TEXTURE_CUBE_MAP_ARRAY. */ + if (_mesa_is_gles3(ctx)) + return write_error(error, GL_INVALID_OPERATION); + break; + case MESA_FORMAT_LAYOUT_BPTC: target_can_be_compresed = ctx->Extensions.ARB_texture_compression_bptc; break; - } + case MESA_FORMAT_LAYOUT_ASTC: + target_can_be_compresed = + ctx->Extensions.KHR_texture_compression_astc_hdr; - break; + /* Throw an INVALID_OPERATION error if the target is TEXTURE_3D and + * and the hdr extension is not supported. + * See comment in switch case GL_TEXTURE_CUBE_MAP_ARRAY for more info. + */ + if (!target_can_be_compresed) + return write_error(error, GL_INVALID_OPERATION); + break; + default: + /* Throw an INVALID_OPERATION error if the target is TEXTURE_3D and + * the format is not ASTC. + * See comment in switch case GL_TEXTURE_CUBE_MAP_ARRAY for more info. + */ + if (ctx->Extensions.KHR_texture_compression_astc_ldr) + return write_error(error, GL_INVALID_OPERATION); + break; + } default: break; } @@ -2328,7 +2390,7 @@ texture_error_check( struct gl_context *ctx, "glTexImage%dD(target can't be compressed)", dimensions); return GL_TRUE; } - if (compressedteximage_only_format(ctx, internalFormat)) { + if (_mesa_format_no_online_compression(ctx, internalFormat)) { _mesa_error(ctx, GL_INVALID_OPERATION, "glTexImage%dD(no compression for format)", dimensions); return GL_TRUE; @@ -2592,7 +2654,7 @@ texsubimage_error_check(struct gl_context *ctx, GLuint dimensions, } if (_mesa_is_format_compressed(texImage->TexFormat)) { - if (compressedteximage_only_format(ctx, texImage->InternalFormat)) { + if (_mesa_format_no_online_compression(ctx, texImage->InternalFormat)) { _mesa_error(ctx, GL_INVALID_OPERATION, "%s(no compression for format)", callerName); return GL_TRUE; @@ -2850,7 +2912,7 @@ copytexture_error_check( struct gl_context *ctx, GLuint dimensions, "glCopyTexImage%dD(target can't be compressed)", dimensions); return GL_TRUE; } - if (compressedteximage_only_format(ctx, internalFormat)) { + if (_mesa_format_no_online_compression(ctx, internalFormat)) { _mesa_error(ctx, GL_INVALID_OPERATION, "glCopyTexImage%dD(no compression for format)", dimensions); return GL_TRUE; @@ -2931,7 +2993,7 @@ copytexsubimage_error_check(struct gl_context *ctx, GLuint dimensions, } if (_mesa_is_format_compressed(texImage->TexFormat)) { - if (compressedteximage_only_format(ctx, texImage->InternalFormat)) { + if (_mesa_format_no_online_compression(ctx, texImage->InternalFormat)) { _mesa_error(ctx, GL_INVALID_OPERATION, "%s(no compression for format)", caller); return GL_TRUE; @@ -3702,7 +3764,7 @@ texturesubimage(struct gl_context *ctx, GLuint dims, /* Must handle special case GL_TEXTURE_CUBE_MAP. */ if (texObj->Target == GL_TEXTURE_CUBE_MAP) { - GLint rowStride; + GLint imageStride; /* * What do we do if the user created a texture with the following code @@ -3740,18 +3802,18 @@ texturesubimage(struct gl_context *ctx, GLuint dims, return; } - rowStride = _mesa_image_image_stride(&ctx->Unpack, width, height, - format, type); + imageStride = _mesa_image_image_stride(&ctx->Unpack, width, height, + format, type); /* Copy in each face. */ - for (i = 0; i < 6; ++i) { + for (i = zoffset; i < zoffset + depth; ++i) { texImage = texObj->Image[i][level]; assert(texImage); _mesa_texture_sub_image(ctx, 3, texObj, texImage, texObj->Target, - level, xoffset, yoffset, zoffset, + level, xoffset, yoffset, 0, width, height, 1, format, type, pixels, true); - pixels = (GLubyte *) pixels + rowStride; + pixels = (GLubyte *) pixels + imageStride; } } else { diff --git a/src/mesa/main/texobj.c b/src/mesa/main/texobj.c index c5d83e145a6..a1be1e33042 100644 --- a/src/mesa/main/texobj.c +++ b/src/mesa/main/texobj.c @@ -1612,7 +1612,8 @@ _mesa_tex_target_to_index(const struct gl_context *ctx, GLenum target) return ((_mesa_is_desktop_gl(ctx) && ctx->Extensions.ARB_texture_multisample) || _mesa_is_gles31(ctx)) ? TEXTURE_2D_MULTISAMPLE_INDEX: -1; case GL_TEXTURE_2D_MULTISAMPLE_ARRAY: - return _mesa_is_desktop_gl(ctx) && ctx->Extensions.ARB_texture_multisample + return ((_mesa_is_desktop_gl(ctx) && ctx->Extensions.ARB_texture_multisample) || + _mesa_is_gles31(ctx)) ? TEXTURE_2D_MULTISAMPLE_ARRAY_INDEX: -1; default: return -1; diff --git a/src/mesa/main/texparam.c b/src/mesa/main/texparam.c index 72d36117498..89f286cc05e 100644 --- a/src/mesa/main/texparam.c +++ b/src/mesa/main/texparam.c @@ -1223,6 +1223,7 @@ legal_get_tex_level_parameter_target(struct gl_context *ctx, GLenum target, case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z_ARB: return ctx->Extensions.ARB_texture_cube_map; case GL_TEXTURE_2D_MULTISAMPLE: + case GL_TEXTURE_2D_MULTISAMPLE_ARRAY: return ctx->Extensions.ARB_texture_multisample; } @@ -1267,7 +1268,6 @@ legal_get_tex_level_parameter_target(struct gl_context *ctx, GLenum target, * "target may also be TEXTURE_BUFFER, indicating the texture buffer." */ return ctx->API == API_OPENGL_CORE && ctx->Version >= 31; - case GL_TEXTURE_2D_MULTISAMPLE_ARRAY: case GL_PROXY_TEXTURE_2D_MULTISAMPLE: case GL_PROXY_TEXTURE_2D_MULTISAMPLE_ARRAY: return ctx->Extensions.ARB_texture_multisample; @@ -1926,6 +1926,12 @@ get_tex_parameterfv(struct gl_context *ctx, *params = (GLfloat) obj->ImageFormatCompatibilityType; break; + case GL_TEXTURE_TARGET: + if (ctx->API != API_OPENGL_CORE) + goto invalid_pname; + *params = ENUM_TO_FLOAT(obj->Target); + break; + default: goto invalid_pname; } @@ -2151,6 +2157,12 @@ get_tex_parameteriv(struct gl_context *ctx, *params = obj->ImageFormatCompatibilityType; break; + case GL_TEXTURE_TARGET: + if (ctx->API != API_OPENGL_CORE) + goto invalid_pname; + *params = (GLint) obj->Target; + break; + default: goto invalid_pname; } diff --git a/src/mesa/program/prog_to_nir.c b/src/mesa/program/prog_to_nir.c index d96b7bc8782..fccd16fc8c0 100644 --- a/src/mesa/program/prog_to_nir.c +++ b/src/mesa/program/prog_to_nir.c @@ -1108,7 +1108,7 @@ prog_to_nir(const struct gl_program *prog, c->build.shader = s; c->build.impl = impl; - nir_builder_insert_after_cf_list(&c->build, &impl->body); + c->build.cursor = nir_after_cf_list(&impl->body); setup_registers_and_variables(c); if (unlikely(c->error)) diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index cba98819718..6c9f9477a17 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -262,6 +262,7 @@ public: int dead_mask; /**< Used in dead code elimination */ class function_entry *function; /* Set on TGSI_OPCODE_CAL or TGSI_OPCODE_BGNSUB */ + const struct tgsi_opcode_info *info; }; class variable_storage : public exec_node { @@ -335,6 +336,11 @@ struct array_decl { unsigned array_size; }; +struct rename_reg_pair { + int old_reg; + int new_reg; +}; + struct glsl_to_tgsi_visitor : public ir_visitor { public: glsl_to_tgsi_visitor(); @@ -478,11 +484,10 @@ public: void simplify_cmp(void); - void rename_temp_register(int index, int new_index); - int get_first_temp_read(int index); - int get_first_temp_write(int index); - int get_last_temp_read(int index); - int get_last_temp_write(int index); + void rename_temp_registers(int num_renames, struct rename_reg_pair *renames); + void get_first_temp_read(int *first_reads); + void get_last_temp_read_first_temp_write(int *last_reads, int *first_writes); + void get_last_temp_write(int *last_writes); void copy_propagate(void); int eliminate_dead_code(void); @@ -530,25 +535,16 @@ swizzle_for_size(int size) return size_swizzles[size - 1]; } -static bool -is_tex_instruction(unsigned opcode) +static unsigned +num_inst_dst_regs(const glsl_to_tgsi_instruction *op) { - const tgsi_opcode_info* info = tgsi_get_opcode_info(opcode); - return info->is_tex; + return op->info->num_dst; } static unsigned -num_inst_dst_regs(unsigned opcode) +num_inst_src_regs(const glsl_to_tgsi_instruction *op) { - const tgsi_opcode_info* info = tgsi_get_opcode_info(opcode); - return info->num_dst; -} - -static unsigned -num_inst_src_regs(unsigned opcode) -{ - const tgsi_opcode_info* info = tgsi_get_opcode_info(opcode); - return info->is_tex ? info->num_src - 1 : info->num_src; + return op->info->is_tex ? op->info->num_src - 1 : op->info->num_src; } glsl_to_tgsi_instruction * @@ -592,6 +588,7 @@ glsl_to_tgsi_visitor::emit_asm(ir_instruction *ir, unsigned op, assert(num_reladdr == 0); inst->op = op; + inst->info = tgsi_get_opcode_info(op); inst->dst[0] = dst; inst->dst[1] = dst1; inst->src[0] = src0; @@ -1123,6 +1120,34 @@ type_size(const struct glsl_type *type) return 0; } + +/** + * If the given GLSL type is an array or matrix or a structure containing + * an array/matrix member, return true. Else return false. + * + * This is used to determine which kind of temp storage (PROGRAM_TEMPORARY + * or PROGRAM_ARRAY) should be used for variables of this type. Anytime + * we have an array that might be indexed with a variable, we need to use + * the later storage type. + */ +static bool +type_has_array_or_matrix(const glsl_type *type) +{ + if (type->is_array() || type->is_matrix()) + return true; + + if (type->is_record()) { + for (unsigned i = 0; i < type->length; i++) { + if (type_has_array_or_matrix(type->fields.structure[i].type)) { + return true; + } + } + } + + return false; +} + + /** * In the initial pass of codegen, we assign temporary numbers to * intermediate results. (not SSA -- variable assignments will reuse @@ -1137,9 +1162,7 @@ glsl_to_tgsi_visitor::get_temp(const glsl_type *type) src.reladdr = NULL; src.negate = 0; - if (!options->EmitNoIndirectTemp && - (type->is_array() || type->is_matrix())) { - + if (!options->EmitNoIndirectTemp && type_has_array_or_matrix(type)) { if (next_array >= max_num_arrays) { max_num_arrays += 32; array_sizes = (unsigned*) @@ -3538,7 +3561,7 @@ count_resources(glsl_to_tgsi_visitor *v, gl_program *prog) v->samplers_used = 0; foreach_in_list(glsl_to_tgsi_instruction, inst, &v->instructions) { - if (is_tex_instruction(inst->op)) { + if (inst->info->is_tex) { for (int i = 0; i < inst->sampler_array_size; i++) { unsigned idx = inst->sampler.index + i; v->samplers_used |= 1 << idx; @@ -3668,51 +3691,52 @@ glsl_to_tgsi_visitor::simplify_cmp(void) /* Replaces all references to a temporary register index with another index. */ void -glsl_to_tgsi_visitor::rename_temp_register(int index, int new_index) +glsl_to_tgsi_visitor::rename_temp_registers(int num_renames, struct rename_reg_pair *renames) { foreach_in_list(glsl_to_tgsi_instruction, inst, &this->instructions) { unsigned j; - - for (j = 0; j < num_inst_src_regs(inst->op); j++) { - if (inst->src[j].file == PROGRAM_TEMPORARY && - inst->src[j].index == index) { - inst->src[j].index = new_index; - } + int k; + for (j = 0; j < num_inst_src_regs(inst); j++) { + if (inst->src[j].file == PROGRAM_TEMPORARY) + for (k = 0; k < num_renames; k++) + if (inst->src[j].index == renames[k].old_reg) + inst->src[j].index = renames[k].new_reg; } for (j = 0; j < inst->tex_offset_num_offset; j++) { - if (inst->tex_offsets[j].file == PROGRAM_TEMPORARY && - inst->tex_offsets[j].index == index) { - inst->tex_offsets[j].index = new_index; - } + if (inst->tex_offsets[j].file == PROGRAM_TEMPORARY) + for (k = 0; k < num_renames; k++) + if (inst->tex_offsets[j].index == renames[k].old_reg) + inst->tex_offsets[j].index = renames[k].new_reg; } - for (j = 0; j < num_inst_dst_regs(inst->op); j++) { - if (inst->dst[j].file == PROGRAM_TEMPORARY && inst->dst[j].index == index) { - inst->dst[j].index = new_index; - } + for (j = 0; j < num_inst_dst_regs(inst); j++) { + if (inst->dst[j].file == PROGRAM_TEMPORARY) + for (k = 0; k < num_renames; k++) + if (inst->dst[j].index == renames[k].old_reg) + inst->dst[j].index = renames[k].new_reg; } } } -int -glsl_to_tgsi_visitor::get_first_temp_read(int index) +void +glsl_to_tgsi_visitor::get_first_temp_read(int *first_reads) { int depth = 0; /* loop depth */ int loop_start = -1; /* index of the first active BGNLOOP (if any) */ unsigned i = 0, j; foreach_in_list(glsl_to_tgsi_instruction, inst, &this->instructions) { - for (j = 0; j < num_inst_src_regs(inst->op); j++) { - if (inst->src[j].file == PROGRAM_TEMPORARY && - inst->src[j].index == index) { - return (depth == 0) ? i : loop_start; + for (j = 0; j < num_inst_src_regs(inst); j++) { + if (inst->src[j].file == PROGRAM_TEMPORARY) { + if (first_reads[inst->src[j].index] == -1) + first_reads[inst->src[j].index] = (depth == 0) ? i : loop_start; } } for (j = 0; j < inst->tex_offset_num_offset; j++) { - if (inst->tex_offsets[j].file == PROGRAM_TEMPORARY && - inst->tex_offsets[j].index == index) { - return (depth == 0) ? i : loop_start; + if (inst->tex_offsets[j].file == PROGRAM_TEMPORARY) { + if (first_reads[inst->tex_offsets[j].index] == -1) + first_reads[inst->tex_offsets[j].index] = (depth == 0) ? i : loop_start; } } if (inst->op == TGSI_OPCODE_BGNLOOP) { @@ -3725,91 +3749,73 @@ glsl_to_tgsi_visitor::get_first_temp_read(int index) assert(depth >= 0); i++; } - return -1; } -int -glsl_to_tgsi_visitor::get_first_temp_write(int index) +void +glsl_to_tgsi_visitor::get_last_temp_read_first_temp_write(int *last_reads, int *first_writes) { int depth = 0; /* loop depth */ int loop_start = -1; /* index of the first active BGNLOOP (if any) */ - int i = 0; - unsigned j; - + unsigned i = 0, j; + int k; foreach_in_list(glsl_to_tgsi_instruction, inst, &this->instructions) { - for (j = 0; j < num_inst_dst_regs(inst->op); j++) { - if (inst->dst[j].file == PROGRAM_TEMPORARY && inst->dst[j].index == index) { - return (depth == 0) ? i : loop_start; - } + for (j = 0; j < num_inst_src_regs(inst); j++) { + if (inst->src[j].file == PROGRAM_TEMPORARY) + last_reads[inst->src[j].index] = (depth == 0) ? i : -2; + } + for (j = 0; j < num_inst_dst_regs(inst); j++) { + if (inst->dst[j].file == PROGRAM_TEMPORARY) + if (first_writes[inst->dst[j].index] == -1) + first_writes[inst->dst[j].index] = (depth == 0) ? i : loop_start; + } + for (j = 0; j < inst->tex_offset_num_offset; j++) { + if (inst->tex_offsets[j].file == PROGRAM_TEMPORARY) + last_reads[inst->tex_offsets[j].index] = (depth == 0) ? i : -2; } if (inst->op == TGSI_OPCODE_BGNLOOP) { if(depth++ == 0) loop_start = i; } else if (inst->op == TGSI_OPCODE_ENDLOOP) { - if (--depth == 0) + if (--depth == 0) { loop_start = -1; - } - assert(depth >= 0); - i++; - } - return -1; -} - -int -glsl_to_tgsi_visitor::get_last_temp_read(int index) -{ - int depth = 0; /* loop depth */ - int last = -1; /* index of last instruction that reads the temporary */ - unsigned i = 0, j; - - foreach_in_list(glsl_to_tgsi_instruction, inst, &this->instructions) { - for (j = 0; j < num_inst_src_regs(inst->op); j++) { - if (inst->src[j].file == PROGRAM_TEMPORARY && - inst->src[j].index == index) { - last = (depth == 0) ? i : -2; + for (k = 0; k < this->next_temp; k++) { + if (last_reads[k] == -2) { + last_reads[k] = i; + } + } } } - for (j = 0; j < inst->tex_offset_num_offset; j++) { - if (inst->tex_offsets[j].file == PROGRAM_TEMPORARY && - inst->tex_offsets[j].index == index) - last = (depth == 0) ? i : -2; - } - if (inst->op == TGSI_OPCODE_BGNLOOP) - depth++; - else if (inst->op == TGSI_OPCODE_ENDLOOP) - if (--depth == 0 && last == -2) - last = i; assert(depth >= 0); i++; } - assert(last >= -1); - return last; } -int -glsl_to_tgsi_visitor::get_last_temp_write(int index) +void +glsl_to_tgsi_visitor::get_last_temp_write(int *last_writes) { int depth = 0; /* loop depth */ - int last = -1; /* index of last instruction that writes to the temporary */ - int i = 0; + int i = 0, k; unsigned j; foreach_in_list(glsl_to_tgsi_instruction, inst, &this->instructions) { - for (j = 0; j < num_inst_dst_regs(inst->op); j++) { - if (inst->dst[j].file == PROGRAM_TEMPORARY && inst->dst[j].index == index) - last = (depth == 0) ? i : -2; + for (j = 0; j < num_inst_dst_regs(inst); j++) { + if (inst->dst[j].file == PROGRAM_TEMPORARY) + last_writes[inst->dst[j].index] = (depth == 0) ? i : -2; } if (inst->op == TGSI_OPCODE_BGNLOOP) depth++; else if (inst->op == TGSI_OPCODE_ENDLOOP) - if (--depth == 0 && last == -2) - last = i; + if (--depth == 0) { + for (k = 0; k < this->next_temp; k++) { + if (last_writes[k] == -2) { + last_writes[k] = i; + } + } + } assert(depth >= 0); i++; } - assert(last >= -1); - return last; } /* @@ -4193,7 +4199,7 @@ glsl_to_tgsi_visitor::merge_two_dsts(void) foreach_in_list_safe(glsl_to_tgsi_instruction, inst, &this->instructions) { glsl_to_tgsi_instruction *inst2; bool merged; - if (num_inst_dst_regs(inst->op) != 2) + if (num_inst_dst_regs(inst) != 2) continue; if (inst->dst[0].file != PROGRAM_UNDEFINED && @@ -4239,15 +4245,18 @@ glsl_to_tgsi_visitor::merge_registers(void) { int *last_reads = rzalloc_array(mem_ctx, int, this->next_temp); int *first_writes = rzalloc_array(mem_ctx, int, this->next_temp); + struct rename_reg_pair *renames = rzalloc_array(mem_ctx, struct rename_reg_pair, this->next_temp); int i, j; + int num_renames = 0; /* Read the indices of the last read and first write to each temp register * into an array so that we don't have to traverse the instruction list as * much. */ for (i = 0; i < this->next_temp; i++) { - last_reads[i] = get_last_temp_read(i); - first_writes[i] = get_first_temp_write(i); + last_reads[i] = -1; + first_writes[i] = -1; } + get_last_temp_read_first_temp_write(last_reads, first_writes); /* Start looking for registers with non-overlapping usages that can be * merged together. */ @@ -4265,7 +4274,9 @@ glsl_to_tgsi_visitor::merge_registers(void) * as the register at index j. */ if (first_writes[i] <= first_writes[j] && last_reads[i] <= first_writes[j]) { - rename_temp_register(j, i); /* Replace all references to j with i.*/ + renames[num_renames].old_reg = j; + renames[num_renames].new_reg = i; + num_renames++; /* Update the first_writes and last_reads arrays with the new * values for the merged register index, and mark the newly unused @@ -4277,6 +4288,8 @@ glsl_to_tgsi_visitor::merge_registers(void) } } + rename_temp_registers(num_renames, renames); + ralloc_free(renames); ralloc_free(last_reads); ralloc_free(first_writes); } @@ -4288,15 +4301,28 @@ glsl_to_tgsi_visitor::renumber_registers(void) { int i = 0; int new_index = 0; + int *first_reads = rzalloc_array(mem_ctx, int, this->next_temp); + struct rename_reg_pair *renames = rzalloc_array(mem_ctx, struct rename_reg_pair, this->next_temp); + int num_renames = 0; + for (i = 0; i < this->next_temp; i++) { + first_reads[i] = -1; + } + get_first_temp_read(first_reads); for (i = 0; i < this->next_temp; i++) { - if (get_first_temp_read(i) < 0) continue; - if (i != new_index) - rename_temp_register(i, new_index); + if (first_reads[i] < 0) continue; + if (i != new_index) { + renames[num_renames].old_reg = i; + renames[num_renames].new_reg = new_index; + num_renames++; + } new_index++; } + rename_temp_registers(num_renames, renames); this->next_temp = new_index; + ralloc_free(renames); + ralloc_free(first_reads); } /** @@ -4969,8 +4995,8 @@ compile_tgsi_instruction(struct st_translate *t, unsigned num_src; unsigned tex_target; - num_dst = num_inst_dst_regs(inst->op); - num_src = num_inst_src_regs(inst->op); + num_dst = num_inst_dst_regs(inst); + num_src = num_inst_src_regs(inst); for (i = 0; i < num_dst; i++) dst[i] = translate_dst(t, @@ -5771,14 +5797,31 @@ get_mesa_program(struct gl_context *ctx, #if 0 /* Print out some information (for debugging purposes) used by the * optimization passes. */ - for (i = 0; i < v->next_temp; i++) { - int fr = v->get_first_temp_read(i); - int fw = v->get_first_temp_write(i); - int lr = v->get_last_temp_read(i); - int lw = v->get_last_temp_write(i); + { + int i; + int *first_writes = rzalloc_array(v->mem_ctx, int, v->next_temp); + int *first_reads = rzalloc_array(v->mem_ctx, int, v->next_temp); + int *last_writes = rzalloc_array(v->mem_ctx, int, v->next_temp); + int *last_reads = rzalloc_array(v->mem_ctx, int, v->next_temp); - printf("Temp %d: FR=%3d FW=%3d LR=%3d LW=%3d\n", i, fr, fw, lr, lw); - assert(fw <= fr); + for (i = 0; i < v->next_temp; i++) { + first_writes[i] = -1; + first_reads[i] = -1; + last_writes[i] = -1; + last_reads[i] = -1; + } + v->get_first_temp_read(first_reads); + v->get_last_temp_read_first_temp_write(last_reads, first_writes); + v->get_last_temp_write(last_writes); + for (i = 0; i < v->next_temp; i++) + printf("Temp %d: FR=%3d FW=%3d LR=%3d LW=%3d\n", i, first_reads[i], + first_writes[i], + last_reads[i], + last_writes[i]); + ralloc_free(first_writes); + ralloc_free(first_reads); + ralloc_free(last_writes); + ralloc_free(last_reads); } #endif @@ -5993,6 +6036,10 @@ st_link_shader(struct gl_context *ctx, struct gl_shader_program *prog) LOWER_PACK_HALF_2x16 | LOWER_UNPACK_HALF_2x16; + if (ctx->Extensions.ARB_gpu_shader5) + lower_inst |= LOWER_PACK_USE_BFI | + LOWER_PACK_USE_BFE; + lower_packing_builtins(ir, lower_inst); } diff --git a/src/mesa/swrast/s_texfetch.c b/src/mesa/swrast/s_texfetch.c index 1fe21c0b469..acb06e6ae92 100644 --- a/src/mesa/swrast/s_texfetch.c +++ b/src/mesa/swrast/s_texfetch.c @@ -116,6 +116,14 @@ static void fetch_null_texelf( const struct swrast_texture_image *texImage, NULL \ } +#define FETCH_COMPRESSED(NAME) \ + { \ + MESA_FORMAT_ ## NAME, \ + fetch_compressed, \ + fetch_compressed, \ + fetch_compressed \ + } + /** * Table to map MESA_FORMAT_ to texel fetch/store funcs. */ @@ -344,214 +352,79 @@ texfetch_funcs[] = FETCH_NULL(RGBX_SINT32), /* DXT compressed formats */ - { - MESA_FORMAT_RGB_DXT1, - fetch_compressed, - fetch_compressed, - fetch_compressed - }, - { - MESA_FORMAT_RGBA_DXT1, - fetch_compressed, - fetch_compressed, - fetch_compressed - }, - { - MESA_FORMAT_RGBA_DXT3, - fetch_compressed, - fetch_compressed, - fetch_compressed - }, - { - MESA_FORMAT_RGBA_DXT5, - fetch_compressed, - fetch_compressed, - fetch_compressed - }, + FETCH_COMPRESSED(RGB_DXT1), + FETCH_COMPRESSED(RGBA_DXT1), + FETCH_COMPRESSED(RGBA_DXT3), + FETCH_COMPRESSED(RGBA_DXT5), /* DXT sRGB compressed formats */ - { - MESA_FORMAT_SRGB_DXT1, - fetch_compressed, - fetch_compressed, - fetch_compressed - }, - { - MESA_FORMAT_SRGBA_DXT1, - fetch_compressed, - fetch_compressed, - fetch_compressed - }, - { - MESA_FORMAT_SRGBA_DXT3, - fetch_compressed, - fetch_compressed, - fetch_compressed - }, - { - MESA_FORMAT_SRGBA_DXT5, - fetch_compressed, - fetch_compressed, - fetch_compressed - }, + FETCH_COMPRESSED(SRGB_DXT1), + FETCH_COMPRESSED(SRGBA_DXT1), + FETCH_COMPRESSED(SRGBA_DXT3), + FETCH_COMPRESSED(SRGBA_DXT5), /* FXT1 compressed formats */ - { - MESA_FORMAT_RGB_FXT1, - fetch_compressed, - fetch_compressed, - fetch_compressed - }, - { - MESA_FORMAT_RGBA_FXT1, - fetch_compressed, - fetch_compressed, - fetch_compressed - }, + FETCH_COMPRESSED(RGB_FXT1), + FETCH_COMPRESSED(RGBA_FXT1), /* RGTC compressed formats */ - { - MESA_FORMAT_R_RGTC1_UNORM, - fetch_compressed, - fetch_compressed, - fetch_compressed - }, - { - MESA_FORMAT_R_RGTC1_SNORM, - fetch_compressed, - fetch_compressed, - fetch_compressed - }, - { - MESA_FORMAT_RG_RGTC2_UNORM, - fetch_compressed, - fetch_compressed, - fetch_compressed - }, - { - MESA_FORMAT_RG_RGTC2_SNORM, - fetch_compressed, - fetch_compressed, - fetch_compressed - }, + FETCH_COMPRESSED(R_RGTC1_UNORM), + FETCH_COMPRESSED(R_RGTC1_SNORM), + FETCH_COMPRESSED(RG_RGTC2_UNORM), + FETCH_COMPRESSED(RG_RGTC2_SNORM), /* LATC1/2 compressed formats */ - { - MESA_FORMAT_L_LATC1_UNORM, - fetch_compressed, - fetch_compressed, - fetch_compressed - }, - { - MESA_FORMAT_L_LATC1_SNORM, - fetch_compressed, - fetch_compressed, - fetch_compressed - }, - { - MESA_FORMAT_LA_LATC2_UNORM, - fetch_compressed, - fetch_compressed, - fetch_compressed - }, - { - MESA_FORMAT_LA_LATC2_SNORM, - fetch_compressed, - fetch_compressed, - fetch_compressed - }, + FETCH_COMPRESSED(L_LATC1_UNORM), + FETCH_COMPRESSED(L_LATC1_SNORM), + FETCH_COMPRESSED(LA_LATC2_UNORM), + FETCH_COMPRESSED(LA_LATC2_SNORM), /* ETC1/2 compressed formats */ - { - MESA_FORMAT_ETC1_RGB8, - fetch_compressed, - fetch_compressed, - fetch_compressed - }, - { - MESA_FORMAT_ETC2_RGB8, - fetch_compressed, - fetch_compressed, - fetch_compressed - }, - { - MESA_FORMAT_ETC2_SRGB8, - fetch_compressed, - fetch_compressed, - fetch_compressed - }, - { - MESA_FORMAT_ETC2_RGBA8_EAC, - fetch_compressed, - fetch_compressed, - fetch_compressed - }, - { - MESA_FORMAT_ETC2_SRGB8_ALPHA8_EAC, - fetch_compressed, - fetch_compressed, - fetch_compressed - }, - { - MESA_FORMAT_ETC2_R11_EAC, - fetch_compressed, - fetch_compressed, - fetch_compressed - }, - { - MESA_FORMAT_ETC2_RG11_EAC, - fetch_compressed, - fetch_compressed, - fetch_compressed - }, - { - MESA_FORMAT_ETC2_SIGNED_R11_EAC, - fetch_compressed, - fetch_compressed, - fetch_compressed - }, - { - MESA_FORMAT_ETC2_SIGNED_RG11_EAC, - fetch_compressed, - fetch_compressed, - fetch_compressed - }, - { - MESA_FORMAT_ETC2_RGB8_PUNCHTHROUGH_ALPHA1, - fetch_compressed, - fetch_compressed, - fetch_compressed - }, - { - MESA_FORMAT_ETC2_SRGB8_PUNCHTHROUGH_ALPHA1, - fetch_compressed, - fetch_compressed, - fetch_compressed - }, - { - MESA_FORMAT_BPTC_RGBA_UNORM, - fetch_compressed, - fetch_compressed, - fetch_compressed - }, - { - MESA_FORMAT_BPTC_SRGB_ALPHA_UNORM, - fetch_compressed, - fetch_compressed, - fetch_compressed - }, - { - MESA_FORMAT_BPTC_RGB_SIGNED_FLOAT, - fetch_compressed, - fetch_compressed, - fetch_compressed - }, - { - MESA_FORMAT_BPTC_RGB_UNSIGNED_FLOAT, - fetch_compressed, - fetch_compressed, - fetch_compressed - } + FETCH_COMPRESSED(ETC1_RGB8), + FETCH_COMPRESSED(ETC2_RGB8), + FETCH_COMPRESSED(ETC2_SRGB8), + FETCH_COMPRESSED(ETC2_RGBA8_EAC), + FETCH_COMPRESSED(ETC2_SRGB8_ALPHA8_EAC), + FETCH_COMPRESSED(ETC2_R11_EAC), + FETCH_COMPRESSED(ETC2_RG11_EAC), + FETCH_COMPRESSED(ETC2_SIGNED_R11_EAC), + FETCH_COMPRESSED(ETC2_SIGNED_RG11_EAC), + FETCH_COMPRESSED(ETC2_RGB8_PUNCHTHROUGH_ALPHA1), + FETCH_COMPRESSED(ETC2_SRGB8_PUNCHTHROUGH_ALPHA1), + FETCH_COMPRESSED(BPTC_RGBA_UNORM), + FETCH_COMPRESSED(BPTC_SRGB_ALPHA_UNORM), + FETCH_COMPRESSED(BPTC_RGB_SIGNED_FLOAT), + FETCH_COMPRESSED(BPTC_RGB_UNSIGNED_FLOAT), + + /* ASTC compressed formats */ + FETCH_NULL(RGBA_ASTC_4x4), + FETCH_NULL(RGBA_ASTC_5x4), + FETCH_NULL(RGBA_ASTC_5x5), + FETCH_NULL(RGBA_ASTC_6x5), + FETCH_NULL(RGBA_ASTC_6x6), + FETCH_NULL(RGBA_ASTC_8x5), + FETCH_NULL(RGBA_ASTC_8x6), + FETCH_NULL(RGBA_ASTC_8x8), + FETCH_NULL(RGBA_ASTC_10x5), + FETCH_NULL(RGBA_ASTC_10x6), + FETCH_NULL(RGBA_ASTC_10x8), + FETCH_NULL(RGBA_ASTC_10x10), + FETCH_NULL(RGBA_ASTC_12x10), + FETCH_NULL(RGBA_ASTC_12x12), + FETCH_NULL(SRGB8_ALPHA8_ASTC_4x4), + FETCH_NULL(SRGB8_ALPHA8_ASTC_5x4), + FETCH_NULL(SRGB8_ALPHA8_ASTC_5x5), + FETCH_NULL(SRGB8_ALPHA8_ASTC_6x5), + FETCH_NULL(SRGB8_ALPHA8_ASTC_6x6), + FETCH_NULL(SRGB8_ALPHA8_ASTC_8x5), + FETCH_NULL(SRGB8_ALPHA8_ASTC_8x6), + FETCH_NULL(SRGB8_ALPHA8_ASTC_8x8), + FETCH_NULL(SRGB8_ALPHA8_ASTC_10x5), + FETCH_NULL(SRGB8_ALPHA8_ASTC_10x6), + FETCH_NULL(SRGB8_ALPHA8_ASTC_10x8), + FETCH_NULL(SRGB8_ALPHA8_ASTC_10x10), + FETCH_NULL(SRGB8_ALPHA8_ASTC_12x10), + FETCH_NULL(SRGB8_ALPHA8_ASTC_12x12) };