Merge remote-tracking branch 'mesa-public/master' into vulkan

This commit is contained in:
Jason Ekstrand 2015-08-31 16:30:07 -07:00
commit 0af4bf4d4b
106 changed files with 2063 additions and 1572 deletions

View file

@ -196,7 +196,7 @@ GL 4.5, GLSL 4.50:
GL_ARB_get_texture_sub_image DONE (all drivers)
GL_ARB_shader_texture_image_samples not started
GL_ARB_texture_barrier DONE (nv50, nvc0, r600, radeonsi)
GL_KHR_context_flush_control DONE (all - but needs GLX/EXT extension to be useful)
GL_KHR_context_flush_control DONE (all - but needs GLX/EGL extension to be useful)
GL_KHR_robust_buffer_access_behavior not started
GL_KHR_robustness 90% done (the ARB variant)
GL_EXT_shader_integer_mix DONE (all drivers that support GLSL)

View file

@ -65,24 +65,24 @@ struct ttn_compile {
nir_register *addr_reg;
/**
* Stack of cf_node_lists where instructions should be pushed as we pop
* Stack of nir_cursors where instructions should be pushed as we pop
* back out of the control flow stack.
*
* For each IF/ELSE/ENDIF block, if_stack[if_stack_pos] has where the else
* instructions should be placed, and if_stack[if_stack_pos - 1] has where
* the next instructions outside of the if/then/else block go.
*/
struct exec_list **if_stack;
nir_cursor *if_stack;
unsigned if_stack_pos;
/**
* Stack of cf_node_lists where instructions should be pushed as we pop
* Stack of nir_cursors where instructions should be pushed as we pop
* back out of the control flow stack.
*
* loop_stack[loop_stack_pos - 1] contains the cf_node_list for the outside
* of the loop.
*/
struct exec_list **loop_stack;
nir_cursor *loop_stack;
unsigned loop_stack_pos;
/* How many TGSI_FILE_IMMEDIATE vec4s have been parsed so far. */
@ -922,7 +922,7 @@ ttn_if(struct ttn_compile *c, nir_ssa_def *src, bool is_uint)
nir_builder *b = &c->build;
/* Save the outside-of-the-if-statement node list. */
c->if_stack[c->if_stack_pos] = b->cf_node_list;
c->if_stack[c->if_stack_pos] = b->cursor;
c->if_stack_pos++;
src = ttn_channel(b, src, X);
@ -933,11 +933,11 @@ ttn_if(struct ttn_compile *c, nir_ssa_def *src, bool is_uint)
} else {
if_stmt->condition = nir_src_for_ssa(nir_fne(b, src, nir_imm_int(b, 0)));
}
nir_cf_node_insert_end(b->cf_node_list, &if_stmt->cf_node);
nir_builder_cf_insert(b, &if_stmt->cf_node);
nir_builder_insert_after_cf_list(b, &if_stmt->then_list);
b->cursor = nir_after_cf_list(&if_stmt->then_list);
c->if_stack[c->if_stack_pos] = &if_stmt->else_list;
c->if_stack[c->if_stack_pos] = nir_after_cf_list(&if_stmt->else_list);
c->if_stack_pos++;
}
@ -946,7 +946,7 @@ ttn_else(struct ttn_compile *c)
{
nir_builder *b = &c->build;
nir_builder_insert_after_cf_list(b, c->if_stack[c->if_stack_pos - 1]);
b->cursor = c->if_stack[c->if_stack_pos - 1];
}
static void
@ -955,7 +955,7 @@ ttn_endif(struct ttn_compile *c)
nir_builder *b = &c->build;
c->if_stack_pos -= 2;
nir_builder_insert_after_cf_list(b, c->if_stack[c->if_stack_pos]);
b->cursor = c->if_stack[c->if_stack_pos];
}
static void
@ -964,13 +964,13 @@ ttn_bgnloop(struct ttn_compile *c)
nir_builder *b = &c->build;
/* Save the outside-of-the-loop node list. */
c->loop_stack[c->loop_stack_pos] = b->cf_node_list;
c->loop_stack[c->loop_stack_pos] = b->cursor;
c->loop_stack_pos++;
nir_loop *loop = nir_loop_create(b->shader);
nir_cf_node_insert_end(b->cf_node_list, &loop->cf_node);
nir_builder_cf_insert(b, &loop->cf_node);
nir_builder_insert_after_cf_list(b, &loop->body);
b->cursor = nir_after_cf_list(&loop->body);
}
static void
@ -993,7 +993,7 @@ ttn_endloop(struct ttn_compile *c)
nir_builder *b = &c->build;
c->loop_stack_pos--;
nir_builder_insert_after_cf_list(b, c->loop_stack[c->loop_stack_pos]);
b->cursor = c->loop_stack[c->loop_stack_pos];
}
static void
@ -1803,7 +1803,7 @@ tgsi_to_nir(const void *tgsi_tokens,
nir_function_impl *impl = nir_function_impl_create(overload);
nir_builder_init(&c->build, impl);
nir_builder_insert_after_cf_list(&c->build, &impl->body);
c->build.cursor = nir_after_cf_list(&impl->body);
s->num_inputs = scan.file_max[TGSI_FILE_INPUT] + 1;
s->num_uniforms = scan.const_file_max[0] + 1;
@ -1819,10 +1819,10 @@ tgsi_to_nir(const void *tgsi_tokens,
c->num_samp_types = scan.file_max[TGSI_FILE_SAMPLER_VIEW] + 1;
c->samp_types = rzalloc_array(c, nir_alu_type, c->num_samp_types);
c->if_stack = rzalloc_array(c, struct exec_list *,
c->if_stack = rzalloc_array(c, nir_cursor,
(scan.opcode_count[TGSI_OPCODE_IF] +
scan.opcode_count[TGSI_OPCODE_UIF]) * 2);
c->loop_stack = rzalloc_array(c, struct exec_list *,
c->loop_stack = rzalloc_array(c, nir_cursor,
scan.opcode_count[TGSI_OPCODE_BGNLOOP]);
ret = tgsi_parse_init(&parser, tgsi_tokens);

View file

@ -372,30 +372,28 @@ void util_blitter_custom_resolve_color(struct blitter_context *blitter,
*
* States not listed here are not affected by util_blitter. */
static inline
void util_blitter_save_blend(struct blitter_context *blitter,
void *state)
static inline void
util_blitter_save_blend(struct blitter_context *blitter, void *state)
{
blitter->saved_blend_state = state;
}
static inline
void util_blitter_save_depth_stencil_alpha(struct blitter_context *blitter,
void *state)
static inline void
util_blitter_save_depth_stencil_alpha(struct blitter_context *blitter,
void *state)
{
blitter->saved_dsa_state = state;
}
static inline
void util_blitter_save_vertex_elements(struct blitter_context *blitter,
void *state)
static inline void
util_blitter_save_vertex_elements(struct blitter_context *blitter, void *state)
{
blitter->saved_velem_state = state;
}
static inline
void util_blitter_save_stencil_ref(struct blitter_context *blitter,
const struct pipe_stencil_ref *state)
static inline void
util_blitter_save_stencil_ref(struct blitter_context *blitter,
const struct pipe_stencil_ref *state)
{
blitter->saved_stencil_ref = *state;
}
@ -407,23 +405,20 @@ void util_blitter_save_rasterizer(struct blitter_context *blitter,
blitter->saved_rs_state = state;
}
static inline
void util_blitter_save_fragment_shader(struct blitter_context *blitter,
void *fs)
static inline void
util_blitter_save_fragment_shader(struct blitter_context *blitter, void *fs)
{
blitter->saved_fs = fs;
}
static inline
void util_blitter_save_vertex_shader(struct blitter_context *blitter,
void *vs)
static inline void
util_blitter_save_vertex_shader(struct blitter_context *blitter, void *vs)
{
blitter->saved_vs = vs;
}
static inline
void util_blitter_save_geometry_shader(struct blitter_context *blitter,
void *gs)
static inline void
util_blitter_save_geometry_shader(struct blitter_context *blitter, void *gs)
{
blitter->saved_gs = gs;
}
@ -442,24 +437,24 @@ util_blitter_save_tesseval_shader(struct blitter_context *blitter,
blitter->saved_tes = sh;
}
static inline
void util_blitter_save_framebuffer(struct blitter_context *blitter,
const struct pipe_framebuffer_state *state)
static inline void
util_blitter_save_framebuffer(struct blitter_context *blitter,
const struct pipe_framebuffer_state *state)
{
blitter->saved_fb_state.nr_cbufs = 0; /* It's ~0 now, meaning it's unsaved. */
util_copy_framebuffer_state(&blitter->saved_fb_state, state);
}
static inline
void util_blitter_save_viewport(struct blitter_context *blitter,
struct pipe_viewport_state *state)
static inline void
util_blitter_save_viewport(struct blitter_context *blitter,
struct pipe_viewport_state *state)
{
blitter->saved_viewport = *state;
}
static inline
void util_blitter_save_scissor(struct blitter_context *blitter,
struct pipe_scissor_state *state)
static inline void
util_blitter_save_scissor(struct blitter_context *blitter,
struct pipe_scissor_state *state)
{
blitter->saved_scissor = *state;
}

View file

@ -41,6 +41,7 @@
#include "util/u_tile.h"
#include "util/u_prim.h"
#include "util/u_surface.h"
#include <inttypes.h>
#include <stdio.h>
#include <limits.h> /* CHAR_BIT */
@ -275,7 +276,7 @@ debug_get_flags_option(const char *name,
for (; flags->name; ++flags)
namealign = MAX2(namealign, strlen(flags->name));
for (flags = orig; flags->name; ++flags)
_debug_printf("| %*s [0x%0*lx]%s%s\n", namealign, flags->name,
_debug_printf("| %*s [0x%0*"PRIu64"]%s%s\n", namealign, flags->name,
(int)sizeof(uint64_t)*CHAR_BIT/4, flags->value,
flags->desc ? " " : "", flags->desc ? flags->desc : "");
}
@ -290,9 +291,9 @@ debug_get_flags_option(const char *name,
if (debug_get_option_should_print()) {
if (str) {
debug_printf("%s: %s = 0x%lx (%s)\n", __FUNCTION__, name, result, str);
debug_printf("%s: %s = 0x%"PRIu64" (%s)\n", __FUNCTION__, name, result, str);
} else {
debug_printf("%s: %s = 0x%lx\n", __FUNCTION__, name, result);
debug_printf("%s: %s = 0x%"PRIu64"\n", __FUNCTION__, name, result);
}
}

View file

@ -680,6 +680,7 @@ static inline uint32_t REG_A3XX_CP_PROTECT_REG(uint32_t i0) { return 0x00000460
#define A3XX_GRAS_CL_CLIP_CNTL_VP_CLIP_CODE_IGNORE 0x00080000
#define A3XX_GRAS_CL_CLIP_CNTL_VP_XFORM_DISABLE 0x00100000
#define A3XX_GRAS_CL_CLIP_CNTL_PERSP_DIVISION_DISABLE 0x00200000
#define A3XX_GRAS_CL_CLIP_CNTL_ZERO_GB_SCALE_Z 0x00400000
#define A3XX_GRAS_CL_CLIP_CNTL_ZCOORD 0x00800000
#define A3XX_GRAS_CL_CLIP_CNTL_WCOORD 0x01000000
#define A3XX_GRAS_CL_CLIP_CNTL_ZCLIP_DISABLE 0x02000000

View file

@ -563,10 +563,29 @@ fd3_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
val |= COND(fp->writes_pos, A3XX_GRAS_CL_CLIP_CNTL_ZCLIP_DISABLE);
val |= COND(fp->frag_coord, A3XX_GRAS_CL_CLIP_CNTL_ZCOORD |
A3XX_GRAS_CL_CLIP_CNTL_WCOORD);
/* TODO only use if prog doesn't use clipvertex/clipdist */
val |= MIN2(util_bitcount(ctx->rasterizer->clip_plane_enable), 6) << 26;
OUT_PKT0(ring, REG_A3XX_GRAS_CL_CLIP_CNTL, 1);
OUT_RING(ring, val);
}
if (dirty & (FD_DIRTY_RASTERIZER | FD_DIRTY_UCP)) {
uint32_t planes = ctx->rasterizer->clip_plane_enable;
int count = 0;
while (planes && count < 6) {
int i = ffs(planes) - 1;
planes &= ~(1U << i);
fd_wfi(ctx, ring);
OUT_PKT0(ring, REG_A3XX_GRAS_CL_USER_PLANE(count++), 4);
OUT_RING(ring, fui(ctx->ucp.ucp[i][0]));
OUT_RING(ring, fui(ctx->ucp.ucp[i][1]));
OUT_RING(ring, fui(ctx->ucp.ucp[i][2]));
OUT_RING(ring, fui(ctx->ucp.ucp[i][3]));
}
}
/* NOTE: since primitive_restart is not actually part of any
* state object, we need to make sure that we always emit
* PRIM_VTX_CNTL.. either that or be more clever and detect

View file

@ -65,7 +65,8 @@ fd3_rasterizer_state_create(struct pipe_context *pctx,
if (cso->multisample)
TODO
*/
so->gras_cl_clip_cntl = A3XX_GRAS_CL_CLIP_CNTL_IJ_PERSP_CENTER; /* ??? */
so->gras_cl_clip_cntl = A3XX_GRAS_CL_CLIP_CNTL_IJ_PERSP_CENTER /* ??? */ |
COND(cso->clip_halfz, A3XX_GRAS_CL_CLIP_CNTL_ZERO_GB_SCALE_Z);
so->gras_su_point_minmax =
A3XX_GRAS_SU_POINT_MINMAX_MIN(psize_min) |
A3XX_GRAS_SU_POINT_MINMAX_MAX(psize_max);

View file

@ -334,6 +334,7 @@ struct fd_context {
FD_DIRTY_INDEXBUF = (1 << 16),
FD_DIRTY_SCISSOR = (1 << 17),
FD_DIRTY_STREAMOUT = (1 << 18),
FD_DIRTY_UCP = (1 << 19),
} dirty;
struct pipe_blend_state *blend;
@ -355,6 +356,7 @@ struct fd_context {
struct fd_constbuf_stateobj constbuf[PIPE_SHADER_TYPES];
struct pipe_index_buffer indexbuf;
struct fd_streamout_stateobj streamout;
struct pipe_clip_state ucp;
/* GMEM/tile handling fxns: */
void (*emit_tile_init)(struct fd_context *ctx);

View file

@ -191,6 +191,7 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
return 16383;
case PIPE_CAP_DEPTH_CLIP_DISABLE:
case PIPE_CAP_CLIP_HALFZ:
case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE:
return is_a3xx(screen);
@ -228,7 +229,6 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_TGSI_FS_FINE_DERIVATIVE:
case PIPE_CAP_CONDITIONAL_RENDER_INVERTED:
case PIPE_CAP_SAMPLER_VIEW_TARGET:
case PIPE_CAP_CLIP_HALFZ:
case PIPE_CAP_POLYGON_OFFSET_CLAMP:
case PIPE_CAP_MULTISAMPLE_Z_RESOLVE:
case PIPE_CAP_RESOURCE_FROM_USER_MEMORY:

View file

@ -65,7 +65,9 @@ static void
fd_set_clip_state(struct pipe_context *pctx,
const struct pipe_clip_state *clip)
{
DBG("TODO: ");
struct fd_context *ctx = fd_context(pctx);
ctx->ucp = *clip;
ctx->dirty |= FD_DIRTY_UCP;
}
static void

View file

@ -172,7 +172,7 @@ flatten_block(nir_builder *bld, nir_block *if_block, nir_block *prev_block,
(intr->intrinsic == nir_intrinsic_discard_if)) {
nir_ssa_def *discard_cond;
nir_builder_insert_after_instr(bld,
bld->cursor = nir_after_instr(
nir_block_last_instr(prev_block));
if (invert) {

View file

@ -190,7 +190,7 @@ nv30_context_destroy(struct pipe_context *pipe)
} while(0)
struct pipe_context *
nv30_context_create(struct pipe_screen *pscreen, void *priv, unsigned flags)
nv30_context_create(struct pipe_screen *pscreen, void *priv, unsigned ctxflags)
{
struct nv30_screen *screen = nv30_screen(pscreen);
struct nv30_context *nv30 = CALLOC_STRUCT(nv30_context);

View file

@ -240,7 +240,7 @@ nv50_context_get_sample_position(struct pipe_context *, unsigned, unsigned,
float *);
struct pipe_context *
nv50_create(struct pipe_screen *pscreen, void *priv, unsigned flags)
nv50_create(struct pipe_screen *pscreen, void *priv, unsigned ctxflags)
{
struct nv50_screen *screen = nv50_screen(pscreen);
struct nv50_context *nv50;

View file

@ -262,7 +262,7 @@ nvc0_context_get_sample_position(struct pipe_context *, unsigned, unsigned,
float *);
struct pipe_context *
nvc0_create(struct pipe_screen *pscreen, void *priv, unsigned flags)
nvc0_create(struct pipe_screen *pscreen, void *priv, unsigned ctxflags)
{
struct nvc0_screen *screen = nvc0_screen(pscreen);
struct nvc0_context *nvc0;

View file

@ -56,10 +56,10 @@ struct nvc0_query {
#define NVC0_QUERY_ALLOC_SPACE 256
static boolean nvc0_mp_pm_query_begin(struct nvc0_context *,
static boolean nvc0_hw_sm_query_begin(struct nvc0_context *,
struct nvc0_query *);
static void nvc0_mp_pm_query_end(struct nvc0_context *, struct nvc0_query *);
static boolean nvc0_mp_pm_query_result(struct nvc0_context *,
static void nvc0_hw_sm_query_end(struct nvc0_context *, struct nvc0_query *);
static boolean nvc0_hw_sm_query_result(struct nvc0_context *,
struct nvc0_query *, void *, boolean);
static inline struct nvc0_query *
@ -159,7 +159,7 @@ nvc0_query_create(struct pipe_context *pipe, unsigned type, unsigned index)
} else
#endif
if (nvc0->screen->base.device->drm_version >= 0x01000101) {
if (type >= NVE4_PM_QUERY(0) && type <= NVE4_PM_QUERY_LAST) {
if (type >= NVE4_HW_SM_QUERY(0) && type <= NVE4_HW_SM_QUERY_LAST) {
/* for each MP:
* [00] = WS0.C0
* [04] = WS0.C1
@ -189,7 +189,7 @@ nvc0_query_create(struct pipe_context *pipe, unsigned type, unsigned index)
space = (4 * 4 + 4 + 4) * nvc0->screen->mp_count * sizeof(uint32_t);
break;
} else
if (type >= NVC0_PM_QUERY(0) && type <= NVC0_PM_QUERY_LAST) {
if (type >= NVC0_HW_SM_QUERY(0) && type <= NVC0_HW_SM_QUERY_LAST) {
/* for each MP:
* [00] = MP.C0
* [04] = MP.C1
@ -327,9 +327,9 @@ nvc0_query_begin(struct pipe_context *pipe, struct pipe_query *pq)
q->u.value = 0;
} else
#endif
if ((q->type >= NVE4_PM_QUERY(0) && q->type <= NVE4_PM_QUERY_LAST) ||
(q->type >= NVC0_PM_QUERY(0) && q->type <= NVC0_PM_QUERY_LAST)) {
ret = nvc0_mp_pm_query_begin(nvc0, q);
if ((q->type >= NVE4_HW_SM_QUERY(0) && q->type <= NVE4_HW_SM_QUERY_LAST) ||
(q->type >= NVC0_HW_SM_QUERY(0) && q->type <= NVC0_HW_SM_QUERY_LAST)) {
ret = nvc0_hw_sm_query_begin(nvc0, q);
}
break;
}
@ -412,9 +412,9 @@ nvc0_query_end(struct pipe_context *pipe, struct pipe_query *pq)
return;
} else
#endif
if ((q->type >= NVE4_PM_QUERY(0) && q->type <= NVE4_PM_QUERY_LAST) ||
(q->type >= NVC0_PM_QUERY(0) && q->type <= NVC0_PM_QUERY_LAST)) {
nvc0_mp_pm_query_end(nvc0, q);
if ((q->type >= NVE4_HW_SM_QUERY(0) && q->type <= NVE4_HW_SM_QUERY_LAST) ||
(q->type >= NVC0_HW_SM_QUERY(0) && q->type <= NVC0_HW_SM_QUERY_LAST)) {
nvc0_hw_sm_query_end(nvc0, q);
}
break;
}
@ -453,9 +453,9 @@ nvc0_query_result(struct pipe_context *pipe, struct pipe_query *pq,
return true;
} else
#endif
if ((q->type >= NVE4_PM_QUERY(0) && q->type <= NVE4_PM_QUERY_LAST) ||
(q->type >= NVC0_PM_QUERY(0) && q->type <= NVC0_PM_QUERY_LAST)) {
return nvc0_mp_pm_query_result(nvc0, q, result, wait);
if ((q->type >= NVE4_HW_SM_QUERY(0) && q->type <= NVE4_HW_SM_QUERY_LAST) ||
(q->type >= NVC0_HW_SM_QUERY(0) && q->type <= NVC0_HW_SM_QUERY_LAST)) {
return nvc0_hw_sm_query_result(nvc0, q, result, wait);
}
if (q->state != NVC0_QUERY_STATE_READY)
@ -692,7 +692,7 @@ static const char *nvc0_drv_stat_names[] =
* We could add a kernel interface for it, but reading the counters like this
* has the advantage of being async (if get_result isn't called immediately).
*/
static const uint64_t nve4_read_mp_pm_counters_code[] =
static const uint64_t nve4_read_hw_sm_counters_code[] =
{
/* sched 0x20 0x20 0x20 0x20 0x20 0x20 0x20
* mov b32 $r8 $tidx
@ -776,6 +776,33 @@ static const uint64_t nve4_read_mp_pm_counters_code[] =
static const char *nve4_pm_query_names[] =
{
/* MP counters */
"active_cycles",
"active_warps",
"atom_count",
"branch",
"divergent_branch",
"gld_request",
"global_ld_mem_divergence_replays",
"global_store_transaction",
"global_st_mem_divergence_replays",
"gred_count",
"gst_request",
"inst_executed",
"inst_issued",
"inst_issued1",
"inst_issued2",
"l1_global_load_hit",
"l1_global_load_miss",
"l1_local_load_hit",
"l1_local_load_miss",
"l1_local_store_hit",
"l1_local_store_miss",
"l1_shared_load_transactions",
"l1_shared_store_transactions",
"local_load",
"local_load_transactions",
"local_store",
"local_store_transactions",
"prof_trigger_00",
"prof_trigger_01",
"prof_trigger_02",
@ -784,41 +811,14 @@ static const char *nve4_pm_query_names[] =
"prof_trigger_05",
"prof_trigger_06",
"prof_trigger_07",
"warps_launched",
"threads_launched",
"sm_cta_launched",
"inst_issued1",
"inst_issued2",
"inst_executed",
"local_load",
"local_store",
"shared_load",
"shared_store",
"l1_local_load_hit",
"l1_local_load_miss",
"l1_local_store_hit",
"l1_local_store_miss",
"gld_request",
"gst_request",
"l1_global_load_hit",
"l1_global_load_miss",
"uncached_global_load_transaction",
"global_store_transaction",
"branch",
"divergent_branch",
"active_warps",
"active_cycles",
"inst_issued",
"atom_count",
"gred_count",
"shared_load_replay",
"shared_store",
"shared_store_replay",
"local_load_transactions",
"local_store_transactions",
"l1_shared_load_transactions",
"l1_shared_store_transactions",
"global_ld_mem_divergence_replays",
"global_st_mem_divergence_replays",
"sm_cta_launched",
"threads_launched",
"uncached_global_load_transaction",
"warps_launched",
/* metrics, i.e. functions of the MP counters */
"metric-ipc", /* inst_executed, clock */
"metric-ipac", /* inst_executed, active_cycles */
@ -852,7 +852,7 @@ struct nvc0_mp_counter_cfg
#define NVC0_COUNTER_OP2_AVG_DIV_MM 5 /* avg(ctr0 / ctr1) */
#define NVC0_COUNTER_OP2_AVG_DIV_M0 6 /* avg(ctr0) / ctr1 of MP[0]) */
struct nvc0_mp_pm_query_cfg
struct nvc0_hw_sm_query_cfg
{
struct nvc0_mp_counter_cfg ctr[4];
uint8_t num_counters;
@ -860,17 +860,17 @@ struct nvc0_mp_pm_query_cfg
uint8_t norm[2]; /* normalization num,denom */
};
#define _Q1A(n, f, m, g, s, nu, dn) [NVE4_PM_QUERY_##n] = { { { f, NVE4_COMPUTE_MP_PM_FUNC_MODE_##m, 0, 0, NVE4_COMPUTE_MP_PM_A_SIGSEL_##g, s }, {}, {}, {} }, 1, NVC0_COUNTER_OPn_SUM, { nu, dn } }
#define _Q1B(n, f, m, g, s, nu, dn) [NVE4_PM_QUERY_##n] = { { { f, NVE4_COMPUTE_MP_PM_FUNC_MODE_##m, 0, 1, NVE4_COMPUTE_MP_PM_B_SIGSEL_##g, s }, {}, {}, {} }, 1, NVC0_COUNTER_OPn_SUM, { nu, dn } }
#define _M2A(n, f0, m0, g0, s0, f1, m1, g1, s1, o, nu, dn) [NVE4_PM_QUERY_METRIC_##n] = { { \
#define _Q1A(n, f, m, g, s, nu, dn) [NVE4_HW_SM_QUERY_##n] = { { { f, NVE4_COMPUTE_MP_PM_FUNC_MODE_##m, 0, 0, NVE4_COMPUTE_MP_PM_A_SIGSEL_##g, s }, {}, {}, {} }, 1, NVC0_COUNTER_OPn_SUM, { nu, dn } }
#define _Q1B(n, f, m, g, s, nu, dn) [NVE4_HW_SM_QUERY_##n] = { { { f, NVE4_COMPUTE_MP_PM_FUNC_MODE_##m, 0, 1, NVE4_COMPUTE_MP_PM_B_SIGSEL_##g, s }, {}, {}, {} }, 1, NVC0_COUNTER_OPn_SUM, { nu, dn } }
#define _M2A(n, f0, m0, g0, s0, f1, m1, g1, s1, o, nu, dn) [NVE4_HW_SM_QUERY_METRIC_##n] = { { \
{ f0, NVE4_COMPUTE_MP_PM_FUNC_MODE_##m0, 0, 0, NVE4_COMPUTE_MP_PM_A_SIGSEL_##g0, s0 }, \
{ f1, NVE4_COMPUTE_MP_PM_FUNC_MODE_##m1, 0, 0, NVE4_COMPUTE_MP_PM_A_SIGSEL_##g1, s1 }, \
{}, {}, }, 2, NVC0_COUNTER_OP2_##o, { nu, dn } }
#define _M2B(n, f0, m0, g0, s0, f1, m1, g1, s1, o, nu, dn) [NVE4_PM_QUERY_METRIC_##n] = { { \
#define _M2B(n, f0, m0, g0, s0, f1, m1, g1, s1, o, nu, dn) [NVE4_HW_SM_QUERY_METRIC_##n] = { { \
{ f0, NVE4_COMPUTE_MP_PM_FUNC_MODE_##m0, 0, 1, NVE4_COMPUTE_MP_PM_B_SIGSEL_##g0, s0 }, \
{ f1, NVE4_COMPUTE_MP_PM_FUNC_MODE_##m1, 0, 1, NVE4_COMPUTE_MP_PM_B_SIGSEL_##g1, s1 }, \
{}, {}, }, 2, NVC0_COUNTER_OP2_##o, { nu, dn } }
#define _M2AB(n, f0, m0, g0, s0, f1, m1, g1, s1, o, nu, dn) [NVE4_PM_QUERY_METRIC_##n] = { { \
#define _M2AB(n, f0, m0, g0, s0, f1, m1, g1, s1, o, nu, dn) [NVE4_HW_SM_QUERY_METRIC_##n] = { { \
{ f0, NVE4_COMPUTE_MP_PM_FUNC_MODE_##m0, 0, 0, NVE4_COMPUTE_MP_PM_A_SIGSEL_##g0, s0 }, \
{ f1, NVE4_COMPUTE_MP_PM_FUNC_MODE_##m1, 0, 1, NVE4_COMPUTE_MP_PM_B_SIGSEL_##g1, s1 }, \
{}, {}, }, 2, NVC0_COUNTER_OP2_##o, { nu, dn } }
@ -881,8 +881,35 @@ struct nvc0_mp_pm_query_cfg
* metric-ipXc: we simply multiply by 4 to account for the 4 warp schedulers;
* this is inaccurate !
*/
static const struct nvc0_mp_pm_query_cfg nve4_mp_pm_queries[] =
static const struct nvc0_hw_sm_query_cfg nve4_hw_sm_queries[] =
{
_Q1B(ACTIVE_CYCLES, 0x0001, B6, WARP, 0x00000000, 1, 1),
_Q1B(ACTIVE_WARPS, 0x003f, B6, WARP, 0x31483104, 2, 1),
_Q1A(ATOM_COUNT, 0x0001, B6, BRANCH, 0x00000000, 1, 1),
_Q1A(BRANCH, 0x0001, B6, BRANCH, 0x0000000c, 1, 1),
_Q1A(DIVERGENT_BRANCH, 0x0001, B6, BRANCH, 0x00000010, 1, 1),
_Q1A(GLD_REQUEST, 0x0001, B6, LDST, 0x00000010, 1, 1),
_Q1B(GLD_MEM_DIV_REPLAY, 0x0001, B6, REPLAY, 0x00000010, 1, 1),
_Q1B(GST_TRANSACTIONS, 0x0001, B6, MEM, 0x00000004, 1, 1),
_Q1B(GST_MEM_DIV_REPLAY, 0x0001, B6, REPLAY, 0x00000014, 1, 1),
_Q1A(GRED_COUNT, 0x0001, B6, BRANCH, 0x00000008, 1, 1),
_Q1A(GST_REQUEST, 0x0001, B6, LDST, 0x00000014, 1, 1),
_Q1A(INST_EXECUTED, 0x0003, B6, EXEC, 0x00000398, 1, 1),
_Q1A(INST_ISSUED, 0x0003, B6, ISSUE, 0x00000104, 1, 1),
_Q1A(INST_ISSUED1, 0x0001, B6, ISSUE, 0x00000004, 1, 1),
_Q1A(INST_ISSUED2, 0x0001, B6, ISSUE, 0x00000008, 1, 1),
_Q1B(L1_GLD_HIT, 0x0001, B6, L1, 0x00000010, 1, 1),
_Q1B(L1_GLD_MISS, 0x0001, B6, L1, 0x00000014, 1, 1),
_Q1B(L1_LOCAL_LD_HIT, 0x0001, B6, L1, 0x00000000, 1, 1),
_Q1B(L1_LOCAL_LD_MISS, 0x0001, B6, L1, 0x00000004, 1, 1),
_Q1B(L1_LOCAL_ST_HIT, 0x0001, B6, L1, 0x00000008, 1, 1),
_Q1B(L1_LOCAL_ST_MISS, 0x0001, B6, L1, 0x0000000c, 1, 1),
_Q1B(L1_SHARED_LD_TRANSACTIONS, 0x0001, B6, TRANSACTION, 0x00000008, 1, 1),
_Q1B(L1_SHARED_ST_TRANSACTIONS, 0x0001, B6, TRANSACTION, 0x0000000c, 1, 1),
_Q1A(LOCAL_LD, 0x0001, B6, LDST, 0x00000008, 1, 1),
_Q1B(LOCAL_LD_TRANSACTIONS, 0x0001, B6, TRANSACTION, 0x00000000, 1, 1),
_Q1A(LOCAL_ST, 0x0001, B6, LDST, 0x0000000c, 1, 1),
_Q1B(LOCAL_ST_TRANSACTIONS, 0x0001, B6, TRANSACTION, 0x00000004, 1, 1),
_Q1A(PROF_TRIGGER_0, 0x0001, B6, USER, 0x00000000, 1, 1),
_Q1A(PROF_TRIGGER_1, 0x0001, B6, USER, 0x00000004, 1, 1),
_Q1A(PROF_TRIGGER_2, 0x0001, B6, USER, 0x00000008, 1, 1),
@ -891,41 +918,14 @@ static const struct nvc0_mp_pm_query_cfg nve4_mp_pm_queries[] =
_Q1A(PROF_TRIGGER_5, 0x0001, B6, USER, 0x00000014, 1, 1),
_Q1A(PROF_TRIGGER_6, 0x0001, B6, USER, 0x00000018, 1, 1),
_Q1A(PROF_TRIGGER_7, 0x0001, B6, USER, 0x0000001c, 1, 1),
_Q1A(LAUNCHED_WARPS, 0x0001, B6, LAUNCH, 0x00000004, 1, 1),
_Q1A(LAUNCHED_THREADS, 0x003f, B6, LAUNCH, 0x398a4188, 1, 1),
_Q1B(LAUNCHED_CTA, 0x0001, B6, WARP, 0x0000001c, 1, 1),
_Q1A(INST_ISSUED1, 0x0001, B6, ISSUE, 0x00000004, 1, 1),
_Q1A(INST_ISSUED2, 0x0001, B6, ISSUE, 0x00000008, 1, 1),
_Q1A(INST_ISSUED, 0x0003, B6, ISSUE, 0x00000104, 1, 1),
_Q1A(INST_EXECUTED, 0x0003, B6, EXEC, 0x00000398, 1, 1),
_Q1A(LD_SHARED, 0x0001, B6, LDST, 0x00000000, 1, 1),
_Q1A(ST_SHARED, 0x0001, B6, LDST, 0x00000004, 1, 1),
_Q1A(LD_LOCAL, 0x0001, B6, LDST, 0x00000008, 1, 1),
_Q1A(ST_LOCAL, 0x0001, B6, LDST, 0x0000000c, 1, 1),
_Q1A(GLD_REQUEST, 0x0001, B6, LDST, 0x00000010, 1, 1),
_Q1A(GST_REQUEST, 0x0001, B6, LDST, 0x00000014, 1, 1),
_Q1B(L1_LOCAL_LOAD_HIT, 0x0001, B6, L1, 0x00000000, 1, 1),
_Q1B(L1_LOCAL_LOAD_MISS, 0x0001, B6, L1, 0x00000004, 1, 1),
_Q1B(L1_LOCAL_STORE_HIT, 0x0001, B6, L1, 0x00000008, 1, 1),
_Q1B(L1_LOCAL_STORE_MISS, 0x0001, B6, L1, 0x0000000c, 1, 1),
_Q1B(L1_GLOBAL_LOAD_HIT, 0x0001, B6, L1, 0x00000010, 1, 1),
_Q1B(L1_GLOBAL_LOAD_MISS, 0x0001, B6, L1, 0x00000014, 1, 1),
_Q1B(GLD_TRANSACTIONS_UNCACHED, 0x0001, B6, MEM, 0x00000000, 1, 1),
_Q1B(GST_TRANSACTIONS, 0x0001, B6, MEM, 0x00000004, 1, 1),
_Q1A(BRANCH, 0x0001, B6, BRANCH, 0x0000000c, 1, 1),
_Q1A(BRANCH_DIVERGENT, 0x0001, B6, BRANCH, 0x00000010, 1, 1),
_Q1B(ACTIVE_WARPS, 0x003f, B6, WARP, 0x31483104, 2, 1),
_Q1B(ACTIVE_CYCLES, 0x0001, B6, WARP, 0x00000000, 1, 1),
_Q1A(ATOM_COUNT, 0x0001, B6, BRANCH, 0x00000000, 1, 1),
_Q1A(GRED_COUNT, 0x0001, B6, BRANCH, 0x00000008, 1, 1),
_Q1B(LD_SHARED_REPLAY, 0x0001, B6, REPLAY, 0x00000008, 1, 1),
_Q1B(ST_SHARED_REPLAY, 0x0001, B6, REPLAY, 0x0000000c, 1, 1),
_Q1B(LD_LOCAL_TRANSACTIONS, 0x0001, B6, TRANSACTION, 0x00000000, 1, 1),
_Q1B(ST_LOCAL_TRANSACTIONS, 0x0001, B6, TRANSACTION, 0x00000004, 1, 1),
_Q1B(L1_LD_SHARED_TRANSACTIONS, 0x0001, B6, TRANSACTION, 0x00000008, 1, 1),
_Q1B(L1_ST_SHARED_TRANSACTIONS, 0x0001, B6, TRANSACTION, 0x0000000c, 1, 1),
_Q1B(GLD_MEM_DIV_REPLAY, 0x0001, B6, REPLAY, 0x00000010, 1, 1),
_Q1B(GST_MEM_DIV_REPLAY, 0x0001, B6, REPLAY, 0x00000014, 1, 1),
_Q1A(SHARED_LD, 0x0001, B6, LDST, 0x00000000, 1, 1),
_Q1B(SHARED_LD_REPLAY, 0x0001, B6, REPLAY, 0x00000008, 1, 1),
_Q1A(SHARED_ST, 0x0001, B6, LDST, 0x00000004, 1, 1),
_Q1B(SHARED_ST_REPLAY, 0x0001, B6, REPLAY, 0x0000000c, 1, 1),
_Q1B(SM_CTA_LAUNCHED, 0x0001, B6, WARP, 0x0000001c, 1, 1),
_Q1A(THREADS_LAUNCHED, 0x003f, B6, LAUNCH, 0x398a4188, 1, 1),
_Q1B(UNCACHED_GLD_TRANSACTIONS, 0x0001, B6, MEM, 0x00000000, 1, 1),
_Q1A(WARPS_LAUNCHED, 0x0001, B6, LAUNCH, 0x00000004, 1, 1),
_M2AB(IPC, 0x3, B6, EXEC, 0x398, 0xffff, LOGOP, WARP, 0x0, DIV_SUM_M0, 10, 1),
_M2AB(IPAC, 0x3, B6, EXEC, 0x398, 0x1, B6, WARP, 0x0, AVG_DIV_MM, 10, 1),
_M2A(IPEC, 0x3, B6, EXEC, 0x398, 0xe, LOGOP, EXEC, 0x398, AVG_DIV_MM, 10, 1),
@ -940,7 +940,7 @@ static const struct nvc0_mp_pm_query_cfg nve4_mp_pm_queries[] =
#undef _M2B
/* === PERFORMANCE MONITORING COUNTERS for NVC0:NVE4 === */
static const uint64_t nvc0_read_mp_pm_counters_code[] =
static const uint64_t nvc0_read_hw_sm_counters_code[] =
{
/* mov b32 $r8 $tidx
* mov b32 $r9 $physid
@ -993,29 +993,21 @@ static const uint64_t nvc0_read_mp_pm_counters_code[] =
static const char *nvc0_pm_query_names[] =
{
/* MP counters */
"inst_executed",
"active_cycles",
"active_warps",
"atom_count",
"branch",
"divergent_branch",
"active_warps",
"active_cycles",
"warps_launched",
"threads_launched",
"shared_load",
"shared_store",
"local_load",
"local_store",
"gred_count",
"atom_count",
"gld_request",
"gred_count",
"gst_request",
"inst_executed",
"inst_issued1_0",
"inst_issued1_1",
"inst_issued2_0",
"inst_issued2_1",
"thread_inst_executed_0",
"thread_inst_executed_1",
"thread_inst_executed_2",
"thread_inst_executed_3",
"local_load",
"local_store",
"prof_trigger_00",
"prof_trigger_01",
"prof_trigger_02",
@ -1024,35 +1016,35 @@ static const char *nvc0_pm_query_names[] =
"prof_trigger_05",
"prof_trigger_06",
"prof_trigger_07",
"shared_load",
"shared_store",
"threads_launched",
"thread_inst_executed_0",
"thread_inst_executed_1",
"thread_inst_executed_2",
"thread_inst_executed_3",
"warps_launched",
};
#define _Q(n, f, m, g, c, s0, s1, s2, s3, s4, s5) [NVC0_PM_QUERY_##n] = { { { f, NVC0_COMPUTE_MP_PM_OP_MODE_##m, c, 0, g, s0|(s1 << 8)|(s2 << 16)|(s3 << 24)|(s4##ULL << 32)|(s5##ULL << 40) }, {}, {}, {} }, 1, NVC0_COUNTER_OPn_SUM, { 1, 1 } }
#define _Q(n, f, m, g, c, s0, s1, s2, s3, s4, s5) [NVC0_HW_SM_QUERY_##n] = { { { f, NVC0_COMPUTE_MP_PM_OP_MODE_##m, c, 0, g, s0|(s1 << 8)|(s2 << 16)|(s3 << 24)|(s4##ULL << 32)|(s5##ULL << 40) }, {}, {}, {} }, 1, NVC0_COUNTER_OPn_SUM, { 1, 1 } }
static const struct nvc0_mp_pm_query_cfg nvc0_mp_pm_queries[] =
static const struct nvc0_hw_sm_query_cfg nvc0_hw_sm_queries[] =
{
_Q(INST_EXECUTED, 0xaaaa, LOGOP, 0x2d, 3, 0x00, 0x11, 0x22, 0x00, 0x00, 0x00),
_Q(BRANCH, 0xaaaa, LOGOP, 0x1a, 2, 0x00, 0x11, 0x00, 0x00, 0x00, 0x00),
_Q(BRANCH_DIVERGENT, 0xaaaa, LOGOP, 0x19, 2, 0x20, 0x31, 0x00, 0x00, 0x00, 0x00),
_Q(ACTIVE_WARPS, 0xaaaa, LOGOP, 0x24, 6, 0x10, 0x21, 0x32, 0x43, 0x54, 0x65),
_Q(ACTIVE_CYCLES, 0xaaaa, LOGOP, 0x11, 1, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00),
_Q(LAUNCHED_WARPS, 0xaaaa, LOGOP, 0x26, 1, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00),
_Q(LAUNCHED_THREADS, 0xaaaa, LOGOP, 0x26, 6, 0x10, 0x21, 0x32, 0x43, 0x54, 0x65),
_Q(LD_SHARED, 0xaaaa, LOGOP, 0x64, 1, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00),
_Q(ST_SHARED, 0xaaaa, LOGOP, 0x64, 1, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00),
_Q(LD_LOCAL, 0xaaaa, LOGOP, 0x64, 1, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00),
_Q(ST_LOCAL, 0xaaaa, LOGOP, 0x64, 1, 0x50, 0x00, 0x00, 0x00, 0x00, 0x00),
_Q(GRED_COUNT, 0xaaaa, LOGOP, 0x63, 1, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00),
_Q(ACTIVE_WARPS, 0xaaaa, LOGOP, 0x24, 6, 0x10, 0x21, 0x32, 0x43, 0x54, 0x65),
_Q(ATOM_COUNT, 0xaaaa, LOGOP, 0x63, 1, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00),
_Q(BRANCH, 0xaaaa, LOGOP, 0x1a, 2, 0x00, 0x11, 0x00, 0x00, 0x00, 0x00),
_Q(DIVERGENT_BRANCH, 0xaaaa, LOGOP, 0x19, 2, 0x20, 0x31, 0x00, 0x00, 0x00, 0x00),
_Q(GLD_REQUEST, 0xaaaa, LOGOP, 0x64, 1, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00),
_Q(GRED_COUNT, 0xaaaa, LOGOP, 0x63, 1, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00),
_Q(GST_REQUEST, 0xaaaa, LOGOP, 0x64, 1, 0x60, 0x00, 0x00, 0x00, 0x00, 0x00),
_Q(INST_EXECUTED, 0xaaaa, LOGOP, 0x2d, 3, 0x00, 0x11, 0x22, 0x00, 0x00, 0x00),
_Q(INST_ISSUED1_0, 0xaaaa, LOGOP, 0x7e, 1, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00),
_Q(INST_ISSUED1_1, 0xaaaa, LOGOP, 0x7e, 1, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00),
_Q(INST_ISSUED2_0, 0xaaaa, LOGOP, 0x7e, 1, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00),
_Q(INST_ISSUED2_1, 0xaaaa, LOGOP, 0x7e, 1, 0x50, 0x00, 0x00, 0x00, 0x00, 0x00),
_Q(TH_INST_EXECUTED_0, 0xaaaa, LOGOP, 0xa3, 6, 0x00, 0x11, 0x22, 0x33, 0x44, 0x55),
_Q(TH_INST_EXECUTED_1, 0xaaaa, LOGOP, 0xa5, 6, 0x00, 0x11, 0x22, 0x33, 0x44, 0x55),
_Q(TH_INST_EXECUTED_2, 0xaaaa, LOGOP, 0xa4, 6, 0x00, 0x11, 0x22, 0x33, 0x44, 0x55),
_Q(TH_INST_EXECUTED_3, 0xaaaa, LOGOP, 0xa6, 6, 0x00, 0x11, 0x22, 0x33, 0x44, 0x55),
_Q(LOCAL_LD, 0xaaaa, LOGOP, 0x64, 1, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00),
_Q(LOCAL_ST, 0xaaaa, LOGOP, 0x64, 1, 0x50, 0x00, 0x00, 0x00, 0x00, 0x00),
_Q(PROF_TRIGGER_0, 0xaaaa, LOGOP, 0x01, 1, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00),
_Q(PROF_TRIGGER_1, 0xaaaa, LOGOP, 0x01, 1, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00),
_Q(PROF_TRIGGER_2, 0xaaaa, LOGOP, 0x01, 1, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00),
@ -1061,38 +1053,46 @@ static const struct nvc0_mp_pm_query_cfg nvc0_mp_pm_queries[] =
_Q(PROF_TRIGGER_5, 0xaaaa, LOGOP, 0x01, 1, 0x50, 0x00, 0x00, 0x00, 0x00, 0x00),
_Q(PROF_TRIGGER_6, 0xaaaa, LOGOP, 0x01, 1, 0x60, 0x00, 0x00, 0x00, 0x00, 0x00),
_Q(PROF_TRIGGER_7, 0xaaaa, LOGOP, 0x01, 1, 0x70, 0x00, 0x00, 0x00, 0x00, 0x00),
_Q(SHARED_LD, 0xaaaa, LOGOP, 0x64, 1, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00),
_Q(SHARED_ST, 0xaaaa, LOGOP, 0x64, 1, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00),
_Q(THREADS_LAUNCHED, 0xaaaa, LOGOP, 0x26, 6, 0x10, 0x21, 0x32, 0x43, 0x54, 0x65),
_Q(TH_INST_EXECUTED_0, 0xaaaa, LOGOP, 0xa3, 6, 0x00, 0x11, 0x22, 0x33, 0x44, 0x55),
_Q(TH_INST_EXECUTED_1, 0xaaaa, LOGOP, 0xa5, 6, 0x00, 0x11, 0x22, 0x33, 0x44, 0x55),
_Q(TH_INST_EXECUTED_2, 0xaaaa, LOGOP, 0xa4, 6, 0x00, 0x11, 0x22, 0x33, 0x44, 0x55),
_Q(TH_INST_EXECUTED_3, 0xaaaa, LOGOP, 0xa6, 6, 0x00, 0x11, 0x22, 0x33, 0x44, 0x55),
_Q(WARPS_LAUNCHED, 0xaaaa, LOGOP, 0x26, 1, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00),
};
#undef _Q
static const struct nvc0_mp_pm_query_cfg *
nvc0_mp_pm_query_get_cfg(struct nvc0_context *nvc0, struct nvc0_query *q)
static const struct nvc0_hw_sm_query_cfg *
nvc0_hw_sm_query_get_cfg(struct nvc0_context *nvc0, struct nvc0_query *q)
{
struct nvc0_screen *screen = nvc0->screen;
if (screen->base.class_3d >= NVE4_3D_CLASS)
return &nve4_mp_pm_queries[q->type - PIPE_QUERY_DRIVER_SPECIFIC];
return &nvc0_mp_pm_queries[q->type - NVC0_PM_QUERY(0)];
return &nve4_hw_sm_queries[q->type - PIPE_QUERY_DRIVER_SPECIFIC];
return &nvc0_hw_sm_queries[q->type - NVC0_HW_SM_QUERY(0)];
}
boolean
nvc0_mp_pm_query_begin(struct nvc0_context *nvc0, struct nvc0_query *q)
nvc0_hw_sm_query_begin(struct nvc0_context *nvc0, struct nvc0_query *q)
{
struct nvc0_screen *screen = nvc0->screen;
struct nouveau_pushbuf *push = nvc0->base.pushbuf;
const bool is_nve4 = screen->base.class_3d >= NVE4_3D_CLASS;
const struct nvc0_mp_pm_query_cfg *cfg;
const struct nvc0_hw_sm_query_cfg *cfg;
unsigned i, c;
unsigned num_ab[2] = { 0, 0 };
cfg = nvc0_mp_pm_query_get_cfg(nvc0, q);
cfg = nvc0_hw_sm_query_get_cfg(nvc0, q);
/* check if we have enough free counter slots */
for (i = 0; i < cfg->num_counters; ++i)
num_ab[cfg->ctr[i].sig_dom]++;
if (screen->pm.num_mp_pm_active[0] + num_ab[0] > 4 ||
screen->pm.num_mp_pm_active[1] + num_ab[1] > 4) {
if (screen->pm.num_hw_sm_active[0] + num_ab[0] > 4 ||
screen->pm.num_hw_sm_active[1] + num_ab[1] > 4) {
NOUVEAU_ERR("Not enough free MP counter slots !\n");
return false;
}
@ -1113,14 +1113,14 @@ nvc0_mp_pm_query_begin(struct nvc0_context *nvc0, struct nvc0_query *q)
for (i = 0; i < cfg->num_counters; ++i) {
const unsigned d = cfg->ctr[i].sig_dom;
if (!screen->pm.num_mp_pm_active[d]) {
if (!screen->pm.num_hw_sm_active[d]) {
uint32_t m = (1 << 22) | (1 << (7 + (8 * !d)));
if (screen->pm.num_mp_pm_active[!d])
if (screen->pm.num_hw_sm_active[!d])
m |= 1 << (7 + (8 * d));
BEGIN_NVC0(push, SUBC_SW(0x0600), 1);
PUSH_DATA (push, m);
}
screen->pm.num_mp_pm_active[d]++;
screen->pm.num_hw_sm_active[d]++;
for (c = d * 4; c < (d * 4 + 4); ++c) {
if (!screen->pm.mp_counter[c]) {
@ -1163,7 +1163,7 @@ nvc0_mp_pm_query_begin(struct nvc0_context *nvc0, struct nvc0_query *q)
}
static void
nvc0_mp_pm_query_end(struct nvc0_context *nvc0, struct nvc0_query *q)
nvc0_hw_sm_query_end(struct nvc0_context *nvc0, struct nvc0_query *q)
{
struct nvc0_screen *screen = nvc0->screen;
struct pipe_context *pipe = &nvc0->base.pipe;
@ -1174,9 +1174,9 @@ nvc0_mp_pm_query_end(struct nvc0_context *nvc0, struct nvc0_query *q)
const uint block[3] = { 32, is_nve4 ? 4 : 1, 1 };
const uint grid[3] = { screen->mp_count, 1, 1 };
unsigned c;
const struct nvc0_mp_pm_query_cfg *cfg;
const struct nvc0_hw_sm_query_cfg *cfg;
cfg = nvc0_mp_pm_query_get_cfg(nvc0, q);
cfg = nvc0_hw_sm_query_get_cfg(nvc0, q);
if (unlikely(!screen->pm.prog)) {
struct nvc0_program *prog = CALLOC_STRUCT(nvc0_program);
@ -1185,11 +1185,11 @@ nvc0_mp_pm_query_end(struct nvc0_context *nvc0, struct nvc0_query *q)
prog->num_gprs = 14;
prog->parm_size = 12;
if (is_nve4) {
prog->code = (uint32_t *)nve4_read_mp_pm_counters_code;
prog->code_size = sizeof(nve4_read_mp_pm_counters_code);
prog->code = (uint32_t *)nve4_read_hw_sm_counters_code;
prog->code_size = sizeof(nve4_read_hw_sm_counters_code);
} else {
prog->code = (uint32_t *)nvc0_read_mp_pm_counters_code;
prog->code_size = sizeof(nvc0_read_mp_pm_counters_code);
prog->code = (uint32_t *)nvc0_read_hw_sm_counters_code;
prog->code_size = sizeof(nvc0_read_hw_sm_counters_code);
}
screen->pm.prog = prog;
}
@ -1207,7 +1207,7 @@ nvc0_mp_pm_query_end(struct nvc0_context *nvc0, struct nvc0_query *q)
/* release counters for this query */
for (c = 0; c < 8; ++c) {
if (nvc0_query(screen->pm.mp_counter[c]) == q) {
screen->pm.num_mp_pm_active[c / 4]--;
screen->pm.num_hw_sm_active[c / 4]--;
screen->pm.mp_counter[c] = NULL;
}
}
@ -1234,7 +1234,7 @@ nvc0_mp_pm_query_end(struct nvc0_context *nvc0, struct nvc0_query *q)
q = nvc0_query(screen->pm.mp_counter[c]);
if (!q)
continue;
cfg = nvc0_mp_pm_query_get_cfg(nvc0, q);
cfg = nvc0_hw_sm_query_get_cfg(nvc0, q);
for (i = 0; i < cfg->num_counters; ++i) {
if (mask & (1 << q->ctr[i]))
break;
@ -1250,10 +1250,10 @@ nvc0_mp_pm_query_end(struct nvc0_context *nvc0, struct nvc0_query *q)
}
static inline bool
nvc0_mp_pm_query_read_data(uint32_t count[32][4],
nvc0_hw_sm_query_read_data(uint32_t count[32][4],
struct nvc0_context *nvc0, bool wait,
struct nvc0_query *q,
const struct nvc0_mp_pm_query_cfg *cfg,
const struct nvc0_hw_sm_query_cfg *cfg,
unsigned mp_count)
{
unsigned p, c;
@ -1275,10 +1275,10 @@ nvc0_mp_pm_query_read_data(uint32_t count[32][4],
}
static inline bool
nve4_mp_pm_query_read_data(uint32_t count[32][4],
nve4_hw_sm_query_read_data(uint32_t count[32][4],
struct nvc0_context *nvc0, bool wait,
struct nvc0_query *q,
const struct nvc0_mp_pm_query_cfg *cfg,
const struct nvc0_hw_sm_query_cfg *cfg,
unsigned mp_count)
{
unsigned p, c, d;
@ -1317,22 +1317,22 @@ nve4_mp_pm_query_read_data(uint32_t count[32][4],
* NOTE: Interpretation of IPC requires knowledge of MP count.
*/
static boolean
nvc0_mp_pm_query_result(struct nvc0_context *nvc0, struct nvc0_query *q,
nvc0_hw_sm_query_result(struct nvc0_context *nvc0, struct nvc0_query *q,
void *result, boolean wait)
{
uint32_t count[32][4];
uint64_t value = 0;
unsigned mp_count = MIN2(nvc0->screen->mp_count_compute, 32);
unsigned p, c;
const struct nvc0_mp_pm_query_cfg *cfg;
const struct nvc0_hw_sm_query_cfg *cfg;
bool ret;
cfg = nvc0_mp_pm_query_get_cfg(nvc0, q);
cfg = nvc0_hw_sm_query_get_cfg(nvc0, q);
if (nvc0->screen->base.class_3d >= NVE4_3D_CLASS)
ret = nve4_mp_pm_query_read_data(count, nvc0, wait, q, cfg, mp_count);
ret = nve4_hw_sm_query_read_data(count, nvc0, wait, q, cfg, mp_count);
else
ret = nvc0_mp_pm_query_read_data(count, nvc0, wait, q, cfg, mp_count);
ret = nvc0_hw_sm_query_read_data(count, nvc0, wait, q, cfg, mp_count);
if (!ret)
return false;
@ -1410,11 +1410,11 @@ nvc0_screen_get_driver_query_info(struct pipe_screen *pscreen,
if (screen->base.device->drm_version >= 0x01000101) {
if (screen->compute) {
if (screen->base.class_3d == NVE4_3D_CLASS) {
count += NVE4_PM_QUERY_COUNT;
count += NVE4_HW_SM_QUERY_COUNT;
} else
if (screen->base.class_3d < NVE4_3D_CLASS) {
/* NVC0_COMPUTE is not always enabled */
count += NVC0_PM_QUERY_COUNT;
count += NVC0_HW_SM_QUERY_COUNT;
}
}
}
@ -1444,15 +1444,15 @@ nvc0_screen_get_driver_query_info(struct pipe_screen *pscreen,
if (screen->compute) {
if (screen->base.class_3d == NVE4_3D_CLASS) {
info->name = nve4_pm_query_names[id - NVC0_QUERY_DRV_STAT_COUNT];
info->query_type = NVE4_PM_QUERY(id - NVC0_QUERY_DRV_STAT_COUNT);
info->query_type = NVE4_HW_SM_QUERY(id - NVC0_QUERY_DRV_STAT_COUNT);
info->max_value.u64 =
(id < NVE4_PM_QUERY_METRIC_MP_OCCUPANCY) ? 0 : 100;
(id < NVE4_HW_SM_QUERY_METRIC_MP_OCCUPANCY) ? 0 : 100;
info->group_id = NVC0_QUERY_MP_COUNTER_GROUP;
return 1;
} else
if (screen->base.class_3d < NVE4_3D_CLASS) {
info->name = nvc0_pm_query_names[id - NVC0_QUERY_DRV_STAT_COUNT];
info->query_type = NVC0_PM_QUERY(id - NVC0_QUERY_DRV_STAT_COUNT);
info->query_type = NVC0_HW_SM_QUERY(id - NVC0_QUERY_DRV_STAT_COUNT);
info->group_id = NVC0_QUERY_MP_COUNTER_GROUP;
return 1;
}
@ -1494,7 +1494,7 @@ nvc0_screen_get_driver_query_group_info(struct pipe_screen *pscreen,
info->type = PIPE_DRIVER_QUERY_GROUP_TYPE_GPU;
if (screen->base.class_3d == NVE4_3D_CLASS) {
info->num_queries = NVE4_PM_QUERY_COUNT;
info->num_queries = NVE4_HW_SM_QUERY_COUNT;
/* On NVE4+, each multiprocessor have 8 hardware counters separated
* in two distinct domains, but we allow only one active query
@ -1504,7 +1504,7 @@ nvc0_screen_get_driver_query_group_info(struct pipe_screen *pscreen,
return 1;
} else
if (screen->base.class_3d < NVE4_3D_CLASS) {
info->num_queries = NVC0_PM_QUERY_COUNT;
info->num_queries = NVC0_HW_SM_QUERY_COUNT;
/* On NVC0:NVE4, each multiprocessor have 8 hardware counters
* in a single domain. */

View file

@ -95,7 +95,7 @@ struct nvc0_screen {
struct {
struct nvc0_program *prog; /* compute state object to read MP counters */
struct pipe_query *mp_counter[8]; /* counter to query allocation */
uint8_t num_mp_pm_active[2];
uint8_t num_hw_sm_active[2];
bool mp_counters_enabled;
} pm;
@ -120,156 +120,139 @@ nvc0_screen(struct pipe_screen *screen)
/* Performance counter queries:
*/
#define NVE4_PM_QUERY_COUNT 49
#define NVE4_PM_QUERY(i) (PIPE_QUERY_DRIVER_SPECIFIC + (i))
#define NVE4_PM_QUERY_LAST NVE4_PM_QUERY(NVE4_PM_QUERY_COUNT - 1)
#define NVE4_PM_QUERY_PROF_TRIGGER_0 0
#define NVE4_PM_QUERY_PROF_TRIGGER_1 1
#define NVE4_PM_QUERY_PROF_TRIGGER_2 2
#define NVE4_PM_QUERY_PROF_TRIGGER_3 3
#define NVE4_PM_QUERY_PROF_TRIGGER_4 4
#define NVE4_PM_QUERY_PROF_TRIGGER_5 5
#define NVE4_PM_QUERY_PROF_TRIGGER_6 6
#define NVE4_PM_QUERY_PROF_TRIGGER_7 7
#define NVE4_PM_QUERY_LAUNCHED_WARPS 8
#define NVE4_PM_QUERY_LAUNCHED_THREADS 9
#define NVE4_PM_QUERY_LAUNCHED_CTA 10
#define NVE4_PM_QUERY_INST_ISSUED1 11
#define NVE4_PM_QUERY_INST_ISSUED2 12
#define NVE4_PM_QUERY_INST_EXECUTED 13
#define NVE4_PM_QUERY_LD_LOCAL 14
#define NVE4_PM_QUERY_ST_LOCAL 15
#define NVE4_PM_QUERY_LD_SHARED 16
#define NVE4_PM_QUERY_ST_SHARED 17
#define NVE4_PM_QUERY_L1_LOCAL_LOAD_HIT 18
#define NVE4_PM_QUERY_L1_LOCAL_LOAD_MISS 19
#define NVE4_PM_QUERY_L1_LOCAL_STORE_HIT 20
#define NVE4_PM_QUERY_L1_LOCAL_STORE_MISS 21
#define NVE4_PM_QUERY_GLD_REQUEST 22
#define NVE4_PM_QUERY_GST_REQUEST 23
#define NVE4_PM_QUERY_L1_GLOBAL_LOAD_HIT 24
#define NVE4_PM_QUERY_L1_GLOBAL_LOAD_MISS 25
#define NVE4_PM_QUERY_GLD_TRANSACTIONS_UNCACHED 26
#define NVE4_PM_QUERY_GST_TRANSACTIONS 27
#define NVE4_PM_QUERY_BRANCH 28
#define NVE4_PM_QUERY_BRANCH_DIVERGENT 29
#define NVE4_PM_QUERY_ACTIVE_WARPS 30
#define NVE4_PM_QUERY_ACTIVE_CYCLES 31
#define NVE4_PM_QUERY_INST_ISSUED 32
#define NVE4_PM_QUERY_ATOM_COUNT 33
#define NVE4_PM_QUERY_GRED_COUNT 34
#define NVE4_PM_QUERY_LD_SHARED_REPLAY 35
#define NVE4_PM_QUERY_ST_SHARED_REPLAY 36
#define NVE4_PM_QUERY_LD_LOCAL_TRANSACTIONS 37
#define NVE4_PM_QUERY_ST_LOCAL_TRANSACTIONS 38
#define NVE4_PM_QUERY_L1_LD_SHARED_TRANSACTIONS 39
#define NVE4_PM_QUERY_L1_ST_SHARED_TRANSACTIONS 40
#define NVE4_PM_QUERY_GLD_MEM_DIV_REPLAY 41
#define NVE4_PM_QUERY_GST_MEM_DIV_REPLAY 42
#define NVE4_PM_QUERY_METRIC_IPC 43
#define NVE4_PM_QUERY_METRIC_IPAC 44
#define NVE4_PM_QUERY_METRIC_IPEC 45
#define NVE4_PM_QUERY_METRIC_MP_OCCUPANCY 46
#define NVE4_PM_QUERY_METRIC_MP_EFFICIENCY 47
#define NVE4_PM_QUERY_METRIC_INST_REPLAY_OHEAD 48
#define NVE4_HW_SM_QUERY(i) (PIPE_QUERY_DRIVER_SPECIFIC + (i))
#define NVE4_HW_SM_QUERY_LAST NVE4_HW_SM_QUERY(NVE4_HW_SM_QUERY_COUNT - 1)
enum nve4_pm_queries
{
NVE4_HW_SM_QUERY_ACTIVE_CYCLES = 0,
NVE4_HW_SM_QUERY_ACTIVE_WARPS,
NVE4_HW_SM_QUERY_ATOM_COUNT,
NVE4_HW_SM_QUERY_BRANCH,
NVE4_HW_SM_QUERY_DIVERGENT_BRANCH,
NVE4_HW_SM_QUERY_GLD_REQUEST,
NVE4_HW_SM_QUERY_GLD_MEM_DIV_REPLAY,
NVE4_HW_SM_QUERY_GST_TRANSACTIONS,
NVE4_HW_SM_QUERY_GST_MEM_DIV_REPLAY,
NVE4_HW_SM_QUERY_GRED_COUNT,
NVE4_HW_SM_QUERY_GST_REQUEST,
NVE4_HW_SM_QUERY_INST_EXECUTED,
NVE4_HW_SM_QUERY_INST_ISSUED,
NVE4_HW_SM_QUERY_INST_ISSUED1,
NVE4_HW_SM_QUERY_INST_ISSUED2,
NVE4_HW_SM_QUERY_L1_GLD_HIT,
NVE4_HW_SM_QUERY_L1_GLD_MISS,
NVE4_HW_SM_QUERY_L1_LOCAL_LD_HIT,
NVE4_HW_SM_QUERY_L1_LOCAL_LD_MISS,
NVE4_HW_SM_QUERY_L1_LOCAL_ST_HIT,
NVE4_HW_SM_QUERY_L1_LOCAL_ST_MISS,
NVE4_HW_SM_QUERY_L1_SHARED_LD_TRANSACTIONS,
NVE4_HW_SM_QUERY_L1_SHARED_ST_TRANSACTIONS,
NVE4_HW_SM_QUERY_LOCAL_LD,
NVE4_HW_SM_QUERY_LOCAL_LD_TRANSACTIONS,
NVE4_HW_SM_QUERY_LOCAL_ST,
NVE4_HW_SM_QUERY_LOCAL_ST_TRANSACTIONS,
NVE4_HW_SM_QUERY_PROF_TRIGGER_0,
NVE4_HW_SM_QUERY_PROF_TRIGGER_1,
NVE4_HW_SM_QUERY_PROF_TRIGGER_2,
NVE4_HW_SM_QUERY_PROF_TRIGGER_3,
NVE4_HW_SM_QUERY_PROF_TRIGGER_4,
NVE4_HW_SM_QUERY_PROF_TRIGGER_5,
NVE4_HW_SM_QUERY_PROF_TRIGGER_6,
NVE4_HW_SM_QUERY_PROF_TRIGGER_7,
NVE4_HW_SM_QUERY_SHARED_LD,
NVE4_HW_SM_QUERY_SHARED_LD_REPLAY,
NVE4_HW_SM_QUERY_SHARED_ST,
NVE4_HW_SM_QUERY_SHARED_ST_REPLAY,
NVE4_HW_SM_QUERY_SM_CTA_LAUNCHED,
NVE4_HW_SM_QUERY_THREADS_LAUNCHED,
NVE4_HW_SM_QUERY_UNCACHED_GLD_TRANSACTIONS,
NVE4_HW_SM_QUERY_WARPS_LAUNCHED,
NVE4_HW_SM_QUERY_METRIC_IPC,
NVE4_HW_SM_QUERY_METRIC_IPAC,
NVE4_HW_SM_QUERY_METRIC_IPEC,
NVE4_HW_SM_QUERY_METRIC_MP_OCCUPANCY,
NVE4_HW_SM_QUERY_METRIC_MP_EFFICIENCY,
NVE4_HW_SM_QUERY_METRIC_INST_REPLAY_OHEAD,
NVE4_HW_SM_QUERY_COUNT
};
/*
#define NVE4_PM_QUERY_GR_IDLE 50
#define NVE4_PM_QUERY_BSP_IDLE 51
#define NVE4_PM_QUERY_VP_IDLE 52
#define NVE4_PM_QUERY_PPP_IDLE 53
#define NVE4_PM_QUERY_CE0_IDLE 54
#define NVE4_PM_QUERY_CE1_IDLE 55
#define NVE4_PM_QUERY_CE2_IDLE 56
*/
/* L2 queries (PCOUNTER) */
/*
#define NVE4_PM_QUERY_L2_SUBP_WRITE_L1_SECTOR_QUERIES 57
...
*/
/* TEX queries (PCOUNTER) */
/*
#define NVE4_PM_QUERY_TEX0_CACHE_SECTOR_QUERIES 58
...
*/
#define NVC0_PM_QUERY_COUNT 31
#define NVC0_PM_QUERY(i) (PIPE_QUERY_DRIVER_SPECIFIC + 2048 + (i))
#define NVC0_PM_QUERY_LAST NVC0_PM_QUERY(NVC0_PM_QUERY_COUNT - 1)
#define NVC0_PM_QUERY_INST_EXECUTED 0
#define NVC0_PM_QUERY_BRANCH 1
#define NVC0_PM_QUERY_BRANCH_DIVERGENT 2
#define NVC0_PM_QUERY_ACTIVE_WARPS 3
#define NVC0_PM_QUERY_ACTIVE_CYCLES 4
#define NVC0_PM_QUERY_LAUNCHED_WARPS 5
#define NVC0_PM_QUERY_LAUNCHED_THREADS 6
#define NVC0_PM_QUERY_LD_SHARED 7
#define NVC0_PM_QUERY_ST_SHARED 8
#define NVC0_PM_QUERY_LD_LOCAL 9
#define NVC0_PM_QUERY_ST_LOCAL 10
#define NVC0_PM_QUERY_GRED_COUNT 11
#define NVC0_PM_QUERY_ATOM_COUNT 12
#define NVC0_PM_QUERY_GLD_REQUEST 13
#define NVC0_PM_QUERY_GST_REQUEST 14
#define NVC0_PM_QUERY_INST_ISSUED1_0 15
#define NVC0_PM_QUERY_INST_ISSUED1_1 16
#define NVC0_PM_QUERY_INST_ISSUED2_0 17
#define NVC0_PM_QUERY_INST_ISSUED2_1 18
#define NVC0_PM_QUERY_TH_INST_EXECUTED_0 19
#define NVC0_PM_QUERY_TH_INST_EXECUTED_1 20
#define NVC0_PM_QUERY_TH_INST_EXECUTED_2 21
#define NVC0_PM_QUERY_TH_INST_EXECUTED_3 22
#define NVC0_PM_QUERY_PROF_TRIGGER_0 23
#define NVC0_PM_QUERY_PROF_TRIGGER_1 24
#define NVC0_PM_QUERY_PROF_TRIGGER_2 25
#define NVC0_PM_QUERY_PROF_TRIGGER_3 26
#define NVC0_PM_QUERY_PROF_TRIGGER_4 27
#define NVC0_PM_QUERY_PROF_TRIGGER_5 28
#define NVC0_PM_QUERY_PROF_TRIGGER_6 29
#define NVC0_PM_QUERY_PROF_TRIGGER_7 30
#define NVC0_HW_SM_QUERY(i) (PIPE_QUERY_DRIVER_SPECIFIC + 2048 + (i))
#define NVC0_HW_SM_QUERY_LAST NVC0_HW_SM_QUERY(NVC0_HW_SM_QUERY_COUNT - 1)
enum nvc0_pm_queries
{
NVC0_HW_SM_QUERY_ACTIVE_CYCLES = 0,
NVC0_HW_SM_QUERY_ACTIVE_WARPS,
NVC0_HW_SM_QUERY_ATOM_COUNT,
NVC0_HW_SM_QUERY_BRANCH,
NVC0_HW_SM_QUERY_DIVERGENT_BRANCH,
NVC0_HW_SM_QUERY_GLD_REQUEST,
NVC0_HW_SM_QUERY_GRED_COUNT,
NVC0_HW_SM_QUERY_GST_REQUEST,
NVC0_HW_SM_QUERY_INST_EXECUTED,
NVC0_HW_SM_QUERY_INST_ISSUED1_0,
NVC0_HW_SM_QUERY_INST_ISSUED1_1,
NVC0_HW_SM_QUERY_INST_ISSUED2_0,
NVC0_HW_SM_QUERY_INST_ISSUED2_1,
NVC0_HW_SM_QUERY_LOCAL_LD,
NVC0_HW_SM_QUERY_LOCAL_ST,
NVC0_HW_SM_QUERY_PROF_TRIGGER_0,
NVC0_HW_SM_QUERY_PROF_TRIGGER_1,
NVC0_HW_SM_QUERY_PROF_TRIGGER_2,
NVC0_HW_SM_QUERY_PROF_TRIGGER_3,
NVC0_HW_SM_QUERY_PROF_TRIGGER_4,
NVC0_HW_SM_QUERY_PROF_TRIGGER_5,
NVC0_HW_SM_QUERY_PROF_TRIGGER_6,
NVC0_HW_SM_QUERY_PROF_TRIGGER_7,
NVC0_HW_SM_QUERY_SHARED_LD,
NVC0_HW_SM_QUERY_SHARED_ST,
NVC0_HW_SM_QUERY_THREADS_LAUNCHED,
NVC0_HW_SM_QUERY_TH_INST_EXECUTED_0,
NVC0_HW_SM_QUERY_TH_INST_EXECUTED_1,
NVC0_HW_SM_QUERY_TH_INST_EXECUTED_2,
NVC0_HW_SM_QUERY_TH_INST_EXECUTED_3,
NVC0_HW_SM_QUERY_WARPS_LAUNCHED,
NVC0_HW_SM_QUERY_COUNT
};
/* Driver statistics queries:
*/
#ifdef NOUVEAU_ENABLE_DRIVER_STATISTICS
#define NVC0_QUERY_DRV_STAT(i) (PIPE_QUERY_DRIVER_SPECIFIC + 1024 + (i))
#define NVC0_QUERY_DRV_STAT_COUNT 29
#define NVC0_QUERY_DRV_STAT_LAST NVC0_QUERY_DRV_STAT(NVC0_QUERY_DRV_STAT_COUNT - 1)
#define NVC0_QUERY_DRV_STAT_TEX_OBJECT_CURRENT_COUNT 0
#define NVC0_QUERY_DRV_STAT_TEX_OBJECT_CURRENT_BYTES 1
#define NVC0_QUERY_DRV_STAT_BUF_OBJECT_CURRENT_COUNT 2
#define NVC0_QUERY_DRV_STAT_BUF_OBJECT_CURRENT_BYTES_VID 3
#define NVC0_QUERY_DRV_STAT_BUF_OBJECT_CURRENT_BYTES_SYS 4
#define NVC0_QUERY_DRV_STAT_TEX_TRANSFERS_READ 5
#define NVC0_QUERY_DRV_STAT_TEX_TRANSFERS_WRITE 6
#define NVC0_QUERY_DRV_STAT_TEX_COPY_COUNT 7
#define NVC0_QUERY_DRV_STAT_TEX_BLIT_COUNT 8
#define NVC0_QUERY_DRV_STAT_TEX_CACHE_FLUSH_COUNT 9
#define NVC0_QUERY_DRV_STAT_BUF_TRANSFERS_READ 10
#define NVC0_QUERY_DRV_STAT_BUF_TRANSFERS_WRITE 11
#define NVC0_QUERY_DRV_STAT_BUF_READ_BYTES_STAGING_VID 12
#define NVC0_QUERY_DRV_STAT_BUF_WRITE_BYTES_DIRECT 13
#define NVC0_QUERY_DRV_STAT_BUF_WRITE_BYTES_STAGING_VID 14
#define NVC0_QUERY_DRV_STAT_BUF_WRITE_BYTES_STAGING_SYS 15
#define NVC0_QUERY_DRV_STAT_BUF_COPY_BYTES 16
#define NVC0_QUERY_DRV_STAT_BUF_NON_KERNEL_FENCE_SYNC_COUNT 17
#define NVC0_QUERY_DRV_STAT_ANY_NON_KERNEL_FENCE_SYNC_COUNT 18
#define NVC0_QUERY_DRV_STAT_QUERY_SYNC_COUNT 19
#define NVC0_QUERY_DRV_STAT_GPU_SERIALIZE_COUNT 20
#define NVC0_QUERY_DRV_STAT_DRAW_CALLS_ARRAY 21
#define NVC0_QUERY_DRV_STAT_DRAW_CALLS_INDEXED 22
#define NVC0_QUERY_DRV_STAT_DRAW_CALLS_FALLBACK_COUNT 23
#define NVC0_QUERY_DRV_STAT_USER_BUFFER_UPLOAD_BYTES 24
#define NVC0_QUERY_DRV_STAT_CONSTBUF_UPLOAD_COUNT 25
#define NVC0_QUERY_DRV_STAT_CONSTBUF_UPLOAD_BYTES 26
#define NVC0_QUERY_DRV_STAT_PUSHBUF_COUNT 27
#define NVC0_QUERY_DRV_STAT_RESOURCE_VALIDATE_COUNT 28
#else
#define NVC0_QUERY_DRV_STAT_COUNT 0
enum nvc0_drv_stats_queries
{
#ifdef NOUVEAU_ENABLE_DRIVER_STATISTICS
NVC0_QUERY_DRV_STAT_TEX_OBJECT_CURRENT_COUNT = 0,
NVC0_QUERY_DRV_STAT_TEX_OBJECT_CURRENT_BYTES,
NVC0_QUERY_DRV_STAT_BUF_OBJECT_CURRENT_COUNT,
NVC0_QUERY_DRV_STAT_BUF_OBJECT_CURRENT_BYTES_VID,
NVC0_QUERY_DRV_STAT_BUF_OBJECT_CURRENT_BYTES_SYS,
NVC0_QUERY_DRV_STAT_TEX_TRANSFERS_READ,
NVC0_QUERY_DRV_STAT_TEX_TRANSFERS_WRITE,
NVC0_QUERY_DRV_STAT_TEX_COPY_COUNT,
NVC0_QUERY_DRV_STAT_TEX_BLIT_COUNT,
NVC0_QUERY_DRV_STAT_TEX_CACHE_FLUSH_COUNT,
NVC0_QUERY_DRV_STAT_BUF_TRANSFERS_READ,
NVC0_QUERY_DRV_STAT_BUF_TRANSFERS_WRITE,
NVC0_QUERY_DRV_STAT_BUF_READ_BYTES_STAGING_VID,
NVC0_QUERY_DRV_STAT_BUF_WRITE_BYTES_DIRECT,
NVC0_QUERY_DRV_STAT_BUF_WRITE_BYTES_STAGING_VID,
NVC0_QUERY_DRV_STAT_BUF_WRITE_BYTES_STAGING_SYS,
NVC0_QUERY_DRV_STAT_BUF_COPY_BYTES,
NVC0_QUERY_DRV_STAT_BUF_NON_KERNEL_FENCE_SYNC_COUNT,
NVC0_QUERY_DRV_STAT_ANY_NON_KERNEL_FENCE_SYNC_COUNT,
NVC0_QUERY_DRV_STAT_QUERY_SYNC_COUNT,
NVC0_QUERY_DRV_STAT_GPU_SERIALIZE_COUNT,
NVC0_QUERY_DRV_STAT_DRAW_CALLS_ARRAY,
NVC0_QUERY_DRV_STAT_DRAW_CALLS_INDEXED,
NVC0_QUERY_DRV_STAT_DRAW_CALLS_FALLBACK_COUNT,
NVC0_QUERY_DRV_STAT_USER_BUFFER_UPLOAD_BYTES,
NVC0_QUERY_DRV_STAT_CONSTBUF_UPLOAD_COUNT,
NVC0_QUERY_DRV_STAT_CONSTBUF_UPLOAD_BYTES,
NVC0_QUERY_DRV_STAT_PUSHBUF_COUNT,
NVC0_QUERY_DRV_STAT_RESOURCE_VALIDATE_COUNT,
#endif
NVC0_QUERY_DRV_STAT_COUNT
};
int nvc0_screen_get_driver_query_info(struct pipe_screen *, unsigned,
struct pipe_driver_query_info *);

View file

@ -120,7 +120,7 @@ int64_t compute_memory_prealloc_chunk(
assert(size_in_dw <= pool->size_in_dw);
COMPUTE_DBG(pool->screen, "* compute_memory_prealloc_chunk() size_in_dw = %ld\n",
COMPUTE_DBG(pool->screen, "* compute_memory_prealloc_chunk() size_in_dw = %"PRIi64"\n",
size_in_dw);
LIST_FOR_EACH_ENTRY(item, pool->item_list, link) {
@ -151,7 +151,7 @@ struct list_head *compute_memory_postalloc_chunk(
struct compute_memory_item *next;
struct list_head *next_link;
COMPUTE_DBG(pool->screen, "* compute_memory_postalloc_chunck() start_in_dw = %ld\n",
COMPUTE_DBG(pool->screen, "* compute_memory_postalloc_chunck() start_in_dw = %"PRIi64"\n",
start_in_dw);
/* Check if we can insert it in the front of the list */
@ -568,7 +568,7 @@ void compute_memory_free(struct compute_memory_pool* pool, int64_t id)
struct pipe_screen *screen = (struct pipe_screen *)pool->screen;
struct pipe_resource *res;
COMPUTE_DBG(pool->screen, "* compute_memory_free() id + %ld \n", id);
COMPUTE_DBG(pool->screen, "* compute_memory_free() id + %"PRIi64" \n", id);
LIST_FOR_EACH_ENTRY_SAFE(item, next, pool->item_list, link) {
@ -628,7 +628,7 @@ struct compute_memory_item* compute_memory_alloc(
{
struct compute_memory_item *new_item = NULL;
COMPUTE_DBG(pool->screen, "* compute_memory_alloc() size_in_dw = %ld (%ld bytes)\n",
COMPUTE_DBG(pool->screen, "* compute_memory_alloc() size_in_dw = %"PRIi64" (%"PRIi64" bytes)\n",
size_in_dw, 4 * size_in_dw);
new_item = (struct compute_memory_item *)

View file

@ -2143,11 +2143,11 @@ static void evergreen_emit_shader_stages(struct r600_context *rctx, struct r600_
if (state->geom_enable) {
uint32_t cut_val;
if (rctx->gs_shader->current->shader.gs_max_out_vertices <= 128)
if (rctx->gs_shader->gs_max_out_vertices <= 128)
cut_val = V_028A40_GS_CUT_128;
else if (rctx->gs_shader->current->shader.gs_max_out_vertices <= 256)
else if (rctx->gs_shader->gs_max_out_vertices <= 256)
cut_val = V_028A40_GS_CUT_256;
else if (rctx->gs_shader->current->shader.gs_max_out_vertices <= 512)
else if (rctx->gs_shader->gs_max_out_vertices <= 512)
cut_val = V_028A40_GS_CUT_512;
else
cut_val = V_028A40_GS_CUT_1024;
@ -3013,7 +3013,7 @@ void evergreen_update_gs_state(struct pipe_context *ctx, struct r600_pipe_shader
struct r600_shader *rshader = &shader->shader;
struct r600_shader *cp_shader = &shader->gs_copy_shader->shader;
unsigned gsvs_itemsize =
(cp_shader->ring_item_size * rshader->gs_max_out_vertices) >> 2;
(cp_shader->ring_item_size * shader->selector->gs_max_out_vertices) >> 2;
r600_init_command_buffer(cb, 64);
@ -3022,14 +3022,14 @@ void evergreen_update_gs_state(struct pipe_context *ctx, struct r600_pipe_shader
r600_store_context_reg(cb, R_028AB8_VGT_VTX_CNT_EN, 1);
r600_store_context_reg(cb, R_028B38_VGT_GS_MAX_VERT_OUT,
S_028B38_MAX_VERT_OUT(rshader->gs_max_out_vertices));
S_028B38_MAX_VERT_OUT(shader->selector->gs_max_out_vertices));
r600_store_context_reg(cb, R_028A6C_VGT_GS_OUT_PRIM_TYPE,
r600_conv_prim_to_gs_out(rshader->gs_output_prim));
r600_conv_prim_to_gs_out(shader->selector->gs_output_prim));
if (rctx->screen->b.info.drm_minor >= 35) {
r600_store_context_reg(cb, R_028B90_VGT_GS_INSTANCE_CNT,
S_028B90_CNT(MIN2(rshader->gs_num_invocations, 127)) |
S_028B90_ENABLE(rshader->gs_num_invocations > 0));
S_028B90_CNT(MIN2(shader->selector->gs_num_invocations, 127)) |
S_028B90_ENABLE(shader->selector->gs_num_invocations > 0));
}
r600_store_context_reg_seq(cb, R_02891C_SQ_GS_VERT_ITEMSIZE, 4);
r600_store_value(cb, cp_shader->ring_item_size >> 2);

View file

@ -2029,6 +2029,8 @@ void r600_bytecode_disasm(struct r600_bytecode *bc)
fprintf(stderr, "CND:%X ", cf->cond);
if (cf->pop_count)
fprintf(stderr, "POP:%X ", cf->pop_count);
if (cf->end_of_program)
fprintf(stderr, "EOP ");
fprintf(stderr, "\n");
}
}

View file

@ -36,6 +36,8 @@
#include "util/list.h"
#include "util/u_transfer.h"
#include "tgsi/tgsi_scan.h"
#define R600_NUM_ATOMS 75
#define R600_MAX_VIEWPORTS 16
@ -305,12 +307,18 @@ struct r600_pipe_shader_selector {
struct tgsi_token *tokens;
struct pipe_stream_output_info so;
struct tgsi_shader_info info;
unsigned num_shaders;
/* PIPE_SHADER_[VERTEX|FRAGMENT|...] */
unsigned type;
/* geometry shader properties */
unsigned gs_output_prim;
unsigned gs_max_out_vertices;
unsigned gs_num_invocations;
unsigned nr_ps_max_color_exports;
};
@ -936,28 +944,5 @@ static inline bool r600_can_read_depth(struct r600_texture *rtex)
#define V_028A6C_OUTPRIM_TYPE_LINESTRIP 1
#define V_028A6C_OUTPRIM_TYPE_TRISTRIP 2
static inline unsigned r600_conv_prim_to_gs_out(unsigned mode)
{
static const int prim_conv[] = {
V_028A6C_OUTPRIM_TYPE_POINTLIST,
V_028A6C_OUTPRIM_TYPE_LINESTRIP,
V_028A6C_OUTPRIM_TYPE_LINESTRIP,
V_028A6C_OUTPRIM_TYPE_LINESTRIP,
V_028A6C_OUTPRIM_TYPE_TRISTRIP,
V_028A6C_OUTPRIM_TYPE_TRISTRIP,
V_028A6C_OUTPRIM_TYPE_TRISTRIP,
V_028A6C_OUTPRIM_TYPE_TRISTRIP,
V_028A6C_OUTPRIM_TYPE_TRISTRIP,
V_028A6C_OUTPRIM_TYPE_TRISTRIP,
V_028A6C_OUTPRIM_TYPE_LINESTRIP,
V_028A6C_OUTPRIM_TYPE_LINESTRIP,
V_028A6C_OUTPRIM_TYPE_TRISTRIP,
V_028A6C_OUTPRIM_TYPE_TRISTRIP,
V_028A6C_OUTPRIM_TYPE_TRISTRIP
};
assert(mode < Elements(prim_conv));
return prim_conv[mode];
}
unsigned r600_conv_prim_to_gs_out(unsigned mode);
#endif

View file

@ -1809,7 +1809,6 @@ static int r600_shader_from_tgsi(struct r600_context *rctx,
struct tgsi_token *tokens = pipeshader->selector->tokens;
struct pipe_stream_output_info so = pipeshader->selector->so;
struct tgsi_full_immediate *immediate;
struct tgsi_full_property *property;
struct r600_shader_ctx ctx;
struct r600_bytecode_output output[32];
unsigned output_done, noutput;
@ -1840,7 +1839,7 @@ static int r600_shader_from_tgsi(struct r600_context *rctx,
shader->indirect_files = ctx.info.indirect_files;
indirect_gprs = ctx.info.indirect_files & ~(1 << TGSI_FILE_CONSTANT);
tgsi_parse_init(&ctx.parse, tokens);
ctx.type = ctx.parse.FullHeader.Processor.Processor;
ctx.type = ctx.info.processor;
shader->processor_type = ctx.type;
ctx.bc->type = shader->processor_type;
@ -1968,6 +1967,12 @@ static int r600_shader_from_tgsi(struct r600_context *rctx,
ctx.nliterals = 0;
ctx.literals = NULL;
shader->fs_write_all = FALSE;
if (ctx.info.properties[TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS])
shader->fs_write_all = TRUE;
shader->vs_position_window_space = FALSE;
if (ctx.info.properties[TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION])
shader->vs_position_window_space = TRUE;
if (shader->vs_as_gs_a)
vs_add_primid_output(&ctx, key.vs.prim_id_out);
@ -1994,34 +1999,7 @@ static int r600_shader_from_tgsi(struct r600_context *rctx,
goto out_err;
break;
case TGSI_TOKEN_TYPE_INSTRUCTION:
break;
case TGSI_TOKEN_TYPE_PROPERTY:
property = &ctx.parse.FullToken.FullProperty;
switch (property->Property.PropertyName) {
case TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS:
if (property->u[0].Data == 1)
shader->fs_write_all = TRUE;
break;
case TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION:
if (property->u[0].Data == 1)
shader->vs_position_window_space = TRUE;
break;
case TGSI_PROPERTY_VS_PROHIBIT_UCPS:
/* we don't need this one */
break;
case TGSI_PROPERTY_GS_INPUT_PRIM:
shader->gs_input_prim = property->u[0].Data;
break;
case TGSI_PROPERTY_GS_OUTPUT_PRIM:
shader->gs_output_prim = property->u[0].Data;
break;
case TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES:
shader->gs_max_out_vertices = property->u[0].Data;
break;
case TGSI_PROPERTY_GS_INVOCATIONS:
shader->gs_num_invocations = property->u[0].Data;
break;
}
break;
default:
R600_ERR("unsupported token type %d\n", ctx.parse.FullToken.Token.Type);

View file

@ -78,11 +78,6 @@ struct r600_shader {
/* Temporarily workaround SB not handling CF_INDEX_[01] index registers */
boolean uses_index_registers;
/* geometry shader properties */
unsigned gs_input_prim;
unsigned gs_output_prim;
unsigned gs_max_out_vertices;
unsigned gs_num_invocations;
/* size in bytes of a data item in the ring (single vertex data) */
unsigned ring_item_size;

View file

@ -1951,11 +1951,11 @@ static void r600_emit_shader_stages(struct r600_context *rctx, struct r600_atom
if (state->geom_enable) {
uint32_t cut_val;
if (rctx->gs_shader->current->shader.gs_max_out_vertices <= 128)
if (rctx->gs_shader->gs_max_out_vertices <= 128)
cut_val = V_028A40_GS_CUT_128;
else if (rctx->gs_shader->current->shader.gs_max_out_vertices <= 256)
else if (rctx->gs_shader->gs_max_out_vertices <= 256)
cut_val = V_028A40_GS_CUT_256;
else if (rctx->gs_shader->current->shader.gs_max_out_vertices <= 512)
else if (rctx->gs_shader->gs_max_out_vertices <= 512)
cut_val = V_028A40_GS_CUT_512;
else
cut_val = V_028A40_GS_CUT_1024;
@ -2650,7 +2650,7 @@ void r600_update_gs_state(struct pipe_context *ctx, struct r600_pipe_shader *sha
struct r600_shader *rshader = &shader->shader;
struct r600_shader *cp_shader = &shader->gs_copy_shader->shader;
unsigned gsvs_itemsize =
(cp_shader->ring_item_size * rshader->gs_max_out_vertices) >> 2;
(cp_shader->ring_item_size * shader->selector->gs_max_out_vertices) >> 2;
r600_init_command_buffer(cb, 64);
@ -2659,10 +2659,10 @@ void r600_update_gs_state(struct pipe_context *ctx, struct r600_pipe_shader *sha
if (rctx->b.chip_class >= R700) {
r600_store_context_reg(cb, R_028B38_VGT_GS_MAX_VERT_OUT,
S_028B38_MAX_VERT_OUT(rshader->gs_max_out_vertices));
S_028B38_MAX_VERT_OUT(shader->selector->gs_max_out_vertices));
}
r600_store_context_reg(cb, R_028A6C_VGT_GS_OUT_PRIM_TYPE,
r600_conv_prim_to_gs_out(rshader->gs_output_prim));
r600_conv_prim_to_gs_out(shader->selector->gs_output_prim));
r600_store_context_reg(cb, R_0288C8_SQ_GS_VERT_ITEMSIZE,
cp_shader->ring_item_size >> 2);

View file

@ -34,6 +34,7 @@
#include "util/u_upload_mgr.h"
#include "util/u_math.h"
#include "tgsi/tgsi_parse.h"
#include "tgsi/tgsi_scan.h"
void r600_init_command_buffer(struct r600_command_buffer *cb, unsigned num_dw)
{
@ -123,6 +124,31 @@ static unsigned r600_conv_pipe_prim(unsigned prim)
return prim_conv[prim];
}
unsigned r600_conv_prim_to_gs_out(unsigned mode)
{
static const int prim_conv[] = {
[PIPE_PRIM_POINTS] = V_028A6C_OUTPRIM_TYPE_POINTLIST,
[PIPE_PRIM_LINES] = V_028A6C_OUTPRIM_TYPE_LINESTRIP,
[PIPE_PRIM_LINE_LOOP] = V_028A6C_OUTPRIM_TYPE_LINESTRIP,
[PIPE_PRIM_LINE_STRIP] = V_028A6C_OUTPRIM_TYPE_LINESTRIP,
[PIPE_PRIM_TRIANGLES] = V_028A6C_OUTPRIM_TYPE_TRISTRIP,
[PIPE_PRIM_TRIANGLE_STRIP] = V_028A6C_OUTPRIM_TYPE_TRISTRIP,
[PIPE_PRIM_TRIANGLE_FAN] = V_028A6C_OUTPRIM_TYPE_TRISTRIP,
[PIPE_PRIM_QUADS] = V_028A6C_OUTPRIM_TYPE_TRISTRIP,
[PIPE_PRIM_QUAD_STRIP] = V_028A6C_OUTPRIM_TYPE_TRISTRIP,
[PIPE_PRIM_POLYGON] = V_028A6C_OUTPRIM_TYPE_TRISTRIP,
[PIPE_PRIM_LINES_ADJACENCY] = V_028A6C_OUTPRIM_TYPE_LINESTRIP,
[PIPE_PRIM_LINE_STRIP_ADJACENCY] = V_028A6C_OUTPRIM_TYPE_LINESTRIP,
[PIPE_PRIM_TRIANGLES_ADJACENCY] = V_028A6C_OUTPRIM_TYPE_TRISTRIP,
[PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY] = V_028A6C_OUTPRIM_TYPE_TRISTRIP,
[PIPE_PRIM_PATCHES] = V_028A6C_OUTPRIM_TYPE_POINTLIST,
[R600_PRIM_RECTANGLE_LIST] = V_028A6C_OUTPRIM_TYPE_TRISTRIP
};
assert(mode < Elements(prim_conv));
return prim_conv[mode];
}
/* common state between evergreen and r600 */
static void r600_bind_blend_state_internal(struct r600_context *rctx,
@ -818,6 +844,19 @@ static void *r600_create_shader_state(struct pipe_context *ctx,
sel->type = pipe_shader_type;
sel->tokens = tgsi_dup_tokens(state->tokens);
sel->so = state->stream_output;
tgsi_scan_shader(state->tokens, &sel->info);
switch (pipe_shader_type) {
case PIPE_SHADER_GEOMETRY:
sel->gs_output_prim =
sel->info.properties[TGSI_PROPERTY_GS_OUTPUT_PRIM];
sel->gs_max_out_vertices =
sel->info.properties[TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES];
sel->gs_num_invocations =
sel->info.properties[TGSI_PROPERTY_GS_INVOCATIONS];
break;
}
return sel;
}
@ -1524,7 +1563,7 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info
unsigned prim = info.mode;
if (rctx->gs_shader) {
prim = rctx->gs_shader->current->shader.gs_output_prim;
prim = rctx->gs_shader->gs_output_prim;
}
prim = r600_conv_prim_to_gs_out(prim); /* decrease the number of types to 3 */

View file

@ -32,6 +32,7 @@ int bc_decoder::decode_cf(unsigned &i, bc_cf& bc) {
int r = 0;
uint32_t dw0 = dw[i];
uint32_t dw1 = dw[i+1];
assert(i+1 <= ndw);
if ((dw1 >> 29) & 1) { // CF_ALU
return decode_cf_alu(i, bc);

View file

@ -199,6 +199,9 @@ void bc_finalizer::finalize_if(region_node* r) {
cf_node *if_jump = sh.create_cf(CF_OP_JUMP);
cf_node *if_pop = sh.create_cf(CF_OP_POP);
if (!last_cf || last_cf->get_parent_region() == r) {
last_cf = if_pop;
}
if_pop->bc.pop_count = 1;
if_pop->jump_after(if_pop);

View file

@ -95,7 +95,7 @@ int bc_parser::decode_shader() {
if ((r = decode_cf(i, eop)))
return r;
} while (!eop || (i >> 1) <= max_cf);
} while (!eop || (i >> 1) < max_cf);
return 0;
}
@ -769,6 +769,7 @@ int bc_parser::prepare_ir() {
}
int bc_parser::prepare_loop(cf_node* c) {
assert(c->bc.addr-1 < cf_map.size());
cf_node *end = cf_map[c->bc.addr - 1];
assert(end->bc.op == CF_OP_LOOP_END);
@ -788,8 +789,12 @@ int bc_parser::prepare_loop(cf_node* c) {
}
int bc_parser::prepare_if(cf_node* c) {
assert(c->bc.addr-1 < cf_map.size());
cf_node *c_else = NULL, *end = cf_map[c->bc.addr];
if (!end)
return 0; // not quite sure how this happens, malformed input?
BCP_DUMP(
sblog << "parsing JUMP @" << c->bc.id;
sblog << "\n";
@ -815,7 +820,7 @@ int bc_parser::prepare_if(cf_node* c) {
if (c_else->parent != c->parent)
c_else = NULL;
if (end->parent != c->parent)
if (end && end->parent != c->parent)
end = NULL;
region_node *reg = sh->create_region();

View file

@ -236,7 +236,7 @@ void rp_gpr_tracker::unreserve(alu_node* n) {
for (i = 0; i < nsrc; ++i) {
value *v = n->src[i];
if (v->is_readonly())
if (v->is_readonly() || v->is_undef())
continue;
if (i == 1 && opt)
continue;

View file

@ -197,7 +197,7 @@ static void r600_emit_query_begin(struct r600_common_context *ctx, struct r600_q
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_ZPASS_DONE) | EVENT_INDEX(1));
radeon_emit(cs, va);
radeon_emit(cs, (va >> 32UL) & 0xFF);
radeon_emit(cs, (va >> 32) & 0xFFFF);
break;
case PIPE_QUERY_PRIMITIVES_EMITTED:
case PIPE_QUERY_PRIMITIVES_GENERATED:
@ -206,13 +206,13 @@ static void r600_emit_query_begin(struct r600_common_context *ctx, struct r600_q
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
radeon_emit(cs, EVENT_TYPE(event_type_for_stream(query)) | EVENT_INDEX(3));
radeon_emit(cs, va);
radeon_emit(cs, (va >> 32UL) & 0xFF);
radeon_emit(cs, (va >> 32) & 0xFFFF);
break;
case PIPE_QUERY_TIME_ELAPSED:
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE_EOP, 4, 0));
radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5));
radeon_emit(cs, va);
radeon_emit(cs, (3 << 29) | ((va >> 32UL) & 0xFF));
radeon_emit(cs, (3 << 29) | ((va >> 32) & 0xFFFF));
radeon_emit(cs, 0);
radeon_emit(cs, 0);
break;
@ -220,7 +220,7 @@ static void r600_emit_query_begin(struct r600_common_context *ctx, struct r600_q
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_SAMPLE_PIPELINESTAT) | EVENT_INDEX(2));
radeon_emit(cs, va);
radeon_emit(cs, (va >> 32UL) & 0xFF);
radeon_emit(cs, (va >> 32) & 0xFFFF);
break;
default:
assert(0);
@ -254,7 +254,7 @@ static void r600_emit_query_end(struct r600_common_context *ctx, struct r600_que
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_ZPASS_DONE) | EVENT_INDEX(1));
radeon_emit(cs, va);
radeon_emit(cs, (va >> 32UL) & 0xFF);
radeon_emit(cs, (va >> 32) & 0xFFFF);
break;
case PIPE_QUERY_PRIMITIVES_EMITTED:
case PIPE_QUERY_PRIMITIVES_GENERATED:
@ -264,7 +264,7 @@ static void r600_emit_query_end(struct r600_common_context *ctx, struct r600_que
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
radeon_emit(cs, EVENT_TYPE(event_type_for_stream(query)) | EVENT_INDEX(3));
radeon_emit(cs, va);
radeon_emit(cs, (va >> 32UL) & 0xFF);
radeon_emit(cs, (va >> 32) & 0xFFFF);
break;
case PIPE_QUERY_TIME_ELAPSED:
va += query->buffer.results_end + query->result_size/2;
@ -273,7 +273,7 @@ static void r600_emit_query_end(struct r600_common_context *ctx, struct r600_que
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE_EOP, 4, 0));
radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5));
radeon_emit(cs, va);
radeon_emit(cs, (3 << 29) | ((va >> 32UL) & 0xFF));
radeon_emit(cs, (3 << 29) | ((va >> 32) & 0xFFFF));
radeon_emit(cs, 0);
radeon_emit(cs, 0);
break;
@ -282,7 +282,7 @@ static void r600_emit_query_end(struct r600_common_context *ctx, struct r600_que
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_SAMPLE_PIPELINESTAT) | EVENT_INDEX(2));
radeon_emit(cs, va);
radeon_emit(cs, (va >> 32UL) & 0xFF);
radeon_emit(cs, (va >> 32) & 0xFFFF);
break;
default:
assert(0);
@ -341,8 +341,8 @@ static void r600_emit_query_predication(struct r600_common_context *ctx, struct
while (results_base < qbuf->results_end) {
radeon_emit(cs, PKT3(PKT3_SET_PREDICATION, 1, 0));
radeon_emit(cs, (va + results_base) & 0xFFFFFFFFUL);
radeon_emit(cs, op | (((va + results_base) >> 32UL) & 0xFF));
radeon_emit(cs, va + results_base);
radeon_emit(cs, op | (((va + results_base) >> 32) & 0xFF));
r600_emit_reloc(ctx, &ctx->rings.gfx, qbuf->buf, RADEON_USAGE_READ,
RADEON_PRIO_MIN);
results_base += query->result_size;

View file

@ -362,7 +362,7 @@ static void si_launch_grid(
shader_va += pc;
#endif
si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ, RADEON_PRIO_SHADER_DATA);
si_pm4_set_reg(pm4, R_00B830_COMPUTE_PGM_LO, (shader_va >> 8) & 0xffffffff);
si_pm4_set_reg(pm4, R_00B830_COMPUTE_PGM_LO, shader_va >> 8);
si_pm4_set_reg(pm4, R_00B834_COMPUTE_PGM_HI, shader_va >> 40);
si_pm4_set_reg(pm4, R_00B848_COMPUTE_PGM_RSRC1,

View file

@ -426,7 +426,7 @@ static bool si_upload_vertex_buffer_descriptors(struct si_context *sctx)
va = rbuffer->gpu_address + offset;
/* Fill in T# buffer resource description */
desc[0] = va & 0xFFFFFFFF;
desc[0] = va;
desc[1] = S_008F04_BASE_ADDRESS_HI(va >> 32) |
S_008F04_STRIDE(vb->stride);

View file

@ -86,8 +86,8 @@ static void si_dma_copy_buffer(struct si_context *ctx,
for (i = 0; i < ncopy; i++) {
csize = size < max_csize ? size : max_csize;
cs->buf[cs->cdw++] = SI_DMA_PACKET(SI_DMA_PACKET_COPY, sub_cmd, csize);
cs->buf[cs->cdw++] = dst_offset & 0xffffffff;
cs->buf[cs->cdw++] = src_offset & 0xffffffff;
cs->buf[cs->cdw++] = dst_offset;
cs->buf[cs->cdw++] = src_offset;
cs->buf[cs->cdw++] = (dst_offset >> 32UL) & 0xff;
cs->buf[cs->cdw++] = (src_offset >> 32UL) & 0xff;
dst_offset += csize << shift;

View file

@ -3781,7 +3781,7 @@ void si_shader_apply_scratch_relocs(struct si_context *sctx,
uint64_t scratch_va)
{
unsigned i;
uint32_t scratch_rsrc_dword0 = scratch_va & 0xffffffff;
uint32_t scratch_rsrc_dword0 = scratch_va;
uint32_t scratch_rsrc_dword1 =
S_008F04_BASE_ADDRESS_HI(scratch_va >> 32)
| S_008F04_STRIDE(shader->scratch_bytes_per_wave / 64);

View file

@ -35,10 +35,10 @@
#include "util/u_pstipple.h"
static void si_init_atom(struct r600_atom *atom, struct r600_atom **list_elem,
void (*emit)(struct si_context *ctx, struct r600_atom *state),
void (*emit_func)(struct si_context *ctx, struct r600_atom *state),
unsigned num_dw)
{
atom->emit = (void*)emit;
atom->emit = (void*)emit_func;
atom->num_dw = num_dw;
atom->dirty = false;
*list_elem = atom;

View file

@ -409,7 +409,7 @@ vc4_nir_lower_blend_block(nir_block *block, void *state)
nir_cf_node_get_function(&block->cf_node);
nir_builder b;
nir_builder_init(&b, impl);
nir_builder_insert_before_instr(&b, &intr->instr);
b.cursor = nir_before_instr(&intr->instr);
vc4_nir_lower_blend_instr(c, &b, intr);
}
return true;

View file

@ -56,7 +56,7 @@ static void
vc4_nir_lower_input(struct vc4_compile *c, nir_builder *b,
nir_intrinsic_instr *intr)
{
nir_builder_insert_before_instr(b, &intr->instr);
b->cursor = nir_before_instr(&intr->instr);
if (c->stage == QSTAGE_FRAG && intr->const_index[0] ==
VC4_NIR_TLB_COLOR_READ_INPUT) {
@ -160,7 +160,7 @@ vc4_nir_lower_output(struct vc4_compile *c, nir_builder *b,
/* All TGSI-to-NIR outputs are VEC4. */
assert(intr->num_components == 4);
nir_builder_insert_before_instr(b, &intr->instr);
b->cursor = nir_before_instr(&intr->instr);
for (unsigned i = 0; i < intr->num_components; i++) {
nir_intrinsic_instr *intr_comp =
@ -189,7 +189,7 @@ vc4_nir_lower_uniform(struct vc4_compile *c, nir_builder *b,
return;
assert(intr->num_components == 4);
nir_builder_insert_before_instr(b, &intr->instr);
b->cursor = nir_before_instr(&intr->instr);
/* Generate scalar loads equivalent to the original VEC4. */
nir_ssa_def *dests[4];

View file

@ -101,30 +101,54 @@ static struct radeon_bo *get_radeon_bo(struct pb_buffer *_buf)
return bo;
}
static bool radeon_bo_is_busy(struct radeon_bo *bo)
{
struct drm_radeon_gem_busy args = {0};
args.handle = bo->handle;
return drmCommandWriteRead(bo->rws->fd, DRM_RADEON_GEM_BUSY,
&args, sizeof(args)) != 0;
}
static void radeon_bo_wait_idle(struct radeon_bo *bo)
{
struct drm_radeon_gem_wait_idle args = {0};
args.handle = bo->handle;
while (drmCommandWrite(bo->rws->fd, DRM_RADEON_GEM_WAIT_IDLE,
&args, sizeof(args)) == -EBUSY);
}
static bool radeon_bo_wait(struct pb_buffer *_buf, uint64_t timeout,
enum radeon_bo_usage usage)
{
struct radeon_bo *bo = get_radeon_bo(_buf);
struct radeon_bo *bo = get_radeon_bo(_buf);
int64_t abs_timeout;
/* Wait if any ioctl is being submitted with this buffer. */
if (!os_wait_until_zero(&bo->num_active_ioctls, timeout))
return false;
/* No timeout. Just query. */
if (timeout == 0)
return !bo->num_active_ioctls && !radeon_bo_is_busy(bo);
/* TODO: handle arbitrary timeout */
if (!timeout) {
struct drm_radeon_gem_busy args = {0};
abs_timeout = os_time_get_absolute_timeout(timeout);
args.handle = bo->handle;
return drmCommandWriteRead(bo->rws->fd, DRM_RADEON_GEM_BUSY,
&args, sizeof(args)) == 0;
} else {
struct drm_radeon_gem_wait_idle args = {0};
/* Wait if any ioctl is being submitted with this buffer. */
if (!os_wait_until_zero_abs_timeout(&bo->num_active_ioctls, abs_timeout))
return false;
args.handle = bo->handle;
while (drmCommandWrite(bo->rws->fd, DRM_RADEON_GEM_WAIT_IDLE,
&args, sizeof(args)) == -EBUSY);
/* Infinite timeout. */
if (abs_timeout == PIPE_TIMEOUT_INFINITE) {
radeon_bo_wait_idle(bo);
return true;
}
/* Other timeouts need to be emulated with a loop. */
while (radeon_bo_is_busy(bo)) {
if (os_time_get_nano() >= abs_timeout)
return false;
os_time_sleep(10);
}
return true;
}
static enum radeon_bo_domain get_valid_domain(enum radeon_bo_domain domain)

View file

@ -645,29 +645,8 @@ static bool radeon_fence_wait(struct radeon_winsys *ws,
struct pipe_fence_handle *fence,
uint64_t timeout)
{
struct pb_buffer *rfence = (struct pb_buffer*)fence;
if (timeout == 0)
return ws->buffer_wait(rfence, 0, RADEON_USAGE_READWRITE);
if (timeout != PIPE_TIMEOUT_INFINITE) {
int64_t start_time = os_time_get();
/* Convert to microseconds. */
timeout /= 1000;
/* Wait in a loop. */
while (!ws->buffer_wait(rfence, 0, RADEON_USAGE_READWRITE)) {
if (os_time_get() - start_time >= timeout) {
return FALSE;
}
os_time_sleep(10);
}
return TRUE;
}
ws->buffer_wait(rfence, PIPE_TIMEOUT_INFINITE, RADEON_USAGE_READWRITE);
return TRUE;
return ws->buffer_wait((struct pb_buffer*)fence, timeout,
RADEON_USAGE_READWRITE);
}
static void radeon_fence_reference(struct pipe_fence_handle **dst,

View file

@ -284,8 +284,9 @@ texture_multisample(const _mesa_glsl_parse_state *state)
static bool
texture_multisample_array(const _mesa_glsl_parse_state *state)
{
return state->is_version(150, 0) ||
state->ARB_texture_multisample_enable;
return state->is_version(150, 320) ||
state->ARB_texture_multisample_enable ||
state->OES_texture_storage_multisample_2d_array_enable;
}
static bool
@ -665,10 +666,7 @@ private:
B1(any);
B1(all);
B1(not);
B2(textureSize);
ir_function_signature *_textureSize(builtin_available_predicate avail,
const glsl_type *return_type,
const glsl_type *sampler_type);
BA2(textureSize);
/** Flags to _texture() */
#define TEX_PROJECT 1

View file

@ -307,7 +307,8 @@ _mesa_glsl_initialize_types(struct _mesa_glsl_parse_state *state)
add_type(symbols, glsl_type::usamplerCubeArray_type);
}
if (state->ARB_texture_multisample_enable) {
if (state->ARB_texture_multisample_enable ||
state->OES_texture_storage_multisample_2d_array_enable) {
add_type(symbols, glsl_type::sampler2DMS_type);
add_type(symbols, glsl_type::isampler2DMS_type);
add_type(symbols, glsl_type::usampler2DMS_type);

View file

@ -2382,6 +2382,8 @@ _glcpp_parser_handle_version_declaration(glcpp_parser_t *parser, intmax_t versio
add_builtin_define(parser, "GL_OES_EGL_image_external", 1);
if (extensions->OES_standard_derivatives)
add_builtin_define(parser, "GL_OES_standard_derivatives", 1);
if (extensions->ARB_texture_multisample)
add_builtin_define(parser, "GL_OES_texture_storage_multisample_2d_array", 1);
}
} else {
add_builtin_define(parser, "GL_ARB_draw_buffers", 1);

View file

@ -347,9 +347,9 @@ usampler2DArray KEYWORD(130, 300, 130, 300, USAMPLER2DARRAY);
sampler2DMS KEYWORD_WITH_ALT(150, 300, 150, 310, yyextra->ARB_texture_multisample_enable, SAMPLER2DMS);
isampler2DMS KEYWORD_WITH_ALT(150, 300, 150, 310, yyextra->ARB_texture_multisample_enable, ISAMPLER2DMS);
usampler2DMS KEYWORD_WITH_ALT(150, 300, 150, 310, yyextra->ARB_texture_multisample_enable, USAMPLER2DMS);
sampler2DMSArray KEYWORD_WITH_ALT(150, 300, 150, 0, yyextra->ARB_texture_multisample_enable, SAMPLER2DMSARRAY);
isampler2DMSArray KEYWORD_WITH_ALT(150, 300, 150, 0, yyextra->ARB_texture_multisample_enable, ISAMPLER2DMSARRAY);
usampler2DMSArray KEYWORD_WITH_ALT(150, 300, 150, 0, yyextra->ARB_texture_multisample_enable, USAMPLER2DMSARRAY);
sampler2DMSArray KEYWORD_WITH_ALT(150, 300, 150, 320, yyextra->ARB_texture_multisample_enable || yyextra->OES_texture_storage_multisample_2d_array_enable, SAMPLER2DMSARRAY);
isampler2DMSArray KEYWORD_WITH_ALT(150, 300, 150, 320, yyextra->ARB_texture_multisample_enable || yyextra->OES_texture_storage_multisample_2d_array_enable, ISAMPLER2DMSARRAY);
usampler2DMSArray KEYWORD_WITH_ALT(150, 300, 150, 320, yyextra->ARB_texture_multisample_enable || yyextra->OES_texture_storage_multisample_2d_array_enable, USAMPLER2DMSARRAY);
/* keywords available with ARB_texture_cube_map_array_enable extension on desktop GLSL */
samplerCubeArray KEYWORD_WITH_ALT(400, 0, 400, 0, yyextra->ARB_texture_cube_map_array_enable, SAMPLERCUBEARRAY);

View file

@ -628,6 +628,7 @@ static const _mesa_glsl_extension _mesa_glsl_supported_extensions[] = {
EXT(OES_EGL_image_external, false, true, OES_EGL_image_external),
EXT(OES_standard_derivatives, false, true, OES_standard_derivatives),
EXT(OES_texture_3D, false, true, EXT_texture3D),
EXT(OES_texture_storage_multisample_2d_array, false, true, ARB_texture_multisample),
/* All other extensions go here, sorted alphabetically.
*/

View file

@ -548,6 +548,8 @@ struct _mesa_glsl_parse_state {
bool OES_standard_derivatives_warn;
bool OES_texture_3D_enable;
bool OES_texture_3D_warn;
bool OES_texture_storage_multisample_2d_array_enable;
bool OES_texture_storage_multisample_2d_array_warn;
/* All other extensions go here, sorted alphabetically.
*/

View file

@ -620,7 +620,7 @@ struct glsl_type {
const glsl_type *field_type(const char *name) const;
/**
* Get the location of a filed within a record type
* Get the location of a field within a record type
*/
int field_index(const char *name) const;

View file

@ -566,6 +566,12 @@ csel(operand a, operand b, operand c)
return expr(ir_triop_csel, a, b, c);
}
ir_expression *
bitfield_extract(operand a, operand b, operand c)
{
return expr(ir_triop_bitfield_extract, a, b, c);
}
ir_expression *
bitfield_insert(operand a, operand b, operand c, operand d)
{

View file

@ -200,6 +200,7 @@ ir_expression *interpolate_at_sample(operand a, operand b);
ir_expression *fma(operand a, operand b, operand c);
ir_expression *lrp(operand x, operand y, operand a);
ir_expression *csel(operand a, operand b, operand c);
ir_expression *bitfield_extract(operand a, operand b, operand c);
ir_expression *bitfield_insert(operand a, operand b, operand c, operand d);
ir_swizzle *swizzle(operand a, int swizzle, int components);

View file

@ -66,7 +66,10 @@ enum lower_packing_builtins_op {
LOWER_UNPACK_SNORM_4x8 = 0x0200,
LOWER_PACK_UNORM_4x8 = 0x0400,
LOWER_UNPACK_UNORM_4x8 = 0x0800
LOWER_UNPACK_UNORM_4x8 = 0x0800,
LOWER_PACK_USE_BFI = 0x1000,
LOWER_PACK_USE_BFE = 0x2000,
};
bool do_common_optimization(exec_list *ir, bool linked,

View file

@ -47,10 +47,9 @@
static unsigned
values_for_type(const glsl_type *type)
{
if (type->is_sampler() || type->is_subroutine()) {
if (type->is_sampler()) {
return 1;
} else if (type->is_array() && (type->fields.array->is_sampler() ||
type->fields.array->is_subroutine())) {
} else if (type->is_array() && type->fields.array->is_sampler()) {
return type->array_size();
} else {
return type->component_slots();

View file

@ -118,6 +118,8 @@ public:
*rvalue = split_unpack_half_2x16(op0);
break;
case LOWER_PACK_UNPACK_NONE:
case LOWER_PACK_USE_BFI:
case LOWER_PACK_USE_BFE:
assert(!"not reached");
break;
}
@ -222,9 +224,16 @@ private:
/* uvec2 u = UVEC2_RVAL; */
ir_variable *u = factory.make_temp(glsl_type::uvec2_type,
"tmp_pack_uvec2_to_uint");
"tmp_pack_uvec2_to_uint");
factory.emit(assign(u, uvec2_rval));
if (op_mask & LOWER_PACK_USE_BFI) {
return bitfield_insert(bit_and(swizzle_x(u), constant(0xffffu)),
swizzle_y(u),
constant(16),
constant(16));
}
/* return (u.y << 16) | (u.x & 0xffff); */
return bit_or(lshift(swizzle_y(u), constant(16u)),
bit_and(swizzle_x(u), constant(0xffffu)));
@ -242,9 +251,22 @@ private:
{
assert(uvec4_rval->type == glsl_type::uvec4_type);
/* uvec4 u = UVEC4_RVAL; */
ir_variable *u = factory.make_temp(glsl_type::uvec4_type,
"tmp_pack_uvec4_to_uint");
"tmp_pack_uvec4_to_uint");
if (op_mask & LOWER_PACK_USE_BFI) {
/* uvec4 u = UVEC4_RVAL; */
factory.emit(assign(u, uvec4_rval));
return bitfield_insert(bitfield_insert(
bitfield_insert(
bit_and(swizzle_x(u), constant(0xffu)),
swizzle_y(u), constant(8), constant(8)),
swizzle_z(u), constant(16), constant(8)),
swizzle_w(u), constant(24), constant(8));
}
/* uvec4 u = UVEC4_RVAL & 0xff */
factory.emit(assign(u, bit_and(uvec4_rval, constant(0xffu))));
/* return (u.w << 24) | (u.z << 16) | (u.y << 8) | u.x; */
@ -284,6 +306,39 @@ private:
return deref(u2).val;
}
/**
* \brief Unpack a uint32 into two int16's.
*
* Specifically each 16-bit value is sign-extended to the full width of an
* int32 on return.
*/
ir_rvalue *
unpack_uint_to_ivec2(ir_rvalue *uint_rval)
{
assert(uint_rval->type == glsl_type::uint_type);
if (!(op_mask & LOWER_PACK_USE_BFE)) {
return rshift(lshift(u2i(unpack_uint_to_uvec2(uint_rval)),
constant(16u)),
constant(16u));
}
ir_variable *i = factory.make_temp(glsl_type::int_type,
"tmp_unpack_uint_to_ivec2_i");
factory.emit(assign(i, u2i(uint_rval)));
/* ivec2 i2; */
ir_variable *i2 = factory.make_temp(glsl_type::ivec2_type,
"tmp_unpack_uint_to_ivec2_i2");
factory.emit(assign(i2, bitfield_extract(i, constant(0), constant(16)),
WRITEMASK_X));
factory.emit(assign(i2, bitfield_extract(i, constant(16), constant(16)),
WRITEMASK_Y));
return deref(i2).val;
}
/**
* \brief Unpack a uint32 into four uint8's.
*
@ -308,13 +363,23 @@ private:
/* u4.x = u & 0xffu; */
factory.emit(assign(u4, bit_and(u, constant(0xffu)), WRITEMASK_X));
/* u4.y = (u >> 8u) & 0xffu; */
factory.emit(assign(u4, bit_and(rshift(u, constant(8u)),
constant(0xffu)), WRITEMASK_Y));
if (op_mask & LOWER_PACK_USE_BFE) {
/* u4.y = bitfield_extract(u, 8, 8); */
factory.emit(assign(u4, bitfield_extract(u, constant(8), constant(8)),
WRITEMASK_Y));
/* u4.z = (u >> 16u) & 0xffu; */
factory.emit(assign(u4, bit_and(rshift(u, constant(16u)),
constant(0xffu)), WRITEMASK_Z));
/* u4.z = bitfield_extract(u, 16, 8); */
factory.emit(assign(u4, bitfield_extract(u, constant(16), constant(8)),
WRITEMASK_Z));
} else {
/* u4.y = (u >> 8u) & 0xffu; */
factory.emit(assign(u4, bit_and(rshift(u, constant(8u)),
constant(0xffu)), WRITEMASK_Y));
/* u4.z = (u >> 16u) & 0xffu; */
factory.emit(assign(u4, bit_and(rshift(u, constant(16u)),
constant(0xffu)), WRITEMASK_Z));
}
/* u4.w = (u >> 24u) */
factory.emit(assign(u4, rshift(u, constant(24u)), WRITEMASK_W));
@ -322,6 +387,43 @@ private:
return deref(u4).val;
}
/**
* \brief Unpack a uint32 into four int8's.
*
* Specifically each 8-bit value is sign-extended to the full width of an
* int32 on return.
*/
ir_rvalue *
unpack_uint_to_ivec4(ir_rvalue *uint_rval)
{
assert(uint_rval->type == glsl_type::uint_type);
if (!(op_mask & LOWER_PACK_USE_BFE)) {
return rshift(lshift(u2i(unpack_uint_to_uvec4(uint_rval)),
constant(24u)),
constant(24u));
}
ir_variable *i = factory.make_temp(glsl_type::int_type,
"tmp_unpack_uint_to_ivec4_i");
factory.emit(assign(i, u2i(uint_rval)));
/* ivec4 i4; */
ir_variable *i4 = factory.make_temp(glsl_type::ivec4_type,
"tmp_unpack_uint_to_ivec4_i4");
factory.emit(assign(i4, bitfield_extract(i, constant(0), constant(8)),
WRITEMASK_X));
factory.emit(assign(i4, bitfield_extract(i, constant(8), constant(8)),
WRITEMASK_Y));
factory.emit(assign(i4, bitfield_extract(i, constant(16), constant(8)),
WRITEMASK_Z));
factory.emit(assign(i4, bitfield_extract(i, constant(24), constant(8)),
WRITEMASK_W));
return deref(i4).val;
}
/**
* \brief Lower a packSnorm2x16 expression.
*
@ -468,9 +570,7 @@ private:
assert(uint_rval->type == glsl_type::uint_type);
ir_rvalue *result =
clamp(div(i2f(rshift(lshift(u2i(unpack_uint_to_uvec2(uint_rval)),
constant(16)),
constant(16u))),
clamp(div(i2f(unpack_uint_to_ivec2(uint_rval)),
constant(32767.0f)),
constant(-1.0f),
constant(1.0f));
@ -527,9 +627,7 @@ private:
assert(uint_rval->type == glsl_type::uint_type);
ir_rvalue *result =
clamp(div(i2f(rshift(lshift(u2i(unpack_uint_to_uvec4(uint_rval)),
constant(24u)),
constant(24u))),
clamp(div(i2f(unpack_uint_to_ivec4(uint_rval)),
constant(127.0f)),
constant(-1.0f),
constant(1.0f));

View file

@ -664,102 +664,51 @@ add_defs_uses(nir_instr *instr)
}
void
nir_instr_insert_before(nir_instr *instr, nir_instr *before)
nir_instr_insert(nir_cursor cursor, nir_instr *instr)
{
assert(before->type != nir_instr_type_jump);
before->block = instr->block;
add_defs_uses(before);
exec_node_insert_node_before(&instr->node, &before->node);
}
switch (cursor.option) {
case nir_cursor_before_block:
/* Only allow inserting jumps into empty blocks. */
if (instr->type == nir_instr_type_jump)
assert(exec_list_is_empty(&cursor.block->instr_list));
void
nir_instr_insert_after(nir_instr *instr, nir_instr *after)
{
if (after->type == nir_instr_type_jump) {
assert(instr == nir_block_last_instr(instr->block));
instr->block = cursor.block;
add_defs_uses(instr);
exec_list_push_head(&cursor.block->instr_list, &instr->node);
break;
case nir_cursor_after_block: {
/* Inserting instructions after a jump is illegal. */
nir_instr *last = nir_block_last_instr(cursor.block);
assert(last == NULL || last->type != nir_instr_type_jump);
(void) last;
instr->block = cursor.block;
add_defs_uses(instr);
exec_list_push_tail(&cursor.block->instr_list, &instr->node);
break;
}
case nir_cursor_before_instr:
assert(instr->type != nir_instr_type_jump);
instr->block = cursor.instr->block;
add_defs_uses(instr);
exec_node_insert_node_before(&cursor.instr->node, &instr->node);
break;
case nir_cursor_after_instr:
/* Inserting instructions after a jump is illegal. */
assert(cursor.instr->type != nir_instr_type_jump);
/* Only allow inserting jumps at the end of the block. */
if (instr->type == nir_instr_type_jump)
assert(cursor.instr == nir_block_last_instr(cursor.instr->block));
instr->block = cursor.instr->block;
add_defs_uses(instr);
exec_node_insert_after(&cursor.instr->node, &instr->node);
break;
}
after->block = instr->block;
add_defs_uses(after);
exec_node_insert_after(&instr->node, &after->node);
if (after->type == nir_instr_type_jump)
nir_handle_add_jump(after->block);
}
void
nir_instr_insert_before_block(nir_block *block, nir_instr *before)
{
if (before->type == nir_instr_type_jump)
assert(exec_list_is_empty(&block->instr_list));
before->block = block;
add_defs_uses(before);
exec_list_push_head(&block->instr_list, &before->node);
if (before->type == nir_instr_type_jump)
nir_handle_add_jump(block);
}
void
nir_instr_insert_after_block(nir_block *block, nir_instr *after)
{
if (after->type == nir_instr_type_jump) {
assert(exec_list_is_empty(&block->instr_list) ||
nir_block_last_instr(block)->type != nir_instr_type_jump);
}
after->block = block;
add_defs_uses(after);
exec_list_push_tail(&block->instr_list, &after->node);
if (after->type == nir_instr_type_jump)
nir_handle_add_jump(block);
}
void
nir_instr_insert_before_cf(nir_cf_node *node, nir_instr *before)
{
if (node->type == nir_cf_node_block) {
nir_instr_insert_before_block(nir_cf_node_as_block(node), before);
} else {
nir_cf_node *prev = nir_cf_node_prev(node);
assert(prev->type == nir_cf_node_block);
nir_block *prev_block = nir_cf_node_as_block(prev);
nir_instr_insert_before_block(prev_block, before);
}
}
void
nir_instr_insert_after_cf(nir_cf_node *node, nir_instr *after)
{
if (node->type == nir_cf_node_block) {
nir_instr_insert_after_block(nir_cf_node_as_block(node), after);
} else {
nir_cf_node *next = nir_cf_node_next(node);
assert(next->type == nir_cf_node_block);
nir_block *next_block = nir_cf_node_as_block(next);
nir_instr_insert_before_block(next_block, after);
}
}
void
nir_instr_insert_before_cf_list(struct exec_list *list, nir_instr *before)
{
nir_cf_node *first_node = exec_node_data(nir_cf_node,
exec_list_get_head(list), node);
nir_instr_insert_before_cf(first_node, before);
}
void
nir_instr_insert_after_cf_list(struct exec_list *list, nir_instr *after)
{
nir_cf_node *last_node = exec_node_data(nir_cf_node,
exec_list_get_tail(list), node);
nir_instr_insert_after_cf(last_node, after);
if (instr->type == nir_instr_type_jump)
nir_handle_add_jump(instr->block);
}
static bool

View file

@ -1567,20 +1567,182 @@ nir_deref *nir_copy_deref(void *mem_ctx, nir_deref *deref);
nir_load_const_instr *
nir_deref_get_const_initializer_load(nir_shader *shader, nir_deref_var *deref);
void nir_instr_insert_before(nir_instr *instr, nir_instr *before);
void nir_instr_insert_after(nir_instr *instr, nir_instr *after);
/**
* NIR Cursors and Instruction Insertion API
* @{
*
* A tiny struct representing a point to insert/extract instructions or
* control flow nodes. Helps reduce the combinatorial explosion of possible
* points to insert/extract.
*
* \sa nir_control_flow.h
*/
typedef enum {
nir_cursor_before_block,
nir_cursor_after_block,
nir_cursor_before_instr,
nir_cursor_after_instr,
} nir_cursor_option;
void nir_instr_insert_before_block(nir_block *block, nir_instr *before);
void nir_instr_insert_after_block(nir_block *block, nir_instr *after);
typedef struct {
nir_cursor_option option;
union {
nir_block *block;
nir_instr *instr;
};
} nir_cursor;
void nir_instr_insert_before_cf(nir_cf_node *node, nir_instr *before);
void nir_instr_insert_after_cf(nir_cf_node *node, nir_instr *after);
static inline nir_block *
nir_cursor_current_block(nir_cursor cursor)
{
if (cursor.option == nir_cursor_before_instr ||
cursor.option == nir_cursor_after_instr) {
return cursor.instr->block;
} else {
return cursor.block;
}
}
void nir_instr_insert_before_cf_list(struct exec_list *list, nir_instr *before);
void nir_instr_insert_after_cf_list(struct exec_list *list, nir_instr *after);
static inline nir_cursor
nir_before_block(nir_block *block)
{
nir_cursor cursor;
cursor.option = nir_cursor_before_block;
cursor.block = block;
return cursor;
}
static inline nir_cursor
nir_after_block(nir_block *block)
{
nir_cursor cursor;
cursor.option = nir_cursor_after_block;
cursor.block = block;
return cursor;
}
static inline nir_cursor
nir_before_instr(nir_instr *instr)
{
nir_cursor cursor;
cursor.option = nir_cursor_before_instr;
cursor.instr = instr;
return cursor;
}
static inline nir_cursor
nir_after_instr(nir_instr *instr)
{
nir_cursor cursor;
cursor.option = nir_cursor_after_instr;
cursor.instr = instr;
return cursor;
}
static inline nir_cursor
nir_after_block_before_jump(nir_block *block)
{
nir_instr *last_instr = nir_block_last_instr(block);
if (last_instr && last_instr->type == nir_instr_type_jump) {
return nir_before_instr(last_instr);
} else {
return nir_after_block(block);
}
}
static inline nir_cursor
nir_before_cf_node(nir_cf_node *node)
{
if (node->type == nir_cf_node_block)
return nir_before_block(nir_cf_node_as_block(node));
return nir_after_block(nir_cf_node_as_block(nir_cf_node_prev(node)));
}
static inline nir_cursor
nir_after_cf_node(nir_cf_node *node)
{
if (node->type == nir_cf_node_block)
return nir_after_block(nir_cf_node_as_block(node));
return nir_before_block(nir_cf_node_as_block(nir_cf_node_next(node)));
}
static inline nir_cursor
nir_before_cf_list(struct exec_list *cf_list)
{
nir_cf_node *first_node = exec_node_data(nir_cf_node,
exec_list_get_head(cf_list), node);
return nir_before_cf_node(first_node);
}
static inline nir_cursor
nir_after_cf_list(struct exec_list *cf_list)
{
nir_cf_node *last_node = exec_node_data(nir_cf_node,
exec_list_get_tail(cf_list), node);
return nir_after_cf_node(last_node);
}
/**
* Insert a NIR instruction at the given cursor.
*
* Note: This does not update the cursor.
*/
void nir_instr_insert(nir_cursor cursor, nir_instr *instr);
static inline void
nir_instr_insert_before(nir_instr *instr, nir_instr *before)
{
nir_instr_insert(nir_before_instr(instr), before);
}
static inline void
nir_instr_insert_after(nir_instr *instr, nir_instr *after)
{
nir_instr_insert(nir_after_instr(instr), after);
}
static inline void
nir_instr_insert_before_block(nir_block *block, nir_instr *before)
{
nir_instr_insert(nir_before_block(block), before);
}
static inline void
nir_instr_insert_after_block(nir_block *block, nir_instr *after)
{
nir_instr_insert(nir_after_block(block), after);
}
static inline void
nir_instr_insert_before_cf(nir_cf_node *node, nir_instr *before)
{
nir_instr_insert(nir_before_cf_node(node), before);
}
static inline void
nir_instr_insert_after_cf(nir_cf_node *node, nir_instr *after)
{
nir_instr_insert(nir_after_cf_node(node), after);
}
static inline void
nir_instr_insert_before_cf_list(struct exec_list *list, nir_instr *before)
{
nir_instr_insert(nir_before_cf_list(list), before);
}
static inline void
nir_instr_insert_after_cf_list(struct exec_list *list, nir_instr *after)
{
nir_instr_insert(nir_after_cf_list(list), after);
}
void nir_instr_remove(nir_instr *instr);
/** @} */
typedef bool (*nir_foreach_ssa_def_cb)(nir_ssa_def *def, void *state);
typedef bool (*nir_foreach_dest_cb)(nir_dest *dest, void *state);
typedef bool (*nir_foreach_src_cb)(nir_src *src, void *state);

View file

@ -24,16 +24,12 @@
#ifndef NIR_BUILDER_H
#define NIR_BUILDER_H
#include "nir_control_flow.h"
struct exec_list;
typedef struct nir_builder {
struct exec_list *cf_node_list;
nir_block *before_block;
nir_block *after_block;
nir_instr *before_instr;
nir_instr *after_instr;
nir_cursor cursor;
nir_shader *shader;
nir_function_impl *impl;
@ -47,75 +43,20 @@ nir_builder_init(nir_builder *build, nir_function_impl *impl)
build->shader = impl->overload->function->shader;
}
static inline void
nir_builder_insert_after_cf_list(nir_builder *build,
struct exec_list *cf_node_list)
{
build->cf_node_list = cf_node_list;
build->before_block = NULL;
build->after_block = NULL;
build->before_instr = NULL;
build->after_instr = NULL;
}
static inline void
nir_builder_insert_before_block(nir_builder *build,
nir_block *block)
{
build->cf_node_list = NULL;
build->before_block = block;
build->after_block = NULL;
build->before_instr = NULL;
build->after_instr = NULL;
}
static inline void
nir_builder_insert_after_block(nir_builder *build,
nir_block *block)
{
build->cf_node_list = NULL;
build->before_block = NULL;
build->after_block = block;
build->before_instr = NULL;
build->after_instr = NULL;
}
static inline void
nir_builder_insert_before_instr(nir_builder *build, nir_instr *before_instr)
{
build->cf_node_list = NULL;
build->before_block = NULL;
build->after_block = NULL;
build->before_instr = before_instr;
build->after_instr = NULL;
}
static inline void
nir_builder_insert_after_instr(nir_builder *build, nir_instr *after_instr)
{
build->cf_node_list = NULL;
build->before_block = NULL;
build->after_block = NULL;
build->before_instr = NULL;
build->after_instr = after_instr;
}
static inline void
nir_builder_instr_insert(nir_builder *build, nir_instr *instr)
{
if (build->cf_node_list) {
nir_instr_insert_after_cf_list(build->cf_node_list, instr);
} else if (build->before_block) {
nir_instr_insert_before_block(build->before_block, instr);
} else if (build->after_block) {
nir_instr_insert_after_block(build->after_block, instr);
} else if (build->before_instr) {
nir_instr_insert_before(build->before_instr, instr);
} else {
assert(build->after_instr);
nir_instr_insert_after(build->after_instr, instr);
build->after_instr = instr;
}
nir_instr_insert(build->cursor, instr);
/* Move the cursor forward. */
if (build->cursor.option == nir_cursor_after_instr)
build->cursor.instr = instr;
}
static inline void
nir_builder_cf_insert(nir_builder *build, nir_cf_node *cf)
{
nir_cf_node_insert(build->cursor, cf);
}
static inline nir_ssa_def *

View file

@ -45,95 +45,6 @@ extern "C" {
* deleting them.
*/
/* Helper struct for representing a point to extract/insert. Helps reduce the
* combinatorial explosion of possible points to extract.
*/
typedef enum {
nir_cursor_before_block,
nir_cursor_after_block,
nir_cursor_before_instr,
nir_cursor_after_instr,
} nir_cursor_option;
typedef struct {
nir_cursor_option option;
union {
nir_block *block;
nir_instr *instr;
};
} nir_cursor;
static inline nir_cursor
nir_before_block(nir_block *block)
{
nir_cursor cursor;
cursor.option = nir_cursor_before_block;
cursor.block = block;
return cursor;
}
static inline nir_cursor
nir_after_block(nir_block *block)
{
nir_cursor cursor;
cursor.option = nir_cursor_after_block;
cursor.block = block;
return cursor;
}
static inline nir_cursor
nir_before_instr(nir_instr *instr)
{
nir_cursor cursor;
cursor.option = nir_cursor_before_instr;
cursor.instr = instr;
return cursor;
}
static inline nir_cursor
nir_after_instr(nir_instr *instr)
{
nir_cursor cursor;
cursor.option = nir_cursor_after_instr;
cursor.instr = instr;
return cursor;
}
static inline nir_cursor
nir_before_cf_node(nir_cf_node *node)
{
if (node->type == nir_cf_node_block)
return nir_before_block(nir_cf_node_as_block(node));
return nir_after_block(nir_cf_node_as_block(nir_cf_node_prev(node)));
}
static inline nir_cursor
nir_after_cf_node(nir_cf_node *node)
{
if (node->type == nir_cf_node_block)
return nir_after_block(nir_cf_node_as_block(node));
return nir_before_block(nir_cf_node_as_block(nir_cf_node_next(node)));
}
static inline nir_cursor
nir_before_cf_list(struct exec_list *cf_list)
{
nir_cf_node *first_node = exec_node_data(nir_cf_node,
exec_list_get_head(cf_list), node);
return nir_before_cf_node(first_node);
}
static inline nir_cursor
nir_after_cf_list(struct exec_list *cf_list)
{
nir_cf_node *last_node = exec_node_data(nir_cf_node,
exec_list_get_tail(cf_list), node);
return nir_after_cf_node(last_node);
}
/** Control flow insertion. */
/** puts a control flow node where the cursor is */

View file

@ -50,7 +50,7 @@ convert_instr(nir_builder *bld, nir_alu_instr *alu)
is_signed = (op == nir_op_idiv);
nir_builder_insert_before_instr(bld, &alu->instr);
bld->cursor = nir_before_instr(&alu->instr);
numer = nir_ssa_for_src(bld, alu->src[0].src,
nir_ssa_alu_instr_src_components(alu, 0));

View file

@ -84,7 +84,7 @@ get_io_offset(nir_deref_var *deref, nir_instr *instr, nir_src *indirect,
unsigned base_offset = 0;
nir_builder *b = &state->builder;
nir_builder_insert_before_instr(b, instr);
b->cursor = nir_before_instr(instr);
nir_deref *tail = &deref->deref;
while (tail->child != NULL) {

View file

@ -43,7 +43,7 @@ lower_load_const_instr_scalar(nir_load_const_instr *lower)
nir_builder b;
nir_builder_init(&b, nir_cf_node_get_function(&lower->instr.block->cf_node));
nir_builder_insert_before_instr(&b, &lower->instr);
b.cursor = nir_before_instr(&lower->instr);
/* Emit the individual loads. */
nir_ssa_def *loads[4];

View file

@ -46,7 +46,7 @@ nir_lower_tex_projector_block(nir_block *block, void *void_state)
continue;
nir_tex_instr *tex = nir_instr_as_tex(instr);
nir_builder_insert_before_instr(b, &tex->instr);
b->cursor = nir_before_instr(&tex->instr);
/* Find the projector in the srcs list, if present. */
int proj_index;

View file

@ -52,7 +52,7 @@ normalize_cubemap_coords_block(nir_block *block, void *void_state)
if (tex->sampler_dim != GLSL_SAMPLER_DIM_CUBE)
continue;
nir_builder_insert_before_instr(b, &tex->instr);
b->cursor = nir_before_instr(&tex->instr);
for (unsigned i = 0; i < tex->num_srcs; i++) {
if (tex->src[i].src_type != nir_tex_src_coord)

View file

@ -2310,7 +2310,7 @@ vtn_get_phi_node_src(struct vtn_builder *b, nir_block *block,
}
}
nir_builder_insert_before_block(&b->nb, block);
b->nb.cursor = nir_before_block(block);
struct vtn_ssa_value *phi = vtn_phi_node_create(b, type);
struct set_entry *entry2;
@ -2569,10 +2569,7 @@ vtn_handle_body_instruction(struct vtn_builder *b, SpvOp opcode,
struct vtn_block *block = vtn_value(b, w[1], vtn_value_type_block)->block;
assert(block->block == NULL);
struct exec_node *list_tail = exec_list_get_tail(b->nb.cf_node_list);
nir_cf_node *tail_node = exec_node_data(nir_cf_node, list_tail, node);
assert(tail_node->type == nir_cf_node_block);
block->block = nir_cf_node_as_block(tail_node);
block->block = nir_cursor_current_block(b->nb.cursor);
break;
}
@ -2754,17 +2751,15 @@ vtn_walk_blocks(struct vtn_builder *b, struct vtn_block *start,
vtn_value(b, block->merge_block_id, vtn_value_type_block)->block;
nir_loop *loop = nir_loop_create(b->shader);
nir_cf_node_insert_end(b->nb.cf_node_list, &loop->cf_node);
struct exec_list *old_list = b->nb.cf_node_list;
nir_cf_node_insert(b->nb.cursor, &loop->cf_node);
/* Reset the merge_op to prerevent infinite recursion */
block->merge_op = SpvOpNop;
nir_builder_insert_after_cf_list(&b->nb, &loop->body);
b->nb.cursor = nir_after_cf_list(&loop->body);
vtn_walk_blocks(b, block, new_break_block, new_cont_block, NULL);
nir_builder_insert_after_cf_list(&b->nb, old_list);
b->nb.cursor = nir_after_cf_node(&loop->cf_node);
block = new_break_block;
continue;
}
@ -2776,10 +2771,8 @@ vtn_walk_blocks(struct vtn_builder *b, struct vtn_block *start,
vtn_foreach_instruction(b, block->label, block->branch,
vtn_handle_body_instruction);
nir_cf_node *cur_cf_node =
exec_node_data(nir_cf_node, exec_list_get_tail(b->nb.cf_node_list),
node);
nir_block *cur_block = nir_cf_node_as_block(cur_cf_node);
nir_block *cur_block = nir_cursor_current_block(b->nb.cursor);
assert(cur_block == block->block);
_mesa_hash_table_insert(b->block_table, cur_block, block);
switch (branch_op) {
@ -2824,7 +2817,7 @@ vtn_walk_blocks(struct vtn_builder *b, struct vtn_block *start,
nir_if *if_stmt = nir_if_create(b->shader);
if_stmt->condition = nir_src_for_ssa(vtn_ssa_value(b, w[1])->def);
nir_cf_node_insert_end(b->nb.cf_node_list, &if_stmt->cf_node);
nir_cf_node_insert(b->nb.cursor, &if_stmt->cf_node);
if (then_block == break_block) {
nir_jump_instr *jump = nir_jump_instr_create(b->shader,
@ -2859,15 +2852,13 @@ vtn_walk_blocks(struct vtn_builder *b, struct vtn_block *start,
struct vtn_block *merge_block =
vtn_value(b, block->merge_block_id, vtn_value_type_block)->block;
struct exec_list *old_list = b->nb.cf_node_list;
nir_builder_insert_after_cf_list(&b->nb, &if_stmt->then_list);
b->nb.cursor = nir_after_cf_list(&if_stmt->then_list);
vtn_walk_blocks(b, then_block, break_block, cont_block, merge_block);
nir_builder_insert_after_cf_list(&b->nb, &if_stmt->else_list);
b->nb.cursor = nir_after_cf_list(&if_stmt->else_list);
vtn_walk_blocks(b, else_block, break_block, cont_block, merge_block);
nir_builder_insert_after_cf_list(&b->nb, old_list);
b->nb.cursor = nir_after_cf_node(&if_stmt->cf_node);
block = merge_block;
continue;
}
@ -2967,7 +2958,7 @@ spirv_to_nir(const uint32_t *words, size_t word_count,
b->block_table = _mesa_hash_table_create(b, _mesa_hash_pointer,
_mesa_key_pointer_equal);
nir_builder_init(&b->nb, b->impl);
nir_builder_insert_after_cf_list(&b->nb, &b->impl->body);
b->nb.cursor = nir_after_cf_list(&b->impl->body);
vtn_walk_blocks(b, func->start_block, NULL, NULL, NULL);
vtn_foreach_instruction(b, func->start_block->label, func->end,
vtn_handle_phi_second_pass);

View file

@ -1,4 +1,4 @@
#!/bin/bash
#!/usr/bin/env bash
# Print defined gl.* functions not in GL ES 1.1 or in
# (FIXME, none of these should be part of the ABI)

View file

@ -1,4 +1,4 @@
#!/bin/bash
#!/usr/bin/env bash
# Print defined gl.* functions not in GL ES 3.0 or in
# (FIXME, none of these should be part of the ABI)

View file

@ -0,0 +1,40 @@
<?xml version="1.0"?>
<!DOCTYPE OpenGLAPI SYSTEM "gl_API.dtd">
<OpenGLAPI>
<category name="GL_KHR_texture_compression_astc_ldr" number="118">
<enum name="COMPRESSED_RGBA_ASTC_4x4_KHR" value="0x93B0"/>
<enum name="COMPRESSED_RGBA_ASTC_5x4_KHR" value="0x93B1"/>
<enum name="COMPRESSED_RGBA_ASTC_5x5_KHR" value="0x93B2"/>
<enum name="COMPRESSED_RGBA_ASTC_6x5_KHR" value="0x93B3"/>
<enum name="COMPRESSED_RGBA_ASTC_6x6_KHR" value="0x93B4"/>
<enum name="COMPRESSED_RGBA_ASTC_8x5_KHR" value="0x93B5"/>
<enum name="COMPRESSED_RGBA_ASTC_8x6_KHR" value="0x93B6"/>
<enum name="COMPRESSED_RGBA_ASTC_8x8_KHR" value="0x93B7"/>
<enum name="COMPRESSED_RGBA_ASTC_10x5_KHR" value="0x93B8"/>
<enum name="COMPRESSED_RGBA_ASTC_10x6_KHR" value="0x93B9"/>
<enum name="COMPRESSED_RGBA_ASTC_10x8_KHR" value="0x93BA"/>
<enum name="COMPRESSED_RGBA_ASTC_10x10_KHR" value="0x93BB"/>
<enum name="COMPRESSED_RGBA_ASTC_12x10_KHR" value="0x93BC"/>
<enum name="COMPRESSED_RGBA_ASTC_12x12_KHR" value="0x93BD"/>
<enum name="COMPRESSED_SRGB8_ALPHA8_ASTC_4x4_KHR" value="0x93D0"/>
<enum name="COMPRESSED_SRGB8_ALPHA8_ASTC_5x4_KHR" value="0x93D1"/>
<enum name="COMPRESSED_SRGB8_ALPHA8_ASTC_5x5_KHR" value="0x93D2"/>
<enum name="COMPRESSED_SRGB8_ALPHA8_ASTC_6x5_KHR" value="0x93D3"/>
<enum name="COMPRESSED_SRGB8_ALPHA8_ASTC_6x6_KHR" value="0x93D4"/>
<enum name="COMPRESSED_SRGB8_ALPHA8_ASTC_8x5_KHR" value="0x93D5"/>
<enum name="COMPRESSED_SRGB8_ALPHA8_ASTC_8x6_KHR" value="0x93D6"/>
<enum name="COMPRESSED_SRGB8_ALPHA8_ASTC_8x8_KHR" value="0x93D7"/>
<enum name="COMPRESSED_SRGB8_ALPHA8_ASTC_10x5_KHR" value="0x93D8"/>
<enum name="COMPRESSED_SRGB8_ALPHA8_ASTC_10x6_KHR" value="0x93D9"/>
<enum name="COMPRESSED_SRGB8_ALPHA8_ASTC_10x8_KHR" value="0x93DA"/>
<enum name="COMPRESSED_SRGB8_ALPHA8_ASTC_10x10_KHR" value="0x93DB"/>
<enum name="COMPRESSED_SRGB8_ALPHA8_ASTC_12x10_KHR" value="0x93DC"/>
<enum name="COMPRESSED_SRGB8_ALPHA8_ASTC_12x12_KHR" value="0x93DD"/>
</category>
</OpenGLAPI>

View file

@ -190,6 +190,7 @@ API_XML = \
INTEL_performance_query.xml \
KHR_debug.xml \
KHR_context_flush_control.xml \
KHR_texture_compression_astc.xml \
NV_conditional_render.xml \
NV_primitive_restart.xml \
NV_texture_barrier.xml \

View file

@ -798,4 +798,23 @@
</function>
</category>
<!-- 174. GL_OES_texture_storage_multisample_2d_array -->
<category name="GL_OES_texture_storage_multisample_2d_array" number="174">
<enum name="TEXTURE_2D_MULTISAMPLE_ARRAY_OES" value="0x9102"/>
<enum name="TEXTURE_BINDING_2D_MULTISAMPLE_ARRAY_OES" value="0x9105"/>
<enum name="SAMPLER_2D_MULTISAMPLE_ARRAY_OES" value="0x910B"/>
<enum name="INT_SAMPLER_2D_MULTISAMPLE_ARRAY_OES" value="0x910C"/>
<enum name="UNSIGNED_INT_SAMPLER_2D_MULTISAMPLE_ARRAY_OES" value="0x910D"/>
<function name="TexStorage3DMultisampleOES" alias="TexStorage3DMultisample" es2="3.1">
<param name="target" type="GLenum"/>
<param name="samples" type="GLsizei"/>
<param name="internalformat" type="GLenum"/>
<param name="width" type="GLsizei"/>
<param name="height" type="GLsizei"/>
<param name="depth" type="GLsizei"/>
<param name="fixedsamplelocations" type="GLboolean"/>
</function>
</category>
</OpenGLAPI>

View file

@ -8168,7 +8168,7 @@
<xi:include href="ARB_texture_storage.xml" xmlns:xi="http://www.w3.org/2001/XInclude"/>
<!-- ARB extension #118 -->
<xi:include href="KHR_texture_compression_astc.xml" xmlns:xi="http://www.w3.org/2001/XInclude"/>
<xi:include href="KHR_debug.xml" xmlns:xi="http://www.w3.org/2001/XInclude"/>

View file

@ -144,12 +144,6 @@ class PrintGenericStubs(gl_XML.gl_print_base):
print ''
print '#ifdef GLX_USE_TLS'
print ''
print '\t.globl _x86_64_get_get_dispatch; HIDDEN(_x86_64_get_get_dispatch)'
print '_x86_64_get_get_dispatch:'
print '\tlea\t_x86_64_get_dispatch(%rip), %rax'
print '\tret'
print ''
print '\t.p2align\t4,,15'
print '_x86_64_get_dispatch:'
print '\tmovq\t_glapi_tls_Dispatch@GOTTPOFF(%rip), %rax'
print '\tmovq\t%fs:(%rax), %rax'

View file

@ -278,6 +278,7 @@
#define GEN8_SURFACE_TILING_W (1 << 12)
#define GEN8_SURFACE_TILING_X (2 << 12)
#define GEN8_SURFACE_TILING_Y (3 << 12)
#define GEN8_SURFACE_SAMPLER_L2_BYPASS_DISABLE (1 << 9)
#define BRW_SURFACE_RC_READ_WRITE (1 << 8)
#define BRW_SURFACE_MIPLAYOUT_SHIFT 10
#define BRW_SURFACE_MIPMAPLAYOUT_BELOW 0
@ -506,6 +507,38 @@
#define BRW_SURFACEFORMAT_R8G8B8_UINT 0x1C8
#define BRW_SURFACEFORMAT_R8G8B8_SINT 0x1C9
#define BRW_SURFACEFORMAT_RAW 0x1FF
#define GEN9_SURFACE_ASTC_HDR_FORMAT_BIT 0x100
#define BRW_SURFACEFORMAT_ASTC_LDR_2D_4x4_U8sRGB 0x200
#define BRW_SURFACEFORMAT_ASTC_LDR_2D_5x4_U8sRGB 0x208
#define BRW_SURFACEFORMAT_ASTC_LDR_2D_5x5_U8sRGB 0x209
#define BRW_SURFACEFORMAT_ASTC_LDR_2D_6x5_U8sRGB 0x211
#define BRW_SURFACEFORMAT_ASTC_LDR_2D_6x6_U8sRGB 0x212
#define BRW_SURFACEFORMAT_ASTC_LDR_2D_8x5_U8sRGB 0x221
#define BRW_SURFACEFORMAT_ASTC_LDR_2D_8x6_U8sRGB 0x222
#define BRW_SURFACEFORMAT_ASTC_LDR_2D_8x8_U8sRGB 0x224
#define BRW_SURFACEFORMAT_ASTC_LDR_2D_10x5_U8sRGB 0x231
#define BRW_SURFACEFORMAT_ASTC_LDR_2D_10x6_U8sRGB 0x232
#define BRW_SURFACEFORMAT_ASTC_LDR_2D_10x8_U8sRGB 0x234
#define BRW_SURFACEFORMAT_ASTC_LDR_2D_10x10_U8sRGB 0x236
#define BRW_SURFACEFORMAT_ASTC_LDR_2D_12x10_U8sRGB 0x23E
#define BRW_SURFACEFORMAT_ASTC_LDR_2D_12x12_U8sRGB 0x23F
#define BRW_SURFACEFORMAT_ASTC_LDR_2D_4x4_FLT16 0x240
#define BRW_SURFACEFORMAT_ASTC_LDR_2D_5x4_FLT16 0x248
#define BRW_SURFACEFORMAT_ASTC_LDR_2D_5x5_FLT16 0x249
#define BRW_SURFACEFORMAT_ASTC_LDR_2D_6x5_FLT16 0x251
#define BRW_SURFACEFORMAT_ASTC_LDR_2D_6x6_FLT16 0x252
#define BRW_SURFACEFORMAT_ASTC_LDR_2D_8x5_FLT16 0x261
#define BRW_SURFACEFORMAT_ASTC_LDR_2D_8x6_FLT16 0x262
#define BRW_SURFACEFORMAT_ASTC_LDR_2D_8x8_FLT16 0x264
#define BRW_SURFACEFORMAT_ASTC_LDR_2D_10x5_FLT16 0x271
#define BRW_SURFACEFORMAT_ASTC_LDR_2D_10x6_FLT16 0x272
#define BRW_SURFACEFORMAT_ASTC_LDR_2D_10x8_FLT16 0x274
#define BRW_SURFACEFORMAT_ASTC_LDR_2D_10x10_FLT16 0x276
#define BRW_SURFACEFORMAT_ASTC_LDR_2D_12x10_FLT16 0x27E
#define BRW_SURFACEFORMAT_ASTC_LDR_2D_12x12_FLT16 0x27F
#define BRW_SURFACE_FORMAT_SHIFT 18
#define BRW_SURFACE_FORMAT_MASK INTEL_MASK(26, 18)

View file

@ -427,7 +427,9 @@ fs_reg::equals(const fs_reg &r) const
negate == r.negate &&
abs == r.abs &&
!reladdr && !r.reladdr &&
memcmp(&fixed_hw_reg, &r.fixed_hw_reg, sizeof(fixed_hw_reg)) == 0 &&
((file != HW_REG && file != IMM) ||
memcmp(&fixed_hw_reg, &r.fixed_hw_reg,
sizeof(fixed_hw_reg)) == 0) &&
stride == r.stride);
}
@ -1789,54 +1791,46 @@ fs_visitor::assign_constant_locations()
if (dispatch_width != 8)
return;
unsigned int num_pull_constants = 0;
pull_constant_loc = ralloc_array(mem_ctx, int, uniforms);
memset(pull_constant_loc, -1, sizeof(pull_constant_loc[0]) * uniforms);
/* Walk through and find array access of uniforms. Put a copy of that
* uniform in the pull constant buffer.
bool is_live[uniforms];
memset(is_live, 0, sizeof(is_live));
/* First, we walk through the instructions and do two things:
*
* 1) Figure out which uniforms are live.
*
* 2) Find all indirect access of uniform arrays and flag them as needing
* to go into the pull constant buffer.
*
* Note that we don't move constant-indexed accesses to arrays. No
* testing has been done of the performance impact of this choice.
*/
foreach_block_and_inst_safe(block, fs_inst, inst, cfg) {
for (int i = 0 ; i < inst->sources; i++) {
if (inst->src[i].file != UNIFORM || !inst->src[i].reladdr)
continue;
int uniform = inst->src[i].reg;
/* If this array isn't already present in the pull constant buffer,
* add it.
*/
if (pull_constant_loc[uniform] == -1) {
const gl_constant_value **values = &stage_prog_data->param[uniform];
assert(param_size[uniform]);
for (int j = 0; j < param_size[uniform]; j++) {
pull_constant_loc[uniform + j] = stage_prog_data->nr_pull_params;
stage_prog_data->pull_param[stage_prog_data->nr_pull_params++] =
values[j];
}
}
}
}
/* Find which UNIFORM registers are still in use. */
bool is_live[uniforms];
for (unsigned int i = 0; i < uniforms; i++) {
is_live[i] = false;
}
foreach_block_and_inst(block, fs_inst, inst, cfg) {
for (int i = 0; i < inst->sources; i++) {
if (inst->src[i].file != UNIFORM)
continue;
int constant_nr = inst->src[i].reg + inst->src[i].reg_offset;
if (constant_nr >= 0 && constant_nr < (int) uniforms)
is_live[constant_nr] = true;
if (inst->src[i].reladdr) {
int uniform = inst->src[i].reg;
/* If this array isn't already present in the pull constant buffer,
* add it.
*/
if (pull_constant_loc[uniform] == -1) {
assert(param_size[uniform]);
for (int j = 0; j < param_size[uniform]; j++)
pull_constant_loc[uniform + j] = num_pull_constants++;
}
} else {
/* Mark the the one accessed uniform as live */
int constant_nr = inst->src[i].reg + inst->src[i].reg_offset;
if (constant_nr >= 0 && constant_nr < (int) uniforms)
is_live[constant_nr] = true;
}
}
}
@ -1870,27 +1864,29 @@ fs_visitor::assign_constant_locations()
} else {
/* Demote to a pull constant. */
push_constant_loc[i] = -1;
int pull_index = stage_prog_data->nr_pull_params++;
stage_prog_data->pull_param[pull_index] = stage_prog_data->param[i];
pull_constant_loc[i] = pull_index;
pull_constant_loc[i] = num_pull_constants++;
}
}
stage_prog_data->nr_params = num_push_constants;
stage_prog_data->nr_pull_params = num_pull_constants;
/* Up until now, the param[] array has been indexed by reg + reg_offset
* of UNIFORM registers. Condense it to only contain the uniforms we
* chose to upload as push constants.
* of UNIFORM registers. Move pull constants into pull_param[] and
* condense param[] to only contain the uniforms we chose to push.
*
* NOTE: Because we are condensing the params[] array, we know that
* push_constant_loc[i] <= i and we can do it in one smooth loop without
* having to make a copy.
*/
for (unsigned int i = 0; i < uniforms; i++) {
int remapped = push_constant_loc[i];
const gl_constant_value *value = stage_prog_data->param[i];
if (remapped == -1)
continue;
assert(remapped <= (int)i);
stage_prog_data->param[remapped] = stage_prog_data->param[i];
if (pull_constant_loc[i] != -1) {
stage_prog_data->pull_param[pull_constant_loc[i]] = value;
} else if (push_constant_loc[i] != -1) {
stage_prog_data->param[push_constant_loc[i]] = value;
}
}
}
@ -4806,11 +4802,11 @@ fs_visitor::optimize()
*/
bld = fs_builder(this, 64);
split_virtual_grfs();
assign_constant_locations();
demote_pull_constants();
split_virtual_grfs();
#define OPT(pass, args...) ({ \
pass_num++; \
bool this_progress = pass(args); \

View file

@ -225,7 +225,6 @@ public:
void emit_gen6_gather_wa(uint8_t wa, fs_reg dst);
fs_reg resolve_source_modifiers(const fs_reg &src);
void emit_discard_jump();
bool try_replace_with_sel();
bool opt_peephole_sel();
bool opt_peephole_predicated_break();
bool opt_saturate_propagation();

View file

@ -372,6 +372,8 @@ namespace brw {
emit_minmax(const dst_reg &dst, const src_reg &src0,
const src_reg &src1, brw_conditional_mod mod) const
{
assert(mod == BRW_CONDITIONAL_GE || mod == BRW_CONDITIONAL_L);
if (shader->devinfo->gen >= 6) {
set_condmod(mod, SEL(dst, fix_unsigned_negate(src0),
fix_unsigned_negate(src1)));

View file

@ -132,7 +132,7 @@ fs_visitor::nir_setup_outputs(nir_shader *shader)
switch (stage) {
case MESA_SHADER_VERTEX:
for (int i = 0; i < ALIGN(type_size_scalar(var->type), 4) / 4; i++) {
for (unsigned int i = 0; i < ALIGN(type_size_scalar(var->type), 4) / 4; i++) {
int output = var->data.location + i;
this->outputs[output] = offset(reg, bld, 4 * i);
this->output_components[output] = vector_elements;
@ -191,8 +191,8 @@ fs_visitor::nir_setup_uniforms(nir_shader *shader)
nir_setup_builtin_uniform(var);
else
nir_setup_uniform(var);
param_size[var->data.driver_location] = type_size_scalar(var->type);
if(type_size_scalar(var->type) > 0)
param_size[var->data.driver_location] = type_size_scalar(var->type);
}
} else {
/* prog_to_nir only creates a single giant uniform variable so we can
@ -203,7 +203,8 @@ fs_visitor::nir_setup_uniforms(nir_shader *shader)
&prog->Parameters->ParameterValues[p][i];
}
}
param_size[0] = prog->Parameters->NumParameters * 4;
if(prog->Parameters->NumParameters > 0)
param_size[0] = prog->Parameters->NumParameters * 4;
}
}
@ -416,8 +417,6 @@ fs_visitor::nir_emit_if(nir_if *if_stmt)
nir_emit_cf_list(&if_stmt->else_list);
bld.emit(BRW_OPCODE_ENDIF);
try_replace_with_sel();
}
void

View file

@ -95,42 +95,51 @@ is_coalesce_candidate(const fs_visitor *v, const fs_inst *inst)
static bool
can_coalesce_vars(brw::fs_live_variables *live_intervals,
const cfg_t *cfg, const fs_inst *inst,
int var_to, int var_from)
int dst_var, int src_var)
{
if (!live_intervals->vars_interfere(var_from, var_to))
if (!live_intervals->vars_interfere(src_var, dst_var))
return true;
int start_to = live_intervals->start[var_to];
int end_to = live_intervals->end[var_to];
int start_from = live_intervals->start[var_from];
int end_from = live_intervals->end[var_from];
int dst_start = live_intervals->start[dst_var];
int dst_end = live_intervals->end[dst_var];
int src_start = live_intervals->start[src_var];
int src_end = live_intervals->end[src_var];
/* Variables interfere and one line range isn't a subset of the other. */
if ((end_to > end_from && start_from < start_to) ||
(end_from > end_to && start_to < start_from))
if ((dst_end > src_end && src_start < dst_start) ||
(src_end > dst_end && dst_start < src_start))
return false;
int start_ip = MIN2(start_to, start_from);
int scan_ip = -1;
/* Check for a write to either register in the intersection of their live
* ranges.
*/
int start_ip = MAX2(dst_start, src_start);
int end_ip = MIN2(dst_end, src_end);
foreach_block_and_inst(block, fs_inst, scan_inst, cfg) {
scan_ip++;
if (scan_ip < start_ip)
foreach_block(block, cfg) {
if (block->end_ip < start_ip)
continue;
if (scan_inst->is_control_flow())
return false;
int scan_ip = block->start_ip - 1;
if (scan_ip <= live_intervals->start[var_to])
continue;
foreach_inst_in_block(fs_inst, scan_inst, block) {
scan_ip++;
if (scan_ip > live_intervals->end[var_to])
return true;
/* Ignore anything before the intersection of the live ranges */
if (scan_ip < start_ip)
continue;
if (scan_inst->dst.equals(inst->dst) ||
scan_inst->dst.equals(inst->src[0]))
return false;
/* Ignore the copying instruction itself */
if (scan_inst == inst)
continue;
if (scan_ip > end_ip)
return true; /* registers do not interfere */
if (scan_inst->overwrites_reg(inst->dst) ||
scan_inst->overwrites_reg(inst->src[0]))
return false; /* registers interfere */
}
}
return true;
@ -145,11 +154,11 @@ fs_visitor::register_coalesce()
int src_size = 0;
int channels_remaining = 0;
int reg_from = -1, reg_to = -1;
int reg_to_offset[MAX_VGRF_SIZE];
int src_reg = -1, dst_reg = -1;
int dst_reg_offset[MAX_VGRF_SIZE];
fs_inst *mov[MAX_VGRF_SIZE];
int var_to[MAX_VGRF_SIZE];
int var_from[MAX_VGRF_SIZE];
int dst_var[MAX_VGRF_SIZE];
int src_var[MAX_VGRF_SIZE];
foreach_block_and_inst(block, fs_inst, inst, cfg) {
if (!is_coalesce_candidate(this, inst))
@ -161,8 +170,8 @@ fs_visitor::register_coalesce()
continue;
}
if (reg_from != inst->src[0].reg) {
reg_from = inst->src[0].reg;
if (src_reg != inst->src[0].reg) {
src_reg = inst->src[0].reg;
src_size = alloc.sizes[inst->src[0].reg];
assert(src_size <= MAX_VGRF_SIZE);
@ -170,15 +179,15 @@ fs_visitor::register_coalesce()
channels_remaining = src_size;
memset(mov, 0, sizeof(mov));
reg_to = inst->dst.reg;
dst_reg = inst->dst.reg;
}
if (reg_to != inst->dst.reg)
if (dst_reg != inst->dst.reg)
continue;
if (inst->opcode == SHADER_OPCODE_LOAD_PAYLOAD) {
for (int i = 0; i < src_size; i++) {
reg_to_offset[i] = i;
dst_reg_offset[i] = i;
}
mov[0] = inst;
channels_remaining -= inst->regs_written;
@ -194,9 +203,9 @@ fs_visitor::register_coalesce()
channels_remaining = -1;
continue;
}
reg_to_offset[offset] = inst->dst.reg_offset;
dst_reg_offset[offset] = inst->dst.reg_offset;
if (inst->regs_written > 1)
reg_to_offset[offset + 1] = inst->dst.reg_offset + 1;
dst_reg_offset[offset + 1] = inst->dst.reg_offset + 1;
mov[offset] = inst;
channels_remaining -= inst->regs_written;
}
@ -206,20 +215,20 @@ fs_visitor::register_coalesce()
bool can_coalesce = true;
for (int i = 0; i < src_size; i++) {
if (reg_to_offset[i] != reg_to_offset[0] + i) {
if (dst_reg_offset[i] != dst_reg_offset[0] + i) {
/* Registers are out-of-order. */
can_coalesce = false;
reg_from = -1;
src_reg = -1;
break;
}
var_to[i] = live_intervals->var_from_vgrf[reg_to] + reg_to_offset[i];
var_from[i] = live_intervals->var_from_vgrf[reg_from] + i;
dst_var[i] = live_intervals->var_from_vgrf[dst_reg] + dst_reg_offset[i];
src_var[i] = live_intervals->var_from_vgrf[src_reg] + i;
if (!can_coalesce_vars(live_intervals, cfg, inst,
var_to[i], var_from[i])) {
dst_var[i], src_var[i])) {
can_coalesce = false;
reg_from = -1;
src_reg = -1;
break;
}
}
@ -242,31 +251,31 @@ fs_visitor::register_coalesce()
foreach_block_and_inst(block, fs_inst, scan_inst, cfg) {
if (scan_inst->dst.file == GRF &&
scan_inst->dst.reg == reg_from) {
scan_inst->dst.reg = reg_to;
scan_inst->dst.reg == src_reg) {
scan_inst->dst.reg = dst_reg;
scan_inst->dst.reg_offset =
reg_to_offset[scan_inst->dst.reg_offset];
dst_reg_offset[scan_inst->dst.reg_offset];
}
for (int j = 0; j < scan_inst->sources; j++) {
if (scan_inst->src[j].file == GRF &&
scan_inst->src[j].reg == reg_from) {
scan_inst->src[j].reg = reg_to;
scan_inst->src[j].reg == src_reg) {
scan_inst->src[j].reg = dst_reg;
scan_inst->src[j].reg_offset =
reg_to_offset[scan_inst->src[j].reg_offset];
dst_reg_offset[scan_inst->src[j].reg_offset];
}
}
}
for (int i = 0; i < src_size; i++) {
live_intervals->start[var_to[i]] =
MIN2(live_intervals->start[var_to[i]],
live_intervals->start[var_from[i]]);
live_intervals->end[var_to[i]] =
MAX2(live_intervals->end[var_to[i]],
live_intervals->end[var_from[i]]);
live_intervals->start[dst_var[i]] =
MIN2(live_intervals->start[dst_var[i]],
live_intervals->start[src_var[i]]);
live_intervals->end[dst_var[i]] =
MAX2(live_intervals->end[dst_var[i]],
live_intervals->end[src_var[i]]);
}
reg_from = -1;
src_reg = -1;
}
if (progress) {

View file

@ -686,7 +686,7 @@ namespace {
if (is_signed)
bld.emit_minmax(offset(dst, bld, c), offset(dst, bld, c),
fs_reg(-(int)scale(widths[c] - s) - 1),
BRW_CONDITIONAL_G);
BRW_CONDITIONAL_GE);
}
}
@ -717,7 +717,7 @@ namespace {
if (is_signed)
bld.emit_minmax(offset(dst, bld, c),
offset(dst, bld, c), fs_reg(-1.0f),
BRW_CONDITIONAL_G);
BRW_CONDITIONAL_GE);
}
}
return dst;
@ -741,7 +741,7 @@ namespace {
/* Clamp the normalized floating-point argument. */
if (is_signed) {
bld.emit_minmax(offset(fdst, bld, c), offset(src, bld, c),
fs_reg(-1.0f), BRW_CONDITIONAL_G);
fs_reg(-1.0f), BRW_CONDITIONAL_GE);
bld.emit_minmax(offset(fdst, bld, c), offset(fdst, bld, c),
fs_reg(1.0f), BRW_CONDITIONAL_L);
@ -812,7 +812,7 @@ namespace {
/* Clamp to the minimum value. */
if (widths[c] < 16)
bld.emit_minmax(offset(fdst, bld, c), offset(fdst, bld, c),
fs_reg(0.0f), BRW_CONDITIONAL_G);
fs_reg(0.0f), BRW_CONDITIONAL_GE);
/* Convert to 16-bit floating-point. */
bld.F32TO16(offset(dst, bld, c), offset(fdst, bld, c));

View file

@ -441,95 +441,6 @@ fs_visitor::swizzle_result(ir_texture_opcode op, int dest_components,
}
}
/**
* Try to replace IF/MOV/ELSE/MOV/ENDIF with SEL.
*
* Many GLSL shaders contain the following pattern:
*
* x = condition ? foo : bar
*
* The compiler emits an ir_if tree for this, since each subexpression might be
* a complex tree that could have side-effects or short-circuit logic.
*
* However, the common case is to simply select one of two constants or
* variable values---which is exactly what SEL is for. In this case, the
* assembly looks like:
*
* (+f0) IF
* MOV dst src0
* ELSE
* MOV dst src1
* ENDIF
*
* which can be easily translated into:
*
* (+f0) SEL dst src0 src1
*
* If src0 is an immediate value, we promote it to a temporary GRF.
*/
bool
fs_visitor::try_replace_with_sel()
{
fs_inst *endif_inst = (fs_inst *) instructions.get_tail();
assert(endif_inst->opcode == BRW_OPCODE_ENDIF);
/* Pattern match in reverse: IF, MOV, ELSE, MOV, ENDIF. */
int opcodes[] = {
BRW_OPCODE_IF, BRW_OPCODE_MOV, BRW_OPCODE_ELSE, BRW_OPCODE_MOV,
};
fs_inst *match = (fs_inst *) endif_inst->prev;
for (int i = 0; i < 4; i++) {
if (match->is_head_sentinel() || match->opcode != opcodes[4-i-1])
return false;
match = (fs_inst *) match->prev;
}
/* The opcodes match; it looks like the right sequence of instructions. */
fs_inst *else_mov = (fs_inst *) endif_inst->prev;
fs_inst *then_mov = (fs_inst *) else_mov->prev->prev;
fs_inst *if_inst = (fs_inst *) then_mov->prev;
/* Check that the MOVs are the right form. */
if (then_mov->dst.equals(else_mov->dst) &&
!then_mov->is_partial_write() &&
!else_mov->is_partial_write()) {
/* Remove the matched instructions; we'll emit a SEL to replace them. */
while (!if_inst->next->is_tail_sentinel())
if_inst->next->exec_node::remove();
if_inst->exec_node::remove();
/* Only the last source register can be a constant, so if the MOV in
* the "then" clause uses a constant, we need to put it in a temporary.
*/
fs_reg src0(then_mov->src[0]);
if (src0.file == IMM) {
src0 = vgrf(glsl_type::float_type);
src0.type = then_mov->src[0].type;
bld.MOV(src0, then_mov->src[0]);
}
if (if_inst->conditional_mod) {
/* Sandybridge-specific IF with embedded comparison */
bld.CMP(bld.null_reg_d(), if_inst->src[0], if_inst->src[1],
if_inst->conditional_mod);
set_predicate(BRW_PREDICATE_NORMAL,
bld.emit(BRW_OPCODE_SEL, then_mov->dst,
src0, else_mov->src[0]));
} else {
/* Separate CMP and IF instructions */
set_predicate_inv(if_inst->predicate, if_inst->predicate_inverse,
bld.emit(BRW_OPCODE_SEL, then_mov->dst,
src0, else_mov->src[0]));
}
return true;
}
return false;
}
/** Emits a dummy fragment shader consisting of magenta for bringup purposes. */
void
fs_visitor::emit_dummy_fs()

View file

@ -63,6 +63,8 @@ nir_optimize(nir_shader *nir, bool is_scalar)
nir_validate_shader(nir);
progress |= nir_opt_remove_phis(nir);
nir_validate_shader(nir);
progress |= nir_opt_undef(nir);
nir_validate_shader(nir);
} while (progress);
}

View file

@ -307,6 +307,34 @@ const struct surface_format_info surface_formats[] = {
SF( x, x, x, x, x, x, x, x, x, ETC2_EAC_SRGB8_A8)
SF( x, x, x, x, x, x, x, x, x, R8G8B8_UINT)
SF( x, x, x, x, x, x, x, x, x, R8G8B8_SINT)
SF(80, 80, x, x, x, x, x, x, x, ASTC_LDR_2D_4x4_FLT16)
SF(80, 80, x, x, x, x, x, x, x, ASTC_LDR_2D_5x4_FLT16)
SF(80, 80, x, x, x, x, x, x, x, ASTC_LDR_2D_5x5_FLT16)
SF(80, 80, x, x, x, x, x, x, x, ASTC_LDR_2D_6x5_FLT16)
SF(80, 80, x, x, x, x, x, x, x, ASTC_LDR_2D_6x6_FLT16)
SF(80, 80, x, x, x, x, x, x, x, ASTC_LDR_2D_8x5_FLT16)
SF(80, 80, x, x, x, x, x, x, x, ASTC_LDR_2D_8x6_FLT16)
SF(80, 80, x, x, x, x, x, x, x, ASTC_LDR_2D_8x8_FLT16)
SF(80, 80, x, x, x, x, x, x, x, ASTC_LDR_2D_10x5_FLT16)
SF(80, 80, x, x, x, x, x, x, x, ASTC_LDR_2D_10x6_FLT16)
SF(80, 80, x, x, x, x, x, x, x, ASTC_LDR_2D_10x8_FLT16)
SF(80, 80, x, x, x, x, x, x, x, ASTC_LDR_2D_10x10_FLT16)
SF(80, 80, x, x, x, x, x, x, x, ASTC_LDR_2D_12x10_FLT16)
SF(80, 80, x, x, x, x, x, x, x, ASTC_LDR_2D_12x12_FLT16)
SF(80, 80, x, x, x, x, x, x, x, ASTC_LDR_2D_4x4_U8sRGB)
SF(80, 80, x, x, x, x, x, x, x, ASTC_LDR_2D_5x4_U8sRGB)
SF(80, 80, x, x, x, x, x, x, x, ASTC_LDR_2D_5x5_U8sRGB)
SF(80, 80, x, x, x, x, x, x, x, ASTC_LDR_2D_6x5_U8sRGB)
SF(80, 80, x, x, x, x, x, x, x, ASTC_LDR_2D_6x6_U8sRGB)
SF(80, 80, x, x, x, x, x, x, x, ASTC_LDR_2D_8x5_U8sRGB)
SF(80, 80, x, x, x, x, x, x, x, ASTC_LDR_2D_8x6_U8sRGB)
SF(80, 80, x, x, x, x, x, x, x, ASTC_LDR_2D_8x8_U8sRGB)
SF(80, 80, x, x, x, x, x, x, x, ASTC_LDR_2D_10x5_U8sRGB)
SF(80, 80, x, x, x, x, x, x, x, ASTC_LDR_2D_10x6_U8sRGB)
SF(80, 80, x, x, x, x, x, x, x, ASTC_LDR_2D_10x8_U8sRGB)
SF(80, 80, x, x, x, x, x, x, x, ASTC_LDR_2D_10x10_U8sRGB)
SF(80, 80, x, x, x, x, x, x, x, ASTC_LDR_2D_12x10_U8sRGB)
SF(80, 80, x, x, x, x, x, x, x, ASTC_LDR_2D_12x12_U8sRGB)
};
#undef x
#undef Y
@ -503,6 +531,35 @@ brw_format_for_mesa_format(mesa_format mesa_format)
[MESA_FORMAT_BPTC_RGB_SIGNED_FLOAT] = BRW_SURFACEFORMAT_BC6H_SF16,
[MESA_FORMAT_BPTC_RGB_UNSIGNED_FLOAT] = BRW_SURFACEFORMAT_BC6H_UF16,
[MESA_FORMAT_RGBA_ASTC_4x4] = BRW_SURFACEFORMAT_ASTC_LDR_2D_4x4_FLT16,
[MESA_FORMAT_RGBA_ASTC_5x4] = BRW_SURFACEFORMAT_ASTC_LDR_2D_5x4_FLT16,
[MESA_FORMAT_RGBA_ASTC_5x5] = BRW_SURFACEFORMAT_ASTC_LDR_2D_5x5_FLT16,
[MESA_FORMAT_RGBA_ASTC_6x5] = BRW_SURFACEFORMAT_ASTC_LDR_2D_6x5_FLT16,
[MESA_FORMAT_RGBA_ASTC_6x6] = BRW_SURFACEFORMAT_ASTC_LDR_2D_6x6_FLT16,
[MESA_FORMAT_RGBA_ASTC_8x5] = BRW_SURFACEFORMAT_ASTC_LDR_2D_8x5_FLT16,
[MESA_FORMAT_RGBA_ASTC_8x6] = BRW_SURFACEFORMAT_ASTC_LDR_2D_8x6_FLT16,
[MESA_FORMAT_RGBA_ASTC_8x8] = BRW_SURFACEFORMAT_ASTC_LDR_2D_8x8_FLT16,
[MESA_FORMAT_RGBA_ASTC_10x5] = BRW_SURFACEFORMAT_ASTC_LDR_2D_10x5_FLT16,
[MESA_FORMAT_RGBA_ASTC_10x6] = BRW_SURFACEFORMAT_ASTC_LDR_2D_10x6_FLT16,
[MESA_FORMAT_RGBA_ASTC_10x8] = BRW_SURFACEFORMAT_ASTC_LDR_2D_10x8_FLT16,
[MESA_FORMAT_RGBA_ASTC_10x10] = BRW_SURFACEFORMAT_ASTC_LDR_2D_10x10_FLT16,
[MESA_FORMAT_RGBA_ASTC_12x10] = BRW_SURFACEFORMAT_ASTC_LDR_2D_12x10_FLT16,
[MESA_FORMAT_RGBA_ASTC_12x12] = BRW_SURFACEFORMAT_ASTC_LDR_2D_12x12_FLT16,
[MESA_FORMAT_SRGB8_ALPHA8_ASTC_4x4] = BRW_SURFACEFORMAT_ASTC_LDR_2D_4x4_U8sRGB,
[MESA_FORMAT_SRGB8_ALPHA8_ASTC_5x4] = BRW_SURFACEFORMAT_ASTC_LDR_2D_5x4_U8sRGB,
[MESA_FORMAT_SRGB8_ALPHA8_ASTC_5x5] = BRW_SURFACEFORMAT_ASTC_LDR_2D_5x5_U8sRGB,
[MESA_FORMAT_SRGB8_ALPHA8_ASTC_6x5] = BRW_SURFACEFORMAT_ASTC_LDR_2D_6x5_U8sRGB,
[MESA_FORMAT_SRGB8_ALPHA8_ASTC_6x6] = BRW_SURFACEFORMAT_ASTC_LDR_2D_6x6_U8sRGB,
[MESA_FORMAT_SRGB8_ALPHA8_ASTC_8x5] = BRW_SURFACEFORMAT_ASTC_LDR_2D_8x5_U8sRGB,
[MESA_FORMAT_SRGB8_ALPHA8_ASTC_8x6] = BRW_SURFACEFORMAT_ASTC_LDR_2D_8x6_U8sRGB,
[MESA_FORMAT_SRGB8_ALPHA8_ASTC_8x8] = BRW_SURFACEFORMAT_ASTC_LDR_2D_8x8_U8sRGB,
[MESA_FORMAT_SRGB8_ALPHA8_ASTC_10x5] = BRW_SURFACEFORMAT_ASTC_LDR_2D_10x5_U8sRGB,
[MESA_FORMAT_SRGB8_ALPHA8_ASTC_10x6] = BRW_SURFACEFORMAT_ASTC_LDR_2D_10x6_U8sRGB,
[MESA_FORMAT_SRGB8_ALPHA8_ASTC_10x8] = BRW_SURFACEFORMAT_ASTC_LDR_2D_10x8_U8sRGB,
[MESA_FORMAT_SRGB8_ALPHA8_ASTC_10x10] = BRW_SURFACEFORMAT_ASTC_LDR_2D_10x10_U8sRGB,
[MESA_FORMAT_SRGB8_ALPHA8_ASTC_12x10] = BRW_SURFACEFORMAT_ASTC_LDR_2D_12x10_U8sRGB,
[MESA_FORMAT_SRGB8_ALPHA8_ASTC_12x12] = BRW_SURFACEFORMAT_ASTC_LDR_2D_12x12_U8sRGB,
[MESA_FORMAT_A_SNORM8] = 0,
[MESA_FORMAT_L_SNORM8] = 0,
[MESA_FORMAT_L8A8_SNORM] = 0,
@ -768,6 +825,36 @@ translate_tex_format(struct brw_context *brw,
}
return brw_format_for_mesa_format(mesa_format);
case MESA_FORMAT_RGBA_ASTC_4x4:
case MESA_FORMAT_RGBA_ASTC_5x4:
case MESA_FORMAT_RGBA_ASTC_5x5:
case MESA_FORMAT_RGBA_ASTC_6x5:
case MESA_FORMAT_RGBA_ASTC_6x6:
case MESA_FORMAT_RGBA_ASTC_8x5:
case MESA_FORMAT_RGBA_ASTC_8x6:
case MESA_FORMAT_RGBA_ASTC_8x8:
case MESA_FORMAT_RGBA_ASTC_10x5:
case MESA_FORMAT_RGBA_ASTC_10x6:
case MESA_FORMAT_RGBA_ASTC_10x8:
case MESA_FORMAT_RGBA_ASTC_10x10:
case MESA_FORMAT_RGBA_ASTC_12x10:
case MESA_FORMAT_RGBA_ASTC_12x12: {
GLuint brw_fmt = brw_format_for_mesa_format(mesa_format);
/**
* On Gen9+, it is possible to process these formats using the LDR
* Profile or the Full Profile mode of the hardware. Because, it isn't
* possible to determine if an HDR or LDR texture is being rendered, we
* can't determine which mode to enable in the hardware. Therefore, to
* handle all cases, always default to Full profile unless we are
* processing sRGBs, which are incompatible with this mode.
*/
if (brw->gen >= 9)
brw_fmt |= GEN9_SURFACE_ASTC_HDR_FORMAT_BIT;
return brw_fmt;
}
default:
assert(brw_format_for_mesa_format(mesa_format) != 0);
return brw_format_for_mesa_format(mesa_format);

View file

@ -123,12 +123,6 @@ intel_horizontal_texture_alignment_unit(struct brw_context *brw,
return 16;
/**
* From the "Alignment Unit Size" section of various specs, namely:
* - Gen3 Spec: "Memory Data Formats" Volume, Section 1.20.1.4
* - i965 and G45 PRMs: Volume 1, Section 6.17.3.4.
* - Ironlake and Sandybridge PRMs: Volume 1, Part 1, Section 7.18.3.4
* - BSpec (for Ivybridge and slight variations in separate stencil)
*
* +----------------------------------------------------------------------+
* | | alignment unit width ("i") |
* | Surface Property |-----------------------------|
@ -146,32 +140,6 @@ intel_horizontal_texture_alignment_unit(struct brw_context *brw,
* On IVB+, non-special cases can be overridden by setting the SURFACE_STATE
* "Surface Horizontal Alignment" field to HALIGN_4 or HALIGN_8.
*/
if (_mesa_is_format_compressed(mt->format)) {
/* The hardware alignment requirements for compressed textures
* happen to match the block boundaries.
*/
unsigned int i, j;
_mesa_get_format_block_size(mt->format, &i, &j);
/* On Gen9+ we can pick our own alignment for compressed textures but it
* has to be a multiple of the block size. The minimum alignment we can
* pick is 4 so we effectively have to align to 4 times the block
* size
*/
if (brw->gen >= 9)
return i * 4;
else
return i;
}
if (mt->format == MESA_FORMAT_S_UINT8)
return 8;
if (brw->gen >= 9 && mt->tr_mode != INTEL_MIPTREE_TRMODE_NONE) {
uint32_t align = tr_mode_horizontal_texture_alignment(brw, mt);
/* XY_FAST_COPY_BLT doesn't support horizontal alignment < 32. */
return align < 32 ? 32 : align;
}
if (brw->gen >= 7 && mt->format == MESA_FORMAT_Z_UNORM16)
return 8;
@ -248,12 +216,6 @@ intel_vertical_texture_alignment_unit(struct brw_context *brw,
const struct intel_mipmap_tree *mt)
{
/**
* From the "Alignment Unit Size" section of various specs, namely:
* - Gen3 Spec: "Memory Data Formats" Volume, Section 1.20.1.4
* - i965 and G45 PRMs: Volume 1, Section 6.17.3.4.
* - Ironlake and Sandybridge PRMs: Volume 1, Part 1, Section 7.18.3.4
* - BSpec (for Ivybridge and slight variations in separate stencil)
*
* +----------------------------------------------------------------------+
* | | alignment unit height ("j") |
* | Surface Property |-----------------------------|
@ -270,18 +232,6 @@ intel_vertical_texture_alignment_unit(struct brw_context *brw,
* Where "*" means either VALIGN_2 or VALIGN_4 depending on the setting of
* the SURFACE_STATE "Surface Vertical Alignment" field.
*/
if (_mesa_is_format_compressed(mt->format))
/* See comment above for the horizontal alignment */
return brw->gen >= 9 ? 16 : 4;
if (mt->format == MESA_FORMAT_S_UINT8)
return brw->gen >= 7 ? 8 : 4;
if (mt->tr_mode != INTEL_MIPTREE_TRMODE_NONE) {
uint32_t align = tr_mode_vertical_texture_alignment(brw, mt);
/* XY_FAST_COPY_BLT doesn't support vertical alignment < 64 */
return align < 64 ? 64 : align;
}
/* Broadwell only supports VALIGN of 4, 8, and 16. The BSpec says 4
* should always be used, except for stencil buffers, which should be 8.
@ -367,7 +317,7 @@ brw_miptree_layout_2d(struct intel_mipmap_tree *mt)
mt->total_width = mt->physical_width0;
if (mt->compressed)
mt->total_width = ALIGN(mt->total_width, bw);
mt->total_width = ALIGN_NPOT(mt->total_width, bw);
/* May need to adjust width to accommodate the placement of
* the 2nd mipmap. This occurs when the alignment
@ -378,10 +328,10 @@ brw_miptree_layout_2d(struct intel_mipmap_tree *mt)
unsigned mip1_width;
if (mt->compressed) {
mip1_width = ALIGN(minify(mt->physical_width0, 1), mt->align_w) +
ALIGN(minify(mt->physical_width0, 2), bw);
mip1_width = ALIGN_NPOT(minify(mt->physical_width0, 1), mt->align_w) +
ALIGN_NPOT(minify(mt->physical_width0, 2), bw);
} else {
mip1_width = ALIGN(minify(mt->physical_width0, 1), mt->align_w) +
mip1_width = ALIGN_NPOT(minify(mt->physical_width0, 1), mt->align_w) +
minify(mt->physical_width0, 2);
}
@ -390,6 +340,7 @@ brw_miptree_layout_2d(struct intel_mipmap_tree *mt)
}
}
mt->total_width /= bw;
mt->total_height = 0;
for (unsigned level = mt->first_level; level <= mt->last_level; level++) {
@ -397,7 +348,7 @@ brw_miptree_layout_2d(struct intel_mipmap_tree *mt)
intel_miptree_set_level_info(mt, level, x, y, depth);
img_height = ALIGN(height, mt->align_h);
img_height = ALIGN_NPOT(height, mt->align_h);
if (mt->compressed)
img_height /= bh;
@ -414,7 +365,7 @@ brw_miptree_layout_2d(struct intel_mipmap_tree *mt)
/* Layout_below: step right after second mipmap.
*/
if (level == mt->first_level + 1) {
x += ALIGN(width, mt->align_w);
x += ALIGN_NPOT(width, mt->align_w) / bw;
} else {
y += img_height;
}
@ -434,7 +385,7 @@ brw_miptree_get_horizontal_slice_pitch(const struct brw_context *brw,
{
if ((brw->gen < 9 && mt->target == GL_TEXTURE_3D) ||
(brw->gen == 4 && mt->target == GL_TEXTURE_CUBE_MAP)) {
return ALIGN(minify(mt->physical_width0, level), mt->align_w);
return ALIGN_NPOT(minify(mt->physical_width0, level), mt->align_w);
} else {
return 0;
}
@ -475,11 +426,11 @@ brw_miptree_get_vertical_slice_pitch(const struct brw_context *brw,
} else if (mt->target == GL_TEXTURE_3D ||
(brw->gen == 4 && mt->target == GL_TEXTURE_CUBE_MAP) ||
mt->array_layout == ALL_SLICES_AT_EACH_LOD) {
return ALIGN(minify(mt->physical_height0, level), mt->align_h);
return ALIGN_NPOT(minify(mt->physical_height0, level), mt->align_h);
} else {
const unsigned h0 = ALIGN(mt->physical_height0, mt->align_h);
const unsigned h1 = ALIGN(minify(mt->physical_height0, 1), mt->align_h);
const unsigned h0 = ALIGN_NPOT(mt->physical_height0, mt->align_h);
const unsigned h1 = ALIGN_NPOT(minify(mt->physical_height0, 1), mt->align_h);
return h0 + h1 + (brw->gen >= 7 ? 12 : 11) * mt->align_h;
}
@ -551,7 +502,7 @@ brw_miptree_layout_texture_array(struct brw_context *brw,
for (unsigned level = mt->first_level; level <= mt->last_level; level++) {
unsigned img_height;
img_height = ALIGN(height, mt->align_h);
img_height = ALIGN_NPOT(height, mt->align_h);
if (mt->compressed)
img_height /= mt->align_h;
@ -574,18 +525,20 @@ static void
brw_miptree_layout_texture_3d(struct brw_context *brw,
struct intel_mipmap_tree *mt)
{
unsigned yscale = mt->compressed ? 4 : 1;
mt->total_width = 0;
mt->total_height = 0;
unsigned ysum = 0;
unsigned bh, bw;
_mesa_get_format_block_size(mt->format, &bw, &bh);
for (unsigned level = mt->first_level; level <= mt->last_level; level++) {
unsigned WL = MAX2(mt->physical_width0 >> level, 1);
unsigned HL = MAX2(mt->physical_height0 >> level, 1);
unsigned DL = MAX2(mt->physical_depth0 >> level, 1);
unsigned wL = ALIGN(WL, mt->align_w);
unsigned hL = ALIGN(HL, mt->align_h);
unsigned wL = ALIGN_NPOT(WL, mt->align_w);
unsigned hL = ALIGN_NPOT(HL, mt->align_h);
if (mt->target == GL_TEXTURE_CUBE_MAP)
DL = 6;
@ -596,9 +549,9 @@ brw_miptree_layout_texture_3d(struct brw_context *brw,
unsigned x = (q % (1 << level)) * wL;
unsigned y = ysum + (q >> level) * hL;
intel_miptree_set_image_offset(mt, level, q, x, y / yscale);
mt->total_width = MAX2(mt->total_width, x + wL);
mt->total_height = MAX2(mt->total_height, (y + hL) / yscale);
intel_miptree_set_image_offset(mt, level, q, x / bw, y / bh);
mt->total_width = MAX2(mt->total_width, (x + wL) / bw);
mt->total_height = MAX2(mt->total_height, (y + hL) / bh);
}
ysum += ALIGN(DL, 1 << level) / (1 << level) * hL;
@ -767,6 +720,13 @@ intel_miptree_set_alignment(struct brw_context *brw,
struct intel_mipmap_tree *mt,
uint32_t layout_flags)
{
/**
* From the "Alignment Unit Size" section of various specs, namely:
* - Gen3 Spec: "Memory Data Formats" Volume, Section 1.20.1.4
* - i965 and G45 PRMs: Volume 1, Section 6.17.3.4.
* - Ironlake and Sandybridge PRMs: Volume 1, Part 1, Section 7.18.3.4
* - BSpec (for Ivybridge and slight variations in separate stencil)
*/
bool gen6_hiz_or_stencil = false;
if (brw->gen == 6 && mt->array_layout == ALL_SLICES_AT_EACH_LOD) {
@ -798,6 +758,29 @@ intel_miptree_set_alignment(struct brw_context *brw,
mt->align_w = 128 / mt->cpp;
mt->align_h = 32;
}
} else if (mt->compressed) {
/* The hardware alignment requirements for compressed textures
* happen to match the block boundaries.
*/
_mesa_get_format_block_size(mt->format, &mt->align_w, &mt->align_h);
/* On Gen9+ we can pick our own alignment for compressed textures but it
* has to be a multiple of the block size. The minimum alignment we can
* pick is 4 so we effectively have to align to 4 times the block
* size
*/
if (brw->gen >= 9) {
mt->align_w *= 4;
mt->align_h *= 4;
}
} else if (mt->format == MESA_FORMAT_S_UINT8) {
mt->align_w = 8;
mt->align_h = brw->gen >= 7 ? 8 : 4;
} else if (brw->gen >= 9 && mt->tr_mode != INTEL_MIPTREE_TRMODE_NONE) {
/* XY_FAST_COPY_BLT doesn't support horizontal alignment < 32 or
* vertical alignment < 64. */
mt->align_w = MAX2(tr_mode_horizontal_texture_alignment(brw, mt), 32);
mt->align_h = MAX2(tr_mode_vertical_texture_alignment(brw, mt), 64);
} else {
mt->align_w =
intel_horizontal_texture_alignment_unit(brw, mt, layout_flags);

View file

@ -216,8 +216,9 @@ dst_reg::equals(const dst_reg &r) const
writemask == r.writemask &&
(reladdr == r.reladdr ||
(reladdr && r.reladdr && reladdr->equals(*r.reladdr))) &&
memcmp(&fixed_hw_reg, &r.fixed_hw_reg,
sizeof(fixed_hw_reg)) == 0);
((file != HW_REG && file != IMM) ||
memcmp(&fixed_hw_reg, &r.fixed_hw_reg,
sizeof(fixed_hw_reg)) == 0));
}
bool

View file

@ -238,6 +238,20 @@ gen8_emit_texture_surface_state(struct brw_context *brw,
surf[0] |= BRW_SURFACE_CUBEFACE_ENABLES;
}
/* From the CHV PRM, Volume 2d, page 321 (RENDER_SURFACE_STATE dword 0
* bit 9 "Sampler L2 Bypass Mode Disable" Programming Notes):
*
* This bit must be set for the following surface types: BC2_UNORM
* BC3_UNORM BC5_UNORM BC5_SNORM BC7_UNORM
*/
if ((brw->gen >= 9 || brw->is_cherryview) &&
(format == BRW_SURFACEFORMAT_BC2_UNORM ||
format == BRW_SURFACEFORMAT_BC3_UNORM ||
format == BRW_SURFACEFORMAT_BC5_UNORM ||
format == BRW_SURFACEFORMAT_BC5_SNORM ||
format == BRW_SURFACEFORMAT_BC7_UNORM))
surf[0] |= GEN8_SURFACE_SAMPLER_L2_BYPASS_DISABLE;
if (_mesa_is_array_texture(target) || target == GL_TEXTURE_CUBE_MAP)
surf[0] |= GEN8_SURFACE_IS_ARRAY;

View file

@ -41,7 +41,6 @@ copy_image_with_blitter(struct brw_context *brw,
{
GLuint bw, bh;
uint32_t src_image_x, src_image_y, dst_image_x, dst_image_y;
int cpp;
/* The blitter doesn't understand multisampling at all. */
if (src_mt->num_samples > 0 || dst_mt->num_samples > 0)
@ -86,16 +85,6 @@ copy_image_with_blitter(struct brw_context *brw,
src_y /= (int)bh;
src_width /= (int)bw;
src_height /= (int)bh;
/* Inside of the miptree, the x offsets are stored in pixels while
* the y offsets are stored in blocks. We need to scale just the x
* offset.
*/
src_image_x /= bw;
cpp = _mesa_get_format_bytes(src_mt->format);
} else {
cpp = src_mt->cpp;
}
src_x += src_image_x;
src_y += src_image_y;
@ -111,18 +100,12 @@ copy_image_with_blitter(struct brw_context *brw,
dst_x /= (int)bw;
dst_y /= (int)bh;
/* Inside of the miptree, the x offsets are stored in pixels while
* the y offsets are stored in blocks. We need to scale just the x
* offset.
*/
dst_image_x /= bw;
}
dst_x += dst_image_x;
dst_y += dst_image_y;
return intelEmitCopyBlit(brw,
cpp,
src_mt->cpp,
src_mt->pitch,
src_mt->bo, src_mt->offset,
src_mt->tiling,

View file

@ -68,7 +68,7 @@ static const struct dri_debug_control debug_control[] = {
{ "optimizer", DEBUG_OPTIMIZER },
{ "ann", DEBUG_ANNOTATION },
{ "no8", DEBUG_NO8 },
{ "vec4vs", DEBUG_VEC4VS },
{ "vec4", DEBUG_VEC4VS },
{ "spill", DEBUG_SPILL },
{ "cs", DEBUG_CS },
{ NULL, 0 }

View file

@ -313,15 +313,7 @@ intel_miptree_create_layout(struct brw_context *brw,
mt->fast_clear_state = INTEL_FAST_CLEAR_STATE_NO_MCS;
mt->disable_aux_buffers = (layout_flags & MIPTREE_LAYOUT_DISABLE_AUX) != 0;
exec_list_make_empty(&mt->hiz_map);
/* The cpp is bytes per (1, blockheight)-sized block for compressed
* textures. This is why you'll see divides by blockheight all over
*/
unsigned bw, bh;
_mesa_get_format_block_size(format, &bw, &bh);
assert(_mesa_get_format_bytes(mt->format) % bw == 0);
mt->cpp = _mesa_get_format_bytes(mt->format) / bw;
mt->cpp = _mesa_get_format_bytes(format);
mt->num_samples = num_samples;
mt->compressed = _mesa_is_format_compressed(format);
mt->msaa_layout = INTEL_MSAA_LAYOUT_NONE;
@ -1272,8 +1264,8 @@ intel_miptree_copy_slice(struct brw_context *brw,
if (dst_mt->compressed) {
unsigned int i, j;
_mesa_get_format_block_size(dst_mt->format, &i, &j);
height = ALIGN(height, j) / j;
width = ALIGN(width, i);
height = ALIGN_NPOT(height, j) / j;
width = ALIGN_NPOT(width, i) / i;
}
/* If it's a packed depth/stencil buffer with separate stencil, the blit
@ -2105,7 +2097,9 @@ intel_miptree_map_gtt(struct brw_context *brw,
*/
_mesa_get_format_block_size(mt->format, &bw, &bh);
assert(y % bh == 0);
assert(x % bw == 0);
y /= bh;
x /= bw;
base = intel_miptree_map_raw(brw, mt) + mt->offset;

View file

@ -390,7 +390,7 @@ struct intel_mipmap_tree
*/
GLuint physical_width0, physical_height0, physical_depth0;
GLuint cpp; /**< bytes per pixel */
GLuint cpp; /**< bytes per pixel (or bytes per block if compressed) */
GLuint num_samples;
bool compressed;

View file

@ -44,12 +44,6 @@
#define INTEL_UPLOAD_SIZE (64*1024)
/**
* Like ALIGN(), but works with a non-power-of-two alignment.
*/
#define ALIGN_NPOT(value, alignment) \
(((value) + (alignment) - 1) / (alignment) * (alignment))
void
intel_upload_finish(struct brw_context *brw)
{

View file

@ -336,12 +336,15 @@ static const struct extension extension_table[] = {
{ "GL_OES_texture_half_float", o(OES_texture_half_float), ES2, 2005 },
{ "GL_OES_texture_half_float_linear", o(OES_texture_half_float_linear), ES2, 2005 },
{ "GL_OES_texture_mirrored_repeat", o(dummy_true), ES1, 2005 },
{ "GL_OES_texture_storage_multisample_2d_array",o(ARB_texture_multisample), ES31, 2014 },
{ "GL_OES_texture_npot", o(ARB_texture_non_power_of_two), ES1 | ES2, 2005 },
{ "GL_OES_vertex_array_object", o(dummy_true), ES1 | ES2, 2010 },
/* KHR extensions */
{ "GL_KHR_debug", o(dummy_true), GL, 2012 },
{ "GL_KHR_context_flush_control", o(dummy_true), GL | ES2, 2014 },
{ "GL_KHR_texture_compression_astc_hdr", o(KHR_texture_compression_astc_hdr), GL | ES2, 2012 },
{ "GL_KHR_texture_compression_astc_ldr", o(KHR_texture_compression_astc_ldr), GL | ES2, 2012 },
/* Vendor extensions */
{ "GL_3DFX_texture_compression_FXT1", o(TDFX_texture_compression_FXT1), GL, 1999 },

View file

@ -122,6 +122,9 @@ def get_channel_bits(fmat, chan_name):
elif fmat.layout == 'bptc':
bits = 16 if fmat.name.endswith('_FLOAT') else 8
return bits if fmat.has_channel(chan_name) else 0
elif fmat.layout == 'astc':
bits = 16 if 'RGBA' in fmat.name else 8
return bits if fmat.has_channel(chan_name) else 0
else:
assert False
else:

View file

@ -197,6 +197,7 @@ _mesa_get_format_max_bits(mesa_format format)
* MESA_FORMAT_LAYOUT_ETC1
* MESA_FORMAT_LAYOUT_ETC2
* MESA_FORMAT_LAYOUT_BPTC
* MESA_FORMAT_LAYOUT_ASTC
* MESA_FORMAT_LAYOUT_OTHER
*/
extern enum mesa_format_layout
@ -663,6 +664,48 @@ _mesa_get_srgb_format_linear(mesa_format format)
case MESA_FORMAT_BPTC_SRGB_ALPHA_UNORM:
format = MESA_FORMAT_BPTC_RGBA_UNORM;
break;
case MESA_FORMAT_SRGB8_ALPHA8_ASTC_4x4:
format = MESA_FORMAT_RGBA_ASTC_4x4;
break;
case MESA_FORMAT_SRGB8_ALPHA8_ASTC_5x4:
format = MESA_FORMAT_RGBA_ASTC_5x4;
break;
case MESA_FORMAT_SRGB8_ALPHA8_ASTC_5x5:
format = MESA_FORMAT_RGBA_ASTC_5x5;
break;
case MESA_FORMAT_SRGB8_ALPHA8_ASTC_6x5:
format = MESA_FORMAT_RGBA_ASTC_6x5;
break;
case MESA_FORMAT_SRGB8_ALPHA8_ASTC_6x6:
format = MESA_FORMAT_RGBA_ASTC_6x6;
break;
case MESA_FORMAT_SRGB8_ALPHA8_ASTC_8x5:
format = MESA_FORMAT_RGBA_ASTC_8x5;
break;
case MESA_FORMAT_SRGB8_ALPHA8_ASTC_8x6:
format = MESA_FORMAT_RGBA_ASTC_8x6;
break;
case MESA_FORMAT_SRGB8_ALPHA8_ASTC_8x8:
format = MESA_FORMAT_RGBA_ASTC_8x8;
break;
case MESA_FORMAT_SRGB8_ALPHA8_ASTC_10x5:
format = MESA_FORMAT_RGBA_ASTC_10x5;
break;
case MESA_FORMAT_SRGB8_ALPHA8_ASTC_10x6:
format = MESA_FORMAT_RGBA_ASTC_10x6;
break;
case MESA_FORMAT_SRGB8_ALPHA8_ASTC_10x8:
format = MESA_FORMAT_RGBA_ASTC_10x8;
break;
case MESA_FORMAT_SRGB8_ALPHA8_ASTC_10x10:
format = MESA_FORMAT_RGBA_ASTC_10x10;
break;
case MESA_FORMAT_SRGB8_ALPHA8_ASTC_12x10:
format = MESA_FORMAT_RGBA_ASTC_12x10;
break;
case MESA_FORMAT_SRGB8_ALPHA8_ASTC_12x12:
format = MESA_FORMAT_RGBA_ASTC_12x12;
break;
case MESA_FORMAT_B8G8R8X8_SRGB:
format = MESA_FORMAT_B8G8R8X8_UNORM;
break;

View file

@ -301,3 +301,34 @@ MESA_FORMAT_BPTC_RGBA_UNORM , bptc , 4, 4, x128, , ,
MESA_FORMAT_BPTC_SRGB_ALPHA_UNORM , bptc , 4, 4, x128, , , , xyzw, srgb
MESA_FORMAT_BPTC_RGB_SIGNED_FLOAT , bptc , 4, 4, x128, , , , xyz1, rgb
MESA_FORMAT_BPTC_RGB_UNSIGNED_FLOAT , bptc , 4, 4, x128, , , , xyz1, rgb
# ASTC compressed formats
MESA_FORMAT_RGBA_ASTC_4x4 , astc , 4, 4, x128, , , , xyzw, rgb
MESA_FORMAT_RGBA_ASTC_5x4 , astc , 5, 4, x128, , , , xyzw, rgb
MESA_FORMAT_RGBA_ASTC_5x5 , astc , 5, 5, x128, , , , xyzw, rgb
MESA_FORMAT_RGBA_ASTC_6x5 , astc , 6, 5, x128, , , , xyzw, rgb
MESA_FORMAT_RGBA_ASTC_6x6 , astc , 6, 6, x128, , , , xyzw, rgb
MESA_FORMAT_RGBA_ASTC_8x5 , astc , 8, 5, x128, , , , xyzw, rgb
MESA_FORMAT_RGBA_ASTC_8x6 , astc , 8, 6, x128, , , , xyzw, rgb
MESA_FORMAT_RGBA_ASTC_8x8 , astc , 8, 8, x128, , , , xyzw, rgb
MESA_FORMAT_RGBA_ASTC_10x5 , astc ,10, 5, x128, , , , xyzw, rgb
MESA_FORMAT_RGBA_ASTC_10x6 , astc ,10, 6, x128, , , , xyzw, rgb
MESA_FORMAT_RGBA_ASTC_10x8 , astc ,10, 8, x128, , , , xyzw, rgb
MESA_FORMAT_RGBA_ASTC_10x10 , astc ,10,10, x128, , , , xyzw, rgb
MESA_FORMAT_RGBA_ASTC_12x10 , astc ,12,10, x128, , , , xyzw, rgb
MESA_FORMAT_RGBA_ASTC_12x12 , astc ,12,12, x128, , , , xyzw, rgb
MESA_FORMAT_SRGB8_ALPHA8_ASTC_4x4 , astc , 4, 4, x128, , , , xyzw, srgb
MESA_FORMAT_SRGB8_ALPHA8_ASTC_5x4 , astc , 5, 4, x128, , , , xyzw, srgb
MESA_FORMAT_SRGB8_ALPHA8_ASTC_5x5 , astc , 5, 5, x128, , , , xyzw, srgb
MESA_FORMAT_SRGB8_ALPHA8_ASTC_6x5 , astc , 6, 5, x128, , , , xyzw, srgb
MESA_FORMAT_SRGB8_ALPHA8_ASTC_6x6 , astc , 6, 6, x128, , , , xyzw, srgb
MESA_FORMAT_SRGB8_ALPHA8_ASTC_8x5 , astc , 8, 5, x128, , , , xyzw, srgb
MESA_FORMAT_SRGB8_ALPHA8_ASTC_8x6 , astc , 8, 6, x128, , , , xyzw, srgb
MESA_FORMAT_SRGB8_ALPHA8_ASTC_8x8 , astc , 8, 8, x128, , , , xyzw, srgb
MESA_FORMAT_SRGB8_ALPHA8_ASTC_10x5 , astc ,10, 5, x128, , , , xyzw, srgb
MESA_FORMAT_SRGB8_ALPHA8_ASTC_10x6 , astc ,10, 6, x128, , , , xyzw, srgb
MESA_FORMAT_SRGB8_ALPHA8_ASTC_10x8 , astc ,10, 8, x128, , , , xyzw, srgb
MESA_FORMAT_SRGB8_ALPHA8_ASTC_10x10 , astc ,10,10, x128, , , , xyzw, srgb
MESA_FORMAT_SRGB8_ALPHA8_ASTC_12x10 , astc ,12,10, x128, , , , xyzw, srgb
MESA_FORMAT_SRGB8_ALPHA8_ASTC_12x12 , astc ,12,12, x128, , , , xyzw, srgb

Can't render this file because it contains an unexpected character in line 9 and column 3.

View file

@ -70,6 +70,7 @@ enum mesa_format_layout {
MESA_FORMAT_LAYOUT_ETC1,
MESA_FORMAT_LAYOUT_ETC2,
MESA_FORMAT_LAYOUT_BPTC,
MESA_FORMAT_LAYOUT_ASTC,
MESA_FORMAT_LAYOUT_OTHER,
};
@ -586,6 +587,36 @@ typedef enum
MESA_FORMAT_BPTC_RGB_SIGNED_FLOAT,
MESA_FORMAT_BPTC_RGB_UNSIGNED_FLOAT,
/* ASTC compressed formats */
MESA_FORMAT_RGBA_ASTC_4x4,
MESA_FORMAT_RGBA_ASTC_5x4,
MESA_FORMAT_RGBA_ASTC_5x5,
MESA_FORMAT_RGBA_ASTC_6x5,
MESA_FORMAT_RGBA_ASTC_6x6,
MESA_FORMAT_RGBA_ASTC_8x5,
MESA_FORMAT_RGBA_ASTC_8x6,
MESA_FORMAT_RGBA_ASTC_8x8,
MESA_FORMAT_RGBA_ASTC_10x5,
MESA_FORMAT_RGBA_ASTC_10x6,
MESA_FORMAT_RGBA_ASTC_10x8,
MESA_FORMAT_RGBA_ASTC_10x10,
MESA_FORMAT_RGBA_ASTC_12x10,
MESA_FORMAT_RGBA_ASTC_12x12,
MESA_FORMAT_SRGB8_ALPHA8_ASTC_4x4,
MESA_FORMAT_SRGB8_ALPHA8_ASTC_5x4,
MESA_FORMAT_SRGB8_ALPHA8_ASTC_5x5,
MESA_FORMAT_SRGB8_ALPHA8_ASTC_6x5,
MESA_FORMAT_SRGB8_ALPHA8_ASTC_6x6,
MESA_FORMAT_SRGB8_ALPHA8_ASTC_8x5,
MESA_FORMAT_SRGB8_ALPHA8_ASTC_8x6,
MESA_FORMAT_SRGB8_ALPHA8_ASTC_8x8,
MESA_FORMAT_SRGB8_ALPHA8_ASTC_10x5,
MESA_FORMAT_SRGB8_ALPHA8_ASTC_10x6,
MESA_FORMAT_SRGB8_ALPHA8_ASTC_10x8,
MESA_FORMAT_SRGB8_ALPHA8_ASTC_10x10,
MESA_FORMAT_SRGB8_ALPHA8_ASTC_12x10,
MESA_FORMAT_SRGB8_ALPHA8_ASTC_12x12,
MESA_FORMAT_COUNT
} mesa_format;

View file

@ -111,6 +111,7 @@ _mesa_generate_texture_mipmap(struct gl_context *ctx,
if (_mesa_is_enum_format_integer(srcImage->InternalFormat) ||
_mesa_is_depthstencil_format(srcImage->InternalFormat) ||
_mesa_is_astc_format(srcImage->InternalFormat) ||
_mesa_is_stencil_format(srcImage->InternalFormat)) {
_mesa_unlock_texture(ctx, texObj);
_mesa_error(ctx, GL_INVALID_OPERATION,

View file

@ -35,6 +35,7 @@
#include "mtypes.h"
#include "state.h"
#include "texcompress.h"
#include "texstate.h"
#include "framebuffer.h"
#include "samplerobj.h"
#include "stencil.h"
@ -993,16 +994,7 @@ find_custom_value(struct gl_context *ctx, const struct value_desc *d, union valu
{
struct gl_sampler_object *samp =
ctx->Texture.Unit[ctx->Texture.CurrentUnit].Sampler;
/*
* The sampler object may have been deleted on another context,
* so we try to lookup the sampler object before returning its Name.
*/
if (samp && _mesa_lookup_samplerobj(ctx, samp->Name)) {
v->value_int = samp->Name;
} else {
v->value_int = 0;
}
v->value_int = samp ? samp->Name : 0;
}
break;
/* GL_ARB_uniform_buffer_object */
@ -1750,6 +1742,52 @@ _mesa_GetDoublev(GLenum pname, GLdouble *params)
}
}
/**
* Convert a GL texture binding enum such as GL_TEXTURE_BINDING_2D
* into the corresponding Mesa texture target index.
* \return TEXTURE_x_INDEX or -1 if binding is invalid
*/
static int
tex_binding_to_index(const struct gl_context *ctx, GLenum binding)
{
switch (binding) {
case GL_TEXTURE_BINDING_1D:
return _mesa_is_desktop_gl(ctx) ? TEXTURE_1D_INDEX : -1;
case GL_TEXTURE_BINDING_2D:
return TEXTURE_2D_INDEX;
case GL_TEXTURE_BINDING_3D:
return ctx->API != API_OPENGLES ? TEXTURE_3D_INDEX : -1;
case GL_TEXTURE_BINDING_CUBE_MAP:
return ctx->Extensions.ARB_texture_cube_map
? TEXTURE_CUBE_INDEX : -1;
case GL_TEXTURE_BINDING_RECTANGLE:
return _mesa_is_desktop_gl(ctx) && ctx->Extensions.NV_texture_rectangle
? TEXTURE_RECT_INDEX : -1;
case GL_TEXTURE_BINDING_1D_ARRAY:
return _mesa_is_desktop_gl(ctx) && ctx->Extensions.EXT_texture_array
? TEXTURE_1D_ARRAY_INDEX : -1;
case GL_TEXTURE_BINDING_2D_ARRAY:
return (_mesa_is_desktop_gl(ctx) && ctx->Extensions.EXT_texture_array)
|| _mesa_is_gles3(ctx)
? TEXTURE_2D_ARRAY_INDEX : -1;
case GL_TEXTURE_BINDING_BUFFER:
return ctx->API == API_OPENGL_CORE &&
ctx->Extensions.ARB_texture_buffer_object ?
TEXTURE_BUFFER_INDEX : -1;
case GL_TEXTURE_BINDING_CUBE_MAP_ARRAY:
return _mesa_is_desktop_gl(ctx) && ctx->Extensions.ARB_texture_cube_map_array
? TEXTURE_CUBE_ARRAY_INDEX : -1;
case GL_TEXTURE_BINDING_2D_MULTISAMPLE:
return _mesa_is_desktop_gl(ctx) && ctx->Extensions.ARB_texture_multisample
? TEXTURE_2D_MULTISAMPLE_INDEX : -1;
case GL_TEXTURE_BINDING_2D_MULTISAMPLE_ARRAY:
return _mesa_is_desktop_gl(ctx) && ctx->Extensions.ARB_texture_multisample
? TEXTURE_2D_MULTISAMPLE_ARRAY_INDEX : -1;
default:
return -1;
}
}
static enum value_type
find_value_indexed(const char *func, GLenum pname, GLuint index, union value *v)
{
@ -2013,6 +2051,45 @@ find_value_indexed(const char *func, GLenum pname, GLuint index, union value *v)
v->value_int = ctx->ImageUnits[index].Format;
return TYPE_INT;
/* ARB_direct_state_access */
case GL_TEXTURE_BINDING_1D:
case GL_TEXTURE_BINDING_1D_ARRAY:
case GL_TEXTURE_BINDING_2D:
case GL_TEXTURE_BINDING_2D_ARRAY:
case GL_TEXTURE_BINDING_2D_MULTISAMPLE:
case GL_TEXTURE_BINDING_2D_MULTISAMPLE_ARRAY:
case GL_TEXTURE_BINDING_3D:
case GL_TEXTURE_BINDING_BUFFER:
case GL_TEXTURE_BINDING_CUBE_MAP:
case GL_TEXTURE_BINDING_CUBE_MAP_ARRAY:
case GL_TEXTURE_BINDING_RECTANGLE: {
int target;
if (ctx->API != API_OPENGL_CORE)
goto invalid_enum;
target = tex_binding_to_index(ctx, pname);
if (target < 0)
goto invalid_enum;
if (index >= _mesa_max_tex_unit(ctx))
goto invalid_value;
v->value_int = ctx->Texture.Unit[index].CurrentTex[target]->Name;
return TYPE_INT;
}
case GL_SAMPLER_BINDING: {
struct gl_sampler_object *samp;
if (ctx->API != API_OPENGL_CORE)
goto invalid_enum;
if (index >= _mesa_max_tex_unit(ctx))
goto invalid_value;
samp = ctx->Texture.Unit[index].Sampler;
v->value_int = samp ? samp->Name : 0;
return TYPE_INT;
}
case GL_MAX_COMPUTE_WORK_GROUP_COUNT:
if (!_mesa_has_compute_shaders(ctx))
goto invalid_enum;

View file

@ -434,6 +434,9 @@ descriptor=[
[ "SAMPLE_MASK", "CONTEXT_BOOL(Multisample.SampleMask), extra_ARB_texture_multisample" ],
[ "MAX_SAMPLE_MASK_WORDS", "CONST(1), extra_ARB_texture_multisample" ],
# GL_ARB_texture_multisample / ES 3.1 with GL_OES_texture_storage_multisample_2d_array
[ "TEXTURE_BINDING_2D_MULTISAMPLE_ARRAY", "LOC_CUSTOM, TYPE_INT, TEXTURE_2D_MULTISAMPLE_ARRAY_INDEX, extra_ARB_texture_multisample" ],
# GL_ARB_texture_gather / GLES 3.1
[ "MIN_PROGRAM_TEXTURE_GATHER_OFFSET", "CONTEXT_INT(Const.MinProgramTextureGatherOffset), extra_ARB_texture_gather"],
[ "MAX_PROGRAM_TEXTURE_GATHER_OFFSET", "CONTEXT_INT(Const.MaxProgramTextureGatherOffset), extra_ARB_texture_gather"],
@ -740,9 +743,6 @@ descriptor=[
[ "TEXTURE_BUFFER_FORMAT_ARB", "LOC_CUSTOM, TYPE_INT, 0, extra_texture_buffer_object" ],
[ "TEXTURE_BUFFER_ARB", "LOC_CUSTOM, TYPE_INT, 0, extra_texture_buffer_object" ],
# GL_ARB_texture_multisample / GL 3.2
[ "TEXTURE_BINDING_2D_MULTISAMPLE_ARRAY", "LOC_CUSTOM, TYPE_INT, TEXTURE_2D_MULTISAMPLE_ARRAY_INDEX, extra_ARB_texture_multisample" ],
# GL 3.0
[ "CONTEXT_FLAGS", "CONTEXT_INT(Const.ContextFlags), extra_version_30" ],

View file

@ -820,6 +820,47 @@ _mesa_is_enum_format_signed_int(GLenum format)
}
}
/**
* Test if the given format is an ASTC format.
*/
GLboolean
_mesa_is_astc_format(GLenum internalFormat)
{
switch (internalFormat) {
case GL_COMPRESSED_RGBA_ASTC_4x4_KHR:
case GL_COMPRESSED_RGBA_ASTC_5x4_KHR:
case GL_COMPRESSED_RGBA_ASTC_5x5_KHR:
case GL_COMPRESSED_RGBA_ASTC_6x5_KHR:
case GL_COMPRESSED_RGBA_ASTC_6x6_KHR:
case GL_COMPRESSED_RGBA_ASTC_8x5_KHR:
case GL_COMPRESSED_RGBA_ASTC_8x6_KHR:
case GL_COMPRESSED_RGBA_ASTC_8x8_KHR:
case GL_COMPRESSED_RGBA_ASTC_10x5_KHR:
case GL_COMPRESSED_RGBA_ASTC_10x6_KHR:
case GL_COMPRESSED_RGBA_ASTC_10x8_KHR:
case GL_COMPRESSED_RGBA_ASTC_10x10_KHR:
case GL_COMPRESSED_RGBA_ASTC_12x10_KHR:
case GL_COMPRESSED_RGBA_ASTC_12x12_KHR:
case GL_COMPRESSED_SRGB8_ALPHA8_ASTC_4x4_KHR:
case GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x4_KHR:
case GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x5_KHR:
case GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x5_KHR:
case GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x6_KHR:
case GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x5_KHR:
case GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x6_KHR:
case GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x8_KHR:
case GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x5_KHR:
case GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x6_KHR:
case GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x8_KHR:
case GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x10_KHR:
case GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x10_KHR:
case GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x12_KHR:
return true;
default:
return false;
}
}
/**
* Test if the given format is an integer (non-normalized) format.
@ -1262,6 +1303,35 @@ _mesa_is_compressed_format(const struct gl_context *ctx, GLenum format)
case GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT:
return _mesa_is_desktop_gl(ctx) &&
ctx->Extensions.ARB_texture_compression_bptc;
case GL_COMPRESSED_RGBA_ASTC_4x4_KHR:
case GL_COMPRESSED_RGBA_ASTC_5x4_KHR:
case GL_COMPRESSED_RGBA_ASTC_5x5_KHR:
case GL_COMPRESSED_RGBA_ASTC_6x5_KHR:
case GL_COMPRESSED_RGBA_ASTC_6x6_KHR:
case GL_COMPRESSED_RGBA_ASTC_8x5_KHR:
case GL_COMPRESSED_RGBA_ASTC_8x6_KHR:
case GL_COMPRESSED_RGBA_ASTC_8x8_KHR:
case GL_COMPRESSED_RGBA_ASTC_10x5_KHR:
case GL_COMPRESSED_RGBA_ASTC_10x6_KHR:
case GL_COMPRESSED_RGBA_ASTC_10x8_KHR:
case GL_COMPRESSED_RGBA_ASTC_10x10_KHR:
case GL_COMPRESSED_RGBA_ASTC_12x10_KHR:
case GL_COMPRESSED_RGBA_ASTC_12x12_KHR:
case GL_COMPRESSED_SRGB8_ALPHA8_ASTC_4x4_KHR:
case GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x4_KHR:
case GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x5_KHR:
case GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x5_KHR:
case GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x6_KHR:
case GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x5_KHR:
case GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x6_KHR:
case GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x8_KHR:
case GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x5_KHR:
case GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x6_KHR:
case GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x8_KHR:
case GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x10_KHR:
case GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x10_KHR:
case GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x12_KHR:
return ctx->Extensions.KHR_texture_compression_astc_ldr;
case GL_PALETTE4_RGB8_OES:
case GL_PALETTE4_RGBA8_OES:
case GL_PALETTE4_R5_G6_B5_OES:

View file

@ -56,6 +56,9 @@ _mesa_bytes_per_pixel( GLenum format, GLenum type );
extern GLint
_mesa_bytes_per_vertex_attrib(GLint comps, GLenum type);
extern GLboolean
_mesa_is_astc_format(GLenum internalFormat);
extern GLboolean
_mesa_is_type_unsigned(GLenum type);

View file

@ -690,7 +690,22 @@ minify(unsigned value, unsigned levels)
*
* \sa ROUND_DOWN_TO()
*/
#define ALIGN(value, alignment) (((value) + (alignment) - 1) & ~((alignment) - 1))
static inline uintptr_t
ALIGN(uintptr_t value, int32_t alignment)
{
assert((alignment > 0) && _mesa_is_pow_two(alignment));
return (((value) + (alignment) - 1) & ~((alignment) - 1));
}
/**
* Like ALIGN(), but works with a non-power-of-two alignment.
*/
static inline uintptr_t
ALIGN_NPOT(uintptr_t value, int32_t alignment)
{
assert(alignment > 0);
return (value + alignment - 1) / alignment * alignment;
}
/**
* Align a value down to an alignment value
@ -703,7 +718,12 @@ minify(unsigned value, unsigned levels)
*
* \sa ALIGN()
*/
#define ROUND_DOWN_TO(value, alignment) ((value) & ~(alignment - 1))
static inline uintptr_t
ROUND_DOWN_TO(uintptr_t value, int32_t alignment)
{
assert((alignment > 0) && _mesa_is_pow_two(alignment));
return ((value) & ~(alignment - 1));
}
/** Cross product of two 3-element vectors */

View file

@ -3751,6 +3751,8 @@ struct gl_extensions
GLboolean ATI_fragment_shader;
GLboolean ATI_separate_stencil;
GLboolean INTEL_performance_query;
GLboolean KHR_texture_compression_astc_hdr;
GLboolean KHR_texture_compression_astc_ldr;
GLboolean MESA_pack_invert;
GLboolean MESA_ycbcr_texture;
GLboolean NV_conditional_render;

View file

@ -2480,5 +2480,8 @@ const struct function gles31_functions_possible[] = {
{ "glVertexAttribBinding", 31, -1 },
{ "glVertexBindingDivisor", 31, -1 },
/* GL_OES_texture_storage_multisample_2d_array */
{ "glTexStorage3DMultisampleOES", 31, -1 },
{ NULL, 0, -1 },
};

View file

@ -229,6 +229,28 @@ _mesa_gl_compressed_format_base_format(GLenum format)
* what GL_NUM_COMPRESSED_TEXTURE_FORMATS and
* GL_COMPRESSED_TEXTURE_FORMATS return."
*
* The KHR_texture_compression_astc_hdr spec says:
*
* "Interactions with OpenGL 4.2
*
* OpenGL 4.2 supports the feature that compressed textures can be
* compressed online, by passing the compressed texture format enum as
* the internal format when uploading a texture using TexImage1D,
* TexImage2D or TexImage3D (see Section 3.9.3, Texture Image
* Specification, subsection Encoding of Special Internal Formats).
*
* Due to the complexity of the ASTC compression algorithm, it is not
* usually suitable for online use, and therefore ASTC support will be
* limited to pre-compressed textures only. Where on-device compression
* is required, a domain-specific limited compressor will typically
* be used, and this is therefore not suitable for implementation in
* the driver.
*
* In particular, the ASTC format specifiers will not be added to
* Table 3.14, and thus will not be accepted by the TexImage*D
* functions, and will not be returned by the (already deprecated)
* COMPRESSED_TEXTURE_FORMATS query."
*
* There is no formal spec for GL_ATI_texture_compression_3dc. Since the
* formats added by this extension are luminance-alpha formats, it is
* reasonable to expect them to follow the same rules as
@ -378,15 +400,15 @@ _mesa_glenum_to_compressed_format(GLenum format)
case GL_COMPRESSED_RGB_S3TC_DXT1_EXT:
case GL_RGB_S3TC:
case GL_RGB4_S3TC:
return MESA_FORMAT_RGB_DXT1;
case GL_COMPRESSED_RGBA_S3TC_DXT1_EXT:
case GL_RGB4_S3TC:
return MESA_FORMAT_RGBA_DXT1;
case GL_COMPRESSED_RGBA_S3TC_DXT3_EXT:
case GL_RGBA_S3TC:
case GL_RGBA4_S3TC:
return MESA_FORMAT_RGBA_DXT3;
case GL_COMPRESSED_RGBA_S3TC_DXT5_EXT:
case GL_RGBA4_S3TC:
return MESA_FORMAT_RGBA_DXT5;
case GL_COMPRESSED_SRGB_S3TC_DXT1_EXT:
@ -449,6 +471,63 @@ _mesa_glenum_to_compressed_format(GLenum format)
case GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT:
return MESA_FORMAT_BPTC_RGB_UNSIGNED_FLOAT;
case GL_COMPRESSED_RGBA_ASTC_4x4_KHR:
return MESA_FORMAT_RGBA_ASTC_4x4;
case GL_COMPRESSED_RGBA_ASTC_5x4_KHR:
return MESA_FORMAT_RGBA_ASTC_5x4;
case GL_COMPRESSED_RGBA_ASTC_5x5_KHR:
return MESA_FORMAT_RGBA_ASTC_5x5;
case GL_COMPRESSED_RGBA_ASTC_6x5_KHR:
return MESA_FORMAT_RGBA_ASTC_6x5;
case GL_COMPRESSED_RGBA_ASTC_6x6_KHR:
return MESA_FORMAT_RGBA_ASTC_6x6;
case GL_COMPRESSED_RGBA_ASTC_8x5_KHR:
return MESA_FORMAT_RGBA_ASTC_8x5;
case GL_COMPRESSED_RGBA_ASTC_8x6_KHR:
return MESA_FORMAT_RGBA_ASTC_8x6;
case GL_COMPRESSED_RGBA_ASTC_8x8_KHR:
return MESA_FORMAT_RGBA_ASTC_8x8;
case GL_COMPRESSED_RGBA_ASTC_10x5_KHR:
return MESA_FORMAT_RGBA_ASTC_10x5;
case GL_COMPRESSED_RGBA_ASTC_10x6_KHR:
return MESA_FORMAT_RGBA_ASTC_10x6;
case GL_COMPRESSED_RGBA_ASTC_10x8_KHR:
return MESA_FORMAT_RGBA_ASTC_10x8;
case GL_COMPRESSED_RGBA_ASTC_10x10_KHR:
return MESA_FORMAT_RGBA_ASTC_10x10;
case GL_COMPRESSED_RGBA_ASTC_12x10_KHR:
return MESA_FORMAT_RGBA_ASTC_12x10;
case GL_COMPRESSED_RGBA_ASTC_12x12_KHR:
return MESA_FORMAT_RGBA_ASTC_12x12;
case GL_COMPRESSED_SRGB8_ALPHA8_ASTC_4x4_KHR:
return MESA_FORMAT_SRGB8_ALPHA8_ASTC_4x4;
case GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x4_KHR:
return MESA_FORMAT_SRGB8_ALPHA8_ASTC_5x4;
case GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x5_KHR:
return MESA_FORMAT_SRGB8_ALPHA8_ASTC_5x5;
case GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x5_KHR:
return MESA_FORMAT_SRGB8_ALPHA8_ASTC_6x5;
case GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x6_KHR:
return MESA_FORMAT_SRGB8_ALPHA8_ASTC_6x6;
case GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x5_KHR:
return MESA_FORMAT_SRGB8_ALPHA8_ASTC_8x5;
case GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x6_KHR:
return MESA_FORMAT_SRGB8_ALPHA8_ASTC_8x6;
case GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x8_KHR:
return MESA_FORMAT_SRGB8_ALPHA8_ASTC_8x8;
case GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x5_KHR:
return MESA_FORMAT_SRGB8_ALPHA8_ASTC_10x5;
case GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x6_KHR:
return MESA_FORMAT_SRGB8_ALPHA8_ASTC_10x6;
case GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x8_KHR:
return MESA_FORMAT_SRGB8_ALPHA8_ASTC_10x8;
case GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x10_KHR:
return MESA_FORMAT_SRGB8_ALPHA8_ASTC_10x10;
case GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x10_KHR:
return MESA_FORMAT_SRGB8_ALPHA8_ASTC_12x10;
case GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x12_KHR:
return MESA_FORMAT_SRGB8_ALPHA8_ASTC_12x12;
default:
return MESA_FORMAT_NONE;
}
@ -539,6 +618,63 @@ _mesa_compressed_format_to_glenum(struct gl_context *ctx, mesa_format mesaFormat
case MESA_FORMAT_BPTC_RGB_UNSIGNED_FLOAT:
return GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT;
case MESA_FORMAT_RGBA_ASTC_4x4:
return GL_COMPRESSED_RGBA_ASTC_4x4_KHR;
case MESA_FORMAT_RGBA_ASTC_5x4:
return GL_COMPRESSED_RGBA_ASTC_5x4_KHR;
case MESA_FORMAT_RGBA_ASTC_5x5:
return GL_COMPRESSED_RGBA_ASTC_5x5_KHR;
case MESA_FORMAT_RGBA_ASTC_6x5:
return GL_COMPRESSED_RGBA_ASTC_6x5_KHR;
case MESA_FORMAT_RGBA_ASTC_6x6:
return GL_COMPRESSED_RGBA_ASTC_6x6_KHR;
case MESA_FORMAT_RGBA_ASTC_8x5:
return GL_COMPRESSED_RGBA_ASTC_8x5_KHR;
case MESA_FORMAT_RGBA_ASTC_8x6:
return GL_COMPRESSED_RGBA_ASTC_8x6_KHR;
case MESA_FORMAT_RGBA_ASTC_8x8:
return GL_COMPRESSED_RGBA_ASTC_8x8_KHR;
case MESA_FORMAT_RGBA_ASTC_10x5:
return GL_COMPRESSED_RGBA_ASTC_10x5_KHR;
case MESA_FORMAT_RGBA_ASTC_10x6:
return GL_COMPRESSED_RGBA_ASTC_10x6_KHR;
case MESA_FORMAT_RGBA_ASTC_10x8:
return GL_COMPRESSED_RGBA_ASTC_10x8_KHR;
case MESA_FORMAT_RGBA_ASTC_10x10:
return GL_COMPRESSED_RGBA_ASTC_10x10_KHR;
case MESA_FORMAT_RGBA_ASTC_12x10:
return GL_COMPRESSED_RGBA_ASTC_12x10_KHR;
case MESA_FORMAT_RGBA_ASTC_12x12:
return GL_COMPRESSED_RGBA_ASTC_12x12_KHR;
case MESA_FORMAT_SRGB8_ALPHA8_ASTC_4x4:
return GL_COMPRESSED_SRGB8_ALPHA8_ASTC_4x4_KHR;
case MESA_FORMAT_SRGB8_ALPHA8_ASTC_5x4:
return GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x4_KHR;
case MESA_FORMAT_SRGB8_ALPHA8_ASTC_5x5:
return GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x5_KHR;
case MESA_FORMAT_SRGB8_ALPHA8_ASTC_6x5:
return GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x5_KHR;
case MESA_FORMAT_SRGB8_ALPHA8_ASTC_6x6:
return GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x6_KHR;
case MESA_FORMAT_SRGB8_ALPHA8_ASTC_8x5:
return GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x5_KHR;
case MESA_FORMAT_SRGB8_ALPHA8_ASTC_8x6:
return GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x6_KHR;
case MESA_FORMAT_SRGB8_ALPHA8_ASTC_8x8:
return GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x8_KHR;
case MESA_FORMAT_SRGB8_ALPHA8_ASTC_10x5:
return GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x5_KHR;
case MESA_FORMAT_SRGB8_ALPHA8_ASTC_10x6:
return GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x6_KHR;
case MESA_FORMAT_SRGB8_ALPHA8_ASTC_10x8:
return GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x8_KHR;
case MESA_FORMAT_SRGB8_ALPHA8_ASTC_10x10:
return GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x10_KHR;
case MESA_FORMAT_SRGB8_ALPHA8_ASTC_12x10:
return GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x10_KHR;
case MESA_FORMAT_SRGB8_ALPHA8_ASTC_12x12:
return GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x12_KHR;
default:
_mesa_problem(ctx, "Unexpected mesa texture format in"
" _mesa_compressed_format_to_glenum()");

View file

@ -38,6 +38,7 @@
#include "mtypes.h"
#include "texcompress.h"
#include "texformat.h"
#include "glformats.h"
#define RETURN_IF_SUPPORTED(f) do { \
if (ctx->TextureFormatSupported[f]) \
@ -276,87 +277,6 @@ _mesa_choose_tex_format(struct gl_context *ctx, GLenum target,
RETURN_IF_SUPPORTED(MESA_FORMAT_YCBCR_REV);
break;
/* For non-generic compressed format we assert two things:
*
* 1. The format has already been validated against the set of available
* extensions.
*
* 2. The driver only enables the extension if it supports all of the
* formats that are part of that extension.
*/
case GL_COMPRESSED_RGB_FXT1_3DFX:
return MESA_FORMAT_RGB_FXT1;
case GL_COMPRESSED_RGBA_FXT1_3DFX:
return MESA_FORMAT_RGBA_FXT1;
case GL_COMPRESSED_RGB_S3TC_DXT1_EXT:
case GL_RGB_S3TC:
case GL_RGB4_S3TC:
return MESA_FORMAT_RGB_DXT1;
case GL_COMPRESSED_RGBA_S3TC_DXT1_EXT:
return MESA_FORMAT_RGBA_DXT1;
case GL_COMPRESSED_RGBA_S3TC_DXT3_EXT:
case GL_RGBA_S3TC:
case GL_RGBA4_S3TC:
return MESA_FORMAT_RGBA_DXT3;
case GL_COMPRESSED_RGBA_S3TC_DXT5_EXT:
return MESA_FORMAT_RGBA_DXT5;
case GL_COMPRESSED_RED_RGTC1:
return MESA_FORMAT_R_RGTC1_UNORM;
case GL_COMPRESSED_SIGNED_RED_RGTC1:
return MESA_FORMAT_R_RGTC1_SNORM;
case GL_COMPRESSED_RG_RGTC2:
return MESA_FORMAT_RG_RGTC2_UNORM;
case GL_COMPRESSED_SIGNED_RG_RGTC2:
return MESA_FORMAT_RG_RGTC2_SNORM;
case GL_COMPRESSED_LUMINANCE_LATC1_EXT:
return MESA_FORMAT_L_LATC1_UNORM;
case GL_COMPRESSED_SIGNED_LUMINANCE_LATC1_EXT:
return MESA_FORMAT_L_LATC1_SNORM;
case GL_COMPRESSED_LUMINANCE_ALPHA_LATC2_EXT:
return MESA_FORMAT_LA_LATC2_UNORM;
case GL_COMPRESSED_SIGNED_LUMINANCE_ALPHA_LATC2_EXT:
return MESA_FORMAT_LA_LATC2_SNORM;
case GL_COMPRESSED_LUMINANCE_ALPHA_3DC_ATI:
return MESA_FORMAT_LA_LATC2_UNORM;
case GL_ETC1_RGB8_OES:
return MESA_FORMAT_ETC1_RGB8;
case GL_COMPRESSED_RGB8_ETC2:
return MESA_FORMAT_ETC2_RGB8;
case GL_COMPRESSED_SRGB8_ETC2:
return MESA_FORMAT_ETC2_SRGB8;
case GL_COMPRESSED_RGBA8_ETC2_EAC:
return MESA_FORMAT_ETC2_RGBA8_EAC;
case GL_COMPRESSED_SRGB8_ALPHA8_ETC2_EAC:
return MESA_FORMAT_ETC2_SRGB8_ALPHA8_EAC;
case GL_COMPRESSED_R11_EAC:
return MESA_FORMAT_ETC2_R11_EAC;
case GL_COMPRESSED_RG11_EAC:
return MESA_FORMAT_ETC2_RG11_EAC;
case GL_COMPRESSED_SIGNED_R11_EAC:
return MESA_FORMAT_ETC2_SIGNED_R11_EAC;
case GL_COMPRESSED_SIGNED_RG11_EAC:
return MESA_FORMAT_ETC2_SIGNED_RG11_EAC;
case GL_COMPRESSED_RGB8_PUNCHTHROUGH_ALPHA1_ETC2:
return MESA_FORMAT_ETC2_RGB8_PUNCHTHROUGH_ALPHA1;
case GL_COMPRESSED_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2:
return MESA_FORMAT_ETC2_SRGB8_PUNCHTHROUGH_ALPHA1;
case GL_COMPRESSED_SRGB_S3TC_DXT1_EXT:
return MESA_FORMAT_SRGB_DXT1;
case GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT:
return MESA_FORMAT_SRGBA_DXT1;
case GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT:
return MESA_FORMAT_SRGBA_DXT3;
case GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT:
return MESA_FORMAT_SRGBA_DXT5;
case GL_COMPRESSED_RGBA_BPTC_UNORM:
return MESA_FORMAT_BPTC_RGBA_UNORM;
case GL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM:
return MESA_FORMAT_BPTC_SRGB_ALPHA_UNORM;
case GL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT:
return MESA_FORMAT_BPTC_RGB_SIGNED_FLOAT;
case GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT:
return MESA_FORMAT_BPTC_RGB_UNSIGNED_FLOAT;
case GL_ALPHA16F_ARB:
RETURN_IF_SUPPORTED(MESA_FORMAT_A_FLOAT16);
RETURN_IF_SUPPORTED(MESA_FORMAT_A_FLOAT32);
@ -844,6 +764,18 @@ _mesa_choose_tex_format(struct gl_context *ctx, GLenum target,
case GL_BGRA:
RETURN_IF_SUPPORTED(MESA_FORMAT_B8G8R8A8_UNORM);
break;
default:
/* For non-generic compressed format we assert two things:
*
* 1. The format has already been validated against the set of available
* extensions.
*
* 2. The driver only enables the extension if it supports all of the
* formats that are part of that extension.
*/
if (_mesa_is_compressed_format(ctx, internalFormat))
return _mesa_glenum_to_compressed_format(internalFormat);
}
_mesa_problem(ctx, "unexpected format %s in _mesa_choose_tex_format()",

Some files were not shown because too many files have changed in this diff Show more