mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-07 09:18:04 +02:00
Merge remote-tracking branch 'mesa-public/master' into vulkan
This commit is contained in:
commit
0af4bf4d4b
106 changed files with 2063 additions and 1572 deletions
|
|
@ -196,7 +196,7 @@ GL 4.5, GLSL 4.50:
|
|||
GL_ARB_get_texture_sub_image DONE (all drivers)
|
||||
GL_ARB_shader_texture_image_samples not started
|
||||
GL_ARB_texture_barrier DONE (nv50, nvc0, r600, radeonsi)
|
||||
GL_KHR_context_flush_control DONE (all - but needs GLX/EXT extension to be useful)
|
||||
GL_KHR_context_flush_control DONE (all - but needs GLX/EGL extension to be useful)
|
||||
GL_KHR_robust_buffer_access_behavior not started
|
||||
GL_KHR_robustness 90% done (the ARB variant)
|
||||
GL_EXT_shader_integer_mix DONE (all drivers that support GLSL)
|
||||
|
|
|
|||
|
|
@ -65,24 +65,24 @@ struct ttn_compile {
|
|||
nir_register *addr_reg;
|
||||
|
||||
/**
|
||||
* Stack of cf_node_lists where instructions should be pushed as we pop
|
||||
* Stack of nir_cursors where instructions should be pushed as we pop
|
||||
* back out of the control flow stack.
|
||||
*
|
||||
* For each IF/ELSE/ENDIF block, if_stack[if_stack_pos] has where the else
|
||||
* instructions should be placed, and if_stack[if_stack_pos - 1] has where
|
||||
* the next instructions outside of the if/then/else block go.
|
||||
*/
|
||||
struct exec_list **if_stack;
|
||||
nir_cursor *if_stack;
|
||||
unsigned if_stack_pos;
|
||||
|
||||
/**
|
||||
* Stack of cf_node_lists where instructions should be pushed as we pop
|
||||
* Stack of nir_cursors where instructions should be pushed as we pop
|
||||
* back out of the control flow stack.
|
||||
*
|
||||
* loop_stack[loop_stack_pos - 1] contains the cf_node_list for the outside
|
||||
* of the loop.
|
||||
*/
|
||||
struct exec_list **loop_stack;
|
||||
nir_cursor *loop_stack;
|
||||
unsigned loop_stack_pos;
|
||||
|
||||
/* How many TGSI_FILE_IMMEDIATE vec4s have been parsed so far. */
|
||||
|
|
@ -922,7 +922,7 @@ ttn_if(struct ttn_compile *c, nir_ssa_def *src, bool is_uint)
|
|||
nir_builder *b = &c->build;
|
||||
|
||||
/* Save the outside-of-the-if-statement node list. */
|
||||
c->if_stack[c->if_stack_pos] = b->cf_node_list;
|
||||
c->if_stack[c->if_stack_pos] = b->cursor;
|
||||
c->if_stack_pos++;
|
||||
|
||||
src = ttn_channel(b, src, X);
|
||||
|
|
@ -933,11 +933,11 @@ ttn_if(struct ttn_compile *c, nir_ssa_def *src, bool is_uint)
|
|||
} else {
|
||||
if_stmt->condition = nir_src_for_ssa(nir_fne(b, src, nir_imm_int(b, 0)));
|
||||
}
|
||||
nir_cf_node_insert_end(b->cf_node_list, &if_stmt->cf_node);
|
||||
nir_builder_cf_insert(b, &if_stmt->cf_node);
|
||||
|
||||
nir_builder_insert_after_cf_list(b, &if_stmt->then_list);
|
||||
b->cursor = nir_after_cf_list(&if_stmt->then_list);
|
||||
|
||||
c->if_stack[c->if_stack_pos] = &if_stmt->else_list;
|
||||
c->if_stack[c->if_stack_pos] = nir_after_cf_list(&if_stmt->else_list);
|
||||
c->if_stack_pos++;
|
||||
}
|
||||
|
||||
|
|
@ -946,7 +946,7 @@ ttn_else(struct ttn_compile *c)
|
|||
{
|
||||
nir_builder *b = &c->build;
|
||||
|
||||
nir_builder_insert_after_cf_list(b, c->if_stack[c->if_stack_pos - 1]);
|
||||
b->cursor = c->if_stack[c->if_stack_pos - 1];
|
||||
}
|
||||
|
||||
static void
|
||||
|
|
@ -955,7 +955,7 @@ ttn_endif(struct ttn_compile *c)
|
|||
nir_builder *b = &c->build;
|
||||
|
||||
c->if_stack_pos -= 2;
|
||||
nir_builder_insert_after_cf_list(b, c->if_stack[c->if_stack_pos]);
|
||||
b->cursor = c->if_stack[c->if_stack_pos];
|
||||
}
|
||||
|
||||
static void
|
||||
|
|
@ -964,13 +964,13 @@ ttn_bgnloop(struct ttn_compile *c)
|
|||
nir_builder *b = &c->build;
|
||||
|
||||
/* Save the outside-of-the-loop node list. */
|
||||
c->loop_stack[c->loop_stack_pos] = b->cf_node_list;
|
||||
c->loop_stack[c->loop_stack_pos] = b->cursor;
|
||||
c->loop_stack_pos++;
|
||||
|
||||
nir_loop *loop = nir_loop_create(b->shader);
|
||||
nir_cf_node_insert_end(b->cf_node_list, &loop->cf_node);
|
||||
nir_builder_cf_insert(b, &loop->cf_node);
|
||||
|
||||
nir_builder_insert_after_cf_list(b, &loop->body);
|
||||
b->cursor = nir_after_cf_list(&loop->body);
|
||||
}
|
||||
|
||||
static void
|
||||
|
|
@ -993,7 +993,7 @@ ttn_endloop(struct ttn_compile *c)
|
|||
nir_builder *b = &c->build;
|
||||
|
||||
c->loop_stack_pos--;
|
||||
nir_builder_insert_after_cf_list(b, c->loop_stack[c->loop_stack_pos]);
|
||||
b->cursor = c->loop_stack[c->loop_stack_pos];
|
||||
}
|
||||
|
||||
static void
|
||||
|
|
@ -1803,7 +1803,7 @@ tgsi_to_nir(const void *tgsi_tokens,
|
|||
nir_function_impl *impl = nir_function_impl_create(overload);
|
||||
|
||||
nir_builder_init(&c->build, impl);
|
||||
nir_builder_insert_after_cf_list(&c->build, &impl->body);
|
||||
c->build.cursor = nir_after_cf_list(&impl->body);
|
||||
|
||||
s->num_inputs = scan.file_max[TGSI_FILE_INPUT] + 1;
|
||||
s->num_uniforms = scan.const_file_max[0] + 1;
|
||||
|
|
@ -1819,10 +1819,10 @@ tgsi_to_nir(const void *tgsi_tokens,
|
|||
c->num_samp_types = scan.file_max[TGSI_FILE_SAMPLER_VIEW] + 1;
|
||||
c->samp_types = rzalloc_array(c, nir_alu_type, c->num_samp_types);
|
||||
|
||||
c->if_stack = rzalloc_array(c, struct exec_list *,
|
||||
c->if_stack = rzalloc_array(c, nir_cursor,
|
||||
(scan.opcode_count[TGSI_OPCODE_IF] +
|
||||
scan.opcode_count[TGSI_OPCODE_UIF]) * 2);
|
||||
c->loop_stack = rzalloc_array(c, struct exec_list *,
|
||||
c->loop_stack = rzalloc_array(c, nir_cursor,
|
||||
scan.opcode_count[TGSI_OPCODE_BGNLOOP]);
|
||||
|
||||
ret = tgsi_parse_init(&parser, tgsi_tokens);
|
||||
|
|
|
|||
|
|
@ -372,30 +372,28 @@ void util_blitter_custom_resolve_color(struct blitter_context *blitter,
|
|||
*
|
||||
* States not listed here are not affected by util_blitter. */
|
||||
|
||||
static inline
|
||||
void util_blitter_save_blend(struct blitter_context *blitter,
|
||||
void *state)
|
||||
static inline void
|
||||
util_blitter_save_blend(struct blitter_context *blitter, void *state)
|
||||
{
|
||||
blitter->saved_blend_state = state;
|
||||
}
|
||||
|
||||
static inline
|
||||
void util_blitter_save_depth_stencil_alpha(struct blitter_context *blitter,
|
||||
void *state)
|
||||
static inline void
|
||||
util_blitter_save_depth_stencil_alpha(struct blitter_context *blitter,
|
||||
void *state)
|
||||
{
|
||||
blitter->saved_dsa_state = state;
|
||||
}
|
||||
|
||||
static inline
|
||||
void util_blitter_save_vertex_elements(struct blitter_context *blitter,
|
||||
void *state)
|
||||
static inline void
|
||||
util_blitter_save_vertex_elements(struct blitter_context *blitter, void *state)
|
||||
{
|
||||
blitter->saved_velem_state = state;
|
||||
}
|
||||
|
||||
static inline
|
||||
void util_blitter_save_stencil_ref(struct blitter_context *blitter,
|
||||
const struct pipe_stencil_ref *state)
|
||||
static inline void
|
||||
util_blitter_save_stencil_ref(struct blitter_context *blitter,
|
||||
const struct pipe_stencil_ref *state)
|
||||
{
|
||||
blitter->saved_stencil_ref = *state;
|
||||
}
|
||||
|
|
@ -407,23 +405,20 @@ void util_blitter_save_rasterizer(struct blitter_context *blitter,
|
|||
blitter->saved_rs_state = state;
|
||||
}
|
||||
|
||||
static inline
|
||||
void util_blitter_save_fragment_shader(struct blitter_context *blitter,
|
||||
void *fs)
|
||||
static inline void
|
||||
util_blitter_save_fragment_shader(struct blitter_context *blitter, void *fs)
|
||||
{
|
||||
blitter->saved_fs = fs;
|
||||
}
|
||||
|
||||
static inline
|
||||
void util_blitter_save_vertex_shader(struct blitter_context *blitter,
|
||||
void *vs)
|
||||
static inline void
|
||||
util_blitter_save_vertex_shader(struct blitter_context *blitter, void *vs)
|
||||
{
|
||||
blitter->saved_vs = vs;
|
||||
}
|
||||
|
||||
static inline
|
||||
void util_blitter_save_geometry_shader(struct blitter_context *blitter,
|
||||
void *gs)
|
||||
static inline void
|
||||
util_blitter_save_geometry_shader(struct blitter_context *blitter, void *gs)
|
||||
{
|
||||
blitter->saved_gs = gs;
|
||||
}
|
||||
|
|
@ -442,24 +437,24 @@ util_blitter_save_tesseval_shader(struct blitter_context *blitter,
|
|||
blitter->saved_tes = sh;
|
||||
}
|
||||
|
||||
static inline
|
||||
void util_blitter_save_framebuffer(struct blitter_context *blitter,
|
||||
const struct pipe_framebuffer_state *state)
|
||||
static inline void
|
||||
util_blitter_save_framebuffer(struct blitter_context *blitter,
|
||||
const struct pipe_framebuffer_state *state)
|
||||
{
|
||||
blitter->saved_fb_state.nr_cbufs = 0; /* It's ~0 now, meaning it's unsaved. */
|
||||
util_copy_framebuffer_state(&blitter->saved_fb_state, state);
|
||||
}
|
||||
|
||||
static inline
|
||||
void util_blitter_save_viewport(struct blitter_context *blitter,
|
||||
struct pipe_viewport_state *state)
|
||||
static inline void
|
||||
util_blitter_save_viewport(struct blitter_context *blitter,
|
||||
struct pipe_viewport_state *state)
|
||||
{
|
||||
blitter->saved_viewport = *state;
|
||||
}
|
||||
|
||||
static inline
|
||||
void util_blitter_save_scissor(struct blitter_context *blitter,
|
||||
struct pipe_scissor_state *state)
|
||||
static inline void
|
||||
util_blitter_save_scissor(struct blitter_context *blitter,
|
||||
struct pipe_scissor_state *state)
|
||||
{
|
||||
blitter->saved_scissor = *state;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -41,6 +41,7 @@
|
|||
#include "util/u_tile.h"
|
||||
#include "util/u_prim.h"
|
||||
#include "util/u_surface.h"
|
||||
#include <inttypes.h>
|
||||
|
||||
#include <stdio.h>
|
||||
#include <limits.h> /* CHAR_BIT */
|
||||
|
|
@ -275,7 +276,7 @@ debug_get_flags_option(const char *name,
|
|||
for (; flags->name; ++flags)
|
||||
namealign = MAX2(namealign, strlen(flags->name));
|
||||
for (flags = orig; flags->name; ++flags)
|
||||
_debug_printf("| %*s [0x%0*lx]%s%s\n", namealign, flags->name,
|
||||
_debug_printf("| %*s [0x%0*"PRIu64"]%s%s\n", namealign, flags->name,
|
||||
(int)sizeof(uint64_t)*CHAR_BIT/4, flags->value,
|
||||
flags->desc ? " " : "", flags->desc ? flags->desc : "");
|
||||
}
|
||||
|
|
@ -290,9 +291,9 @@ debug_get_flags_option(const char *name,
|
|||
|
||||
if (debug_get_option_should_print()) {
|
||||
if (str) {
|
||||
debug_printf("%s: %s = 0x%lx (%s)\n", __FUNCTION__, name, result, str);
|
||||
debug_printf("%s: %s = 0x%"PRIu64" (%s)\n", __FUNCTION__, name, result, str);
|
||||
} else {
|
||||
debug_printf("%s: %s = 0x%lx\n", __FUNCTION__, name, result);
|
||||
debug_printf("%s: %s = 0x%"PRIu64"\n", __FUNCTION__, name, result);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -680,6 +680,7 @@ static inline uint32_t REG_A3XX_CP_PROTECT_REG(uint32_t i0) { return 0x00000460
|
|||
#define A3XX_GRAS_CL_CLIP_CNTL_VP_CLIP_CODE_IGNORE 0x00080000
|
||||
#define A3XX_GRAS_CL_CLIP_CNTL_VP_XFORM_DISABLE 0x00100000
|
||||
#define A3XX_GRAS_CL_CLIP_CNTL_PERSP_DIVISION_DISABLE 0x00200000
|
||||
#define A3XX_GRAS_CL_CLIP_CNTL_ZERO_GB_SCALE_Z 0x00400000
|
||||
#define A3XX_GRAS_CL_CLIP_CNTL_ZCOORD 0x00800000
|
||||
#define A3XX_GRAS_CL_CLIP_CNTL_WCOORD 0x01000000
|
||||
#define A3XX_GRAS_CL_CLIP_CNTL_ZCLIP_DISABLE 0x02000000
|
||||
|
|
|
|||
|
|
@ -563,10 +563,29 @@ fd3_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
|
|||
val |= COND(fp->writes_pos, A3XX_GRAS_CL_CLIP_CNTL_ZCLIP_DISABLE);
|
||||
val |= COND(fp->frag_coord, A3XX_GRAS_CL_CLIP_CNTL_ZCOORD |
|
||||
A3XX_GRAS_CL_CLIP_CNTL_WCOORD);
|
||||
/* TODO only use if prog doesn't use clipvertex/clipdist */
|
||||
val |= MIN2(util_bitcount(ctx->rasterizer->clip_plane_enable), 6) << 26;
|
||||
OUT_PKT0(ring, REG_A3XX_GRAS_CL_CLIP_CNTL, 1);
|
||||
OUT_RING(ring, val);
|
||||
}
|
||||
|
||||
if (dirty & (FD_DIRTY_RASTERIZER | FD_DIRTY_UCP)) {
|
||||
uint32_t planes = ctx->rasterizer->clip_plane_enable;
|
||||
int count = 0;
|
||||
|
||||
while (planes && count < 6) {
|
||||
int i = ffs(planes) - 1;
|
||||
|
||||
planes &= ~(1U << i);
|
||||
fd_wfi(ctx, ring);
|
||||
OUT_PKT0(ring, REG_A3XX_GRAS_CL_USER_PLANE(count++), 4);
|
||||
OUT_RING(ring, fui(ctx->ucp.ucp[i][0]));
|
||||
OUT_RING(ring, fui(ctx->ucp.ucp[i][1]));
|
||||
OUT_RING(ring, fui(ctx->ucp.ucp[i][2]));
|
||||
OUT_RING(ring, fui(ctx->ucp.ucp[i][3]));
|
||||
}
|
||||
}
|
||||
|
||||
/* NOTE: since primitive_restart is not actually part of any
|
||||
* state object, we need to make sure that we always emit
|
||||
* PRIM_VTX_CNTL.. either that or be more clever and detect
|
||||
|
|
|
|||
|
|
@ -65,7 +65,8 @@ fd3_rasterizer_state_create(struct pipe_context *pctx,
|
|||
if (cso->multisample)
|
||||
TODO
|
||||
*/
|
||||
so->gras_cl_clip_cntl = A3XX_GRAS_CL_CLIP_CNTL_IJ_PERSP_CENTER; /* ??? */
|
||||
so->gras_cl_clip_cntl = A3XX_GRAS_CL_CLIP_CNTL_IJ_PERSP_CENTER /* ??? */ |
|
||||
COND(cso->clip_halfz, A3XX_GRAS_CL_CLIP_CNTL_ZERO_GB_SCALE_Z);
|
||||
so->gras_su_point_minmax =
|
||||
A3XX_GRAS_SU_POINT_MINMAX_MIN(psize_min) |
|
||||
A3XX_GRAS_SU_POINT_MINMAX_MAX(psize_max);
|
||||
|
|
|
|||
|
|
@ -334,6 +334,7 @@ struct fd_context {
|
|||
FD_DIRTY_INDEXBUF = (1 << 16),
|
||||
FD_DIRTY_SCISSOR = (1 << 17),
|
||||
FD_DIRTY_STREAMOUT = (1 << 18),
|
||||
FD_DIRTY_UCP = (1 << 19),
|
||||
} dirty;
|
||||
|
||||
struct pipe_blend_state *blend;
|
||||
|
|
@ -355,6 +356,7 @@ struct fd_context {
|
|||
struct fd_constbuf_stateobj constbuf[PIPE_SHADER_TYPES];
|
||||
struct pipe_index_buffer indexbuf;
|
||||
struct fd_streamout_stateobj streamout;
|
||||
struct pipe_clip_state ucp;
|
||||
|
||||
/* GMEM/tile handling fxns: */
|
||||
void (*emit_tile_init)(struct fd_context *ctx);
|
||||
|
|
|
|||
|
|
@ -191,6 +191,7 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
|
|||
return 16383;
|
||||
|
||||
case PIPE_CAP_DEPTH_CLIP_DISABLE:
|
||||
case PIPE_CAP_CLIP_HALFZ:
|
||||
case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE:
|
||||
return is_a3xx(screen);
|
||||
|
||||
|
|
@ -228,7 +229,6 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
|
|||
case PIPE_CAP_TGSI_FS_FINE_DERIVATIVE:
|
||||
case PIPE_CAP_CONDITIONAL_RENDER_INVERTED:
|
||||
case PIPE_CAP_SAMPLER_VIEW_TARGET:
|
||||
case PIPE_CAP_CLIP_HALFZ:
|
||||
case PIPE_CAP_POLYGON_OFFSET_CLAMP:
|
||||
case PIPE_CAP_MULTISAMPLE_Z_RESOLVE:
|
||||
case PIPE_CAP_RESOURCE_FROM_USER_MEMORY:
|
||||
|
|
|
|||
|
|
@ -65,7 +65,9 @@ static void
|
|||
fd_set_clip_state(struct pipe_context *pctx,
|
||||
const struct pipe_clip_state *clip)
|
||||
{
|
||||
DBG("TODO: ");
|
||||
struct fd_context *ctx = fd_context(pctx);
|
||||
ctx->ucp = *clip;
|
||||
ctx->dirty |= FD_DIRTY_UCP;
|
||||
}
|
||||
|
||||
static void
|
||||
|
|
|
|||
|
|
@ -172,7 +172,7 @@ flatten_block(nir_builder *bld, nir_block *if_block, nir_block *prev_block,
|
|||
(intr->intrinsic == nir_intrinsic_discard_if)) {
|
||||
nir_ssa_def *discard_cond;
|
||||
|
||||
nir_builder_insert_after_instr(bld,
|
||||
bld->cursor = nir_after_instr(
|
||||
nir_block_last_instr(prev_block));
|
||||
|
||||
if (invert) {
|
||||
|
|
|
|||
|
|
@ -190,7 +190,7 @@ nv30_context_destroy(struct pipe_context *pipe)
|
|||
} while(0)
|
||||
|
||||
struct pipe_context *
|
||||
nv30_context_create(struct pipe_screen *pscreen, void *priv, unsigned flags)
|
||||
nv30_context_create(struct pipe_screen *pscreen, void *priv, unsigned ctxflags)
|
||||
{
|
||||
struct nv30_screen *screen = nv30_screen(pscreen);
|
||||
struct nv30_context *nv30 = CALLOC_STRUCT(nv30_context);
|
||||
|
|
|
|||
|
|
@ -240,7 +240,7 @@ nv50_context_get_sample_position(struct pipe_context *, unsigned, unsigned,
|
|||
float *);
|
||||
|
||||
struct pipe_context *
|
||||
nv50_create(struct pipe_screen *pscreen, void *priv, unsigned flags)
|
||||
nv50_create(struct pipe_screen *pscreen, void *priv, unsigned ctxflags)
|
||||
{
|
||||
struct nv50_screen *screen = nv50_screen(pscreen);
|
||||
struct nv50_context *nv50;
|
||||
|
|
|
|||
|
|
@ -262,7 +262,7 @@ nvc0_context_get_sample_position(struct pipe_context *, unsigned, unsigned,
|
|||
float *);
|
||||
|
||||
struct pipe_context *
|
||||
nvc0_create(struct pipe_screen *pscreen, void *priv, unsigned flags)
|
||||
nvc0_create(struct pipe_screen *pscreen, void *priv, unsigned ctxflags)
|
||||
{
|
||||
struct nvc0_screen *screen = nvc0_screen(pscreen);
|
||||
struct nvc0_context *nvc0;
|
||||
|
|
|
|||
|
|
@ -56,10 +56,10 @@ struct nvc0_query {
|
|||
|
||||
#define NVC0_QUERY_ALLOC_SPACE 256
|
||||
|
||||
static boolean nvc0_mp_pm_query_begin(struct nvc0_context *,
|
||||
static boolean nvc0_hw_sm_query_begin(struct nvc0_context *,
|
||||
struct nvc0_query *);
|
||||
static void nvc0_mp_pm_query_end(struct nvc0_context *, struct nvc0_query *);
|
||||
static boolean nvc0_mp_pm_query_result(struct nvc0_context *,
|
||||
static void nvc0_hw_sm_query_end(struct nvc0_context *, struct nvc0_query *);
|
||||
static boolean nvc0_hw_sm_query_result(struct nvc0_context *,
|
||||
struct nvc0_query *, void *, boolean);
|
||||
|
||||
static inline struct nvc0_query *
|
||||
|
|
@ -159,7 +159,7 @@ nvc0_query_create(struct pipe_context *pipe, unsigned type, unsigned index)
|
|||
} else
|
||||
#endif
|
||||
if (nvc0->screen->base.device->drm_version >= 0x01000101) {
|
||||
if (type >= NVE4_PM_QUERY(0) && type <= NVE4_PM_QUERY_LAST) {
|
||||
if (type >= NVE4_HW_SM_QUERY(0) && type <= NVE4_HW_SM_QUERY_LAST) {
|
||||
/* for each MP:
|
||||
* [00] = WS0.C0
|
||||
* [04] = WS0.C1
|
||||
|
|
@ -189,7 +189,7 @@ nvc0_query_create(struct pipe_context *pipe, unsigned type, unsigned index)
|
|||
space = (4 * 4 + 4 + 4) * nvc0->screen->mp_count * sizeof(uint32_t);
|
||||
break;
|
||||
} else
|
||||
if (type >= NVC0_PM_QUERY(0) && type <= NVC0_PM_QUERY_LAST) {
|
||||
if (type >= NVC0_HW_SM_QUERY(0) && type <= NVC0_HW_SM_QUERY_LAST) {
|
||||
/* for each MP:
|
||||
* [00] = MP.C0
|
||||
* [04] = MP.C1
|
||||
|
|
@ -327,9 +327,9 @@ nvc0_query_begin(struct pipe_context *pipe, struct pipe_query *pq)
|
|||
q->u.value = 0;
|
||||
} else
|
||||
#endif
|
||||
if ((q->type >= NVE4_PM_QUERY(0) && q->type <= NVE4_PM_QUERY_LAST) ||
|
||||
(q->type >= NVC0_PM_QUERY(0) && q->type <= NVC0_PM_QUERY_LAST)) {
|
||||
ret = nvc0_mp_pm_query_begin(nvc0, q);
|
||||
if ((q->type >= NVE4_HW_SM_QUERY(0) && q->type <= NVE4_HW_SM_QUERY_LAST) ||
|
||||
(q->type >= NVC0_HW_SM_QUERY(0) && q->type <= NVC0_HW_SM_QUERY_LAST)) {
|
||||
ret = nvc0_hw_sm_query_begin(nvc0, q);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
|
@ -412,9 +412,9 @@ nvc0_query_end(struct pipe_context *pipe, struct pipe_query *pq)
|
|||
return;
|
||||
} else
|
||||
#endif
|
||||
if ((q->type >= NVE4_PM_QUERY(0) && q->type <= NVE4_PM_QUERY_LAST) ||
|
||||
(q->type >= NVC0_PM_QUERY(0) && q->type <= NVC0_PM_QUERY_LAST)) {
|
||||
nvc0_mp_pm_query_end(nvc0, q);
|
||||
if ((q->type >= NVE4_HW_SM_QUERY(0) && q->type <= NVE4_HW_SM_QUERY_LAST) ||
|
||||
(q->type >= NVC0_HW_SM_QUERY(0) && q->type <= NVC0_HW_SM_QUERY_LAST)) {
|
||||
nvc0_hw_sm_query_end(nvc0, q);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
|
@ -453,9 +453,9 @@ nvc0_query_result(struct pipe_context *pipe, struct pipe_query *pq,
|
|||
return true;
|
||||
} else
|
||||
#endif
|
||||
if ((q->type >= NVE4_PM_QUERY(0) && q->type <= NVE4_PM_QUERY_LAST) ||
|
||||
(q->type >= NVC0_PM_QUERY(0) && q->type <= NVC0_PM_QUERY_LAST)) {
|
||||
return nvc0_mp_pm_query_result(nvc0, q, result, wait);
|
||||
if ((q->type >= NVE4_HW_SM_QUERY(0) && q->type <= NVE4_HW_SM_QUERY_LAST) ||
|
||||
(q->type >= NVC0_HW_SM_QUERY(0) && q->type <= NVC0_HW_SM_QUERY_LAST)) {
|
||||
return nvc0_hw_sm_query_result(nvc0, q, result, wait);
|
||||
}
|
||||
|
||||
if (q->state != NVC0_QUERY_STATE_READY)
|
||||
|
|
@ -692,7 +692,7 @@ static const char *nvc0_drv_stat_names[] =
|
|||
* We could add a kernel interface for it, but reading the counters like this
|
||||
* has the advantage of being async (if get_result isn't called immediately).
|
||||
*/
|
||||
static const uint64_t nve4_read_mp_pm_counters_code[] =
|
||||
static const uint64_t nve4_read_hw_sm_counters_code[] =
|
||||
{
|
||||
/* sched 0x20 0x20 0x20 0x20 0x20 0x20 0x20
|
||||
* mov b32 $r8 $tidx
|
||||
|
|
@ -776,6 +776,33 @@ static const uint64_t nve4_read_mp_pm_counters_code[] =
|
|||
static const char *nve4_pm_query_names[] =
|
||||
{
|
||||
/* MP counters */
|
||||
"active_cycles",
|
||||
"active_warps",
|
||||
"atom_count",
|
||||
"branch",
|
||||
"divergent_branch",
|
||||
"gld_request",
|
||||
"global_ld_mem_divergence_replays",
|
||||
"global_store_transaction",
|
||||
"global_st_mem_divergence_replays",
|
||||
"gred_count",
|
||||
"gst_request",
|
||||
"inst_executed",
|
||||
"inst_issued",
|
||||
"inst_issued1",
|
||||
"inst_issued2",
|
||||
"l1_global_load_hit",
|
||||
"l1_global_load_miss",
|
||||
"l1_local_load_hit",
|
||||
"l1_local_load_miss",
|
||||
"l1_local_store_hit",
|
||||
"l1_local_store_miss",
|
||||
"l1_shared_load_transactions",
|
||||
"l1_shared_store_transactions",
|
||||
"local_load",
|
||||
"local_load_transactions",
|
||||
"local_store",
|
||||
"local_store_transactions",
|
||||
"prof_trigger_00",
|
||||
"prof_trigger_01",
|
||||
"prof_trigger_02",
|
||||
|
|
@ -784,41 +811,14 @@ static const char *nve4_pm_query_names[] =
|
|||
"prof_trigger_05",
|
||||
"prof_trigger_06",
|
||||
"prof_trigger_07",
|
||||
"warps_launched",
|
||||
"threads_launched",
|
||||
"sm_cta_launched",
|
||||
"inst_issued1",
|
||||
"inst_issued2",
|
||||
"inst_executed",
|
||||
"local_load",
|
||||
"local_store",
|
||||
"shared_load",
|
||||
"shared_store",
|
||||
"l1_local_load_hit",
|
||||
"l1_local_load_miss",
|
||||
"l1_local_store_hit",
|
||||
"l1_local_store_miss",
|
||||
"gld_request",
|
||||
"gst_request",
|
||||
"l1_global_load_hit",
|
||||
"l1_global_load_miss",
|
||||
"uncached_global_load_transaction",
|
||||
"global_store_transaction",
|
||||
"branch",
|
||||
"divergent_branch",
|
||||
"active_warps",
|
||||
"active_cycles",
|
||||
"inst_issued",
|
||||
"atom_count",
|
||||
"gred_count",
|
||||
"shared_load_replay",
|
||||
"shared_store",
|
||||
"shared_store_replay",
|
||||
"local_load_transactions",
|
||||
"local_store_transactions",
|
||||
"l1_shared_load_transactions",
|
||||
"l1_shared_store_transactions",
|
||||
"global_ld_mem_divergence_replays",
|
||||
"global_st_mem_divergence_replays",
|
||||
"sm_cta_launched",
|
||||
"threads_launched",
|
||||
"uncached_global_load_transaction",
|
||||
"warps_launched",
|
||||
/* metrics, i.e. functions of the MP counters */
|
||||
"metric-ipc", /* inst_executed, clock */
|
||||
"metric-ipac", /* inst_executed, active_cycles */
|
||||
|
|
@ -852,7 +852,7 @@ struct nvc0_mp_counter_cfg
|
|||
#define NVC0_COUNTER_OP2_AVG_DIV_MM 5 /* avg(ctr0 / ctr1) */
|
||||
#define NVC0_COUNTER_OP2_AVG_DIV_M0 6 /* avg(ctr0) / ctr1 of MP[0]) */
|
||||
|
||||
struct nvc0_mp_pm_query_cfg
|
||||
struct nvc0_hw_sm_query_cfg
|
||||
{
|
||||
struct nvc0_mp_counter_cfg ctr[4];
|
||||
uint8_t num_counters;
|
||||
|
|
@ -860,17 +860,17 @@ struct nvc0_mp_pm_query_cfg
|
|||
uint8_t norm[2]; /* normalization num,denom */
|
||||
};
|
||||
|
||||
#define _Q1A(n, f, m, g, s, nu, dn) [NVE4_PM_QUERY_##n] = { { { f, NVE4_COMPUTE_MP_PM_FUNC_MODE_##m, 0, 0, NVE4_COMPUTE_MP_PM_A_SIGSEL_##g, s }, {}, {}, {} }, 1, NVC0_COUNTER_OPn_SUM, { nu, dn } }
|
||||
#define _Q1B(n, f, m, g, s, nu, dn) [NVE4_PM_QUERY_##n] = { { { f, NVE4_COMPUTE_MP_PM_FUNC_MODE_##m, 0, 1, NVE4_COMPUTE_MP_PM_B_SIGSEL_##g, s }, {}, {}, {} }, 1, NVC0_COUNTER_OPn_SUM, { nu, dn } }
|
||||
#define _M2A(n, f0, m0, g0, s0, f1, m1, g1, s1, o, nu, dn) [NVE4_PM_QUERY_METRIC_##n] = { { \
|
||||
#define _Q1A(n, f, m, g, s, nu, dn) [NVE4_HW_SM_QUERY_##n] = { { { f, NVE4_COMPUTE_MP_PM_FUNC_MODE_##m, 0, 0, NVE4_COMPUTE_MP_PM_A_SIGSEL_##g, s }, {}, {}, {} }, 1, NVC0_COUNTER_OPn_SUM, { nu, dn } }
|
||||
#define _Q1B(n, f, m, g, s, nu, dn) [NVE4_HW_SM_QUERY_##n] = { { { f, NVE4_COMPUTE_MP_PM_FUNC_MODE_##m, 0, 1, NVE4_COMPUTE_MP_PM_B_SIGSEL_##g, s }, {}, {}, {} }, 1, NVC0_COUNTER_OPn_SUM, { nu, dn } }
|
||||
#define _M2A(n, f0, m0, g0, s0, f1, m1, g1, s1, o, nu, dn) [NVE4_HW_SM_QUERY_METRIC_##n] = { { \
|
||||
{ f0, NVE4_COMPUTE_MP_PM_FUNC_MODE_##m0, 0, 0, NVE4_COMPUTE_MP_PM_A_SIGSEL_##g0, s0 }, \
|
||||
{ f1, NVE4_COMPUTE_MP_PM_FUNC_MODE_##m1, 0, 0, NVE4_COMPUTE_MP_PM_A_SIGSEL_##g1, s1 }, \
|
||||
{}, {}, }, 2, NVC0_COUNTER_OP2_##o, { nu, dn } }
|
||||
#define _M2B(n, f0, m0, g0, s0, f1, m1, g1, s1, o, nu, dn) [NVE4_PM_QUERY_METRIC_##n] = { { \
|
||||
#define _M2B(n, f0, m0, g0, s0, f1, m1, g1, s1, o, nu, dn) [NVE4_HW_SM_QUERY_METRIC_##n] = { { \
|
||||
{ f0, NVE4_COMPUTE_MP_PM_FUNC_MODE_##m0, 0, 1, NVE4_COMPUTE_MP_PM_B_SIGSEL_##g0, s0 }, \
|
||||
{ f1, NVE4_COMPUTE_MP_PM_FUNC_MODE_##m1, 0, 1, NVE4_COMPUTE_MP_PM_B_SIGSEL_##g1, s1 }, \
|
||||
{}, {}, }, 2, NVC0_COUNTER_OP2_##o, { nu, dn } }
|
||||
#define _M2AB(n, f0, m0, g0, s0, f1, m1, g1, s1, o, nu, dn) [NVE4_PM_QUERY_METRIC_##n] = { { \
|
||||
#define _M2AB(n, f0, m0, g0, s0, f1, m1, g1, s1, o, nu, dn) [NVE4_HW_SM_QUERY_METRIC_##n] = { { \
|
||||
{ f0, NVE4_COMPUTE_MP_PM_FUNC_MODE_##m0, 0, 0, NVE4_COMPUTE_MP_PM_A_SIGSEL_##g0, s0 }, \
|
||||
{ f1, NVE4_COMPUTE_MP_PM_FUNC_MODE_##m1, 0, 1, NVE4_COMPUTE_MP_PM_B_SIGSEL_##g1, s1 }, \
|
||||
{}, {}, }, 2, NVC0_COUNTER_OP2_##o, { nu, dn } }
|
||||
|
|
@ -881,8 +881,35 @@ struct nvc0_mp_pm_query_cfg
|
|||
* metric-ipXc: we simply multiply by 4 to account for the 4 warp schedulers;
|
||||
* this is inaccurate !
|
||||
*/
|
||||
static const struct nvc0_mp_pm_query_cfg nve4_mp_pm_queries[] =
|
||||
static const struct nvc0_hw_sm_query_cfg nve4_hw_sm_queries[] =
|
||||
{
|
||||
_Q1B(ACTIVE_CYCLES, 0x0001, B6, WARP, 0x00000000, 1, 1),
|
||||
_Q1B(ACTIVE_WARPS, 0x003f, B6, WARP, 0x31483104, 2, 1),
|
||||
_Q1A(ATOM_COUNT, 0x0001, B6, BRANCH, 0x00000000, 1, 1),
|
||||
_Q1A(BRANCH, 0x0001, B6, BRANCH, 0x0000000c, 1, 1),
|
||||
_Q1A(DIVERGENT_BRANCH, 0x0001, B6, BRANCH, 0x00000010, 1, 1),
|
||||
_Q1A(GLD_REQUEST, 0x0001, B6, LDST, 0x00000010, 1, 1),
|
||||
_Q1B(GLD_MEM_DIV_REPLAY, 0x0001, B6, REPLAY, 0x00000010, 1, 1),
|
||||
_Q1B(GST_TRANSACTIONS, 0x0001, B6, MEM, 0x00000004, 1, 1),
|
||||
_Q1B(GST_MEM_DIV_REPLAY, 0x0001, B6, REPLAY, 0x00000014, 1, 1),
|
||||
_Q1A(GRED_COUNT, 0x0001, B6, BRANCH, 0x00000008, 1, 1),
|
||||
_Q1A(GST_REQUEST, 0x0001, B6, LDST, 0x00000014, 1, 1),
|
||||
_Q1A(INST_EXECUTED, 0x0003, B6, EXEC, 0x00000398, 1, 1),
|
||||
_Q1A(INST_ISSUED, 0x0003, B6, ISSUE, 0x00000104, 1, 1),
|
||||
_Q1A(INST_ISSUED1, 0x0001, B6, ISSUE, 0x00000004, 1, 1),
|
||||
_Q1A(INST_ISSUED2, 0x0001, B6, ISSUE, 0x00000008, 1, 1),
|
||||
_Q1B(L1_GLD_HIT, 0x0001, B6, L1, 0x00000010, 1, 1),
|
||||
_Q1B(L1_GLD_MISS, 0x0001, B6, L1, 0x00000014, 1, 1),
|
||||
_Q1B(L1_LOCAL_LD_HIT, 0x0001, B6, L1, 0x00000000, 1, 1),
|
||||
_Q1B(L1_LOCAL_LD_MISS, 0x0001, B6, L1, 0x00000004, 1, 1),
|
||||
_Q1B(L1_LOCAL_ST_HIT, 0x0001, B6, L1, 0x00000008, 1, 1),
|
||||
_Q1B(L1_LOCAL_ST_MISS, 0x0001, B6, L1, 0x0000000c, 1, 1),
|
||||
_Q1B(L1_SHARED_LD_TRANSACTIONS, 0x0001, B6, TRANSACTION, 0x00000008, 1, 1),
|
||||
_Q1B(L1_SHARED_ST_TRANSACTIONS, 0x0001, B6, TRANSACTION, 0x0000000c, 1, 1),
|
||||
_Q1A(LOCAL_LD, 0x0001, B6, LDST, 0x00000008, 1, 1),
|
||||
_Q1B(LOCAL_LD_TRANSACTIONS, 0x0001, B6, TRANSACTION, 0x00000000, 1, 1),
|
||||
_Q1A(LOCAL_ST, 0x0001, B6, LDST, 0x0000000c, 1, 1),
|
||||
_Q1B(LOCAL_ST_TRANSACTIONS, 0x0001, B6, TRANSACTION, 0x00000004, 1, 1),
|
||||
_Q1A(PROF_TRIGGER_0, 0x0001, B6, USER, 0x00000000, 1, 1),
|
||||
_Q1A(PROF_TRIGGER_1, 0x0001, B6, USER, 0x00000004, 1, 1),
|
||||
_Q1A(PROF_TRIGGER_2, 0x0001, B6, USER, 0x00000008, 1, 1),
|
||||
|
|
@ -891,41 +918,14 @@ static const struct nvc0_mp_pm_query_cfg nve4_mp_pm_queries[] =
|
|||
_Q1A(PROF_TRIGGER_5, 0x0001, B6, USER, 0x00000014, 1, 1),
|
||||
_Q1A(PROF_TRIGGER_6, 0x0001, B6, USER, 0x00000018, 1, 1),
|
||||
_Q1A(PROF_TRIGGER_7, 0x0001, B6, USER, 0x0000001c, 1, 1),
|
||||
_Q1A(LAUNCHED_WARPS, 0x0001, B6, LAUNCH, 0x00000004, 1, 1),
|
||||
_Q1A(LAUNCHED_THREADS, 0x003f, B6, LAUNCH, 0x398a4188, 1, 1),
|
||||
_Q1B(LAUNCHED_CTA, 0x0001, B6, WARP, 0x0000001c, 1, 1),
|
||||
_Q1A(INST_ISSUED1, 0x0001, B6, ISSUE, 0x00000004, 1, 1),
|
||||
_Q1A(INST_ISSUED2, 0x0001, B6, ISSUE, 0x00000008, 1, 1),
|
||||
_Q1A(INST_ISSUED, 0x0003, B6, ISSUE, 0x00000104, 1, 1),
|
||||
_Q1A(INST_EXECUTED, 0x0003, B6, EXEC, 0x00000398, 1, 1),
|
||||
_Q1A(LD_SHARED, 0x0001, B6, LDST, 0x00000000, 1, 1),
|
||||
_Q1A(ST_SHARED, 0x0001, B6, LDST, 0x00000004, 1, 1),
|
||||
_Q1A(LD_LOCAL, 0x0001, B6, LDST, 0x00000008, 1, 1),
|
||||
_Q1A(ST_LOCAL, 0x0001, B6, LDST, 0x0000000c, 1, 1),
|
||||
_Q1A(GLD_REQUEST, 0x0001, B6, LDST, 0x00000010, 1, 1),
|
||||
_Q1A(GST_REQUEST, 0x0001, B6, LDST, 0x00000014, 1, 1),
|
||||
_Q1B(L1_LOCAL_LOAD_HIT, 0x0001, B6, L1, 0x00000000, 1, 1),
|
||||
_Q1B(L1_LOCAL_LOAD_MISS, 0x0001, B6, L1, 0x00000004, 1, 1),
|
||||
_Q1B(L1_LOCAL_STORE_HIT, 0x0001, B6, L1, 0x00000008, 1, 1),
|
||||
_Q1B(L1_LOCAL_STORE_MISS, 0x0001, B6, L1, 0x0000000c, 1, 1),
|
||||
_Q1B(L1_GLOBAL_LOAD_HIT, 0x0001, B6, L1, 0x00000010, 1, 1),
|
||||
_Q1B(L1_GLOBAL_LOAD_MISS, 0x0001, B6, L1, 0x00000014, 1, 1),
|
||||
_Q1B(GLD_TRANSACTIONS_UNCACHED, 0x0001, B6, MEM, 0x00000000, 1, 1),
|
||||
_Q1B(GST_TRANSACTIONS, 0x0001, B6, MEM, 0x00000004, 1, 1),
|
||||
_Q1A(BRANCH, 0x0001, B6, BRANCH, 0x0000000c, 1, 1),
|
||||
_Q1A(BRANCH_DIVERGENT, 0x0001, B6, BRANCH, 0x00000010, 1, 1),
|
||||
_Q1B(ACTIVE_WARPS, 0x003f, B6, WARP, 0x31483104, 2, 1),
|
||||
_Q1B(ACTIVE_CYCLES, 0x0001, B6, WARP, 0x00000000, 1, 1),
|
||||
_Q1A(ATOM_COUNT, 0x0001, B6, BRANCH, 0x00000000, 1, 1),
|
||||
_Q1A(GRED_COUNT, 0x0001, B6, BRANCH, 0x00000008, 1, 1),
|
||||
_Q1B(LD_SHARED_REPLAY, 0x0001, B6, REPLAY, 0x00000008, 1, 1),
|
||||
_Q1B(ST_SHARED_REPLAY, 0x0001, B6, REPLAY, 0x0000000c, 1, 1),
|
||||
_Q1B(LD_LOCAL_TRANSACTIONS, 0x0001, B6, TRANSACTION, 0x00000000, 1, 1),
|
||||
_Q1B(ST_LOCAL_TRANSACTIONS, 0x0001, B6, TRANSACTION, 0x00000004, 1, 1),
|
||||
_Q1B(L1_LD_SHARED_TRANSACTIONS, 0x0001, B6, TRANSACTION, 0x00000008, 1, 1),
|
||||
_Q1B(L1_ST_SHARED_TRANSACTIONS, 0x0001, B6, TRANSACTION, 0x0000000c, 1, 1),
|
||||
_Q1B(GLD_MEM_DIV_REPLAY, 0x0001, B6, REPLAY, 0x00000010, 1, 1),
|
||||
_Q1B(GST_MEM_DIV_REPLAY, 0x0001, B6, REPLAY, 0x00000014, 1, 1),
|
||||
_Q1A(SHARED_LD, 0x0001, B6, LDST, 0x00000000, 1, 1),
|
||||
_Q1B(SHARED_LD_REPLAY, 0x0001, B6, REPLAY, 0x00000008, 1, 1),
|
||||
_Q1A(SHARED_ST, 0x0001, B6, LDST, 0x00000004, 1, 1),
|
||||
_Q1B(SHARED_ST_REPLAY, 0x0001, B6, REPLAY, 0x0000000c, 1, 1),
|
||||
_Q1B(SM_CTA_LAUNCHED, 0x0001, B6, WARP, 0x0000001c, 1, 1),
|
||||
_Q1A(THREADS_LAUNCHED, 0x003f, B6, LAUNCH, 0x398a4188, 1, 1),
|
||||
_Q1B(UNCACHED_GLD_TRANSACTIONS, 0x0001, B6, MEM, 0x00000000, 1, 1),
|
||||
_Q1A(WARPS_LAUNCHED, 0x0001, B6, LAUNCH, 0x00000004, 1, 1),
|
||||
_M2AB(IPC, 0x3, B6, EXEC, 0x398, 0xffff, LOGOP, WARP, 0x0, DIV_SUM_M0, 10, 1),
|
||||
_M2AB(IPAC, 0x3, B6, EXEC, 0x398, 0x1, B6, WARP, 0x0, AVG_DIV_MM, 10, 1),
|
||||
_M2A(IPEC, 0x3, B6, EXEC, 0x398, 0xe, LOGOP, EXEC, 0x398, AVG_DIV_MM, 10, 1),
|
||||
|
|
@ -940,7 +940,7 @@ static const struct nvc0_mp_pm_query_cfg nve4_mp_pm_queries[] =
|
|||
#undef _M2B
|
||||
|
||||
/* === PERFORMANCE MONITORING COUNTERS for NVC0:NVE4 === */
|
||||
static const uint64_t nvc0_read_mp_pm_counters_code[] =
|
||||
static const uint64_t nvc0_read_hw_sm_counters_code[] =
|
||||
{
|
||||
/* mov b32 $r8 $tidx
|
||||
* mov b32 $r9 $physid
|
||||
|
|
@ -993,29 +993,21 @@ static const uint64_t nvc0_read_mp_pm_counters_code[] =
|
|||
static const char *nvc0_pm_query_names[] =
|
||||
{
|
||||
/* MP counters */
|
||||
"inst_executed",
|
||||
"active_cycles",
|
||||
"active_warps",
|
||||
"atom_count",
|
||||
"branch",
|
||||
"divergent_branch",
|
||||
"active_warps",
|
||||
"active_cycles",
|
||||
"warps_launched",
|
||||
"threads_launched",
|
||||
"shared_load",
|
||||
"shared_store",
|
||||
"local_load",
|
||||
"local_store",
|
||||
"gred_count",
|
||||
"atom_count",
|
||||
"gld_request",
|
||||
"gred_count",
|
||||
"gst_request",
|
||||
"inst_executed",
|
||||
"inst_issued1_0",
|
||||
"inst_issued1_1",
|
||||
"inst_issued2_0",
|
||||
"inst_issued2_1",
|
||||
"thread_inst_executed_0",
|
||||
"thread_inst_executed_1",
|
||||
"thread_inst_executed_2",
|
||||
"thread_inst_executed_3",
|
||||
"local_load",
|
||||
"local_store",
|
||||
"prof_trigger_00",
|
||||
"prof_trigger_01",
|
||||
"prof_trigger_02",
|
||||
|
|
@ -1024,35 +1016,35 @@ static const char *nvc0_pm_query_names[] =
|
|||
"prof_trigger_05",
|
||||
"prof_trigger_06",
|
||||
"prof_trigger_07",
|
||||
"shared_load",
|
||||
"shared_store",
|
||||
"threads_launched",
|
||||
"thread_inst_executed_0",
|
||||
"thread_inst_executed_1",
|
||||
"thread_inst_executed_2",
|
||||
"thread_inst_executed_3",
|
||||
"warps_launched",
|
||||
};
|
||||
|
||||
#define _Q(n, f, m, g, c, s0, s1, s2, s3, s4, s5) [NVC0_PM_QUERY_##n] = { { { f, NVC0_COMPUTE_MP_PM_OP_MODE_##m, c, 0, g, s0|(s1 << 8)|(s2 << 16)|(s3 << 24)|(s4##ULL << 32)|(s5##ULL << 40) }, {}, {}, {} }, 1, NVC0_COUNTER_OPn_SUM, { 1, 1 } }
|
||||
#define _Q(n, f, m, g, c, s0, s1, s2, s3, s4, s5) [NVC0_HW_SM_QUERY_##n] = { { { f, NVC0_COMPUTE_MP_PM_OP_MODE_##m, c, 0, g, s0|(s1 << 8)|(s2 << 16)|(s3 << 24)|(s4##ULL << 32)|(s5##ULL << 40) }, {}, {}, {} }, 1, NVC0_COUNTER_OPn_SUM, { 1, 1 } }
|
||||
|
||||
static const struct nvc0_mp_pm_query_cfg nvc0_mp_pm_queries[] =
|
||||
static const struct nvc0_hw_sm_query_cfg nvc0_hw_sm_queries[] =
|
||||
{
|
||||
_Q(INST_EXECUTED, 0xaaaa, LOGOP, 0x2d, 3, 0x00, 0x11, 0x22, 0x00, 0x00, 0x00),
|
||||
_Q(BRANCH, 0xaaaa, LOGOP, 0x1a, 2, 0x00, 0x11, 0x00, 0x00, 0x00, 0x00),
|
||||
_Q(BRANCH_DIVERGENT, 0xaaaa, LOGOP, 0x19, 2, 0x20, 0x31, 0x00, 0x00, 0x00, 0x00),
|
||||
_Q(ACTIVE_WARPS, 0xaaaa, LOGOP, 0x24, 6, 0x10, 0x21, 0x32, 0x43, 0x54, 0x65),
|
||||
_Q(ACTIVE_CYCLES, 0xaaaa, LOGOP, 0x11, 1, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00),
|
||||
_Q(LAUNCHED_WARPS, 0xaaaa, LOGOP, 0x26, 1, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00),
|
||||
_Q(LAUNCHED_THREADS, 0xaaaa, LOGOP, 0x26, 6, 0x10, 0x21, 0x32, 0x43, 0x54, 0x65),
|
||||
_Q(LD_SHARED, 0xaaaa, LOGOP, 0x64, 1, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00),
|
||||
_Q(ST_SHARED, 0xaaaa, LOGOP, 0x64, 1, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00),
|
||||
_Q(LD_LOCAL, 0xaaaa, LOGOP, 0x64, 1, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00),
|
||||
_Q(ST_LOCAL, 0xaaaa, LOGOP, 0x64, 1, 0x50, 0x00, 0x00, 0x00, 0x00, 0x00),
|
||||
_Q(GRED_COUNT, 0xaaaa, LOGOP, 0x63, 1, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00),
|
||||
_Q(ACTIVE_WARPS, 0xaaaa, LOGOP, 0x24, 6, 0x10, 0x21, 0x32, 0x43, 0x54, 0x65),
|
||||
_Q(ATOM_COUNT, 0xaaaa, LOGOP, 0x63, 1, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00),
|
||||
_Q(BRANCH, 0xaaaa, LOGOP, 0x1a, 2, 0x00, 0x11, 0x00, 0x00, 0x00, 0x00),
|
||||
_Q(DIVERGENT_BRANCH, 0xaaaa, LOGOP, 0x19, 2, 0x20, 0x31, 0x00, 0x00, 0x00, 0x00),
|
||||
_Q(GLD_REQUEST, 0xaaaa, LOGOP, 0x64, 1, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00),
|
||||
_Q(GRED_COUNT, 0xaaaa, LOGOP, 0x63, 1, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00),
|
||||
_Q(GST_REQUEST, 0xaaaa, LOGOP, 0x64, 1, 0x60, 0x00, 0x00, 0x00, 0x00, 0x00),
|
||||
_Q(INST_EXECUTED, 0xaaaa, LOGOP, 0x2d, 3, 0x00, 0x11, 0x22, 0x00, 0x00, 0x00),
|
||||
_Q(INST_ISSUED1_0, 0xaaaa, LOGOP, 0x7e, 1, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00),
|
||||
_Q(INST_ISSUED1_1, 0xaaaa, LOGOP, 0x7e, 1, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00),
|
||||
_Q(INST_ISSUED2_0, 0xaaaa, LOGOP, 0x7e, 1, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00),
|
||||
_Q(INST_ISSUED2_1, 0xaaaa, LOGOP, 0x7e, 1, 0x50, 0x00, 0x00, 0x00, 0x00, 0x00),
|
||||
_Q(TH_INST_EXECUTED_0, 0xaaaa, LOGOP, 0xa3, 6, 0x00, 0x11, 0x22, 0x33, 0x44, 0x55),
|
||||
_Q(TH_INST_EXECUTED_1, 0xaaaa, LOGOP, 0xa5, 6, 0x00, 0x11, 0x22, 0x33, 0x44, 0x55),
|
||||
_Q(TH_INST_EXECUTED_2, 0xaaaa, LOGOP, 0xa4, 6, 0x00, 0x11, 0x22, 0x33, 0x44, 0x55),
|
||||
_Q(TH_INST_EXECUTED_3, 0xaaaa, LOGOP, 0xa6, 6, 0x00, 0x11, 0x22, 0x33, 0x44, 0x55),
|
||||
_Q(LOCAL_LD, 0xaaaa, LOGOP, 0x64, 1, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00),
|
||||
_Q(LOCAL_ST, 0xaaaa, LOGOP, 0x64, 1, 0x50, 0x00, 0x00, 0x00, 0x00, 0x00),
|
||||
_Q(PROF_TRIGGER_0, 0xaaaa, LOGOP, 0x01, 1, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00),
|
||||
_Q(PROF_TRIGGER_1, 0xaaaa, LOGOP, 0x01, 1, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00),
|
||||
_Q(PROF_TRIGGER_2, 0xaaaa, LOGOP, 0x01, 1, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00),
|
||||
|
|
@ -1061,38 +1053,46 @@ static const struct nvc0_mp_pm_query_cfg nvc0_mp_pm_queries[] =
|
|||
_Q(PROF_TRIGGER_5, 0xaaaa, LOGOP, 0x01, 1, 0x50, 0x00, 0x00, 0x00, 0x00, 0x00),
|
||||
_Q(PROF_TRIGGER_6, 0xaaaa, LOGOP, 0x01, 1, 0x60, 0x00, 0x00, 0x00, 0x00, 0x00),
|
||||
_Q(PROF_TRIGGER_7, 0xaaaa, LOGOP, 0x01, 1, 0x70, 0x00, 0x00, 0x00, 0x00, 0x00),
|
||||
_Q(SHARED_LD, 0xaaaa, LOGOP, 0x64, 1, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00),
|
||||
_Q(SHARED_ST, 0xaaaa, LOGOP, 0x64, 1, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00),
|
||||
_Q(THREADS_LAUNCHED, 0xaaaa, LOGOP, 0x26, 6, 0x10, 0x21, 0x32, 0x43, 0x54, 0x65),
|
||||
_Q(TH_INST_EXECUTED_0, 0xaaaa, LOGOP, 0xa3, 6, 0x00, 0x11, 0x22, 0x33, 0x44, 0x55),
|
||||
_Q(TH_INST_EXECUTED_1, 0xaaaa, LOGOP, 0xa5, 6, 0x00, 0x11, 0x22, 0x33, 0x44, 0x55),
|
||||
_Q(TH_INST_EXECUTED_2, 0xaaaa, LOGOP, 0xa4, 6, 0x00, 0x11, 0x22, 0x33, 0x44, 0x55),
|
||||
_Q(TH_INST_EXECUTED_3, 0xaaaa, LOGOP, 0xa6, 6, 0x00, 0x11, 0x22, 0x33, 0x44, 0x55),
|
||||
_Q(WARPS_LAUNCHED, 0xaaaa, LOGOP, 0x26, 1, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00),
|
||||
};
|
||||
|
||||
#undef _Q
|
||||
|
||||
static const struct nvc0_mp_pm_query_cfg *
|
||||
nvc0_mp_pm_query_get_cfg(struct nvc0_context *nvc0, struct nvc0_query *q)
|
||||
static const struct nvc0_hw_sm_query_cfg *
|
||||
nvc0_hw_sm_query_get_cfg(struct nvc0_context *nvc0, struct nvc0_query *q)
|
||||
{
|
||||
struct nvc0_screen *screen = nvc0->screen;
|
||||
|
||||
if (screen->base.class_3d >= NVE4_3D_CLASS)
|
||||
return &nve4_mp_pm_queries[q->type - PIPE_QUERY_DRIVER_SPECIFIC];
|
||||
return &nvc0_mp_pm_queries[q->type - NVC0_PM_QUERY(0)];
|
||||
return &nve4_hw_sm_queries[q->type - PIPE_QUERY_DRIVER_SPECIFIC];
|
||||
return &nvc0_hw_sm_queries[q->type - NVC0_HW_SM_QUERY(0)];
|
||||
}
|
||||
|
||||
boolean
|
||||
nvc0_mp_pm_query_begin(struct nvc0_context *nvc0, struct nvc0_query *q)
|
||||
nvc0_hw_sm_query_begin(struct nvc0_context *nvc0, struct nvc0_query *q)
|
||||
{
|
||||
struct nvc0_screen *screen = nvc0->screen;
|
||||
struct nouveau_pushbuf *push = nvc0->base.pushbuf;
|
||||
const bool is_nve4 = screen->base.class_3d >= NVE4_3D_CLASS;
|
||||
const struct nvc0_mp_pm_query_cfg *cfg;
|
||||
const struct nvc0_hw_sm_query_cfg *cfg;
|
||||
unsigned i, c;
|
||||
unsigned num_ab[2] = { 0, 0 };
|
||||
|
||||
cfg = nvc0_mp_pm_query_get_cfg(nvc0, q);
|
||||
cfg = nvc0_hw_sm_query_get_cfg(nvc0, q);
|
||||
|
||||
/* check if we have enough free counter slots */
|
||||
for (i = 0; i < cfg->num_counters; ++i)
|
||||
num_ab[cfg->ctr[i].sig_dom]++;
|
||||
|
||||
if (screen->pm.num_mp_pm_active[0] + num_ab[0] > 4 ||
|
||||
screen->pm.num_mp_pm_active[1] + num_ab[1] > 4) {
|
||||
if (screen->pm.num_hw_sm_active[0] + num_ab[0] > 4 ||
|
||||
screen->pm.num_hw_sm_active[1] + num_ab[1] > 4) {
|
||||
NOUVEAU_ERR("Not enough free MP counter slots !\n");
|
||||
return false;
|
||||
}
|
||||
|
|
@ -1113,14 +1113,14 @@ nvc0_mp_pm_query_begin(struct nvc0_context *nvc0, struct nvc0_query *q)
|
|||
for (i = 0; i < cfg->num_counters; ++i) {
|
||||
const unsigned d = cfg->ctr[i].sig_dom;
|
||||
|
||||
if (!screen->pm.num_mp_pm_active[d]) {
|
||||
if (!screen->pm.num_hw_sm_active[d]) {
|
||||
uint32_t m = (1 << 22) | (1 << (7 + (8 * !d)));
|
||||
if (screen->pm.num_mp_pm_active[!d])
|
||||
if (screen->pm.num_hw_sm_active[!d])
|
||||
m |= 1 << (7 + (8 * d));
|
||||
BEGIN_NVC0(push, SUBC_SW(0x0600), 1);
|
||||
PUSH_DATA (push, m);
|
||||
}
|
||||
screen->pm.num_mp_pm_active[d]++;
|
||||
screen->pm.num_hw_sm_active[d]++;
|
||||
|
||||
for (c = d * 4; c < (d * 4 + 4); ++c) {
|
||||
if (!screen->pm.mp_counter[c]) {
|
||||
|
|
@ -1163,7 +1163,7 @@ nvc0_mp_pm_query_begin(struct nvc0_context *nvc0, struct nvc0_query *q)
|
|||
}
|
||||
|
||||
static void
|
||||
nvc0_mp_pm_query_end(struct nvc0_context *nvc0, struct nvc0_query *q)
|
||||
nvc0_hw_sm_query_end(struct nvc0_context *nvc0, struct nvc0_query *q)
|
||||
{
|
||||
struct nvc0_screen *screen = nvc0->screen;
|
||||
struct pipe_context *pipe = &nvc0->base.pipe;
|
||||
|
|
@ -1174,9 +1174,9 @@ nvc0_mp_pm_query_end(struct nvc0_context *nvc0, struct nvc0_query *q)
|
|||
const uint block[3] = { 32, is_nve4 ? 4 : 1, 1 };
|
||||
const uint grid[3] = { screen->mp_count, 1, 1 };
|
||||
unsigned c;
|
||||
const struct nvc0_mp_pm_query_cfg *cfg;
|
||||
const struct nvc0_hw_sm_query_cfg *cfg;
|
||||
|
||||
cfg = nvc0_mp_pm_query_get_cfg(nvc0, q);
|
||||
cfg = nvc0_hw_sm_query_get_cfg(nvc0, q);
|
||||
|
||||
if (unlikely(!screen->pm.prog)) {
|
||||
struct nvc0_program *prog = CALLOC_STRUCT(nvc0_program);
|
||||
|
|
@ -1185,11 +1185,11 @@ nvc0_mp_pm_query_end(struct nvc0_context *nvc0, struct nvc0_query *q)
|
|||
prog->num_gprs = 14;
|
||||
prog->parm_size = 12;
|
||||
if (is_nve4) {
|
||||
prog->code = (uint32_t *)nve4_read_mp_pm_counters_code;
|
||||
prog->code_size = sizeof(nve4_read_mp_pm_counters_code);
|
||||
prog->code = (uint32_t *)nve4_read_hw_sm_counters_code;
|
||||
prog->code_size = sizeof(nve4_read_hw_sm_counters_code);
|
||||
} else {
|
||||
prog->code = (uint32_t *)nvc0_read_mp_pm_counters_code;
|
||||
prog->code_size = sizeof(nvc0_read_mp_pm_counters_code);
|
||||
prog->code = (uint32_t *)nvc0_read_hw_sm_counters_code;
|
||||
prog->code_size = sizeof(nvc0_read_hw_sm_counters_code);
|
||||
}
|
||||
screen->pm.prog = prog;
|
||||
}
|
||||
|
|
@ -1207,7 +1207,7 @@ nvc0_mp_pm_query_end(struct nvc0_context *nvc0, struct nvc0_query *q)
|
|||
/* release counters for this query */
|
||||
for (c = 0; c < 8; ++c) {
|
||||
if (nvc0_query(screen->pm.mp_counter[c]) == q) {
|
||||
screen->pm.num_mp_pm_active[c / 4]--;
|
||||
screen->pm.num_hw_sm_active[c / 4]--;
|
||||
screen->pm.mp_counter[c] = NULL;
|
||||
}
|
||||
}
|
||||
|
|
@ -1234,7 +1234,7 @@ nvc0_mp_pm_query_end(struct nvc0_context *nvc0, struct nvc0_query *q)
|
|||
q = nvc0_query(screen->pm.mp_counter[c]);
|
||||
if (!q)
|
||||
continue;
|
||||
cfg = nvc0_mp_pm_query_get_cfg(nvc0, q);
|
||||
cfg = nvc0_hw_sm_query_get_cfg(nvc0, q);
|
||||
for (i = 0; i < cfg->num_counters; ++i) {
|
||||
if (mask & (1 << q->ctr[i]))
|
||||
break;
|
||||
|
|
@ -1250,10 +1250,10 @@ nvc0_mp_pm_query_end(struct nvc0_context *nvc0, struct nvc0_query *q)
|
|||
}
|
||||
|
||||
static inline bool
|
||||
nvc0_mp_pm_query_read_data(uint32_t count[32][4],
|
||||
nvc0_hw_sm_query_read_data(uint32_t count[32][4],
|
||||
struct nvc0_context *nvc0, bool wait,
|
||||
struct nvc0_query *q,
|
||||
const struct nvc0_mp_pm_query_cfg *cfg,
|
||||
const struct nvc0_hw_sm_query_cfg *cfg,
|
||||
unsigned mp_count)
|
||||
{
|
||||
unsigned p, c;
|
||||
|
|
@ -1275,10 +1275,10 @@ nvc0_mp_pm_query_read_data(uint32_t count[32][4],
|
|||
}
|
||||
|
||||
static inline bool
|
||||
nve4_mp_pm_query_read_data(uint32_t count[32][4],
|
||||
nve4_hw_sm_query_read_data(uint32_t count[32][4],
|
||||
struct nvc0_context *nvc0, bool wait,
|
||||
struct nvc0_query *q,
|
||||
const struct nvc0_mp_pm_query_cfg *cfg,
|
||||
const struct nvc0_hw_sm_query_cfg *cfg,
|
||||
unsigned mp_count)
|
||||
{
|
||||
unsigned p, c, d;
|
||||
|
|
@ -1317,22 +1317,22 @@ nve4_mp_pm_query_read_data(uint32_t count[32][4],
|
|||
* NOTE: Interpretation of IPC requires knowledge of MP count.
|
||||
*/
|
||||
static boolean
|
||||
nvc0_mp_pm_query_result(struct nvc0_context *nvc0, struct nvc0_query *q,
|
||||
nvc0_hw_sm_query_result(struct nvc0_context *nvc0, struct nvc0_query *q,
|
||||
void *result, boolean wait)
|
||||
{
|
||||
uint32_t count[32][4];
|
||||
uint64_t value = 0;
|
||||
unsigned mp_count = MIN2(nvc0->screen->mp_count_compute, 32);
|
||||
unsigned p, c;
|
||||
const struct nvc0_mp_pm_query_cfg *cfg;
|
||||
const struct nvc0_hw_sm_query_cfg *cfg;
|
||||
bool ret;
|
||||
|
||||
cfg = nvc0_mp_pm_query_get_cfg(nvc0, q);
|
||||
cfg = nvc0_hw_sm_query_get_cfg(nvc0, q);
|
||||
|
||||
if (nvc0->screen->base.class_3d >= NVE4_3D_CLASS)
|
||||
ret = nve4_mp_pm_query_read_data(count, nvc0, wait, q, cfg, mp_count);
|
||||
ret = nve4_hw_sm_query_read_data(count, nvc0, wait, q, cfg, mp_count);
|
||||
else
|
||||
ret = nvc0_mp_pm_query_read_data(count, nvc0, wait, q, cfg, mp_count);
|
||||
ret = nvc0_hw_sm_query_read_data(count, nvc0, wait, q, cfg, mp_count);
|
||||
if (!ret)
|
||||
return false;
|
||||
|
||||
|
|
@ -1410,11 +1410,11 @@ nvc0_screen_get_driver_query_info(struct pipe_screen *pscreen,
|
|||
if (screen->base.device->drm_version >= 0x01000101) {
|
||||
if (screen->compute) {
|
||||
if (screen->base.class_3d == NVE4_3D_CLASS) {
|
||||
count += NVE4_PM_QUERY_COUNT;
|
||||
count += NVE4_HW_SM_QUERY_COUNT;
|
||||
} else
|
||||
if (screen->base.class_3d < NVE4_3D_CLASS) {
|
||||
/* NVC0_COMPUTE is not always enabled */
|
||||
count += NVC0_PM_QUERY_COUNT;
|
||||
count += NVC0_HW_SM_QUERY_COUNT;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -1444,15 +1444,15 @@ nvc0_screen_get_driver_query_info(struct pipe_screen *pscreen,
|
|||
if (screen->compute) {
|
||||
if (screen->base.class_3d == NVE4_3D_CLASS) {
|
||||
info->name = nve4_pm_query_names[id - NVC0_QUERY_DRV_STAT_COUNT];
|
||||
info->query_type = NVE4_PM_QUERY(id - NVC0_QUERY_DRV_STAT_COUNT);
|
||||
info->query_type = NVE4_HW_SM_QUERY(id - NVC0_QUERY_DRV_STAT_COUNT);
|
||||
info->max_value.u64 =
|
||||
(id < NVE4_PM_QUERY_METRIC_MP_OCCUPANCY) ? 0 : 100;
|
||||
(id < NVE4_HW_SM_QUERY_METRIC_MP_OCCUPANCY) ? 0 : 100;
|
||||
info->group_id = NVC0_QUERY_MP_COUNTER_GROUP;
|
||||
return 1;
|
||||
} else
|
||||
if (screen->base.class_3d < NVE4_3D_CLASS) {
|
||||
info->name = nvc0_pm_query_names[id - NVC0_QUERY_DRV_STAT_COUNT];
|
||||
info->query_type = NVC0_PM_QUERY(id - NVC0_QUERY_DRV_STAT_COUNT);
|
||||
info->query_type = NVC0_HW_SM_QUERY(id - NVC0_QUERY_DRV_STAT_COUNT);
|
||||
info->group_id = NVC0_QUERY_MP_COUNTER_GROUP;
|
||||
return 1;
|
||||
}
|
||||
|
|
@ -1494,7 +1494,7 @@ nvc0_screen_get_driver_query_group_info(struct pipe_screen *pscreen,
|
|||
info->type = PIPE_DRIVER_QUERY_GROUP_TYPE_GPU;
|
||||
|
||||
if (screen->base.class_3d == NVE4_3D_CLASS) {
|
||||
info->num_queries = NVE4_PM_QUERY_COUNT;
|
||||
info->num_queries = NVE4_HW_SM_QUERY_COUNT;
|
||||
|
||||
/* On NVE4+, each multiprocessor have 8 hardware counters separated
|
||||
* in two distinct domains, but we allow only one active query
|
||||
|
|
@ -1504,7 +1504,7 @@ nvc0_screen_get_driver_query_group_info(struct pipe_screen *pscreen,
|
|||
return 1;
|
||||
} else
|
||||
if (screen->base.class_3d < NVE4_3D_CLASS) {
|
||||
info->num_queries = NVC0_PM_QUERY_COUNT;
|
||||
info->num_queries = NVC0_HW_SM_QUERY_COUNT;
|
||||
|
||||
/* On NVC0:NVE4, each multiprocessor have 8 hardware counters
|
||||
* in a single domain. */
|
||||
|
|
|
|||
|
|
@ -95,7 +95,7 @@ struct nvc0_screen {
|
|||
struct {
|
||||
struct nvc0_program *prog; /* compute state object to read MP counters */
|
||||
struct pipe_query *mp_counter[8]; /* counter to query allocation */
|
||||
uint8_t num_mp_pm_active[2];
|
||||
uint8_t num_hw_sm_active[2];
|
||||
bool mp_counters_enabled;
|
||||
} pm;
|
||||
|
||||
|
|
@ -120,156 +120,139 @@ nvc0_screen(struct pipe_screen *screen)
|
|||
|
||||
/* Performance counter queries:
|
||||
*/
|
||||
#define NVE4_PM_QUERY_COUNT 49
|
||||
#define NVE4_PM_QUERY(i) (PIPE_QUERY_DRIVER_SPECIFIC + (i))
|
||||
#define NVE4_PM_QUERY_LAST NVE4_PM_QUERY(NVE4_PM_QUERY_COUNT - 1)
|
||||
#define NVE4_PM_QUERY_PROF_TRIGGER_0 0
|
||||
#define NVE4_PM_QUERY_PROF_TRIGGER_1 1
|
||||
#define NVE4_PM_QUERY_PROF_TRIGGER_2 2
|
||||
#define NVE4_PM_QUERY_PROF_TRIGGER_3 3
|
||||
#define NVE4_PM_QUERY_PROF_TRIGGER_4 4
|
||||
#define NVE4_PM_QUERY_PROF_TRIGGER_5 5
|
||||
#define NVE4_PM_QUERY_PROF_TRIGGER_6 6
|
||||
#define NVE4_PM_QUERY_PROF_TRIGGER_7 7
|
||||
#define NVE4_PM_QUERY_LAUNCHED_WARPS 8
|
||||
#define NVE4_PM_QUERY_LAUNCHED_THREADS 9
|
||||
#define NVE4_PM_QUERY_LAUNCHED_CTA 10
|
||||
#define NVE4_PM_QUERY_INST_ISSUED1 11
|
||||
#define NVE4_PM_QUERY_INST_ISSUED2 12
|
||||
#define NVE4_PM_QUERY_INST_EXECUTED 13
|
||||
#define NVE4_PM_QUERY_LD_LOCAL 14
|
||||
#define NVE4_PM_QUERY_ST_LOCAL 15
|
||||
#define NVE4_PM_QUERY_LD_SHARED 16
|
||||
#define NVE4_PM_QUERY_ST_SHARED 17
|
||||
#define NVE4_PM_QUERY_L1_LOCAL_LOAD_HIT 18
|
||||
#define NVE4_PM_QUERY_L1_LOCAL_LOAD_MISS 19
|
||||
#define NVE4_PM_QUERY_L1_LOCAL_STORE_HIT 20
|
||||
#define NVE4_PM_QUERY_L1_LOCAL_STORE_MISS 21
|
||||
#define NVE4_PM_QUERY_GLD_REQUEST 22
|
||||
#define NVE4_PM_QUERY_GST_REQUEST 23
|
||||
#define NVE4_PM_QUERY_L1_GLOBAL_LOAD_HIT 24
|
||||
#define NVE4_PM_QUERY_L1_GLOBAL_LOAD_MISS 25
|
||||
#define NVE4_PM_QUERY_GLD_TRANSACTIONS_UNCACHED 26
|
||||
#define NVE4_PM_QUERY_GST_TRANSACTIONS 27
|
||||
#define NVE4_PM_QUERY_BRANCH 28
|
||||
#define NVE4_PM_QUERY_BRANCH_DIVERGENT 29
|
||||
#define NVE4_PM_QUERY_ACTIVE_WARPS 30
|
||||
#define NVE4_PM_QUERY_ACTIVE_CYCLES 31
|
||||
#define NVE4_PM_QUERY_INST_ISSUED 32
|
||||
#define NVE4_PM_QUERY_ATOM_COUNT 33
|
||||
#define NVE4_PM_QUERY_GRED_COUNT 34
|
||||
#define NVE4_PM_QUERY_LD_SHARED_REPLAY 35
|
||||
#define NVE4_PM_QUERY_ST_SHARED_REPLAY 36
|
||||
#define NVE4_PM_QUERY_LD_LOCAL_TRANSACTIONS 37
|
||||
#define NVE4_PM_QUERY_ST_LOCAL_TRANSACTIONS 38
|
||||
#define NVE4_PM_QUERY_L1_LD_SHARED_TRANSACTIONS 39
|
||||
#define NVE4_PM_QUERY_L1_ST_SHARED_TRANSACTIONS 40
|
||||
#define NVE4_PM_QUERY_GLD_MEM_DIV_REPLAY 41
|
||||
#define NVE4_PM_QUERY_GST_MEM_DIV_REPLAY 42
|
||||
#define NVE4_PM_QUERY_METRIC_IPC 43
|
||||
#define NVE4_PM_QUERY_METRIC_IPAC 44
|
||||
#define NVE4_PM_QUERY_METRIC_IPEC 45
|
||||
#define NVE4_PM_QUERY_METRIC_MP_OCCUPANCY 46
|
||||
#define NVE4_PM_QUERY_METRIC_MP_EFFICIENCY 47
|
||||
#define NVE4_PM_QUERY_METRIC_INST_REPLAY_OHEAD 48
|
||||
#define NVE4_HW_SM_QUERY(i) (PIPE_QUERY_DRIVER_SPECIFIC + (i))
|
||||
#define NVE4_HW_SM_QUERY_LAST NVE4_HW_SM_QUERY(NVE4_HW_SM_QUERY_COUNT - 1)
|
||||
enum nve4_pm_queries
|
||||
{
|
||||
NVE4_HW_SM_QUERY_ACTIVE_CYCLES = 0,
|
||||
NVE4_HW_SM_QUERY_ACTIVE_WARPS,
|
||||
NVE4_HW_SM_QUERY_ATOM_COUNT,
|
||||
NVE4_HW_SM_QUERY_BRANCH,
|
||||
NVE4_HW_SM_QUERY_DIVERGENT_BRANCH,
|
||||
NVE4_HW_SM_QUERY_GLD_REQUEST,
|
||||
NVE4_HW_SM_QUERY_GLD_MEM_DIV_REPLAY,
|
||||
NVE4_HW_SM_QUERY_GST_TRANSACTIONS,
|
||||
NVE4_HW_SM_QUERY_GST_MEM_DIV_REPLAY,
|
||||
NVE4_HW_SM_QUERY_GRED_COUNT,
|
||||
NVE4_HW_SM_QUERY_GST_REQUEST,
|
||||
NVE4_HW_SM_QUERY_INST_EXECUTED,
|
||||
NVE4_HW_SM_QUERY_INST_ISSUED,
|
||||
NVE4_HW_SM_QUERY_INST_ISSUED1,
|
||||
NVE4_HW_SM_QUERY_INST_ISSUED2,
|
||||
NVE4_HW_SM_QUERY_L1_GLD_HIT,
|
||||
NVE4_HW_SM_QUERY_L1_GLD_MISS,
|
||||
NVE4_HW_SM_QUERY_L1_LOCAL_LD_HIT,
|
||||
NVE4_HW_SM_QUERY_L1_LOCAL_LD_MISS,
|
||||
NVE4_HW_SM_QUERY_L1_LOCAL_ST_HIT,
|
||||
NVE4_HW_SM_QUERY_L1_LOCAL_ST_MISS,
|
||||
NVE4_HW_SM_QUERY_L1_SHARED_LD_TRANSACTIONS,
|
||||
NVE4_HW_SM_QUERY_L1_SHARED_ST_TRANSACTIONS,
|
||||
NVE4_HW_SM_QUERY_LOCAL_LD,
|
||||
NVE4_HW_SM_QUERY_LOCAL_LD_TRANSACTIONS,
|
||||
NVE4_HW_SM_QUERY_LOCAL_ST,
|
||||
NVE4_HW_SM_QUERY_LOCAL_ST_TRANSACTIONS,
|
||||
NVE4_HW_SM_QUERY_PROF_TRIGGER_0,
|
||||
NVE4_HW_SM_QUERY_PROF_TRIGGER_1,
|
||||
NVE4_HW_SM_QUERY_PROF_TRIGGER_2,
|
||||
NVE4_HW_SM_QUERY_PROF_TRIGGER_3,
|
||||
NVE4_HW_SM_QUERY_PROF_TRIGGER_4,
|
||||
NVE4_HW_SM_QUERY_PROF_TRIGGER_5,
|
||||
NVE4_HW_SM_QUERY_PROF_TRIGGER_6,
|
||||
NVE4_HW_SM_QUERY_PROF_TRIGGER_7,
|
||||
NVE4_HW_SM_QUERY_SHARED_LD,
|
||||
NVE4_HW_SM_QUERY_SHARED_LD_REPLAY,
|
||||
NVE4_HW_SM_QUERY_SHARED_ST,
|
||||
NVE4_HW_SM_QUERY_SHARED_ST_REPLAY,
|
||||
NVE4_HW_SM_QUERY_SM_CTA_LAUNCHED,
|
||||
NVE4_HW_SM_QUERY_THREADS_LAUNCHED,
|
||||
NVE4_HW_SM_QUERY_UNCACHED_GLD_TRANSACTIONS,
|
||||
NVE4_HW_SM_QUERY_WARPS_LAUNCHED,
|
||||
NVE4_HW_SM_QUERY_METRIC_IPC,
|
||||
NVE4_HW_SM_QUERY_METRIC_IPAC,
|
||||
NVE4_HW_SM_QUERY_METRIC_IPEC,
|
||||
NVE4_HW_SM_QUERY_METRIC_MP_OCCUPANCY,
|
||||
NVE4_HW_SM_QUERY_METRIC_MP_EFFICIENCY,
|
||||
NVE4_HW_SM_QUERY_METRIC_INST_REPLAY_OHEAD,
|
||||
NVE4_HW_SM_QUERY_COUNT
|
||||
};
|
||||
|
||||
/*
|
||||
#define NVE4_PM_QUERY_GR_IDLE 50
|
||||
#define NVE4_PM_QUERY_BSP_IDLE 51
|
||||
#define NVE4_PM_QUERY_VP_IDLE 52
|
||||
#define NVE4_PM_QUERY_PPP_IDLE 53
|
||||
#define NVE4_PM_QUERY_CE0_IDLE 54
|
||||
#define NVE4_PM_QUERY_CE1_IDLE 55
|
||||
#define NVE4_PM_QUERY_CE2_IDLE 56
|
||||
*/
|
||||
/* L2 queries (PCOUNTER) */
|
||||
/*
|
||||
#define NVE4_PM_QUERY_L2_SUBP_WRITE_L1_SECTOR_QUERIES 57
|
||||
...
|
||||
*/
|
||||
/* TEX queries (PCOUNTER) */
|
||||
/*
|
||||
#define NVE4_PM_QUERY_TEX0_CACHE_SECTOR_QUERIES 58
|
||||
...
|
||||
*/
|
||||
|
||||
#define NVC0_PM_QUERY_COUNT 31
|
||||
#define NVC0_PM_QUERY(i) (PIPE_QUERY_DRIVER_SPECIFIC + 2048 + (i))
|
||||
#define NVC0_PM_QUERY_LAST NVC0_PM_QUERY(NVC0_PM_QUERY_COUNT - 1)
|
||||
#define NVC0_PM_QUERY_INST_EXECUTED 0
|
||||
#define NVC0_PM_QUERY_BRANCH 1
|
||||
#define NVC0_PM_QUERY_BRANCH_DIVERGENT 2
|
||||
#define NVC0_PM_QUERY_ACTIVE_WARPS 3
|
||||
#define NVC0_PM_QUERY_ACTIVE_CYCLES 4
|
||||
#define NVC0_PM_QUERY_LAUNCHED_WARPS 5
|
||||
#define NVC0_PM_QUERY_LAUNCHED_THREADS 6
|
||||
#define NVC0_PM_QUERY_LD_SHARED 7
|
||||
#define NVC0_PM_QUERY_ST_SHARED 8
|
||||
#define NVC0_PM_QUERY_LD_LOCAL 9
|
||||
#define NVC0_PM_QUERY_ST_LOCAL 10
|
||||
#define NVC0_PM_QUERY_GRED_COUNT 11
|
||||
#define NVC0_PM_QUERY_ATOM_COUNT 12
|
||||
#define NVC0_PM_QUERY_GLD_REQUEST 13
|
||||
#define NVC0_PM_QUERY_GST_REQUEST 14
|
||||
#define NVC0_PM_QUERY_INST_ISSUED1_0 15
|
||||
#define NVC0_PM_QUERY_INST_ISSUED1_1 16
|
||||
#define NVC0_PM_QUERY_INST_ISSUED2_0 17
|
||||
#define NVC0_PM_QUERY_INST_ISSUED2_1 18
|
||||
#define NVC0_PM_QUERY_TH_INST_EXECUTED_0 19
|
||||
#define NVC0_PM_QUERY_TH_INST_EXECUTED_1 20
|
||||
#define NVC0_PM_QUERY_TH_INST_EXECUTED_2 21
|
||||
#define NVC0_PM_QUERY_TH_INST_EXECUTED_3 22
|
||||
#define NVC0_PM_QUERY_PROF_TRIGGER_0 23
|
||||
#define NVC0_PM_QUERY_PROF_TRIGGER_1 24
|
||||
#define NVC0_PM_QUERY_PROF_TRIGGER_2 25
|
||||
#define NVC0_PM_QUERY_PROF_TRIGGER_3 26
|
||||
#define NVC0_PM_QUERY_PROF_TRIGGER_4 27
|
||||
#define NVC0_PM_QUERY_PROF_TRIGGER_5 28
|
||||
#define NVC0_PM_QUERY_PROF_TRIGGER_6 29
|
||||
#define NVC0_PM_QUERY_PROF_TRIGGER_7 30
|
||||
#define NVC0_HW_SM_QUERY(i) (PIPE_QUERY_DRIVER_SPECIFIC + 2048 + (i))
|
||||
#define NVC0_HW_SM_QUERY_LAST NVC0_HW_SM_QUERY(NVC0_HW_SM_QUERY_COUNT - 1)
|
||||
enum nvc0_pm_queries
|
||||
{
|
||||
NVC0_HW_SM_QUERY_ACTIVE_CYCLES = 0,
|
||||
NVC0_HW_SM_QUERY_ACTIVE_WARPS,
|
||||
NVC0_HW_SM_QUERY_ATOM_COUNT,
|
||||
NVC0_HW_SM_QUERY_BRANCH,
|
||||
NVC0_HW_SM_QUERY_DIVERGENT_BRANCH,
|
||||
NVC0_HW_SM_QUERY_GLD_REQUEST,
|
||||
NVC0_HW_SM_QUERY_GRED_COUNT,
|
||||
NVC0_HW_SM_QUERY_GST_REQUEST,
|
||||
NVC0_HW_SM_QUERY_INST_EXECUTED,
|
||||
NVC0_HW_SM_QUERY_INST_ISSUED1_0,
|
||||
NVC0_HW_SM_QUERY_INST_ISSUED1_1,
|
||||
NVC0_HW_SM_QUERY_INST_ISSUED2_0,
|
||||
NVC0_HW_SM_QUERY_INST_ISSUED2_1,
|
||||
NVC0_HW_SM_QUERY_LOCAL_LD,
|
||||
NVC0_HW_SM_QUERY_LOCAL_ST,
|
||||
NVC0_HW_SM_QUERY_PROF_TRIGGER_0,
|
||||
NVC0_HW_SM_QUERY_PROF_TRIGGER_1,
|
||||
NVC0_HW_SM_QUERY_PROF_TRIGGER_2,
|
||||
NVC0_HW_SM_QUERY_PROF_TRIGGER_3,
|
||||
NVC0_HW_SM_QUERY_PROF_TRIGGER_4,
|
||||
NVC0_HW_SM_QUERY_PROF_TRIGGER_5,
|
||||
NVC0_HW_SM_QUERY_PROF_TRIGGER_6,
|
||||
NVC0_HW_SM_QUERY_PROF_TRIGGER_7,
|
||||
NVC0_HW_SM_QUERY_SHARED_LD,
|
||||
NVC0_HW_SM_QUERY_SHARED_ST,
|
||||
NVC0_HW_SM_QUERY_THREADS_LAUNCHED,
|
||||
NVC0_HW_SM_QUERY_TH_INST_EXECUTED_0,
|
||||
NVC0_HW_SM_QUERY_TH_INST_EXECUTED_1,
|
||||
NVC0_HW_SM_QUERY_TH_INST_EXECUTED_2,
|
||||
NVC0_HW_SM_QUERY_TH_INST_EXECUTED_3,
|
||||
NVC0_HW_SM_QUERY_WARPS_LAUNCHED,
|
||||
NVC0_HW_SM_QUERY_COUNT
|
||||
};
|
||||
|
||||
/* Driver statistics queries:
|
||||
*/
|
||||
#ifdef NOUVEAU_ENABLE_DRIVER_STATISTICS
|
||||
|
||||
#define NVC0_QUERY_DRV_STAT(i) (PIPE_QUERY_DRIVER_SPECIFIC + 1024 + (i))
|
||||
#define NVC0_QUERY_DRV_STAT_COUNT 29
|
||||
#define NVC0_QUERY_DRV_STAT_LAST NVC0_QUERY_DRV_STAT(NVC0_QUERY_DRV_STAT_COUNT - 1)
|
||||
#define NVC0_QUERY_DRV_STAT_TEX_OBJECT_CURRENT_COUNT 0
|
||||
#define NVC0_QUERY_DRV_STAT_TEX_OBJECT_CURRENT_BYTES 1
|
||||
#define NVC0_QUERY_DRV_STAT_BUF_OBJECT_CURRENT_COUNT 2
|
||||
#define NVC0_QUERY_DRV_STAT_BUF_OBJECT_CURRENT_BYTES_VID 3
|
||||
#define NVC0_QUERY_DRV_STAT_BUF_OBJECT_CURRENT_BYTES_SYS 4
|
||||
#define NVC0_QUERY_DRV_STAT_TEX_TRANSFERS_READ 5
|
||||
#define NVC0_QUERY_DRV_STAT_TEX_TRANSFERS_WRITE 6
|
||||
#define NVC0_QUERY_DRV_STAT_TEX_COPY_COUNT 7
|
||||
#define NVC0_QUERY_DRV_STAT_TEX_BLIT_COUNT 8
|
||||
#define NVC0_QUERY_DRV_STAT_TEX_CACHE_FLUSH_COUNT 9
|
||||
#define NVC0_QUERY_DRV_STAT_BUF_TRANSFERS_READ 10
|
||||
#define NVC0_QUERY_DRV_STAT_BUF_TRANSFERS_WRITE 11
|
||||
#define NVC0_QUERY_DRV_STAT_BUF_READ_BYTES_STAGING_VID 12
|
||||
#define NVC0_QUERY_DRV_STAT_BUF_WRITE_BYTES_DIRECT 13
|
||||
#define NVC0_QUERY_DRV_STAT_BUF_WRITE_BYTES_STAGING_VID 14
|
||||
#define NVC0_QUERY_DRV_STAT_BUF_WRITE_BYTES_STAGING_SYS 15
|
||||
#define NVC0_QUERY_DRV_STAT_BUF_COPY_BYTES 16
|
||||
#define NVC0_QUERY_DRV_STAT_BUF_NON_KERNEL_FENCE_SYNC_COUNT 17
|
||||
#define NVC0_QUERY_DRV_STAT_ANY_NON_KERNEL_FENCE_SYNC_COUNT 18
|
||||
#define NVC0_QUERY_DRV_STAT_QUERY_SYNC_COUNT 19
|
||||
#define NVC0_QUERY_DRV_STAT_GPU_SERIALIZE_COUNT 20
|
||||
#define NVC0_QUERY_DRV_STAT_DRAW_CALLS_ARRAY 21
|
||||
#define NVC0_QUERY_DRV_STAT_DRAW_CALLS_INDEXED 22
|
||||
#define NVC0_QUERY_DRV_STAT_DRAW_CALLS_FALLBACK_COUNT 23
|
||||
#define NVC0_QUERY_DRV_STAT_USER_BUFFER_UPLOAD_BYTES 24
|
||||
#define NVC0_QUERY_DRV_STAT_CONSTBUF_UPLOAD_COUNT 25
|
||||
#define NVC0_QUERY_DRV_STAT_CONSTBUF_UPLOAD_BYTES 26
|
||||
#define NVC0_QUERY_DRV_STAT_PUSHBUF_COUNT 27
|
||||
#define NVC0_QUERY_DRV_STAT_RESOURCE_VALIDATE_COUNT 28
|
||||
|
||||
#else
|
||||
|
||||
#define NVC0_QUERY_DRV_STAT_COUNT 0
|
||||
|
||||
enum nvc0_drv_stats_queries
|
||||
{
|
||||
#ifdef NOUVEAU_ENABLE_DRIVER_STATISTICS
|
||||
NVC0_QUERY_DRV_STAT_TEX_OBJECT_CURRENT_COUNT = 0,
|
||||
NVC0_QUERY_DRV_STAT_TEX_OBJECT_CURRENT_BYTES,
|
||||
NVC0_QUERY_DRV_STAT_BUF_OBJECT_CURRENT_COUNT,
|
||||
NVC0_QUERY_DRV_STAT_BUF_OBJECT_CURRENT_BYTES_VID,
|
||||
NVC0_QUERY_DRV_STAT_BUF_OBJECT_CURRENT_BYTES_SYS,
|
||||
NVC0_QUERY_DRV_STAT_TEX_TRANSFERS_READ,
|
||||
NVC0_QUERY_DRV_STAT_TEX_TRANSFERS_WRITE,
|
||||
NVC0_QUERY_DRV_STAT_TEX_COPY_COUNT,
|
||||
NVC0_QUERY_DRV_STAT_TEX_BLIT_COUNT,
|
||||
NVC0_QUERY_DRV_STAT_TEX_CACHE_FLUSH_COUNT,
|
||||
NVC0_QUERY_DRV_STAT_BUF_TRANSFERS_READ,
|
||||
NVC0_QUERY_DRV_STAT_BUF_TRANSFERS_WRITE,
|
||||
NVC0_QUERY_DRV_STAT_BUF_READ_BYTES_STAGING_VID,
|
||||
NVC0_QUERY_DRV_STAT_BUF_WRITE_BYTES_DIRECT,
|
||||
NVC0_QUERY_DRV_STAT_BUF_WRITE_BYTES_STAGING_VID,
|
||||
NVC0_QUERY_DRV_STAT_BUF_WRITE_BYTES_STAGING_SYS,
|
||||
NVC0_QUERY_DRV_STAT_BUF_COPY_BYTES,
|
||||
NVC0_QUERY_DRV_STAT_BUF_NON_KERNEL_FENCE_SYNC_COUNT,
|
||||
NVC0_QUERY_DRV_STAT_ANY_NON_KERNEL_FENCE_SYNC_COUNT,
|
||||
NVC0_QUERY_DRV_STAT_QUERY_SYNC_COUNT,
|
||||
NVC0_QUERY_DRV_STAT_GPU_SERIALIZE_COUNT,
|
||||
NVC0_QUERY_DRV_STAT_DRAW_CALLS_ARRAY,
|
||||
NVC0_QUERY_DRV_STAT_DRAW_CALLS_INDEXED,
|
||||
NVC0_QUERY_DRV_STAT_DRAW_CALLS_FALLBACK_COUNT,
|
||||
NVC0_QUERY_DRV_STAT_USER_BUFFER_UPLOAD_BYTES,
|
||||
NVC0_QUERY_DRV_STAT_CONSTBUF_UPLOAD_COUNT,
|
||||
NVC0_QUERY_DRV_STAT_CONSTBUF_UPLOAD_BYTES,
|
||||
NVC0_QUERY_DRV_STAT_PUSHBUF_COUNT,
|
||||
NVC0_QUERY_DRV_STAT_RESOURCE_VALIDATE_COUNT,
|
||||
#endif
|
||||
NVC0_QUERY_DRV_STAT_COUNT
|
||||
};
|
||||
|
||||
int nvc0_screen_get_driver_query_info(struct pipe_screen *, unsigned,
|
||||
struct pipe_driver_query_info *);
|
||||
|
|
|
|||
|
|
@ -120,7 +120,7 @@ int64_t compute_memory_prealloc_chunk(
|
|||
|
||||
assert(size_in_dw <= pool->size_in_dw);
|
||||
|
||||
COMPUTE_DBG(pool->screen, "* compute_memory_prealloc_chunk() size_in_dw = %ld\n",
|
||||
COMPUTE_DBG(pool->screen, "* compute_memory_prealloc_chunk() size_in_dw = %"PRIi64"\n",
|
||||
size_in_dw);
|
||||
|
||||
LIST_FOR_EACH_ENTRY(item, pool->item_list, link) {
|
||||
|
|
@ -151,7 +151,7 @@ struct list_head *compute_memory_postalloc_chunk(
|
|||
struct compute_memory_item *next;
|
||||
struct list_head *next_link;
|
||||
|
||||
COMPUTE_DBG(pool->screen, "* compute_memory_postalloc_chunck() start_in_dw = %ld\n",
|
||||
COMPUTE_DBG(pool->screen, "* compute_memory_postalloc_chunck() start_in_dw = %"PRIi64"\n",
|
||||
start_in_dw);
|
||||
|
||||
/* Check if we can insert it in the front of the list */
|
||||
|
|
@ -568,7 +568,7 @@ void compute_memory_free(struct compute_memory_pool* pool, int64_t id)
|
|||
struct pipe_screen *screen = (struct pipe_screen *)pool->screen;
|
||||
struct pipe_resource *res;
|
||||
|
||||
COMPUTE_DBG(pool->screen, "* compute_memory_free() id + %ld \n", id);
|
||||
COMPUTE_DBG(pool->screen, "* compute_memory_free() id + %"PRIi64" \n", id);
|
||||
|
||||
LIST_FOR_EACH_ENTRY_SAFE(item, next, pool->item_list, link) {
|
||||
|
||||
|
|
@ -628,7 +628,7 @@ struct compute_memory_item* compute_memory_alloc(
|
|||
{
|
||||
struct compute_memory_item *new_item = NULL;
|
||||
|
||||
COMPUTE_DBG(pool->screen, "* compute_memory_alloc() size_in_dw = %ld (%ld bytes)\n",
|
||||
COMPUTE_DBG(pool->screen, "* compute_memory_alloc() size_in_dw = %"PRIi64" (%"PRIi64" bytes)\n",
|
||||
size_in_dw, 4 * size_in_dw);
|
||||
|
||||
new_item = (struct compute_memory_item *)
|
||||
|
|
|
|||
|
|
@ -2143,11 +2143,11 @@ static void evergreen_emit_shader_stages(struct r600_context *rctx, struct r600_
|
|||
if (state->geom_enable) {
|
||||
uint32_t cut_val;
|
||||
|
||||
if (rctx->gs_shader->current->shader.gs_max_out_vertices <= 128)
|
||||
if (rctx->gs_shader->gs_max_out_vertices <= 128)
|
||||
cut_val = V_028A40_GS_CUT_128;
|
||||
else if (rctx->gs_shader->current->shader.gs_max_out_vertices <= 256)
|
||||
else if (rctx->gs_shader->gs_max_out_vertices <= 256)
|
||||
cut_val = V_028A40_GS_CUT_256;
|
||||
else if (rctx->gs_shader->current->shader.gs_max_out_vertices <= 512)
|
||||
else if (rctx->gs_shader->gs_max_out_vertices <= 512)
|
||||
cut_val = V_028A40_GS_CUT_512;
|
||||
else
|
||||
cut_val = V_028A40_GS_CUT_1024;
|
||||
|
|
@ -3013,7 +3013,7 @@ void evergreen_update_gs_state(struct pipe_context *ctx, struct r600_pipe_shader
|
|||
struct r600_shader *rshader = &shader->shader;
|
||||
struct r600_shader *cp_shader = &shader->gs_copy_shader->shader;
|
||||
unsigned gsvs_itemsize =
|
||||
(cp_shader->ring_item_size * rshader->gs_max_out_vertices) >> 2;
|
||||
(cp_shader->ring_item_size * shader->selector->gs_max_out_vertices) >> 2;
|
||||
|
||||
r600_init_command_buffer(cb, 64);
|
||||
|
||||
|
|
@ -3022,14 +3022,14 @@ void evergreen_update_gs_state(struct pipe_context *ctx, struct r600_pipe_shader
|
|||
r600_store_context_reg(cb, R_028AB8_VGT_VTX_CNT_EN, 1);
|
||||
|
||||
r600_store_context_reg(cb, R_028B38_VGT_GS_MAX_VERT_OUT,
|
||||
S_028B38_MAX_VERT_OUT(rshader->gs_max_out_vertices));
|
||||
S_028B38_MAX_VERT_OUT(shader->selector->gs_max_out_vertices));
|
||||
r600_store_context_reg(cb, R_028A6C_VGT_GS_OUT_PRIM_TYPE,
|
||||
r600_conv_prim_to_gs_out(rshader->gs_output_prim));
|
||||
r600_conv_prim_to_gs_out(shader->selector->gs_output_prim));
|
||||
|
||||
if (rctx->screen->b.info.drm_minor >= 35) {
|
||||
r600_store_context_reg(cb, R_028B90_VGT_GS_INSTANCE_CNT,
|
||||
S_028B90_CNT(MIN2(rshader->gs_num_invocations, 127)) |
|
||||
S_028B90_ENABLE(rshader->gs_num_invocations > 0));
|
||||
S_028B90_CNT(MIN2(shader->selector->gs_num_invocations, 127)) |
|
||||
S_028B90_ENABLE(shader->selector->gs_num_invocations > 0));
|
||||
}
|
||||
r600_store_context_reg_seq(cb, R_02891C_SQ_GS_VERT_ITEMSIZE, 4);
|
||||
r600_store_value(cb, cp_shader->ring_item_size >> 2);
|
||||
|
|
|
|||
|
|
@ -2029,6 +2029,8 @@ void r600_bytecode_disasm(struct r600_bytecode *bc)
|
|||
fprintf(stderr, "CND:%X ", cf->cond);
|
||||
if (cf->pop_count)
|
||||
fprintf(stderr, "POP:%X ", cf->pop_count);
|
||||
if (cf->end_of_program)
|
||||
fprintf(stderr, "EOP ");
|
||||
fprintf(stderr, "\n");
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -36,6 +36,8 @@
|
|||
#include "util/list.h"
|
||||
#include "util/u_transfer.h"
|
||||
|
||||
#include "tgsi/tgsi_scan.h"
|
||||
|
||||
#define R600_NUM_ATOMS 75
|
||||
|
||||
#define R600_MAX_VIEWPORTS 16
|
||||
|
|
@ -305,12 +307,18 @@ struct r600_pipe_shader_selector {
|
|||
|
||||
struct tgsi_token *tokens;
|
||||
struct pipe_stream_output_info so;
|
||||
struct tgsi_shader_info info;
|
||||
|
||||
unsigned num_shaders;
|
||||
|
||||
/* PIPE_SHADER_[VERTEX|FRAGMENT|...] */
|
||||
unsigned type;
|
||||
|
||||
/* geometry shader properties */
|
||||
unsigned gs_output_prim;
|
||||
unsigned gs_max_out_vertices;
|
||||
unsigned gs_num_invocations;
|
||||
|
||||
unsigned nr_ps_max_color_exports;
|
||||
};
|
||||
|
||||
|
|
@ -936,28 +944,5 @@ static inline bool r600_can_read_depth(struct r600_texture *rtex)
|
|||
#define V_028A6C_OUTPRIM_TYPE_LINESTRIP 1
|
||||
#define V_028A6C_OUTPRIM_TYPE_TRISTRIP 2
|
||||
|
||||
static inline unsigned r600_conv_prim_to_gs_out(unsigned mode)
|
||||
{
|
||||
static const int prim_conv[] = {
|
||||
V_028A6C_OUTPRIM_TYPE_POINTLIST,
|
||||
V_028A6C_OUTPRIM_TYPE_LINESTRIP,
|
||||
V_028A6C_OUTPRIM_TYPE_LINESTRIP,
|
||||
V_028A6C_OUTPRIM_TYPE_LINESTRIP,
|
||||
V_028A6C_OUTPRIM_TYPE_TRISTRIP,
|
||||
V_028A6C_OUTPRIM_TYPE_TRISTRIP,
|
||||
V_028A6C_OUTPRIM_TYPE_TRISTRIP,
|
||||
V_028A6C_OUTPRIM_TYPE_TRISTRIP,
|
||||
V_028A6C_OUTPRIM_TYPE_TRISTRIP,
|
||||
V_028A6C_OUTPRIM_TYPE_TRISTRIP,
|
||||
V_028A6C_OUTPRIM_TYPE_LINESTRIP,
|
||||
V_028A6C_OUTPRIM_TYPE_LINESTRIP,
|
||||
V_028A6C_OUTPRIM_TYPE_TRISTRIP,
|
||||
V_028A6C_OUTPRIM_TYPE_TRISTRIP,
|
||||
V_028A6C_OUTPRIM_TYPE_TRISTRIP
|
||||
};
|
||||
assert(mode < Elements(prim_conv));
|
||||
|
||||
return prim_conv[mode];
|
||||
}
|
||||
|
||||
unsigned r600_conv_prim_to_gs_out(unsigned mode);
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -1809,7 +1809,6 @@ static int r600_shader_from_tgsi(struct r600_context *rctx,
|
|||
struct tgsi_token *tokens = pipeshader->selector->tokens;
|
||||
struct pipe_stream_output_info so = pipeshader->selector->so;
|
||||
struct tgsi_full_immediate *immediate;
|
||||
struct tgsi_full_property *property;
|
||||
struct r600_shader_ctx ctx;
|
||||
struct r600_bytecode_output output[32];
|
||||
unsigned output_done, noutput;
|
||||
|
|
@ -1840,7 +1839,7 @@ static int r600_shader_from_tgsi(struct r600_context *rctx,
|
|||
shader->indirect_files = ctx.info.indirect_files;
|
||||
indirect_gprs = ctx.info.indirect_files & ~(1 << TGSI_FILE_CONSTANT);
|
||||
tgsi_parse_init(&ctx.parse, tokens);
|
||||
ctx.type = ctx.parse.FullHeader.Processor.Processor;
|
||||
ctx.type = ctx.info.processor;
|
||||
shader->processor_type = ctx.type;
|
||||
ctx.bc->type = shader->processor_type;
|
||||
|
||||
|
|
@ -1968,6 +1967,12 @@ static int r600_shader_from_tgsi(struct r600_context *rctx,
|
|||
ctx.nliterals = 0;
|
||||
ctx.literals = NULL;
|
||||
shader->fs_write_all = FALSE;
|
||||
if (ctx.info.properties[TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS])
|
||||
shader->fs_write_all = TRUE;
|
||||
|
||||
shader->vs_position_window_space = FALSE;
|
||||
if (ctx.info.properties[TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION])
|
||||
shader->vs_position_window_space = TRUE;
|
||||
|
||||
if (shader->vs_as_gs_a)
|
||||
vs_add_primid_output(&ctx, key.vs.prim_id_out);
|
||||
|
|
@ -1994,34 +1999,7 @@ static int r600_shader_from_tgsi(struct r600_context *rctx,
|
|||
goto out_err;
|
||||
break;
|
||||
case TGSI_TOKEN_TYPE_INSTRUCTION:
|
||||
break;
|
||||
case TGSI_TOKEN_TYPE_PROPERTY:
|
||||
property = &ctx.parse.FullToken.FullProperty;
|
||||
switch (property->Property.PropertyName) {
|
||||
case TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS:
|
||||
if (property->u[0].Data == 1)
|
||||
shader->fs_write_all = TRUE;
|
||||
break;
|
||||
case TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION:
|
||||
if (property->u[0].Data == 1)
|
||||
shader->vs_position_window_space = TRUE;
|
||||
break;
|
||||
case TGSI_PROPERTY_VS_PROHIBIT_UCPS:
|
||||
/* we don't need this one */
|
||||
break;
|
||||
case TGSI_PROPERTY_GS_INPUT_PRIM:
|
||||
shader->gs_input_prim = property->u[0].Data;
|
||||
break;
|
||||
case TGSI_PROPERTY_GS_OUTPUT_PRIM:
|
||||
shader->gs_output_prim = property->u[0].Data;
|
||||
break;
|
||||
case TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES:
|
||||
shader->gs_max_out_vertices = property->u[0].Data;
|
||||
break;
|
||||
case TGSI_PROPERTY_GS_INVOCATIONS:
|
||||
shader->gs_num_invocations = property->u[0].Data;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
R600_ERR("unsupported token type %d\n", ctx.parse.FullToken.Token.Type);
|
||||
|
|
|
|||
|
|
@ -78,11 +78,6 @@ struct r600_shader {
|
|||
/* Temporarily workaround SB not handling CF_INDEX_[01] index registers */
|
||||
boolean uses_index_registers;
|
||||
|
||||
/* geometry shader properties */
|
||||
unsigned gs_input_prim;
|
||||
unsigned gs_output_prim;
|
||||
unsigned gs_max_out_vertices;
|
||||
unsigned gs_num_invocations;
|
||||
/* size in bytes of a data item in the ring (single vertex data) */
|
||||
unsigned ring_item_size;
|
||||
|
||||
|
|
|
|||
|
|
@ -1951,11 +1951,11 @@ static void r600_emit_shader_stages(struct r600_context *rctx, struct r600_atom
|
|||
if (state->geom_enable) {
|
||||
uint32_t cut_val;
|
||||
|
||||
if (rctx->gs_shader->current->shader.gs_max_out_vertices <= 128)
|
||||
if (rctx->gs_shader->gs_max_out_vertices <= 128)
|
||||
cut_val = V_028A40_GS_CUT_128;
|
||||
else if (rctx->gs_shader->current->shader.gs_max_out_vertices <= 256)
|
||||
else if (rctx->gs_shader->gs_max_out_vertices <= 256)
|
||||
cut_val = V_028A40_GS_CUT_256;
|
||||
else if (rctx->gs_shader->current->shader.gs_max_out_vertices <= 512)
|
||||
else if (rctx->gs_shader->gs_max_out_vertices <= 512)
|
||||
cut_val = V_028A40_GS_CUT_512;
|
||||
else
|
||||
cut_val = V_028A40_GS_CUT_1024;
|
||||
|
|
@ -2650,7 +2650,7 @@ void r600_update_gs_state(struct pipe_context *ctx, struct r600_pipe_shader *sha
|
|||
struct r600_shader *rshader = &shader->shader;
|
||||
struct r600_shader *cp_shader = &shader->gs_copy_shader->shader;
|
||||
unsigned gsvs_itemsize =
|
||||
(cp_shader->ring_item_size * rshader->gs_max_out_vertices) >> 2;
|
||||
(cp_shader->ring_item_size * shader->selector->gs_max_out_vertices) >> 2;
|
||||
|
||||
r600_init_command_buffer(cb, 64);
|
||||
|
||||
|
|
@ -2659,10 +2659,10 @@ void r600_update_gs_state(struct pipe_context *ctx, struct r600_pipe_shader *sha
|
|||
|
||||
if (rctx->b.chip_class >= R700) {
|
||||
r600_store_context_reg(cb, R_028B38_VGT_GS_MAX_VERT_OUT,
|
||||
S_028B38_MAX_VERT_OUT(rshader->gs_max_out_vertices));
|
||||
S_028B38_MAX_VERT_OUT(shader->selector->gs_max_out_vertices));
|
||||
}
|
||||
r600_store_context_reg(cb, R_028A6C_VGT_GS_OUT_PRIM_TYPE,
|
||||
r600_conv_prim_to_gs_out(rshader->gs_output_prim));
|
||||
r600_conv_prim_to_gs_out(shader->selector->gs_output_prim));
|
||||
|
||||
r600_store_context_reg(cb, R_0288C8_SQ_GS_VERT_ITEMSIZE,
|
||||
cp_shader->ring_item_size >> 2);
|
||||
|
|
|
|||
|
|
@ -34,6 +34,7 @@
|
|||
#include "util/u_upload_mgr.h"
|
||||
#include "util/u_math.h"
|
||||
#include "tgsi/tgsi_parse.h"
|
||||
#include "tgsi/tgsi_scan.h"
|
||||
|
||||
void r600_init_command_buffer(struct r600_command_buffer *cb, unsigned num_dw)
|
||||
{
|
||||
|
|
@ -123,6 +124,31 @@ static unsigned r600_conv_pipe_prim(unsigned prim)
|
|||
return prim_conv[prim];
|
||||
}
|
||||
|
||||
unsigned r600_conv_prim_to_gs_out(unsigned mode)
|
||||
{
|
||||
static const int prim_conv[] = {
|
||||
[PIPE_PRIM_POINTS] = V_028A6C_OUTPRIM_TYPE_POINTLIST,
|
||||
[PIPE_PRIM_LINES] = V_028A6C_OUTPRIM_TYPE_LINESTRIP,
|
||||
[PIPE_PRIM_LINE_LOOP] = V_028A6C_OUTPRIM_TYPE_LINESTRIP,
|
||||
[PIPE_PRIM_LINE_STRIP] = V_028A6C_OUTPRIM_TYPE_LINESTRIP,
|
||||
[PIPE_PRIM_TRIANGLES] = V_028A6C_OUTPRIM_TYPE_TRISTRIP,
|
||||
[PIPE_PRIM_TRIANGLE_STRIP] = V_028A6C_OUTPRIM_TYPE_TRISTRIP,
|
||||
[PIPE_PRIM_TRIANGLE_FAN] = V_028A6C_OUTPRIM_TYPE_TRISTRIP,
|
||||
[PIPE_PRIM_QUADS] = V_028A6C_OUTPRIM_TYPE_TRISTRIP,
|
||||
[PIPE_PRIM_QUAD_STRIP] = V_028A6C_OUTPRIM_TYPE_TRISTRIP,
|
||||
[PIPE_PRIM_POLYGON] = V_028A6C_OUTPRIM_TYPE_TRISTRIP,
|
||||
[PIPE_PRIM_LINES_ADJACENCY] = V_028A6C_OUTPRIM_TYPE_LINESTRIP,
|
||||
[PIPE_PRIM_LINE_STRIP_ADJACENCY] = V_028A6C_OUTPRIM_TYPE_LINESTRIP,
|
||||
[PIPE_PRIM_TRIANGLES_ADJACENCY] = V_028A6C_OUTPRIM_TYPE_TRISTRIP,
|
||||
[PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY] = V_028A6C_OUTPRIM_TYPE_TRISTRIP,
|
||||
[PIPE_PRIM_PATCHES] = V_028A6C_OUTPRIM_TYPE_POINTLIST,
|
||||
[R600_PRIM_RECTANGLE_LIST] = V_028A6C_OUTPRIM_TYPE_TRISTRIP
|
||||
};
|
||||
assert(mode < Elements(prim_conv));
|
||||
|
||||
return prim_conv[mode];
|
||||
}
|
||||
|
||||
/* common state between evergreen and r600 */
|
||||
|
||||
static void r600_bind_blend_state_internal(struct r600_context *rctx,
|
||||
|
|
@ -818,6 +844,19 @@ static void *r600_create_shader_state(struct pipe_context *ctx,
|
|||
sel->type = pipe_shader_type;
|
||||
sel->tokens = tgsi_dup_tokens(state->tokens);
|
||||
sel->so = state->stream_output;
|
||||
tgsi_scan_shader(state->tokens, &sel->info);
|
||||
|
||||
switch (pipe_shader_type) {
|
||||
case PIPE_SHADER_GEOMETRY:
|
||||
sel->gs_output_prim =
|
||||
sel->info.properties[TGSI_PROPERTY_GS_OUTPUT_PRIM];
|
||||
sel->gs_max_out_vertices =
|
||||
sel->info.properties[TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES];
|
||||
sel->gs_num_invocations =
|
||||
sel->info.properties[TGSI_PROPERTY_GS_INVOCATIONS];
|
||||
break;
|
||||
}
|
||||
|
||||
return sel;
|
||||
}
|
||||
|
||||
|
|
@ -1524,7 +1563,7 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info
|
|||
unsigned prim = info.mode;
|
||||
|
||||
if (rctx->gs_shader) {
|
||||
prim = rctx->gs_shader->current->shader.gs_output_prim;
|
||||
prim = rctx->gs_shader->gs_output_prim;
|
||||
}
|
||||
prim = r600_conv_prim_to_gs_out(prim); /* decrease the number of types to 3 */
|
||||
|
||||
|
|
|
|||
|
|
@ -32,6 +32,7 @@ int bc_decoder::decode_cf(unsigned &i, bc_cf& bc) {
|
|||
int r = 0;
|
||||
uint32_t dw0 = dw[i];
|
||||
uint32_t dw1 = dw[i+1];
|
||||
assert(i+1 <= ndw);
|
||||
|
||||
if ((dw1 >> 29) & 1) { // CF_ALU
|
||||
return decode_cf_alu(i, bc);
|
||||
|
|
|
|||
|
|
@ -199,6 +199,9 @@ void bc_finalizer::finalize_if(region_node* r) {
|
|||
cf_node *if_jump = sh.create_cf(CF_OP_JUMP);
|
||||
cf_node *if_pop = sh.create_cf(CF_OP_POP);
|
||||
|
||||
if (!last_cf || last_cf->get_parent_region() == r) {
|
||||
last_cf = if_pop;
|
||||
}
|
||||
if_pop->bc.pop_count = 1;
|
||||
if_pop->jump_after(if_pop);
|
||||
|
||||
|
|
|
|||
|
|
@ -95,7 +95,7 @@ int bc_parser::decode_shader() {
|
|||
if ((r = decode_cf(i, eop)))
|
||||
return r;
|
||||
|
||||
} while (!eop || (i >> 1) <= max_cf);
|
||||
} while (!eop || (i >> 1) < max_cf);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
|
@ -769,6 +769,7 @@ int bc_parser::prepare_ir() {
|
|||
}
|
||||
|
||||
int bc_parser::prepare_loop(cf_node* c) {
|
||||
assert(c->bc.addr-1 < cf_map.size());
|
||||
|
||||
cf_node *end = cf_map[c->bc.addr - 1];
|
||||
assert(end->bc.op == CF_OP_LOOP_END);
|
||||
|
|
@ -788,8 +789,12 @@ int bc_parser::prepare_loop(cf_node* c) {
|
|||
}
|
||||
|
||||
int bc_parser::prepare_if(cf_node* c) {
|
||||
assert(c->bc.addr-1 < cf_map.size());
|
||||
cf_node *c_else = NULL, *end = cf_map[c->bc.addr];
|
||||
|
||||
if (!end)
|
||||
return 0; // not quite sure how this happens, malformed input?
|
||||
|
||||
BCP_DUMP(
|
||||
sblog << "parsing JUMP @" << c->bc.id;
|
||||
sblog << "\n";
|
||||
|
|
@ -815,7 +820,7 @@ int bc_parser::prepare_if(cf_node* c) {
|
|||
if (c_else->parent != c->parent)
|
||||
c_else = NULL;
|
||||
|
||||
if (end->parent != c->parent)
|
||||
if (end && end->parent != c->parent)
|
||||
end = NULL;
|
||||
|
||||
region_node *reg = sh->create_region();
|
||||
|
|
|
|||
|
|
@ -236,7 +236,7 @@ void rp_gpr_tracker::unreserve(alu_node* n) {
|
|||
|
||||
for (i = 0; i < nsrc; ++i) {
|
||||
value *v = n->src[i];
|
||||
if (v->is_readonly())
|
||||
if (v->is_readonly() || v->is_undef())
|
||||
continue;
|
||||
if (i == 1 && opt)
|
||||
continue;
|
||||
|
|
|
|||
|
|
@ -197,7 +197,7 @@ static void r600_emit_query_begin(struct r600_common_context *ctx, struct r600_q
|
|||
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
|
||||
radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_ZPASS_DONE) | EVENT_INDEX(1));
|
||||
radeon_emit(cs, va);
|
||||
radeon_emit(cs, (va >> 32UL) & 0xFF);
|
||||
radeon_emit(cs, (va >> 32) & 0xFFFF);
|
||||
break;
|
||||
case PIPE_QUERY_PRIMITIVES_EMITTED:
|
||||
case PIPE_QUERY_PRIMITIVES_GENERATED:
|
||||
|
|
@ -206,13 +206,13 @@ static void r600_emit_query_begin(struct r600_common_context *ctx, struct r600_q
|
|||
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
|
||||
radeon_emit(cs, EVENT_TYPE(event_type_for_stream(query)) | EVENT_INDEX(3));
|
||||
radeon_emit(cs, va);
|
||||
radeon_emit(cs, (va >> 32UL) & 0xFF);
|
||||
radeon_emit(cs, (va >> 32) & 0xFFFF);
|
||||
break;
|
||||
case PIPE_QUERY_TIME_ELAPSED:
|
||||
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE_EOP, 4, 0));
|
||||
radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5));
|
||||
radeon_emit(cs, va);
|
||||
radeon_emit(cs, (3 << 29) | ((va >> 32UL) & 0xFF));
|
||||
radeon_emit(cs, (3 << 29) | ((va >> 32) & 0xFFFF));
|
||||
radeon_emit(cs, 0);
|
||||
radeon_emit(cs, 0);
|
||||
break;
|
||||
|
|
@ -220,7 +220,7 @@ static void r600_emit_query_begin(struct r600_common_context *ctx, struct r600_q
|
|||
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
|
||||
radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_SAMPLE_PIPELINESTAT) | EVENT_INDEX(2));
|
||||
radeon_emit(cs, va);
|
||||
radeon_emit(cs, (va >> 32UL) & 0xFF);
|
||||
radeon_emit(cs, (va >> 32) & 0xFFFF);
|
||||
break;
|
||||
default:
|
||||
assert(0);
|
||||
|
|
@ -254,7 +254,7 @@ static void r600_emit_query_end(struct r600_common_context *ctx, struct r600_que
|
|||
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
|
||||
radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_ZPASS_DONE) | EVENT_INDEX(1));
|
||||
radeon_emit(cs, va);
|
||||
radeon_emit(cs, (va >> 32UL) & 0xFF);
|
||||
radeon_emit(cs, (va >> 32) & 0xFFFF);
|
||||
break;
|
||||
case PIPE_QUERY_PRIMITIVES_EMITTED:
|
||||
case PIPE_QUERY_PRIMITIVES_GENERATED:
|
||||
|
|
@ -264,7 +264,7 @@ static void r600_emit_query_end(struct r600_common_context *ctx, struct r600_que
|
|||
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
|
||||
radeon_emit(cs, EVENT_TYPE(event_type_for_stream(query)) | EVENT_INDEX(3));
|
||||
radeon_emit(cs, va);
|
||||
radeon_emit(cs, (va >> 32UL) & 0xFF);
|
||||
radeon_emit(cs, (va >> 32) & 0xFFFF);
|
||||
break;
|
||||
case PIPE_QUERY_TIME_ELAPSED:
|
||||
va += query->buffer.results_end + query->result_size/2;
|
||||
|
|
@ -273,7 +273,7 @@ static void r600_emit_query_end(struct r600_common_context *ctx, struct r600_que
|
|||
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE_EOP, 4, 0));
|
||||
radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5));
|
||||
radeon_emit(cs, va);
|
||||
radeon_emit(cs, (3 << 29) | ((va >> 32UL) & 0xFF));
|
||||
radeon_emit(cs, (3 << 29) | ((va >> 32) & 0xFFFF));
|
||||
radeon_emit(cs, 0);
|
||||
radeon_emit(cs, 0);
|
||||
break;
|
||||
|
|
@ -282,7 +282,7 @@ static void r600_emit_query_end(struct r600_common_context *ctx, struct r600_que
|
|||
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
|
||||
radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_SAMPLE_PIPELINESTAT) | EVENT_INDEX(2));
|
||||
radeon_emit(cs, va);
|
||||
radeon_emit(cs, (va >> 32UL) & 0xFF);
|
||||
radeon_emit(cs, (va >> 32) & 0xFFFF);
|
||||
break;
|
||||
default:
|
||||
assert(0);
|
||||
|
|
@ -341,8 +341,8 @@ static void r600_emit_query_predication(struct r600_common_context *ctx, struct
|
|||
|
||||
while (results_base < qbuf->results_end) {
|
||||
radeon_emit(cs, PKT3(PKT3_SET_PREDICATION, 1, 0));
|
||||
radeon_emit(cs, (va + results_base) & 0xFFFFFFFFUL);
|
||||
radeon_emit(cs, op | (((va + results_base) >> 32UL) & 0xFF));
|
||||
radeon_emit(cs, va + results_base);
|
||||
radeon_emit(cs, op | (((va + results_base) >> 32) & 0xFF));
|
||||
r600_emit_reloc(ctx, &ctx->rings.gfx, qbuf->buf, RADEON_USAGE_READ,
|
||||
RADEON_PRIO_MIN);
|
||||
results_base += query->result_size;
|
||||
|
|
|
|||
|
|
@ -362,7 +362,7 @@ static void si_launch_grid(
|
|||
shader_va += pc;
|
||||
#endif
|
||||
si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ, RADEON_PRIO_SHADER_DATA);
|
||||
si_pm4_set_reg(pm4, R_00B830_COMPUTE_PGM_LO, (shader_va >> 8) & 0xffffffff);
|
||||
si_pm4_set_reg(pm4, R_00B830_COMPUTE_PGM_LO, shader_va >> 8);
|
||||
si_pm4_set_reg(pm4, R_00B834_COMPUTE_PGM_HI, shader_va >> 40);
|
||||
|
||||
si_pm4_set_reg(pm4, R_00B848_COMPUTE_PGM_RSRC1,
|
||||
|
|
|
|||
|
|
@ -426,7 +426,7 @@ static bool si_upload_vertex_buffer_descriptors(struct si_context *sctx)
|
|||
va = rbuffer->gpu_address + offset;
|
||||
|
||||
/* Fill in T# buffer resource description */
|
||||
desc[0] = va & 0xFFFFFFFF;
|
||||
desc[0] = va;
|
||||
desc[1] = S_008F04_BASE_ADDRESS_HI(va >> 32) |
|
||||
S_008F04_STRIDE(vb->stride);
|
||||
|
||||
|
|
|
|||
|
|
@ -86,8 +86,8 @@ static void si_dma_copy_buffer(struct si_context *ctx,
|
|||
for (i = 0; i < ncopy; i++) {
|
||||
csize = size < max_csize ? size : max_csize;
|
||||
cs->buf[cs->cdw++] = SI_DMA_PACKET(SI_DMA_PACKET_COPY, sub_cmd, csize);
|
||||
cs->buf[cs->cdw++] = dst_offset & 0xffffffff;
|
||||
cs->buf[cs->cdw++] = src_offset & 0xffffffff;
|
||||
cs->buf[cs->cdw++] = dst_offset;
|
||||
cs->buf[cs->cdw++] = src_offset;
|
||||
cs->buf[cs->cdw++] = (dst_offset >> 32UL) & 0xff;
|
||||
cs->buf[cs->cdw++] = (src_offset >> 32UL) & 0xff;
|
||||
dst_offset += csize << shift;
|
||||
|
|
|
|||
|
|
@ -3781,7 +3781,7 @@ void si_shader_apply_scratch_relocs(struct si_context *sctx,
|
|||
uint64_t scratch_va)
|
||||
{
|
||||
unsigned i;
|
||||
uint32_t scratch_rsrc_dword0 = scratch_va & 0xffffffff;
|
||||
uint32_t scratch_rsrc_dword0 = scratch_va;
|
||||
uint32_t scratch_rsrc_dword1 =
|
||||
S_008F04_BASE_ADDRESS_HI(scratch_va >> 32)
|
||||
| S_008F04_STRIDE(shader->scratch_bytes_per_wave / 64);
|
||||
|
|
|
|||
|
|
@ -35,10 +35,10 @@
|
|||
#include "util/u_pstipple.h"
|
||||
|
||||
static void si_init_atom(struct r600_atom *atom, struct r600_atom **list_elem,
|
||||
void (*emit)(struct si_context *ctx, struct r600_atom *state),
|
||||
void (*emit_func)(struct si_context *ctx, struct r600_atom *state),
|
||||
unsigned num_dw)
|
||||
{
|
||||
atom->emit = (void*)emit;
|
||||
atom->emit = (void*)emit_func;
|
||||
atom->num_dw = num_dw;
|
||||
atom->dirty = false;
|
||||
*list_elem = atom;
|
||||
|
|
|
|||
|
|
@ -409,7 +409,7 @@ vc4_nir_lower_blend_block(nir_block *block, void *state)
|
|||
nir_cf_node_get_function(&block->cf_node);
|
||||
nir_builder b;
|
||||
nir_builder_init(&b, impl);
|
||||
nir_builder_insert_before_instr(&b, &intr->instr);
|
||||
b.cursor = nir_before_instr(&intr->instr);
|
||||
vc4_nir_lower_blend_instr(c, &b, intr);
|
||||
}
|
||||
return true;
|
||||
|
|
|
|||
|
|
@ -56,7 +56,7 @@ static void
|
|||
vc4_nir_lower_input(struct vc4_compile *c, nir_builder *b,
|
||||
nir_intrinsic_instr *intr)
|
||||
{
|
||||
nir_builder_insert_before_instr(b, &intr->instr);
|
||||
b->cursor = nir_before_instr(&intr->instr);
|
||||
|
||||
if (c->stage == QSTAGE_FRAG && intr->const_index[0] ==
|
||||
VC4_NIR_TLB_COLOR_READ_INPUT) {
|
||||
|
|
@ -160,7 +160,7 @@ vc4_nir_lower_output(struct vc4_compile *c, nir_builder *b,
|
|||
/* All TGSI-to-NIR outputs are VEC4. */
|
||||
assert(intr->num_components == 4);
|
||||
|
||||
nir_builder_insert_before_instr(b, &intr->instr);
|
||||
b->cursor = nir_before_instr(&intr->instr);
|
||||
|
||||
for (unsigned i = 0; i < intr->num_components; i++) {
|
||||
nir_intrinsic_instr *intr_comp =
|
||||
|
|
@ -189,7 +189,7 @@ vc4_nir_lower_uniform(struct vc4_compile *c, nir_builder *b,
|
|||
return;
|
||||
assert(intr->num_components == 4);
|
||||
|
||||
nir_builder_insert_before_instr(b, &intr->instr);
|
||||
b->cursor = nir_before_instr(&intr->instr);
|
||||
|
||||
/* Generate scalar loads equivalent to the original VEC4. */
|
||||
nir_ssa_def *dests[4];
|
||||
|
|
|
|||
|
|
@ -101,30 +101,54 @@ static struct radeon_bo *get_radeon_bo(struct pb_buffer *_buf)
|
|||
return bo;
|
||||
}
|
||||
|
||||
static bool radeon_bo_is_busy(struct radeon_bo *bo)
|
||||
{
|
||||
struct drm_radeon_gem_busy args = {0};
|
||||
|
||||
args.handle = bo->handle;
|
||||
return drmCommandWriteRead(bo->rws->fd, DRM_RADEON_GEM_BUSY,
|
||||
&args, sizeof(args)) != 0;
|
||||
}
|
||||
|
||||
static void radeon_bo_wait_idle(struct radeon_bo *bo)
|
||||
{
|
||||
struct drm_radeon_gem_wait_idle args = {0};
|
||||
|
||||
args.handle = bo->handle;
|
||||
while (drmCommandWrite(bo->rws->fd, DRM_RADEON_GEM_WAIT_IDLE,
|
||||
&args, sizeof(args)) == -EBUSY);
|
||||
}
|
||||
|
||||
static bool radeon_bo_wait(struct pb_buffer *_buf, uint64_t timeout,
|
||||
enum radeon_bo_usage usage)
|
||||
{
|
||||
struct radeon_bo *bo = get_radeon_bo(_buf);
|
||||
struct radeon_bo *bo = get_radeon_bo(_buf);
|
||||
int64_t abs_timeout;
|
||||
|
||||
/* Wait if any ioctl is being submitted with this buffer. */
|
||||
if (!os_wait_until_zero(&bo->num_active_ioctls, timeout))
|
||||
return false;
|
||||
/* No timeout. Just query. */
|
||||
if (timeout == 0)
|
||||
return !bo->num_active_ioctls && !radeon_bo_is_busy(bo);
|
||||
|
||||
/* TODO: handle arbitrary timeout */
|
||||
if (!timeout) {
|
||||
struct drm_radeon_gem_busy args = {0};
|
||||
abs_timeout = os_time_get_absolute_timeout(timeout);
|
||||
|
||||
args.handle = bo->handle;
|
||||
return drmCommandWriteRead(bo->rws->fd, DRM_RADEON_GEM_BUSY,
|
||||
&args, sizeof(args)) == 0;
|
||||
} else {
|
||||
struct drm_radeon_gem_wait_idle args = {0};
|
||||
/* Wait if any ioctl is being submitted with this buffer. */
|
||||
if (!os_wait_until_zero_abs_timeout(&bo->num_active_ioctls, abs_timeout))
|
||||
return false;
|
||||
|
||||
args.handle = bo->handle;
|
||||
while (drmCommandWrite(bo->rws->fd, DRM_RADEON_GEM_WAIT_IDLE,
|
||||
&args, sizeof(args)) == -EBUSY);
|
||||
/* Infinite timeout. */
|
||||
if (abs_timeout == PIPE_TIMEOUT_INFINITE) {
|
||||
radeon_bo_wait_idle(bo);
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Other timeouts need to be emulated with a loop. */
|
||||
while (radeon_bo_is_busy(bo)) {
|
||||
if (os_time_get_nano() >= abs_timeout)
|
||||
return false;
|
||||
os_time_sleep(10);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static enum radeon_bo_domain get_valid_domain(enum radeon_bo_domain domain)
|
||||
|
|
|
|||
|
|
@ -645,29 +645,8 @@ static bool radeon_fence_wait(struct radeon_winsys *ws,
|
|||
struct pipe_fence_handle *fence,
|
||||
uint64_t timeout)
|
||||
{
|
||||
struct pb_buffer *rfence = (struct pb_buffer*)fence;
|
||||
|
||||
if (timeout == 0)
|
||||
return ws->buffer_wait(rfence, 0, RADEON_USAGE_READWRITE);
|
||||
|
||||
if (timeout != PIPE_TIMEOUT_INFINITE) {
|
||||
int64_t start_time = os_time_get();
|
||||
|
||||
/* Convert to microseconds. */
|
||||
timeout /= 1000;
|
||||
|
||||
/* Wait in a loop. */
|
||||
while (!ws->buffer_wait(rfence, 0, RADEON_USAGE_READWRITE)) {
|
||||
if (os_time_get() - start_time >= timeout) {
|
||||
return FALSE;
|
||||
}
|
||||
os_time_sleep(10);
|
||||
}
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
ws->buffer_wait(rfence, PIPE_TIMEOUT_INFINITE, RADEON_USAGE_READWRITE);
|
||||
return TRUE;
|
||||
return ws->buffer_wait((struct pb_buffer*)fence, timeout,
|
||||
RADEON_USAGE_READWRITE);
|
||||
}
|
||||
|
||||
static void radeon_fence_reference(struct pipe_fence_handle **dst,
|
||||
|
|
|
|||
|
|
@ -284,8 +284,9 @@ texture_multisample(const _mesa_glsl_parse_state *state)
|
|||
static bool
|
||||
texture_multisample_array(const _mesa_glsl_parse_state *state)
|
||||
{
|
||||
return state->is_version(150, 0) ||
|
||||
state->ARB_texture_multisample_enable;
|
||||
return state->is_version(150, 320) ||
|
||||
state->ARB_texture_multisample_enable ||
|
||||
state->OES_texture_storage_multisample_2d_array_enable;
|
||||
}
|
||||
|
||||
static bool
|
||||
|
|
@ -665,10 +666,7 @@ private:
|
|||
B1(any);
|
||||
B1(all);
|
||||
B1(not);
|
||||
B2(textureSize);
|
||||
ir_function_signature *_textureSize(builtin_available_predicate avail,
|
||||
const glsl_type *return_type,
|
||||
const glsl_type *sampler_type);
|
||||
BA2(textureSize);
|
||||
|
||||
/** Flags to _texture() */
|
||||
#define TEX_PROJECT 1
|
||||
|
|
|
|||
|
|
@ -307,7 +307,8 @@ _mesa_glsl_initialize_types(struct _mesa_glsl_parse_state *state)
|
|||
add_type(symbols, glsl_type::usamplerCubeArray_type);
|
||||
}
|
||||
|
||||
if (state->ARB_texture_multisample_enable) {
|
||||
if (state->ARB_texture_multisample_enable ||
|
||||
state->OES_texture_storage_multisample_2d_array_enable) {
|
||||
add_type(symbols, glsl_type::sampler2DMS_type);
|
||||
add_type(symbols, glsl_type::isampler2DMS_type);
|
||||
add_type(symbols, glsl_type::usampler2DMS_type);
|
||||
|
|
|
|||
|
|
@ -2382,6 +2382,8 @@ _glcpp_parser_handle_version_declaration(glcpp_parser_t *parser, intmax_t versio
|
|||
add_builtin_define(parser, "GL_OES_EGL_image_external", 1);
|
||||
if (extensions->OES_standard_derivatives)
|
||||
add_builtin_define(parser, "GL_OES_standard_derivatives", 1);
|
||||
if (extensions->ARB_texture_multisample)
|
||||
add_builtin_define(parser, "GL_OES_texture_storage_multisample_2d_array", 1);
|
||||
}
|
||||
} else {
|
||||
add_builtin_define(parser, "GL_ARB_draw_buffers", 1);
|
||||
|
|
|
|||
|
|
@ -347,9 +347,9 @@ usampler2DArray KEYWORD(130, 300, 130, 300, USAMPLER2DARRAY);
|
|||
sampler2DMS KEYWORD_WITH_ALT(150, 300, 150, 310, yyextra->ARB_texture_multisample_enable, SAMPLER2DMS);
|
||||
isampler2DMS KEYWORD_WITH_ALT(150, 300, 150, 310, yyextra->ARB_texture_multisample_enable, ISAMPLER2DMS);
|
||||
usampler2DMS KEYWORD_WITH_ALT(150, 300, 150, 310, yyextra->ARB_texture_multisample_enable, USAMPLER2DMS);
|
||||
sampler2DMSArray KEYWORD_WITH_ALT(150, 300, 150, 0, yyextra->ARB_texture_multisample_enable, SAMPLER2DMSARRAY);
|
||||
isampler2DMSArray KEYWORD_WITH_ALT(150, 300, 150, 0, yyextra->ARB_texture_multisample_enable, ISAMPLER2DMSARRAY);
|
||||
usampler2DMSArray KEYWORD_WITH_ALT(150, 300, 150, 0, yyextra->ARB_texture_multisample_enable, USAMPLER2DMSARRAY);
|
||||
sampler2DMSArray KEYWORD_WITH_ALT(150, 300, 150, 320, yyextra->ARB_texture_multisample_enable || yyextra->OES_texture_storage_multisample_2d_array_enable, SAMPLER2DMSARRAY);
|
||||
isampler2DMSArray KEYWORD_WITH_ALT(150, 300, 150, 320, yyextra->ARB_texture_multisample_enable || yyextra->OES_texture_storage_multisample_2d_array_enable, ISAMPLER2DMSARRAY);
|
||||
usampler2DMSArray KEYWORD_WITH_ALT(150, 300, 150, 320, yyextra->ARB_texture_multisample_enable || yyextra->OES_texture_storage_multisample_2d_array_enable, USAMPLER2DMSARRAY);
|
||||
|
||||
/* keywords available with ARB_texture_cube_map_array_enable extension on desktop GLSL */
|
||||
samplerCubeArray KEYWORD_WITH_ALT(400, 0, 400, 0, yyextra->ARB_texture_cube_map_array_enable, SAMPLERCUBEARRAY);
|
||||
|
|
|
|||
|
|
@ -628,6 +628,7 @@ static const _mesa_glsl_extension _mesa_glsl_supported_extensions[] = {
|
|||
EXT(OES_EGL_image_external, false, true, OES_EGL_image_external),
|
||||
EXT(OES_standard_derivatives, false, true, OES_standard_derivatives),
|
||||
EXT(OES_texture_3D, false, true, EXT_texture3D),
|
||||
EXT(OES_texture_storage_multisample_2d_array, false, true, ARB_texture_multisample),
|
||||
|
||||
/* All other extensions go here, sorted alphabetically.
|
||||
*/
|
||||
|
|
|
|||
|
|
@ -548,6 +548,8 @@ struct _mesa_glsl_parse_state {
|
|||
bool OES_standard_derivatives_warn;
|
||||
bool OES_texture_3D_enable;
|
||||
bool OES_texture_3D_warn;
|
||||
bool OES_texture_storage_multisample_2d_array_enable;
|
||||
bool OES_texture_storage_multisample_2d_array_warn;
|
||||
|
||||
/* All other extensions go here, sorted alphabetically.
|
||||
*/
|
||||
|
|
|
|||
|
|
@ -620,7 +620,7 @@ struct glsl_type {
|
|||
const glsl_type *field_type(const char *name) const;
|
||||
|
||||
/**
|
||||
* Get the location of a filed within a record type
|
||||
* Get the location of a field within a record type
|
||||
*/
|
||||
int field_index(const char *name) const;
|
||||
|
||||
|
|
|
|||
|
|
@ -566,6 +566,12 @@ csel(operand a, operand b, operand c)
|
|||
return expr(ir_triop_csel, a, b, c);
|
||||
}
|
||||
|
||||
ir_expression *
|
||||
bitfield_extract(operand a, operand b, operand c)
|
||||
{
|
||||
return expr(ir_triop_bitfield_extract, a, b, c);
|
||||
}
|
||||
|
||||
ir_expression *
|
||||
bitfield_insert(operand a, operand b, operand c, operand d)
|
||||
{
|
||||
|
|
|
|||
|
|
@ -200,6 +200,7 @@ ir_expression *interpolate_at_sample(operand a, operand b);
|
|||
ir_expression *fma(operand a, operand b, operand c);
|
||||
ir_expression *lrp(operand x, operand y, operand a);
|
||||
ir_expression *csel(operand a, operand b, operand c);
|
||||
ir_expression *bitfield_extract(operand a, operand b, operand c);
|
||||
ir_expression *bitfield_insert(operand a, operand b, operand c, operand d);
|
||||
|
||||
ir_swizzle *swizzle(operand a, int swizzle, int components);
|
||||
|
|
|
|||
|
|
@ -66,7 +66,10 @@ enum lower_packing_builtins_op {
|
|||
LOWER_UNPACK_SNORM_4x8 = 0x0200,
|
||||
|
||||
LOWER_PACK_UNORM_4x8 = 0x0400,
|
||||
LOWER_UNPACK_UNORM_4x8 = 0x0800
|
||||
LOWER_UNPACK_UNORM_4x8 = 0x0800,
|
||||
|
||||
LOWER_PACK_USE_BFI = 0x1000,
|
||||
LOWER_PACK_USE_BFE = 0x2000,
|
||||
};
|
||||
|
||||
bool do_common_optimization(exec_list *ir, bool linked,
|
||||
|
|
|
|||
|
|
@ -47,10 +47,9 @@
|
|||
static unsigned
|
||||
values_for_type(const glsl_type *type)
|
||||
{
|
||||
if (type->is_sampler() || type->is_subroutine()) {
|
||||
if (type->is_sampler()) {
|
||||
return 1;
|
||||
} else if (type->is_array() && (type->fields.array->is_sampler() ||
|
||||
type->fields.array->is_subroutine())) {
|
||||
} else if (type->is_array() && type->fields.array->is_sampler()) {
|
||||
return type->array_size();
|
||||
} else {
|
||||
return type->component_slots();
|
||||
|
|
|
|||
|
|
@ -118,6 +118,8 @@ public:
|
|||
*rvalue = split_unpack_half_2x16(op0);
|
||||
break;
|
||||
case LOWER_PACK_UNPACK_NONE:
|
||||
case LOWER_PACK_USE_BFI:
|
||||
case LOWER_PACK_USE_BFE:
|
||||
assert(!"not reached");
|
||||
break;
|
||||
}
|
||||
|
|
@ -222,9 +224,16 @@ private:
|
|||
|
||||
/* uvec2 u = UVEC2_RVAL; */
|
||||
ir_variable *u = factory.make_temp(glsl_type::uvec2_type,
|
||||
"tmp_pack_uvec2_to_uint");
|
||||
"tmp_pack_uvec2_to_uint");
|
||||
factory.emit(assign(u, uvec2_rval));
|
||||
|
||||
if (op_mask & LOWER_PACK_USE_BFI) {
|
||||
return bitfield_insert(bit_and(swizzle_x(u), constant(0xffffu)),
|
||||
swizzle_y(u),
|
||||
constant(16),
|
||||
constant(16));
|
||||
}
|
||||
|
||||
/* return (u.y << 16) | (u.x & 0xffff); */
|
||||
return bit_or(lshift(swizzle_y(u), constant(16u)),
|
||||
bit_and(swizzle_x(u), constant(0xffffu)));
|
||||
|
|
@ -242,9 +251,22 @@ private:
|
|||
{
|
||||
assert(uvec4_rval->type == glsl_type::uvec4_type);
|
||||
|
||||
/* uvec4 u = UVEC4_RVAL; */
|
||||
ir_variable *u = factory.make_temp(glsl_type::uvec4_type,
|
||||
"tmp_pack_uvec4_to_uint");
|
||||
"tmp_pack_uvec4_to_uint");
|
||||
|
||||
if (op_mask & LOWER_PACK_USE_BFI) {
|
||||
/* uvec4 u = UVEC4_RVAL; */
|
||||
factory.emit(assign(u, uvec4_rval));
|
||||
|
||||
return bitfield_insert(bitfield_insert(
|
||||
bitfield_insert(
|
||||
bit_and(swizzle_x(u), constant(0xffu)),
|
||||
swizzle_y(u), constant(8), constant(8)),
|
||||
swizzle_z(u), constant(16), constant(8)),
|
||||
swizzle_w(u), constant(24), constant(8));
|
||||
}
|
||||
|
||||
/* uvec4 u = UVEC4_RVAL & 0xff */
|
||||
factory.emit(assign(u, bit_and(uvec4_rval, constant(0xffu))));
|
||||
|
||||
/* return (u.w << 24) | (u.z << 16) | (u.y << 8) | u.x; */
|
||||
|
|
@ -284,6 +306,39 @@ private:
|
|||
return deref(u2).val;
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief Unpack a uint32 into two int16's.
|
||||
*
|
||||
* Specifically each 16-bit value is sign-extended to the full width of an
|
||||
* int32 on return.
|
||||
*/
|
||||
ir_rvalue *
|
||||
unpack_uint_to_ivec2(ir_rvalue *uint_rval)
|
||||
{
|
||||
assert(uint_rval->type == glsl_type::uint_type);
|
||||
|
||||
if (!(op_mask & LOWER_PACK_USE_BFE)) {
|
||||
return rshift(lshift(u2i(unpack_uint_to_uvec2(uint_rval)),
|
||||
constant(16u)),
|
||||
constant(16u));
|
||||
}
|
||||
|
||||
ir_variable *i = factory.make_temp(glsl_type::int_type,
|
||||
"tmp_unpack_uint_to_ivec2_i");
|
||||
factory.emit(assign(i, u2i(uint_rval)));
|
||||
|
||||
/* ivec2 i2; */
|
||||
ir_variable *i2 = factory.make_temp(glsl_type::ivec2_type,
|
||||
"tmp_unpack_uint_to_ivec2_i2");
|
||||
|
||||
factory.emit(assign(i2, bitfield_extract(i, constant(0), constant(16)),
|
||||
WRITEMASK_X));
|
||||
factory.emit(assign(i2, bitfield_extract(i, constant(16), constant(16)),
|
||||
WRITEMASK_Y));
|
||||
|
||||
return deref(i2).val;
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief Unpack a uint32 into four uint8's.
|
||||
*
|
||||
|
|
@ -308,13 +363,23 @@ private:
|
|||
/* u4.x = u & 0xffu; */
|
||||
factory.emit(assign(u4, bit_and(u, constant(0xffu)), WRITEMASK_X));
|
||||
|
||||
/* u4.y = (u >> 8u) & 0xffu; */
|
||||
factory.emit(assign(u4, bit_and(rshift(u, constant(8u)),
|
||||
constant(0xffu)), WRITEMASK_Y));
|
||||
if (op_mask & LOWER_PACK_USE_BFE) {
|
||||
/* u4.y = bitfield_extract(u, 8, 8); */
|
||||
factory.emit(assign(u4, bitfield_extract(u, constant(8), constant(8)),
|
||||
WRITEMASK_Y));
|
||||
|
||||
/* u4.z = (u >> 16u) & 0xffu; */
|
||||
factory.emit(assign(u4, bit_and(rshift(u, constant(16u)),
|
||||
constant(0xffu)), WRITEMASK_Z));
|
||||
/* u4.z = bitfield_extract(u, 16, 8); */
|
||||
factory.emit(assign(u4, bitfield_extract(u, constant(16), constant(8)),
|
||||
WRITEMASK_Z));
|
||||
} else {
|
||||
/* u4.y = (u >> 8u) & 0xffu; */
|
||||
factory.emit(assign(u4, bit_and(rshift(u, constant(8u)),
|
||||
constant(0xffu)), WRITEMASK_Y));
|
||||
|
||||
/* u4.z = (u >> 16u) & 0xffu; */
|
||||
factory.emit(assign(u4, bit_and(rshift(u, constant(16u)),
|
||||
constant(0xffu)), WRITEMASK_Z));
|
||||
}
|
||||
|
||||
/* u4.w = (u >> 24u) */
|
||||
factory.emit(assign(u4, rshift(u, constant(24u)), WRITEMASK_W));
|
||||
|
|
@ -322,6 +387,43 @@ private:
|
|||
return deref(u4).val;
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief Unpack a uint32 into four int8's.
|
||||
*
|
||||
* Specifically each 8-bit value is sign-extended to the full width of an
|
||||
* int32 on return.
|
||||
*/
|
||||
ir_rvalue *
|
||||
unpack_uint_to_ivec4(ir_rvalue *uint_rval)
|
||||
{
|
||||
assert(uint_rval->type == glsl_type::uint_type);
|
||||
|
||||
if (!(op_mask & LOWER_PACK_USE_BFE)) {
|
||||
return rshift(lshift(u2i(unpack_uint_to_uvec4(uint_rval)),
|
||||
constant(24u)),
|
||||
constant(24u));
|
||||
}
|
||||
|
||||
ir_variable *i = factory.make_temp(glsl_type::int_type,
|
||||
"tmp_unpack_uint_to_ivec4_i");
|
||||
factory.emit(assign(i, u2i(uint_rval)));
|
||||
|
||||
/* ivec4 i4; */
|
||||
ir_variable *i4 = factory.make_temp(glsl_type::ivec4_type,
|
||||
"tmp_unpack_uint_to_ivec4_i4");
|
||||
|
||||
factory.emit(assign(i4, bitfield_extract(i, constant(0), constant(8)),
|
||||
WRITEMASK_X));
|
||||
factory.emit(assign(i4, bitfield_extract(i, constant(8), constant(8)),
|
||||
WRITEMASK_Y));
|
||||
factory.emit(assign(i4, bitfield_extract(i, constant(16), constant(8)),
|
||||
WRITEMASK_Z));
|
||||
factory.emit(assign(i4, bitfield_extract(i, constant(24), constant(8)),
|
||||
WRITEMASK_W));
|
||||
|
||||
return deref(i4).val;
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief Lower a packSnorm2x16 expression.
|
||||
*
|
||||
|
|
@ -468,9 +570,7 @@ private:
|
|||
assert(uint_rval->type == glsl_type::uint_type);
|
||||
|
||||
ir_rvalue *result =
|
||||
clamp(div(i2f(rshift(lshift(u2i(unpack_uint_to_uvec2(uint_rval)),
|
||||
constant(16)),
|
||||
constant(16u))),
|
||||
clamp(div(i2f(unpack_uint_to_ivec2(uint_rval)),
|
||||
constant(32767.0f)),
|
||||
constant(-1.0f),
|
||||
constant(1.0f));
|
||||
|
|
@ -527,9 +627,7 @@ private:
|
|||
assert(uint_rval->type == glsl_type::uint_type);
|
||||
|
||||
ir_rvalue *result =
|
||||
clamp(div(i2f(rshift(lshift(u2i(unpack_uint_to_uvec4(uint_rval)),
|
||||
constant(24u)),
|
||||
constant(24u))),
|
||||
clamp(div(i2f(unpack_uint_to_ivec4(uint_rval)),
|
||||
constant(127.0f)),
|
||||
constant(-1.0f),
|
||||
constant(1.0f));
|
||||
|
|
|
|||
|
|
@ -664,102 +664,51 @@ add_defs_uses(nir_instr *instr)
|
|||
}
|
||||
|
||||
void
|
||||
nir_instr_insert_before(nir_instr *instr, nir_instr *before)
|
||||
nir_instr_insert(nir_cursor cursor, nir_instr *instr)
|
||||
{
|
||||
assert(before->type != nir_instr_type_jump);
|
||||
before->block = instr->block;
|
||||
add_defs_uses(before);
|
||||
exec_node_insert_node_before(&instr->node, &before->node);
|
||||
}
|
||||
switch (cursor.option) {
|
||||
case nir_cursor_before_block:
|
||||
/* Only allow inserting jumps into empty blocks. */
|
||||
if (instr->type == nir_instr_type_jump)
|
||||
assert(exec_list_is_empty(&cursor.block->instr_list));
|
||||
|
||||
void
|
||||
nir_instr_insert_after(nir_instr *instr, nir_instr *after)
|
||||
{
|
||||
if (after->type == nir_instr_type_jump) {
|
||||
assert(instr == nir_block_last_instr(instr->block));
|
||||
instr->block = cursor.block;
|
||||
add_defs_uses(instr);
|
||||
exec_list_push_head(&cursor.block->instr_list, &instr->node);
|
||||
break;
|
||||
case nir_cursor_after_block: {
|
||||
/* Inserting instructions after a jump is illegal. */
|
||||
nir_instr *last = nir_block_last_instr(cursor.block);
|
||||
assert(last == NULL || last->type != nir_instr_type_jump);
|
||||
(void) last;
|
||||
|
||||
instr->block = cursor.block;
|
||||
add_defs_uses(instr);
|
||||
exec_list_push_tail(&cursor.block->instr_list, &instr->node);
|
||||
break;
|
||||
}
|
||||
case nir_cursor_before_instr:
|
||||
assert(instr->type != nir_instr_type_jump);
|
||||
instr->block = cursor.instr->block;
|
||||
add_defs_uses(instr);
|
||||
exec_node_insert_node_before(&cursor.instr->node, &instr->node);
|
||||
break;
|
||||
case nir_cursor_after_instr:
|
||||
/* Inserting instructions after a jump is illegal. */
|
||||
assert(cursor.instr->type != nir_instr_type_jump);
|
||||
|
||||
/* Only allow inserting jumps at the end of the block. */
|
||||
if (instr->type == nir_instr_type_jump)
|
||||
assert(cursor.instr == nir_block_last_instr(cursor.instr->block));
|
||||
|
||||
instr->block = cursor.instr->block;
|
||||
add_defs_uses(instr);
|
||||
exec_node_insert_after(&cursor.instr->node, &instr->node);
|
||||
break;
|
||||
}
|
||||
|
||||
after->block = instr->block;
|
||||
add_defs_uses(after);
|
||||
exec_node_insert_after(&instr->node, &after->node);
|
||||
|
||||
if (after->type == nir_instr_type_jump)
|
||||
nir_handle_add_jump(after->block);
|
||||
}
|
||||
|
||||
void
|
||||
nir_instr_insert_before_block(nir_block *block, nir_instr *before)
|
||||
{
|
||||
if (before->type == nir_instr_type_jump)
|
||||
assert(exec_list_is_empty(&block->instr_list));
|
||||
|
||||
before->block = block;
|
||||
add_defs_uses(before);
|
||||
exec_list_push_head(&block->instr_list, &before->node);
|
||||
|
||||
if (before->type == nir_instr_type_jump)
|
||||
nir_handle_add_jump(block);
|
||||
}
|
||||
|
||||
void
|
||||
nir_instr_insert_after_block(nir_block *block, nir_instr *after)
|
||||
{
|
||||
if (after->type == nir_instr_type_jump) {
|
||||
assert(exec_list_is_empty(&block->instr_list) ||
|
||||
nir_block_last_instr(block)->type != nir_instr_type_jump);
|
||||
}
|
||||
|
||||
after->block = block;
|
||||
add_defs_uses(after);
|
||||
exec_list_push_tail(&block->instr_list, &after->node);
|
||||
|
||||
if (after->type == nir_instr_type_jump)
|
||||
nir_handle_add_jump(block);
|
||||
}
|
||||
|
||||
void
|
||||
nir_instr_insert_before_cf(nir_cf_node *node, nir_instr *before)
|
||||
{
|
||||
if (node->type == nir_cf_node_block) {
|
||||
nir_instr_insert_before_block(nir_cf_node_as_block(node), before);
|
||||
} else {
|
||||
nir_cf_node *prev = nir_cf_node_prev(node);
|
||||
assert(prev->type == nir_cf_node_block);
|
||||
nir_block *prev_block = nir_cf_node_as_block(prev);
|
||||
|
||||
nir_instr_insert_before_block(prev_block, before);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
nir_instr_insert_after_cf(nir_cf_node *node, nir_instr *after)
|
||||
{
|
||||
if (node->type == nir_cf_node_block) {
|
||||
nir_instr_insert_after_block(nir_cf_node_as_block(node), after);
|
||||
} else {
|
||||
nir_cf_node *next = nir_cf_node_next(node);
|
||||
assert(next->type == nir_cf_node_block);
|
||||
nir_block *next_block = nir_cf_node_as_block(next);
|
||||
|
||||
nir_instr_insert_before_block(next_block, after);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
nir_instr_insert_before_cf_list(struct exec_list *list, nir_instr *before)
|
||||
{
|
||||
nir_cf_node *first_node = exec_node_data(nir_cf_node,
|
||||
exec_list_get_head(list), node);
|
||||
nir_instr_insert_before_cf(first_node, before);
|
||||
}
|
||||
|
||||
void
|
||||
nir_instr_insert_after_cf_list(struct exec_list *list, nir_instr *after)
|
||||
{
|
||||
nir_cf_node *last_node = exec_node_data(nir_cf_node,
|
||||
exec_list_get_tail(list), node);
|
||||
nir_instr_insert_after_cf(last_node, after);
|
||||
if (instr->type == nir_instr_type_jump)
|
||||
nir_handle_add_jump(instr->block);
|
||||
}
|
||||
|
||||
static bool
|
||||
|
|
|
|||
|
|
@ -1567,20 +1567,182 @@ nir_deref *nir_copy_deref(void *mem_ctx, nir_deref *deref);
|
|||
nir_load_const_instr *
|
||||
nir_deref_get_const_initializer_load(nir_shader *shader, nir_deref_var *deref);
|
||||
|
||||
void nir_instr_insert_before(nir_instr *instr, nir_instr *before);
|
||||
void nir_instr_insert_after(nir_instr *instr, nir_instr *after);
|
||||
/**
|
||||
* NIR Cursors and Instruction Insertion API
|
||||
* @{
|
||||
*
|
||||
* A tiny struct representing a point to insert/extract instructions or
|
||||
* control flow nodes. Helps reduce the combinatorial explosion of possible
|
||||
* points to insert/extract.
|
||||
*
|
||||
* \sa nir_control_flow.h
|
||||
*/
|
||||
typedef enum {
|
||||
nir_cursor_before_block,
|
||||
nir_cursor_after_block,
|
||||
nir_cursor_before_instr,
|
||||
nir_cursor_after_instr,
|
||||
} nir_cursor_option;
|
||||
|
||||
void nir_instr_insert_before_block(nir_block *block, nir_instr *before);
|
||||
void nir_instr_insert_after_block(nir_block *block, nir_instr *after);
|
||||
typedef struct {
|
||||
nir_cursor_option option;
|
||||
union {
|
||||
nir_block *block;
|
||||
nir_instr *instr;
|
||||
};
|
||||
} nir_cursor;
|
||||
|
||||
void nir_instr_insert_before_cf(nir_cf_node *node, nir_instr *before);
|
||||
void nir_instr_insert_after_cf(nir_cf_node *node, nir_instr *after);
|
||||
static inline nir_block *
|
||||
nir_cursor_current_block(nir_cursor cursor)
|
||||
{
|
||||
if (cursor.option == nir_cursor_before_instr ||
|
||||
cursor.option == nir_cursor_after_instr) {
|
||||
return cursor.instr->block;
|
||||
} else {
|
||||
return cursor.block;
|
||||
}
|
||||
}
|
||||
|
||||
void nir_instr_insert_before_cf_list(struct exec_list *list, nir_instr *before);
|
||||
void nir_instr_insert_after_cf_list(struct exec_list *list, nir_instr *after);
|
||||
static inline nir_cursor
|
||||
nir_before_block(nir_block *block)
|
||||
{
|
||||
nir_cursor cursor;
|
||||
cursor.option = nir_cursor_before_block;
|
||||
cursor.block = block;
|
||||
return cursor;
|
||||
}
|
||||
|
||||
static inline nir_cursor
|
||||
nir_after_block(nir_block *block)
|
||||
{
|
||||
nir_cursor cursor;
|
||||
cursor.option = nir_cursor_after_block;
|
||||
cursor.block = block;
|
||||
return cursor;
|
||||
}
|
||||
|
||||
static inline nir_cursor
|
||||
nir_before_instr(nir_instr *instr)
|
||||
{
|
||||
nir_cursor cursor;
|
||||
cursor.option = nir_cursor_before_instr;
|
||||
cursor.instr = instr;
|
||||
return cursor;
|
||||
}
|
||||
|
||||
static inline nir_cursor
|
||||
nir_after_instr(nir_instr *instr)
|
||||
{
|
||||
nir_cursor cursor;
|
||||
cursor.option = nir_cursor_after_instr;
|
||||
cursor.instr = instr;
|
||||
return cursor;
|
||||
}
|
||||
|
||||
static inline nir_cursor
|
||||
nir_after_block_before_jump(nir_block *block)
|
||||
{
|
||||
nir_instr *last_instr = nir_block_last_instr(block);
|
||||
if (last_instr && last_instr->type == nir_instr_type_jump) {
|
||||
return nir_before_instr(last_instr);
|
||||
} else {
|
||||
return nir_after_block(block);
|
||||
}
|
||||
}
|
||||
|
||||
static inline nir_cursor
|
||||
nir_before_cf_node(nir_cf_node *node)
|
||||
{
|
||||
if (node->type == nir_cf_node_block)
|
||||
return nir_before_block(nir_cf_node_as_block(node));
|
||||
|
||||
return nir_after_block(nir_cf_node_as_block(nir_cf_node_prev(node)));
|
||||
}
|
||||
|
||||
static inline nir_cursor
|
||||
nir_after_cf_node(nir_cf_node *node)
|
||||
{
|
||||
if (node->type == nir_cf_node_block)
|
||||
return nir_after_block(nir_cf_node_as_block(node));
|
||||
|
||||
return nir_before_block(nir_cf_node_as_block(nir_cf_node_next(node)));
|
||||
}
|
||||
|
||||
static inline nir_cursor
|
||||
nir_before_cf_list(struct exec_list *cf_list)
|
||||
{
|
||||
nir_cf_node *first_node = exec_node_data(nir_cf_node,
|
||||
exec_list_get_head(cf_list), node);
|
||||
return nir_before_cf_node(first_node);
|
||||
}
|
||||
|
||||
static inline nir_cursor
|
||||
nir_after_cf_list(struct exec_list *cf_list)
|
||||
{
|
||||
nir_cf_node *last_node = exec_node_data(nir_cf_node,
|
||||
exec_list_get_tail(cf_list), node);
|
||||
return nir_after_cf_node(last_node);
|
||||
}
|
||||
|
||||
/**
|
||||
* Insert a NIR instruction at the given cursor.
|
||||
*
|
||||
* Note: This does not update the cursor.
|
||||
*/
|
||||
void nir_instr_insert(nir_cursor cursor, nir_instr *instr);
|
||||
|
||||
static inline void
|
||||
nir_instr_insert_before(nir_instr *instr, nir_instr *before)
|
||||
{
|
||||
nir_instr_insert(nir_before_instr(instr), before);
|
||||
}
|
||||
|
||||
static inline void
|
||||
nir_instr_insert_after(nir_instr *instr, nir_instr *after)
|
||||
{
|
||||
nir_instr_insert(nir_after_instr(instr), after);
|
||||
}
|
||||
|
||||
static inline void
|
||||
nir_instr_insert_before_block(nir_block *block, nir_instr *before)
|
||||
{
|
||||
nir_instr_insert(nir_before_block(block), before);
|
||||
}
|
||||
|
||||
static inline void
|
||||
nir_instr_insert_after_block(nir_block *block, nir_instr *after)
|
||||
{
|
||||
nir_instr_insert(nir_after_block(block), after);
|
||||
}
|
||||
|
||||
static inline void
|
||||
nir_instr_insert_before_cf(nir_cf_node *node, nir_instr *before)
|
||||
{
|
||||
nir_instr_insert(nir_before_cf_node(node), before);
|
||||
}
|
||||
|
||||
static inline void
|
||||
nir_instr_insert_after_cf(nir_cf_node *node, nir_instr *after)
|
||||
{
|
||||
nir_instr_insert(nir_after_cf_node(node), after);
|
||||
}
|
||||
|
||||
static inline void
|
||||
nir_instr_insert_before_cf_list(struct exec_list *list, nir_instr *before)
|
||||
{
|
||||
nir_instr_insert(nir_before_cf_list(list), before);
|
||||
}
|
||||
|
||||
static inline void
|
||||
nir_instr_insert_after_cf_list(struct exec_list *list, nir_instr *after)
|
||||
{
|
||||
nir_instr_insert(nir_after_cf_list(list), after);
|
||||
}
|
||||
|
||||
void nir_instr_remove(nir_instr *instr);
|
||||
|
||||
/** @} */
|
||||
|
||||
typedef bool (*nir_foreach_ssa_def_cb)(nir_ssa_def *def, void *state);
|
||||
typedef bool (*nir_foreach_dest_cb)(nir_dest *dest, void *state);
|
||||
typedef bool (*nir_foreach_src_cb)(nir_src *src, void *state);
|
||||
|
|
|
|||
|
|
@ -24,16 +24,12 @@
|
|||
#ifndef NIR_BUILDER_H
|
||||
#define NIR_BUILDER_H
|
||||
|
||||
#include "nir_control_flow.h"
|
||||
|
||||
struct exec_list;
|
||||
|
||||
typedef struct nir_builder {
|
||||
struct exec_list *cf_node_list;
|
||||
|
||||
nir_block *before_block;
|
||||
nir_block *after_block;
|
||||
|
||||
nir_instr *before_instr;
|
||||
nir_instr *after_instr;
|
||||
nir_cursor cursor;
|
||||
|
||||
nir_shader *shader;
|
||||
nir_function_impl *impl;
|
||||
|
|
@ -47,75 +43,20 @@ nir_builder_init(nir_builder *build, nir_function_impl *impl)
|
|||
build->shader = impl->overload->function->shader;
|
||||
}
|
||||
|
||||
static inline void
|
||||
nir_builder_insert_after_cf_list(nir_builder *build,
|
||||
struct exec_list *cf_node_list)
|
||||
{
|
||||
build->cf_node_list = cf_node_list;
|
||||
build->before_block = NULL;
|
||||
build->after_block = NULL;
|
||||
build->before_instr = NULL;
|
||||
build->after_instr = NULL;
|
||||
}
|
||||
|
||||
static inline void
|
||||
nir_builder_insert_before_block(nir_builder *build,
|
||||
nir_block *block)
|
||||
{
|
||||
build->cf_node_list = NULL;
|
||||
build->before_block = block;
|
||||
build->after_block = NULL;
|
||||
build->before_instr = NULL;
|
||||
build->after_instr = NULL;
|
||||
}
|
||||
|
||||
static inline void
|
||||
nir_builder_insert_after_block(nir_builder *build,
|
||||
nir_block *block)
|
||||
{
|
||||
build->cf_node_list = NULL;
|
||||
build->before_block = NULL;
|
||||
build->after_block = block;
|
||||
build->before_instr = NULL;
|
||||
build->after_instr = NULL;
|
||||
}
|
||||
|
||||
static inline void
|
||||
nir_builder_insert_before_instr(nir_builder *build, nir_instr *before_instr)
|
||||
{
|
||||
build->cf_node_list = NULL;
|
||||
build->before_block = NULL;
|
||||
build->after_block = NULL;
|
||||
build->before_instr = before_instr;
|
||||
build->after_instr = NULL;
|
||||
}
|
||||
|
||||
static inline void
|
||||
nir_builder_insert_after_instr(nir_builder *build, nir_instr *after_instr)
|
||||
{
|
||||
build->cf_node_list = NULL;
|
||||
build->before_block = NULL;
|
||||
build->after_block = NULL;
|
||||
build->before_instr = NULL;
|
||||
build->after_instr = after_instr;
|
||||
}
|
||||
|
||||
static inline void
|
||||
nir_builder_instr_insert(nir_builder *build, nir_instr *instr)
|
||||
{
|
||||
if (build->cf_node_list) {
|
||||
nir_instr_insert_after_cf_list(build->cf_node_list, instr);
|
||||
} else if (build->before_block) {
|
||||
nir_instr_insert_before_block(build->before_block, instr);
|
||||
} else if (build->after_block) {
|
||||
nir_instr_insert_after_block(build->after_block, instr);
|
||||
} else if (build->before_instr) {
|
||||
nir_instr_insert_before(build->before_instr, instr);
|
||||
} else {
|
||||
assert(build->after_instr);
|
||||
nir_instr_insert_after(build->after_instr, instr);
|
||||
build->after_instr = instr;
|
||||
}
|
||||
nir_instr_insert(build->cursor, instr);
|
||||
|
||||
/* Move the cursor forward. */
|
||||
if (build->cursor.option == nir_cursor_after_instr)
|
||||
build->cursor.instr = instr;
|
||||
}
|
||||
|
||||
static inline void
|
||||
nir_builder_cf_insert(nir_builder *build, nir_cf_node *cf)
|
||||
{
|
||||
nir_cf_node_insert(build->cursor, cf);
|
||||
}
|
||||
|
||||
static inline nir_ssa_def *
|
||||
|
|
|
|||
|
|
@ -45,95 +45,6 @@ extern "C" {
|
|||
* deleting them.
|
||||
*/
|
||||
|
||||
/* Helper struct for representing a point to extract/insert. Helps reduce the
|
||||
* combinatorial explosion of possible points to extract.
|
||||
*/
|
||||
|
||||
typedef enum {
|
||||
nir_cursor_before_block,
|
||||
nir_cursor_after_block,
|
||||
nir_cursor_before_instr,
|
||||
nir_cursor_after_instr,
|
||||
} nir_cursor_option;
|
||||
|
||||
typedef struct {
|
||||
nir_cursor_option option;
|
||||
union {
|
||||
nir_block *block;
|
||||
nir_instr *instr;
|
||||
};
|
||||
} nir_cursor;
|
||||
|
||||
static inline nir_cursor
|
||||
nir_before_block(nir_block *block)
|
||||
{
|
||||
nir_cursor cursor;
|
||||
cursor.option = nir_cursor_before_block;
|
||||
cursor.block = block;
|
||||
return cursor;
|
||||
}
|
||||
|
||||
static inline nir_cursor
|
||||
nir_after_block(nir_block *block)
|
||||
{
|
||||
nir_cursor cursor;
|
||||
cursor.option = nir_cursor_after_block;
|
||||
cursor.block = block;
|
||||
return cursor;
|
||||
}
|
||||
|
||||
static inline nir_cursor
|
||||
nir_before_instr(nir_instr *instr)
|
||||
{
|
||||
nir_cursor cursor;
|
||||
cursor.option = nir_cursor_before_instr;
|
||||
cursor.instr = instr;
|
||||
return cursor;
|
||||
}
|
||||
|
||||
static inline nir_cursor
|
||||
nir_after_instr(nir_instr *instr)
|
||||
{
|
||||
nir_cursor cursor;
|
||||
cursor.option = nir_cursor_after_instr;
|
||||
cursor.instr = instr;
|
||||
return cursor;
|
||||
}
|
||||
|
||||
static inline nir_cursor
|
||||
nir_before_cf_node(nir_cf_node *node)
|
||||
{
|
||||
if (node->type == nir_cf_node_block)
|
||||
return nir_before_block(nir_cf_node_as_block(node));
|
||||
|
||||
return nir_after_block(nir_cf_node_as_block(nir_cf_node_prev(node)));
|
||||
}
|
||||
|
||||
static inline nir_cursor
|
||||
nir_after_cf_node(nir_cf_node *node)
|
||||
{
|
||||
if (node->type == nir_cf_node_block)
|
||||
return nir_after_block(nir_cf_node_as_block(node));
|
||||
|
||||
return nir_before_block(nir_cf_node_as_block(nir_cf_node_next(node)));
|
||||
}
|
||||
|
||||
static inline nir_cursor
|
||||
nir_before_cf_list(struct exec_list *cf_list)
|
||||
{
|
||||
nir_cf_node *first_node = exec_node_data(nir_cf_node,
|
||||
exec_list_get_head(cf_list), node);
|
||||
return nir_before_cf_node(first_node);
|
||||
}
|
||||
|
||||
static inline nir_cursor
|
||||
nir_after_cf_list(struct exec_list *cf_list)
|
||||
{
|
||||
nir_cf_node *last_node = exec_node_data(nir_cf_node,
|
||||
exec_list_get_tail(cf_list), node);
|
||||
return nir_after_cf_node(last_node);
|
||||
}
|
||||
|
||||
/** Control flow insertion. */
|
||||
|
||||
/** puts a control flow node where the cursor is */
|
||||
|
|
|
|||
|
|
@ -50,7 +50,7 @@ convert_instr(nir_builder *bld, nir_alu_instr *alu)
|
|||
|
||||
is_signed = (op == nir_op_idiv);
|
||||
|
||||
nir_builder_insert_before_instr(bld, &alu->instr);
|
||||
bld->cursor = nir_before_instr(&alu->instr);
|
||||
|
||||
numer = nir_ssa_for_src(bld, alu->src[0].src,
|
||||
nir_ssa_alu_instr_src_components(alu, 0));
|
||||
|
|
|
|||
|
|
@ -84,7 +84,7 @@ get_io_offset(nir_deref_var *deref, nir_instr *instr, nir_src *indirect,
|
|||
unsigned base_offset = 0;
|
||||
|
||||
nir_builder *b = &state->builder;
|
||||
nir_builder_insert_before_instr(b, instr);
|
||||
b->cursor = nir_before_instr(instr);
|
||||
|
||||
nir_deref *tail = &deref->deref;
|
||||
while (tail->child != NULL) {
|
||||
|
|
|
|||
|
|
@ -43,7 +43,7 @@ lower_load_const_instr_scalar(nir_load_const_instr *lower)
|
|||
|
||||
nir_builder b;
|
||||
nir_builder_init(&b, nir_cf_node_get_function(&lower->instr.block->cf_node));
|
||||
nir_builder_insert_before_instr(&b, &lower->instr);
|
||||
b.cursor = nir_before_instr(&lower->instr);
|
||||
|
||||
/* Emit the individual loads. */
|
||||
nir_ssa_def *loads[4];
|
||||
|
|
|
|||
|
|
@ -46,7 +46,7 @@ nir_lower_tex_projector_block(nir_block *block, void *void_state)
|
|||
continue;
|
||||
|
||||
nir_tex_instr *tex = nir_instr_as_tex(instr);
|
||||
nir_builder_insert_before_instr(b, &tex->instr);
|
||||
b->cursor = nir_before_instr(&tex->instr);
|
||||
|
||||
/* Find the projector in the srcs list, if present. */
|
||||
int proj_index;
|
||||
|
|
|
|||
|
|
@ -52,7 +52,7 @@ normalize_cubemap_coords_block(nir_block *block, void *void_state)
|
|||
if (tex->sampler_dim != GLSL_SAMPLER_DIM_CUBE)
|
||||
continue;
|
||||
|
||||
nir_builder_insert_before_instr(b, &tex->instr);
|
||||
b->cursor = nir_before_instr(&tex->instr);
|
||||
|
||||
for (unsigned i = 0; i < tex->num_srcs; i++) {
|
||||
if (tex->src[i].src_type != nir_tex_src_coord)
|
||||
|
|
|
|||
|
|
@ -2310,7 +2310,7 @@ vtn_get_phi_node_src(struct vtn_builder *b, nir_block *block,
|
|||
}
|
||||
}
|
||||
|
||||
nir_builder_insert_before_block(&b->nb, block);
|
||||
b->nb.cursor = nir_before_block(block);
|
||||
struct vtn_ssa_value *phi = vtn_phi_node_create(b, type);
|
||||
|
||||
struct set_entry *entry2;
|
||||
|
|
@ -2569,10 +2569,7 @@ vtn_handle_body_instruction(struct vtn_builder *b, SpvOp opcode,
|
|||
struct vtn_block *block = vtn_value(b, w[1], vtn_value_type_block)->block;
|
||||
assert(block->block == NULL);
|
||||
|
||||
struct exec_node *list_tail = exec_list_get_tail(b->nb.cf_node_list);
|
||||
nir_cf_node *tail_node = exec_node_data(nir_cf_node, list_tail, node);
|
||||
assert(tail_node->type == nir_cf_node_block);
|
||||
block->block = nir_cf_node_as_block(tail_node);
|
||||
block->block = nir_cursor_current_block(b->nb.cursor);
|
||||
break;
|
||||
}
|
||||
|
||||
|
|
@ -2754,17 +2751,15 @@ vtn_walk_blocks(struct vtn_builder *b, struct vtn_block *start,
|
|||
vtn_value(b, block->merge_block_id, vtn_value_type_block)->block;
|
||||
|
||||
nir_loop *loop = nir_loop_create(b->shader);
|
||||
nir_cf_node_insert_end(b->nb.cf_node_list, &loop->cf_node);
|
||||
|
||||
struct exec_list *old_list = b->nb.cf_node_list;
|
||||
nir_cf_node_insert(b->nb.cursor, &loop->cf_node);
|
||||
|
||||
/* Reset the merge_op to prerevent infinite recursion */
|
||||
block->merge_op = SpvOpNop;
|
||||
|
||||
nir_builder_insert_after_cf_list(&b->nb, &loop->body);
|
||||
b->nb.cursor = nir_after_cf_list(&loop->body);
|
||||
vtn_walk_blocks(b, block, new_break_block, new_cont_block, NULL);
|
||||
|
||||
nir_builder_insert_after_cf_list(&b->nb, old_list);
|
||||
b->nb.cursor = nir_after_cf_node(&loop->cf_node);
|
||||
block = new_break_block;
|
||||
continue;
|
||||
}
|
||||
|
|
@ -2776,10 +2771,8 @@ vtn_walk_blocks(struct vtn_builder *b, struct vtn_block *start,
|
|||
vtn_foreach_instruction(b, block->label, block->branch,
|
||||
vtn_handle_body_instruction);
|
||||
|
||||
nir_cf_node *cur_cf_node =
|
||||
exec_node_data(nir_cf_node, exec_list_get_tail(b->nb.cf_node_list),
|
||||
node);
|
||||
nir_block *cur_block = nir_cf_node_as_block(cur_cf_node);
|
||||
nir_block *cur_block = nir_cursor_current_block(b->nb.cursor);
|
||||
assert(cur_block == block->block);
|
||||
_mesa_hash_table_insert(b->block_table, cur_block, block);
|
||||
|
||||
switch (branch_op) {
|
||||
|
|
@ -2824,7 +2817,7 @@ vtn_walk_blocks(struct vtn_builder *b, struct vtn_block *start,
|
|||
|
||||
nir_if *if_stmt = nir_if_create(b->shader);
|
||||
if_stmt->condition = nir_src_for_ssa(vtn_ssa_value(b, w[1])->def);
|
||||
nir_cf_node_insert_end(b->nb.cf_node_list, &if_stmt->cf_node);
|
||||
nir_cf_node_insert(b->nb.cursor, &if_stmt->cf_node);
|
||||
|
||||
if (then_block == break_block) {
|
||||
nir_jump_instr *jump = nir_jump_instr_create(b->shader,
|
||||
|
|
@ -2859,15 +2852,13 @@ vtn_walk_blocks(struct vtn_builder *b, struct vtn_block *start,
|
|||
struct vtn_block *merge_block =
|
||||
vtn_value(b, block->merge_block_id, vtn_value_type_block)->block;
|
||||
|
||||
struct exec_list *old_list = b->nb.cf_node_list;
|
||||
|
||||
nir_builder_insert_after_cf_list(&b->nb, &if_stmt->then_list);
|
||||
b->nb.cursor = nir_after_cf_list(&if_stmt->then_list);
|
||||
vtn_walk_blocks(b, then_block, break_block, cont_block, merge_block);
|
||||
|
||||
nir_builder_insert_after_cf_list(&b->nb, &if_stmt->else_list);
|
||||
b->nb.cursor = nir_after_cf_list(&if_stmt->else_list);
|
||||
vtn_walk_blocks(b, else_block, break_block, cont_block, merge_block);
|
||||
|
||||
nir_builder_insert_after_cf_list(&b->nb, old_list);
|
||||
b->nb.cursor = nir_after_cf_node(&if_stmt->cf_node);
|
||||
block = merge_block;
|
||||
continue;
|
||||
}
|
||||
|
|
@ -2967,7 +2958,7 @@ spirv_to_nir(const uint32_t *words, size_t word_count,
|
|||
b->block_table = _mesa_hash_table_create(b, _mesa_hash_pointer,
|
||||
_mesa_key_pointer_equal);
|
||||
nir_builder_init(&b->nb, b->impl);
|
||||
nir_builder_insert_after_cf_list(&b->nb, &b->impl->body);
|
||||
b->nb.cursor = nir_after_cf_list(&b->impl->body);
|
||||
vtn_walk_blocks(b, func->start_block, NULL, NULL, NULL);
|
||||
vtn_foreach_instruction(b, func->start_block->label, func->end,
|
||||
vtn_handle_phi_second_pass);
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
#!/bin/bash
|
||||
#!/usr/bin/env bash
|
||||
|
||||
# Print defined gl.* functions not in GL ES 1.1 or in
|
||||
# (FIXME, none of these should be part of the ABI)
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
#!/bin/bash
|
||||
#!/usr/bin/env bash
|
||||
|
||||
# Print defined gl.* functions not in GL ES 3.0 or in
|
||||
# (FIXME, none of these should be part of the ABI)
|
||||
|
|
|
|||
40
src/mapi/glapi/gen/KHR_texture_compression_astc.xml
Normal file
40
src/mapi/glapi/gen/KHR_texture_compression_astc.xml
Normal file
|
|
@ -0,0 +1,40 @@
|
|||
<?xml version="1.0"?>
|
||||
<!DOCTYPE OpenGLAPI SYSTEM "gl_API.dtd">
|
||||
|
||||
<OpenGLAPI>
|
||||
|
||||
<category name="GL_KHR_texture_compression_astc_ldr" number="118">
|
||||
|
||||
<enum name="COMPRESSED_RGBA_ASTC_4x4_KHR" value="0x93B0"/>
|
||||
<enum name="COMPRESSED_RGBA_ASTC_5x4_KHR" value="0x93B1"/>
|
||||
<enum name="COMPRESSED_RGBA_ASTC_5x5_KHR" value="0x93B2"/>
|
||||
<enum name="COMPRESSED_RGBA_ASTC_6x5_KHR" value="0x93B3"/>
|
||||
<enum name="COMPRESSED_RGBA_ASTC_6x6_KHR" value="0x93B4"/>
|
||||
<enum name="COMPRESSED_RGBA_ASTC_8x5_KHR" value="0x93B5"/>
|
||||
<enum name="COMPRESSED_RGBA_ASTC_8x6_KHR" value="0x93B6"/>
|
||||
<enum name="COMPRESSED_RGBA_ASTC_8x8_KHR" value="0x93B7"/>
|
||||
<enum name="COMPRESSED_RGBA_ASTC_10x5_KHR" value="0x93B8"/>
|
||||
<enum name="COMPRESSED_RGBA_ASTC_10x6_KHR" value="0x93B9"/>
|
||||
<enum name="COMPRESSED_RGBA_ASTC_10x8_KHR" value="0x93BA"/>
|
||||
<enum name="COMPRESSED_RGBA_ASTC_10x10_KHR" value="0x93BB"/>
|
||||
<enum name="COMPRESSED_RGBA_ASTC_12x10_KHR" value="0x93BC"/>
|
||||
<enum name="COMPRESSED_RGBA_ASTC_12x12_KHR" value="0x93BD"/>
|
||||
|
||||
<enum name="COMPRESSED_SRGB8_ALPHA8_ASTC_4x4_KHR" value="0x93D0"/>
|
||||
<enum name="COMPRESSED_SRGB8_ALPHA8_ASTC_5x4_KHR" value="0x93D1"/>
|
||||
<enum name="COMPRESSED_SRGB8_ALPHA8_ASTC_5x5_KHR" value="0x93D2"/>
|
||||
<enum name="COMPRESSED_SRGB8_ALPHA8_ASTC_6x5_KHR" value="0x93D3"/>
|
||||
<enum name="COMPRESSED_SRGB8_ALPHA8_ASTC_6x6_KHR" value="0x93D4"/>
|
||||
<enum name="COMPRESSED_SRGB8_ALPHA8_ASTC_8x5_KHR" value="0x93D5"/>
|
||||
<enum name="COMPRESSED_SRGB8_ALPHA8_ASTC_8x6_KHR" value="0x93D6"/>
|
||||
<enum name="COMPRESSED_SRGB8_ALPHA8_ASTC_8x8_KHR" value="0x93D7"/>
|
||||
<enum name="COMPRESSED_SRGB8_ALPHA8_ASTC_10x5_KHR" value="0x93D8"/>
|
||||
<enum name="COMPRESSED_SRGB8_ALPHA8_ASTC_10x6_KHR" value="0x93D9"/>
|
||||
<enum name="COMPRESSED_SRGB8_ALPHA8_ASTC_10x8_KHR" value="0x93DA"/>
|
||||
<enum name="COMPRESSED_SRGB8_ALPHA8_ASTC_10x10_KHR" value="0x93DB"/>
|
||||
<enum name="COMPRESSED_SRGB8_ALPHA8_ASTC_12x10_KHR" value="0x93DC"/>
|
||||
<enum name="COMPRESSED_SRGB8_ALPHA8_ASTC_12x12_KHR" value="0x93DD"/>
|
||||
|
||||
</category>
|
||||
|
||||
</OpenGLAPI>
|
||||
|
|
@ -190,6 +190,7 @@ API_XML = \
|
|||
INTEL_performance_query.xml \
|
||||
KHR_debug.xml \
|
||||
KHR_context_flush_control.xml \
|
||||
KHR_texture_compression_astc.xml \
|
||||
NV_conditional_render.xml \
|
||||
NV_primitive_restart.xml \
|
||||
NV_texture_barrier.xml \
|
||||
|
|
|
|||
|
|
@ -798,4 +798,23 @@
|
|||
</function>
|
||||
</category>
|
||||
|
||||
<!-- 174. GL_OES_texture_storage_multisample_2d_array -->
|
||||
<category name="GL_OES_texture_storage_multisample_2d_array" number="174">
|
||||
<enum name="TEXTURE_2D_MULTISAMPLE_ARRAY_OES" value="0x9102"/>
|
||||
<enum name="TEXTURE_BINDING_2D_MULTISAMPLE_ARRAY_OES" value="0x9105"/>
|
||||
<enum name="SAMPLER_2D_MULTISAMPLE_ARRAY_OES" value="0x910B"/>
|
||||
<enum name="INT_SAMPLER_2D_MULTISAMPLE_ARRAY_OES" value="0x910C"/>
|
||||
<enum name="UNSIGNED_INT_SAMPLER_2D_MULTISAMPLE_ARRAY_OES" value="0x910D"/>
|
||||
|
||||
<function name="TexStorage3DMultisampleOES" alias="TexStorage3DMultisample" es2="3.1">
|
||||
<param name="target" type="GLenum"/>
|
||||
<param name="samples" type="GLsizei"/>
|
||||
<param name="internalformat" type="GLenum"/>
|
||||
<param name="width" type="GLsizei"/>
|
||||
<param name="height" type="GLsizei"/>
|
||||
<param name="depth" type="GLsizei"/>
|
||||
<param name="fixedsamplelocations" type="GLboolean"/>
|
||||
</function>
|
||||
</category>
|
||||
|
||||
</OpenGLAPI>
|
||||
|
|
|
|||
|
|
@ -8168,7 +8168,7 @@
|
|||
|
||||
<xi:include href="ARB_texture_storage.xml" xmlns:xi="http://www.w3.org/2001/XInclude"/>
|
||||
|
||||
<!-- ARB extension #118 -->
|
||||
<xi:include href="KHR_texture_compression_astc.xml" xmlns:xi="http://www.w3.org/2001/XInclude"/>
|
||||
|
||||
<xi:include href="KHR_debug.xml" xmlns:xi="http://www.w3.org/2001/XInclude"/>
|
||||
|
||||
|
|
|
|||
|
|
@ -144,12 +144,6 @@ class PrintGenericStubs(gl_XML.gl_print_base):
|
|||
print ''
|
||||
print '#ifdef GLX_USE_TLS'
|
||||
print ''
|
||||
print '\t.globl _x86_64_get_get_dispatch; HIDDEN(_x86_64_get_get_dispatch)'
|
||||
print '_x86_64_get_get_dispatch:'
|
||||
print '\tlea\t_x86_64_get_dispatch(%rip), %rax'
|
||||
print '\tret'
|
||||
print ''
|
||||
print '\t.p2align\t4,,15'
|
||||
print '_x86_64_get_dispatch:'
|
||||
print '\tmovq\t_glapi_tls_Dispatch@GOTTPOFF(%rip), %rax'
|
||||
print '\tmovq\t%fs:(%rax), %rax'
|
||||
|
|
|
|||
|
|
@ -278,6 +278,7 @@
|
|||
#define GEN8_SURFACE_TILING_W (1 << 12)
|
||||
#define GEN8_SURFACE_TILING_X (2 << 12)
|
||||
#define GEN8_SURFACE_TILING_Y (3 << 12)
|
||||
#define GEN8_SURFACE_SAMPLER_L2_BYPASS_DISABLE (1 << 9)
|
||||
#define BRW_SURFACE_RC_READ_WRITE (1 << 8)
|
||||
#define BRW_SURFACE_MIPLAYOUT_SHIFT 10
|
||||
#define BRW_SURFACE_MIPMAPLAYOUT_BELOW 0
|
||||
|
|
@ -506,6 +507,38 @@
|
|||
#define BRW_SURFACEFORMAT_R8G8B8_UINT 0x1C8
|
||||
#define BRW_SURFACEFORMAT_R8G8B8_SINT 0x1C9
|
||||
#define BRW_SURFACEFORMAT_RAW 0x1FF
|
||||
|
||||
#define GEN9_SURFACE_ASTC_HDR_FORMAT_BIT 0x100
|
||||
|
||||
#define BRW_SURFACEFORMAT_ASTC_LDR_2D_4x4_U8sRGB 0x200
|
||||
#define BRW_SURFACEFORMAT_ASTC_LDR_2D_5x4_U8sRGB 0x208
|
||||
#define BRW_SURFACEFORMAT_ASTC_LDR_2D_5x5_U8sRGB 0x209
|
||||
#define BRW_SURFACEFORMAT_ASTC_LDR_2D_6x5_U8sRGB 0x211
|
||||
#define BRW_SURFACEFORMAT_ASTC_LDR_2D_6x6_U8sRGB 0x212
|
||||
#define BRW_SURFACEFORMAT_ASTC_LDR_2D_8x5_U8sRGB 0x221
|
||||
#define BRW_SURFACEFORMAT_ASTC_LDR_2D_8x6_U8sRGB 0x222
|
||||
#define BRW_SURFACEFORMAT_ASTC_LDR_2D_8x8_U8sRGB 0x224
|
||||
#define BRW_SURFACEFORMAT_ASTC_LDR_2D_10x5_U8sRGB 0x231
|
||||
#define BRW_SURFACEFORMAT_ASTC_LDR_2D_10x6_U8sRGB 0x232
|
||||
#define BRW_SURFACEFORMAT_ASTC_LDR_2D_10x8_U8sRGB 0x234
|
||||
#define BRW_SURFACEFORMAT_ASTC_LDR_2D_10x10_U8sRGB 0x236
|
||||
#define BRW_SURFACEFORMAT_ASTC_LDR_2D_12x10_U8sRGB 0x23E
|
||||
#define BRW_SURFACEFORMAT_ASTC_LDR_2D_12x12_U8sRGB 0x23F
|
||||
#define BRW_SURFACEFORMAT_ASTC_LDR_2D_4x4_FLT16 0x240
|
||||
#define BRW_SURFACEFORMAT_ASTC_LDR_2D_5x4_FLT16 0x248
|
||||
#define BRW_SURFACEFORMAT_ASTC_LDR_2D_5x5_FLT16 0x249
|
||||
#define BRW_SURFACEFORMAT_ASTC_LDR_2D_6x5_FLT16 0x251
|
||||
#define BRW_SURFACEFORMAT_ASTC_LDR_2D_6x6_FLT16 0x252
|
||||
#define BRW_SURFACEFORMAT_ASTC_LDR_2D_8x5_FLT16 0x261
|
||||
#define BRW_SURFACEFORMAT_ASTC_LDR_2D_8x6_FLT16 0x262
|
||||
#define BRW_SURFACEFORMAT_ASTC_LDR_2D_8x8_FLT16 0x264
|
||||
#define BRW_SURFACEFORMAT_ASTC_LDR_2D_10x5_FLT16 0x271
|
||||
#define BRW_SURFACEFORMAT_ASTC_LDR_2D_10x6_FLT16 0x272
|
||||
#define BRW_SURFACEFORMAT_ASTC_LDR_2D_10x8_FLT16 0x274
|
||||
#define BRW_SURFACEFORMAT_ASTC_LDR_2D_10x10_FLT16 0x276
|
||||
#define BRW_SURFACEFORMAT_ASTC_LDR_2D_12x10_FLT16 0x27E
|
||||
#define BRW_SURFACEFORMAT_ASTC_LDR_2D_12x12_FLT16 0x27F
|
||||
|
||||
#define BRW_SURFACE_FORMAT_SHIFT 18
|
||||
#define BRW_SURFACE_FORMAT_MASK INTEL_MASK(26, 18)
|
||||
|
||||
|
|
|
|||
|
|
@ -427,7 +427,9 @@ fs_reg::equals(const fs_reg &r) const
|
|||
negate == r.negate &&
|
||||
abs == r.abs &&
|
||||
!reladdr && !r.reladdr &&
|
||||
memcmp(&fixed_hw_reg, &r.fixed_hw_reg, sizeof(fixed_hw_reg)) == 0 &&
|
||||
((file != HW_REG && file != IMM) ||
|
||||
memcmp(&fixed_hw_reg, &r.fixed_hw_reg,
|
||||
sizeof(fixed_hw_reg)) == 0) &&
|
||||
stride == r.stride);
|
||||
}
|
||||
|
||||
|
|
@ -1789,54 +1791,46 @@ fs_visitor::assign_constant_locations()
|
|||
if (dispatch_width != 8)
|
||||
return;
|
||||
|
||||
unsigned int num_pull_constants = 0;
|
||||
|
||||
pull_constant_loc = ralloc_array(mem_ctx, int, uniforms);
|
||||
memset(pull_constant_loc, -1, sizeof(pull_constant_loc[0]) * uniforms);
|
||||
|
||||
/* Walk through and find array access of uniforms. Put a copy of that
|
||||
* uniform in the pull constant buffer.
|
||||
bool is_live[uniforms];
|
||||
memset(is_live, 0, sizeof(is_live));
|
||||
|
||||
/* First, we walk through the instructions and do two things:
|
||||
*
|
||||
* 1) Figure out which uniforms are live.
|
||||
*
|
||||
* 2) Find all indirect access of uniform arrays and flag them as needing
|
||||
* to go into the pull constant buffer.
|
||||
*
|
||||
* Note that we don't move constant-indexed accesses to arrays. No
|
||||
* testing has been done of the performance impact of this choice.
|
||||
*/
|
||||
foreach_block_and_inst_safe(block, fs_inst, inst, cfg) {
|
||||
for (int i = 0 ; i < inst->sources; i++) {
|
||||
if (inst->src[i].file != UNIFORM || !inst->src[i].reladdr)
|
||||
continue;
|
||||
|
||||
int uniform = inst->src[i].reg;
|
||||
|
||||
/* If this array isn't already present in the pull constant buffer,
|
||||
* add it.
|
||||
*/
|
||||
if (pull_constant_loc[uniform] == -1) {
|
||||
const gl_constant_value **values = &stage_prog_data->param[uniform];
|
||||
|
||||
assert(param_size[uniform]);
|
||||
|
||||
for (int j = 0; j < param_size[uniform]; j++) {
|
||||
pull_constant_loc[uniform + j] = stage_prog_data->nr_pull_params;
|
||||
|
||||
stage_prog_data->pull_param[stage_prog_data->nr_pull_params++] =
|
||||
values[j];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Find which UNIFORM registers are still in use. */
|
||||
bool is_live[uniforms];
|
||||
for (unsigned int i = 0; i < uniforms; i++) {
|
||||
is_live[i] = false;
|
||||
}
|
||||
|
||||
foreach_block_and_inst(block, fs_inst, inst, cfg) {
|
||||
for (int i = 0; i < inst->sources; i++) {
|
||||
if (inst->src[i].file != UNIFORM)
|
||||
continue;
|
||||
|
||||
int constant_nr = inst->src[i].reg + inst->src[i].reg_offset;
|
||||
if (constant_nr >= 0 && constant_nr < (int) uniforms)
|
||||
is_live[constant_nr] = true;
|
||||
if (inst->src[i].reladdr) {
|
||||
int uniform = inst->src[i].reg;
|
||||
|
||||
/* If this array isn't already present in the pull constant buffer,
|
||||
* add it.
|
||||
*/
|
||||
if (pull_constant_loc[uniform] == -1) {
|
||||
assert(param_size[uniform]);
|
||||
for (int j = 0; j < param_size[uniform]; j++)
|
||||
pull_constant_loc[uniform + j] = num_pull_constants++;
|
||||
}
|
||||
} else {
|
||||
/* Mark the the one accessed uniform as live */
|
||||
int constant_nr = inst->src[i].reg + inst->src[i].reg_offset;
|
||||
if (constant_nr >= 0 && constant_nr < (int) uniforms)
|
||||
is_live[constant_nr] = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -1870,27 +1864,29 @@ fs_visitor::assign_constant_locations()
|
|||
} else {
|
||||
/* Demote to a pull constant. */
|
||||
push_constant_loc[i] = -1;
|
||||
|
||||
int pull_index = stage_prog_data->nr_pull_params++;
|
||||
stage_prog_data->pull_param[pull_index] = stage_prog_data->param[i];
|
||||
pull_constant_loc[i] = pull_index;
|
||||
pull_constant_loc[i] = num_pull_constants++;
|
||||
}
|
||||
}
|
||||
|
||||
stage_prog_data->nr_params = num_push_constants;
|
||||
stage_prog_data->nr_pull_params = num_pull_constants;
|
||||
|
||||
/* Up until now, the param[] array has been indexed by reg + reg_offset
|
||||
* of UNIFORM registers. Condense it to only contain the uniforms we
|
||||
* chose to upload as push constants.
|
||||
* of UNIFORM registers. Move pull constants into pull_param[] and
|
||||
* condense param[] to only contain the uniforms we chose to push.
|
||||
*
|
||||
* NOTE: Because we are condensing the params[] array, we know that
|
||||
* push_constant_loc[i] <= i and we can do it in one smooth loop without
|
||||
* having to make a copy.
|
||||
*/
|
||||
for (unsigned int i = 0; i < uniforms; i++) {
|
||||
int remapped = push_constant_loc[i];
|
||||
const gl_constant_value *value = stage_prog_data->param[i];
|
||||
|
||||
if (remapped == -1)
|
||||
continue;
|
||||
|
||||
assert(remapped <= (int)i);
|
||||
stage_prog_data->param[remapped] = stage_prog_data->param[i];
|
||||
if (pull_constant_loc[i] != -1) {
|
||||
stage_prog_data->pull_param[pull_constant_loc[i]] = value;
|
||||
} else if (push_constant_loc[i] != -1) {
|
||||
stage_prog_data->param[push_constant_loc[i]] = value;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -4806,11 +4802,11 @@ fs_visitor::optimize()
|
|||
*/
|
||||
bld = fs_builder(this, 64);
|
||||
|
||||
split_virtual_grfs();
|
||||
|
||||
assign_constant_locations();
|
||||
demote_pull_constants();
|
||||
|
||||
split_virtual_grfs();
|
||||
|
||||
#define OPT(pass, args...) ({ \
|
||||
pass_num++; \
|
||||
bool this_progress = pass(args); \
|
||||
|
|
|
|||
|
|
@ -225,7 +225,6 @@ public:
|
|||
void emit_gen6_gather_wa(uint8_t wa, fs_reg dst);
|
||||
fs_reg resolve_source_modifiers(const fs_reg &src);
|
||||
void emit_discard_jump();
|
||||
bool try_replace_with_sel();
|
||||
bool opt_peephole_sel();
|
||||
bool opt_peephole_predicated_break();
|
||||
bool opt_saturate_propagation();
|
||||
|
|
|
|||
|
|
@ -372,6 +372,8 @@ namespace brw {
|
|||
emit_minmax(const dst_reg &dst, const src_reg &src0,
|
||||
const src_reg &src1, brw_conditional_mod mod) const
|
||||
{
|
||||
assert(mod == BRW_CONDITIONAL_GE || mod == BRW_CONDITIONAL_L);
|
||||
|
||||
if (shader->devinfo->gen >= 6) {
|
||||
set_condmod(mod, SEL(dst, fix_unsigned_negate(src0),
|
||||
fix_unsigned_negate(src1)));
|
||||
|
|
|
|||
|
|
@ -132,7 +132,7 @@ fs_visitor::nir_setup_outputs(nir_shader *shader)
|
|||
|
||||
switch (stage) {
|
||||
case MESA_SHADER_VERTEX:
|
||||
for (int i = 0; i < ALIGN(type_size_scalar(var->type), 4) / 4; i++) {
|
||||
for (unsigned int i = 0; i < ALIGN(type_size_scalar(var->type), 4) / 4; i++) {
|
||||
int output = var->data.location + i;
|
||||
this->outputs[output] = offset(reg, bld, 4 * i);
|
||||
this->output_components[output] = vector_elements;
|
||||
|
|
@ -191,8 +191,8 @@ fs_visitor::nir_setup_uniforms(nir_shader *shader)
|
|||
nir_setup_builtin_uniform(var);
|
||||
else
|
||||
nir_setup_uniform(var);
|
||||
|
||||
param_size[var->data.driver_location] = type_size_scalar(var->type);
|
||||
if(type_size_scalar(var->type) > 0)
|
||||
param_size[var->data.driver_location] = type_size_scalar(var->type);
|
||||
}
|
||||
} else {
|
||||
/* prog_to_nir only creates a single giant uniform variable so we can
|
||||
|
|
@ -203,7 +203,8 @@ fs_visitor::nir_setup_uniforms(nir_shader *shader)
|
|||
&prog->Parameters->ParameterValues[p][i];
|
||||
}
|
||||
}
|
||||
param_size[0] = prog->Parameters->NumParameters * 4;
|
||||
if(prog->Parameters->NumParameters > 0)
|
||||
param_size[0] = prog->Parameters->NumParameters * 4;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -416,8 +417,6 @@ fs_visitor::nir_emit_if(nir_if *if_stmt)
|
|||
nir_emit_cf_list(&if_stmt->else_list);
|
||||
|
||||
bld.emit(BRW_OPCODE_ENDIF);
|
||||
|
||||
try_replace_with_sel();
|
||||
}
|
||||
|
||||
void
|
||||
|
|
|
|||
|
|
@ -95,42 +95,51 @@ is_coalesce_candidate(const fs_visitor *v, const fs_inst *inst)
|
|||
static bool
|
||||
can_coalesce_vars(brw::fs_live_variables *live_intervals,
|
||||
const cfg_t *cfg, const fs_inst *inst,
|
||||
int var_to, int var_from)
|
||||
int dst_var, int src_var)
|
||||
{
|
||||
if (!live_intervals->vars_interfere(var_from, var_to))
|
||||
if (!live_intervals->vars_interfere(src_var, dst_var))
|
||||
return true;
|
||||
|
||||
int start_to = live_intervals->start[var_to];
|
||||
int end_to = live_intervals->end[var_to];
|
||||
int start_from = live_intervals->start[var_from];
|
||||
int end_from = live_intervals->end[var_from];
|
||||
int dst_start = live_intervals->start[dst_var];
|
||||
int dst_end = live_intervals->end[dst_var];
|
||||
int src_start = live_intervals->start[src_var];
|
||||
int src_end = live_intervals->end[src_var];
|
||||
|
||||
/* Variables interfere and one line range isn't a subset of the other. */
|
||||
if ((end_to > end_from && start_from < start_to) ||
|
||||
(end_from > end_to && start_to < start_from))
|
||||
if ((dst_end > src_end && src_start < dst_start) ||
|
||||
(src_end > dst_end && dst_start < src_start))
|
||||
return false;
|
||||
|
||||
int start_ip = MIN2(start_to, start_from);
|
||||
int scan_ip = -1;
|
||||
/* Check for a write to either register in the intersection of their live
|
||||
* ranges.
|
||||
*/
|
||||
int start_ip = MAX2(dst_start, src_start);
|
||||
int end_ip = MIN2(dst_end, src_end);
|
||||
|
||||
foreach_block_and_inst(block, fs_inst, scan_inst, cfg) {
|
||||
scan_ip++;
|
||||
|
||||
if (scan_ip < start_ip)
|
||||
foreach_block(block, cfg) {
|
||||
if (block->end_ip < start_ip)
|
||||
continue;
|
||||
|
||||
if (scan_inst->is_control_flow())
|
||||
return false;
|
||||
int scan_ip = block->start_ip - 1;
|
||||
|
||||
if (scan_ip <= live_intervals->start[var_to])
|
||||
continue;
|
||||
foreach_inst_in_block(fs_inst, scan_inst, block) {
|
||||
scan_ip++;
|
||||
|
||||
if (scan_ip > live_intervals->end[var_to])
|
||||
return true;
|
||||
/* Ignore anything before the intersection of the live ranges */
|
||||
if (scan_ip < start_ip)
|
||||
continue;
|
||||
|
||||
if (scan_inst->dst.equals(inst->dst) ||
|
||||
scan_inst->dst.equals(inst->src[0]))
|
||||
return false;
|
||||
/* Ignore the copying instruction itself */
|
||||
if (scan_inst == inst)
|
||||
continue;
|
||||
|
||||
if (scan_ip > end_ip)
|
||||
return true; /* registers do not interfere */
|
||||
|
||||
if (scan_inst->overwrites_reg(inst->dst) ||
|
||||
scan_inst->overwrites_reg(inst->src[0]))
|
||||
return false; /* registers interfere */
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
|
|
@ -145,11 +154,11 @@ fs_visitor::register_coalesce()
|
|||
|
||||
int src_size = 0;
|
||||
int channels_remaining = 0;
|
||||
int reg_from = -1, reg_to = -1;
|
||||
int reg_to_offset[MAX_VGRF_SIZE];
|
||||
int src_reg = -1, dst_reg = -1;
|
||||
int dst_reg_offset[MAX_VGRF_SIZE];
|
||||
fs_inst *mov[MAX_VGRF_SIZE];
|
||||
int var_to[MAX_VGRF_SIZE];
|
||||
int var_from[MAX_VGRF_SIZE];
|
||||
int dst_var[MAX_VGRF_SIZE];
|
||||
int src_var[MAX_VGRF_SIZE];
|
||||
|
||||
foreach_block_and_inst(block, fs_inst, inst, cfg) {
|
||||
if (!is_coalesce_candidate(this, inst))
|
||||
|
|
@ -161,8 +170,8 @@ fs_visitor::register_coalesce()
|
|||
continue;
|
||||
}
|
||||
|
||||
if (reg_from != inst->src[0].reg) {
|
||||
reg_from = inst->src[0].reg;
|
||||
if (src_reg != inst->src[0].reg) {
|
||||
src_reg = inst->src[0].reg;
|
||||
|
||||
src_size = alloc.sizes[inst->src[0].reg];
|
||||
assert(src_size <= MAX_VGRF_SIZE);
|
||||
|
|
@ -170,15 +179,15 @@ fs_visitor::register_coalesce()
|
|||
channels_remaining = src_size;
|
||||
memset(mov, 0, sizeof(mov));
|
||||
|
||||
reg_to = inst->dst.reg;
|
||||
dst_reg = inst->dst.reg;
|
||||
}
|
||||
|
||||
if (reg_to != inst->dst.reg)
|
||||
if (dst_reg != inst->dst.reg)
|
||||
continue;
|
||||
|
||||
if (inst->opcode == SHADER_OPCODE_LOAD_PAYLOAD) {
|
||||
for (int i = 0; i < src_size; i++) {
|
||||
reg_to_offset[i] = i;
|
||||
dst_reg_offset[i] = i;
|
||||
}
|
||||
mov[0] = inst;
|
||||
channels_remaining -= inst->regs_written;
|
||||
|
|
@ -194,9 +203,9 @@ fs_visitor::register_coalesce()
|
|||
channels_remaining = -1;
|
||||
continue;
|
||||
}
|
||||
reg_to_offset[offset] = inst->dst.reg_offset;
|
||||
dst_reg_offset[offset] = inst->dst.reg_offset;
|
||||
if (inst->regs_written > 1)
|
||||
reg_to_offset[offset + 1] = inst->dst.reg_offset + 1;
|
||||
dst_reg_offset[offset + 1] = inst->dst.reg_offset + 1;
|
||||
mov[offset] = inst;
|
||||
channels_remaining -= inst->regs_written;
|
||||
}
|
||||
|
|
@ -206,20 +215,20 @@ fs_visitor::register_coalesce()
|
|||
|
||||
bool can_coalesce = true;
|
||||
for (int i = 0; i < src_size; i++) {
|
||||
if (reg_to_offset[i] != reg_to_offset[0] + i) {
|
||||
if (dst_reg_offset[i] != dst_reg_offset[0] + i) {
|
||||
/* Registers are out-of-order. */
|
||||
can_coalesce = false;
|
||||
reg_from = -1;
|
||||
src_reg = -1;
|
||||
break;
|
||||
}
|
||||
|
||||
var_to[i] = live_intervals->var_from_vgrf[reg_to] + reg_to_offset[i];
|
||||
var_from[i] = live_intervals->var_from_vgrf[reg_from] + i;
|
||||
dst_var[i] = live_intervals->var_from_vgrf[dst_reg] + dst_reg_offset[i];
|
||||
src_var[i] = live_intervals->var_from_vgrf[src_reg] + i;
|
||||
|
||||
if (!can_coalesce_vars(live_intervals, cfg, inst,
|
||||
var_to[i], var_from[i])) {
|
||||
dst_var[i], src_var[i])) {
|
||||
can_coalesce = false;
|
||||
reg_from = -1;
|
||||
src_reg = -1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
|
@ -242,31 +251,31 @@ fs_visitor::register_coalesce()
|
|||
|
||||
foreach_block_and_inst(block, fs_inst, scan_inst, cfg) {
|
||||
if (scan_inst->dst.file == GRF &&
|
||||
scan_inst->dst.reg == reg_from) {
|
||||
scan_inst->dst.reg = reg_to;
|
||||
scan_inst->dst.reg == src_reg) {
|
||||
scan_inst->dst.reg = dst_reg;
|
||||
scan_inst->dst.reg_offset =
|
||||
reg_to_offset[scan_inst->dst.reg_offset];
|
||||
dst_reg_offset[scan_inst->dst.reg_offset];
|
||||
}
|
||||
|
||||
for (int j = 0; j < scan_inst->sources; j++) {
|
||||
if (scan_inst->src[j].file == GRF &&
|
||||
scan_inst->src[j].reg == reg_from) {
|
||||
scan_inst->src[j].reg = reg_to;
|
||||
scan_inst->src[j].reg == src_reg) {
|
||||
scan_inst->src[j].reg = dst_reg;
|
||||
scan_inst->src[j].reg_offset =
|
||||
reg_to_offset[scan_inst->src[j].reg_offset];
|
||||
dst_reg_offset[scan_inst->src[j].reg_offset];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (int i = 0; i < src_size; i++) {
|
||||
live_intervals->start[var_to[i]] =
|
||||
MIN2(live_intervals->start[var_to[i]],
|
||||
live_intervals->start[var_from[i]]);
|
||||
live_intervals->end[var_to[i]] =
|
||||
MAX2(live_intervals->end[var_to[i]],
|
||||
live_intervals->end[var_from[i]]);
|
||||
live_intervals->start[dst_var[i]] =
|
||||
MIN2(live_intervals->start[dst_var[i]],
|
||||
live_intervals->start[src_var[i]]);
|
||||
live_intervals->end[dst_var[i]] =
|
||||
MAX2(live_intervals->end[dst_var[i]],
|
||||
live_intervals->end[src_var[i]]);
|
||||
}
|
||||
reg_from = -1;
|
||||
src_reg = -1;
|
||||
}
|
||||
|
||||
if (progress) {
|
||||
|
|
|
|||
|
|
@ -686,7 +686,7 @@ namespace {
|
|||
if (is_signed)
|
||||
bld.emit_minmax(offset(dst, bld, c), offset(dst, bld, c),
|
||||
fs_reg(-(int)scale(widths[c] - s) - 1),
|
||||
BRW_CONDITIONAL_G);
|
||||
BRW_CONDITIONAL_GE);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -717,7 +717,7 @@ namespace {
|
|||
if (is_signed)
|
||||
bld.emit_minmax(offset(dst, bld, c),
|
||||
offset(dst, bld, c), fs_reg(-1.0f),
|
||||
BRW_CONDITIONAL_G);
|
||||
BRW_CONDITIONAL_GE);
|
||||
}
|
||||
}
|
||||
return dst;
|
||||
|
|
@ -741,7 +741,7 @@ namespace {
|
|||
/* Clamp the normalized floating-point argument. */
|
||||
if (is_signed) {
|
||||
bld.emit_minmax(offset(fdst, bld, c), offset(src, bld, c),
|
||||
fs_reg(-1.0f), BRW_CONDITIONAL_G);
|
||||
fs_reg(-1.0f), BRW_CONDITIONAL_GE);
|
||||
|
||||
bld.emit_minmax(offset(fdst, bld, c), offset(fdst, bld, c),
|
||||
fs_reg(1.0f), BRW_CONDITIONAL_L);
|
||||
|
|
@ -812,7 +812,7 @@ namespace {
|
|||
/* Clamp to the minimum value. */
|
||||
if (widths[c] < 16)
|
||||
bld.emit_minmax(offset(fdst, bld, c), offset(fdst, bld, c),
|
||||
fs_reg(0.0f), BRW_CONDITIONAL_G);
|
||||
fs_reg(0.0f), BRW_CONDITIONAL_GE);
|
||||
|
||||
/* Convert to 16-bit floating-point. */
|
||||
bld.F32TO16(offset(dst, bld, c), offset(fdst, bld, c));
|
||||
|
|
|
|||
|
|
@ -441,95 +441,6 @@ fs_visitor::swizzle_result(ir_texture_opcode op, int dest_components,
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Try to replace IF/MOV/ELSE/MOV/ENDIF with SEL.
|
||||
*
|
||||
* Many GLSL shaders contain the following pattern:
|
||||
*
|
||||
* x = condition ? foo : bar
|
||||
*
|
||||
* The compiler emits an ir_if tree for this, since each subexpression might be
|
||||
* a complex tree that could have side-effects or short-circuit logic.
|
||||
*
|
||||
* However, the common case is to simply select one of two constants or
|
||||
* variable values---which is exactly what SEL is for. In this case, the
|
||||
* assembly looks like:
|
||||
*
|
||||
* (+f0) IF
|
||||
* MOV dst src0
|
||||
* ELSE
|
||||
* MOV dst src1
|
||||
* ENDIF
|
||||
*
|
||||
* which can be easily translated into:
|
||||
*
|
||||
* (+f0) SEL dst src0 src1
|
||||
*
|
||||
* If src0 is an immediate value, we promote it to a temporary GRF.
|
||||
*/
|
||||
bool
|
||||
fs_visitor::try_replace_with_sel()
|
||||
{
|
||||
fs_inst *endif_inst = (fs_inst *) instructions.get_tail();
|
||||
assert(endif_inst->opcode == BRW_OPCODE_ENDIF);
|
||||
|
||||
/* Pattern match in reverse: IF, MOV, ELSE, MOV, ENDIF. */
|
||||
int opcodes[] = {
|
||||
BRW_OPCODE_IF, BRW_OPCODE_MOV, BRW_OPCODE_ELSE, BRW_OPCODE_MOV,
|
||||
};
|
||||
|
||||
fs_inst *match = (fs_inst *) endif_inst->prev;
|
||||
for (int i = 0; i < 4; i++) {
|
||||
if (match->is_head_sentinel() || match->opcode != opcodes[4-i-1])
|
||||
return false;
|
||||
match = (fs_inst *) match->prev;
|
||||
}
|
||||
|
||||
/* The opcodes match; it looks like the right sequence of instructions. */
|
||||
fs_inst *else_mov = (fs_inst *) endif_inst->prev;
|
||||
fs_inst *then_mov = (fs_inst *) else_mov->prev->prev;
|
||||
fs_inst *if_inst = (fs_inst *) then_mov->prev;
|
||||
|
||||
/* Check that the MOVs are the right form. */
|
||||
if (then_mov->dst.equals(else_mov->dst) &&
|
||||
!then_mov->is_partial_write() &&
|
||||
!else_mov->is_partial_write()) {
|
||||
|
||||
/* Remove the matched instructions; we'll emit a SEL to replace them. */
|
||||
while (!if_inst->next->is_tail_sentinel())
|
||||
if_inst->next->exec_node::remove();
|
||||
if_inst->exec_node::remove();
|
||||
|
||||
/* Only the last source register can be a constant, so if the MOV in
|
||||
* the "then" clause uses a constant, we need to put it in a temporary.
|
||||
*/
|
||||
fs_reg src0(then_mov->src[0]);
|
||||
if (src0.file == IMM) {
|
||||
src0 = vgrf(glsl_type::float_type);
|
||||
src0.type = then_mov->src[0].type;
|
||||
bld.MOV(src0, then_mov->src[0]);
|
||||
}
|
||||
|
||||
if (if_inst->conditional_mod) {
|
||||
/* Sandybridge-specific IF with embedded comparison */
|
||||
bld.CMP(bld.null_reg_d(), if_inst->src[0], if_inst->src[1],
|
||||
if_inst->conditional_mod);
|
||||
set_predicate(BRW_PREDICATE_NORMAL,
|
||||
bld.emit(BRW_OPCODE_SEL, then_mov->dst,
|
||||
src0, else_mov->src[0]));
|
||||
} else {
|
||||
/* Separate CMP and IF instructions */
|
||||
set_predicate_inv(if_inst->predicate, if_inst->predicate_inverse,
|
||||
bld.emit(BRW_OPCODE_SEL, then_mov->dst,
|
||||
src0, else_mov->src[0]));
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/** Emits a dummy fragment shader consisting of magenta for bringup purposes. */
|
||||
void
|
||||
fs_visitor::emit_dummy_fs()
|
||||
|
|
|
|||
|
|
@ -63,6 +63,8 @@ nir_optimize(nir_shader *nir, bool is_scalar)
|
|||
nir_validate_shader(nir);
|
||||
progress |= nir_opt_remove_phis(nir);
|
||||
nir_validate_shader(nir);
|
||||
progress |= nir_opt_undef(nir);
|
||||
nir_validate_shader(nir);
|
||||
} while (progress);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -307,6 +307,34 @@ const struct surface_format_info surface_formats[] = {
|
|||
SF( x, x, x, x, x, x, x, x, x, ETC2_EAC_SRGB8_A8)
|
||||
SF( x, x, x, x, x, x, x, x, x, R8G8B8_UINT)
|
||||
SF( x, x, x, x, x, x, x, x, x, R8G8B8_SINT)
|
||||
SF(80, 80, x, x, x, x, x, x, x, ASTC_LDR_2D_4x4_FLT16)
|
||||
SF(80, 80, x, x, x, x, x, x, x, ASTC_LDR_2D_5x4_FLT16)
|
||||
SF(80, 80, x, x, x, x, x, x, x, ASTC_LDR_2D_5x5_FLT16)
|
||||
SF(80, 80, x, x, x, x, x, x, x, ASTC_LDR_2D_6x5_FLT16)
|
||||
SF(80, 80, x, x, x, x, x, x, x, ASTC_LDR_2D_6x6_FLT16)
|
||||
SF(80, 80, x, x, x, x, x, x, x, ASTC_LDR_2D_8x5_FLT16)
|
||||
SF(80, 80, x, x, x, x, x, x, x, ASTC_LDR_2D_8x6_FLT16)
|
||||
SF(80, 80, x, x, x, x, x, x, x, ASTC_LDR_2D_8x8_FLT16)
|
||||
SF(80, 80, x, x, x, x, x, x, x, ASTC_LDR_2D_10x5_FLT16)
|
||||
SF(80, 80, x, x, x, x, x, x, x, ASTC_LDR_2D_10x6_FLT16)
|
||||
SF(80, 80, x, x, x, x, x, x, x, ASTC_LDR_2D_10x8_FLT16)
|
||||
SF(80, 80, x, x, x, x, x, x, x, ASTC_LDR_2D_10x10_FLT16)
|
||||
SF(80, 80, x, x, x, x, x, x, x, ASTC_LDR_2D_12x10_FLT16)
|
||||
SF(80, 80, x, x, x, x, x, x, x, ASTC_LDR_2D_12x12_FLT16)
|
||||
SF(80, 80, x, x, x, x, x, x, x, ASTC_LDR_2D_4x4_U8sRGB)
|
||||
SF(80, 80, x, x, x, x, x, x, x, ASTC_LDR_2D_5x4_U8sRGB)
|
||||
SF(80, 80, x, x, x, x, x, x, x, ASTC_LDR_2D_5x5_U8sRGB)
|
||||
SF(80, 80, x, x, x, x, x, x, x, ASTC_LDR_2D_6x5_U8sRGB)
|
||||
SF(80, 80, x, x, x, x, x, x, x, ASTC_LDR_2D_6x6_U8sRGB)
|
||||
SF(80, 80, x, x, x, x, x, x, x, ASTC_LDR_2D_8x5_U8sRGB)
|
||||
SF(80, 80, x, x, x, x, x, x, x, ASTC_LDR_2D_8x6_U8sRGB)
|
||||
SF(80, 80, x, x, x, x, x, x, x, ASTC_LDR_2D_8x8_U8sRGB)
|
||||
SF(80, 80, x, x, x, x, x, x, x, ASTC_LDR_2D_10x5_U8sRGB)
|
||||
SF(80, 80, x, x, x, x, x, x, x, ASTC_LDR_2D_10x6_U8sRGB)
|
||||
SF(80, 80, x, x, x, x, x, x, x, ASTC_LDR_2D_10x8_U8sRGB)
|
||||
SF(80, 80, x, x, x, x, x, x, x, ASTC_LDR_2D_10x10_U8sRGB)
|
||||
SF(80, 80, x, x, x, x, x, x, x, ASTC_LDR_2D_12x10_U8sRGB)
|
||||
SF(80, 80, x, x, x, x, x, x, x, ASTC_LDR_2D_12x12_U8sRGB)
|
||||
};
|
||||
#undef x
|
||||
#undef Y
|
||||
|
|
@ -503,6 +531,35 @@ brw_format_for_mesa_format(mesa_format mesa_format)
|
|||
[MESA_FORMAT_BPTC_RGB_SIGNED_FLOAT] = BRW_SURFACEFORMAT_BC6H_SF16,
|
||||
[MESA_FORMAT_BPTC_RGB_UNSIGNED_FLOAT] = BRW_SURFACEFORMAT_BC6H_UF16,
|
||||
|
||||
[MESA_FORMAT_RGBA_ASTC_4x4] = BRW_SURFACEFORMAT_ASTC_LDR_2D_4x4_FLT16,
|
||||
[MESA_FORMAT_RGBA_ASTC_5x4] = BRW_SURFACEFORMAT_ASTC_LDR_2D_5x4_FLT16,
|
||||
[MESA_FORMAT_RGBA_ASTC_5x5] = BRW_SURFACEFORMAT_ASTC_LDR_2D_5x5_FLT16,
|
||||
[MESA_FORMAT_RGBA_ASTC_6x5] = BRW_SURFACEFORMAT_ASTC_LDR_2D_6x5_FLT16,
|
||||
[MESA_FORMAT_RGBA_ASTC_6x6] = BRW_SURFACEFORMAT_ASTC_LDR_2D_6x6_FLT16,
|
||||
[MESA_FORMAT_RGBA_ASTC_8x5] = BRW_SURFACEFORMAT_ASTC_LDR_2D_8x5_FLT16,
|
||||
[MESA_FORMAT_RGBA_ASTC_8x6] = BRW_SURFACEFORMAT_ASTC_LDR_2D_8x6_FLT16,
|
||||
[MESA_FORMAT_RGBA_ASTC_8x8] = BRW_SURFACEFORMAT_ASTC_LDR_2D_8x8_FLT16,
|
||||
[MESA_FORMAT_RGBA_ASTC_10x5] = BRW_SURFACEFORMAT_ASTC_LDR_2D_10x5_FLT16,
|
||||
[MESA_FORMAT_RGBA_ASTC_10x6] = BRW_SURFACEFORMAT_ASTC_LDR_2D_10x6_FLT16,
|
||||
[MESA_FORMAT_RGBA_ASTC_10x8] = BRW_SURFACEFORMAT_ASTC_LDR_2D_10x8_FLT16,
|
||||
[MESA_FORMAT_RGBA_ASTC_10x10] = BRW_SURFACEFORMAT_ASTC_LDR_2D_10x10_FLT16,
|
||||
[MESA_FORMAT_RGBA_ASTC_12x10] = BRW_SURFACEFORMAT_ASTC_LDR_2D_12x10_FLT16,
|
||||
[MESA_FORMAT_RGBA_ASTC_12x12] = BRW_SURFACEFORMAT_ASTC_LDR_2D_12x12_FLT16,
|
||||
[MESA_FORMAT_SRGB8_ALPHA8_ASTC_4x4] = BRW_SURFACEFORMAT_ASTC_LDR_2D_4x4_U8sRGB,
|
||||
[MESA_FORMAT_SRGB8_ALPHA8_ASTC_5x4] = BRW_SURFACEFORMAT_ASTC_LDR_2D_5x4_U8sRGB,
|
||||
[MESA_FORMAT_SRGB8_ALPHA8_ASTC_5x5] = BRW_SURFACEFORMAT_ASTC_LDR_2D_5x5_U8sRGB,
|
||||
[MESA_FORMAT_SRGB8_ALPHA8_ASTC_6x5] = BRW_SURFACEFORMAT_ASTC_LDR_2D_6x5_U8sRGB,
|
||||
[MESA_FORMAT_SRGB8_ALPHA8_ASTC_6x6] = BRW_SURFACEFORMAT_ASTC_LDR_2D_6x6_U8sRGB,
|
||||
[MESA_FORMAT_SRGB8_ALPHA8_ASTC_8x5] = BRW_SURFACEFORMAT_ASTC_LDR_2D_8x5_U8sRGB,
|
||||
[MESA_FORMAT_SRGB8_ALPHA8_ASTC_8x6] = BRW_SURFACEFORMAT_ASTC_LDR_2D_8x6_U8sRGB,
|
||||
[MESA_FORMAT_SRGB8_ALPHA8_ASTC_8x8] = BRW_SURFACEFORMAT_ASTC_LDR_2D_8x8_U8sRGB,
|
||||
[MESA_FORMAT_SRGB8_ALPHA8_ASTC_10x5] = BRW_SURFACEFORMAT_ASTC_LDR_2D_10x5_U8sRGB,
|
||||
[MESA_FORMAT_SRGB8_ALPHA8_ASTC_10x6] = BRW_SURFACEFORMAT_ASTC_LDR_2D_10x6_U8sRGB,
|
||||
[MESA_FORMAT_SRGB8_ALPHA8_ASTC_10x8] = BRW_SURFACEFORMAT_ASTC_LDR_2D_10x8_U8sRGB,
|
||||
[MESA_FORMAT_SRGB8_ALPHA8_ASTC_10x10] = BRW_SURFACEFORMAT_ASTC_LDR_2D_10x10_U8sRGB,
|
||||
[MESA_FORMAT_SRGB8_ALPHA8_ASTC_12x10] = BRW_SURFACEFORMAT_ASTC_LDR_2D_12x10_U8sRGB,
|
||||
[MESA_FORMAT_SRGB8_ALPHA8_ASTC_12x12] = BRW_SURFACEFORMAT_ASTC_LDR_2D_12x12_U8sRGB,
|
||||
|
||||
[MESA_FORMAT_A_SNORM8] = 0,
|
||||
[MESA_FORMAT_L_SNORM8] = 0,
|
||||
[MESA_FORMAT_L8A8_SNORM] = 0,
|
||||
|
|
@ -768,6 +825,36 @@ translate_tex_format(struct brw_context *brw,
|
|||
}
|
||||
return brw_format_for_mesa_format(mesa_format);
|
||||
|
||||
case MESA_FORMAT_RGBA_ASTC_4x4:
|
||||
case MESA_FORMAT_RGBA_ASTC_5x4:
|
||||
case MESA_FORMAT_RGBA_ASTC_5x5:
|
||||
case MESA_FORMAT_RGBA_ASTC_6x5:
|
||||
case MESA_FORMAT_RGBA_ASTC_6x6:
|
||||
case MESA_FORMAT_RGBA_ASTC_8x5:
|
||||
case MESA_FORMAT_RGBA_ASTC_8x6:
|
||||
case MESA_FORMAT_RGBA_ASTC_8x8:
|
||||
case MESA_FORMAT_RGBA_ASTC_10x5:
|
||||
case MESA_FORMAT_RGBA_ASTC_10x6:
|
||||
case MESA_FORMAT_RGBA_ASTC_10x8:
|
||||
case MESA_FORMAT_RGBA_ASTC_10x10:
|
||||
case MESA_FORMAT_RGBA_ASTC_12x10:
|
||||
case MESA_FORMAT_RGBA_ASTC_12x12: {
|
||||
GLuint brw_fmt = brw_format_for_mesa_format(mesa_format);
|
||||
|
||||
/**
|
||||
* On Gen9+, it is possible to process these formats using the LDR
|
||||
* Profile or the Full Profile mode of the hardware. Because, it isn't
|
||||
* possible to determine if an HDR or LDR texture is being rendered, we
|
||||
* can't determine which mode to enable in the hardware. Therefore, to
|
||||
* handle all cases, always default to Full profile unless we are
|
||||
* processing sRGBs, which are incompatible with this mode.
|
||||
*/
|
||||
if (brw->gen >= 9)
|
||||
brw_fmt |= GEN9_SURFACE_ASTC_HDR_FORMAT_BIT;
|
||||
|
||||
return brw_fmt;
|
||||
}
|
||||
|
||||
default:
|
||||
assert(brw_format_for_mesa_format(mesa_format) != 0);
|
||||
return brw_format_for_mesa_format(mesa_format);
|
||||
|
|
|
|||
|
|
@ -123,12 +123,6 @@ intel_horizontal_texture_alignment_unit(struct brw_context *brw,
|
|||
return 16;
|
||||
|
||||
/**
|
||||
* From the "Alignment Unit Size" section of various specs, namely:
|
||||
* - Gen3 Spec: "Memory Data Formats" Volume, Section 1.20.1.4
|
||||
* - i965 and G45 PRMs: Volume 1, Section 6.17.3.4.
|
||||
* - Ironlake and Sandybridge PRMs: Volume 1, Part 1, Section 7.18.3.4
|
||||
* - BSpec (for Ivybridge and slight variations in separate stencil)
|
||||
*
|
||||
* +----------------------------------------------------------------------+
|
||||
* | | alignment unit width ("i") |
|
||||
* | Surface Property |-----------------------------|
|
||||
|
|
@ -146,32 +140,6 @@ intel_horizontal_texture_alignment_unit(struct brw_context *brw,
|
|||
* On IVB+, non-special cases can be overridden by setting the SURFACE_STATE
|
||||
* "Surface Horizontal Alignment" field to HALIGN_4 or HALIGN_8.
|
||||
*/
|
||||
if (_mesa_is_format_compressed(mt->format)) {
|
||||
/* The hardware alignment requirements for compressed textures
|
||||
* happen to match the block boundaries.
|
||||
*/
|
||||
unsigned int i, j;
|
||||
_mesa_get_format_block_size(mt->format, &i, &j);
|
||||
|
||||
/* On Gen9+ we can pick our own alignment for compressed textures but it
|
||||
* has to be a multiple of the block size. The minimum alignment we can
|
||||
* pick is 4 so we effectively have to align to 4 times the block
|
||||
* size
|
||||
*/
|
||||
if (brw->gen >= 9)
|
||||
return i * 4;
|
||||
else
|
||||
return i;
|
||||
}
|
||||
|
||||
if (mt->format == MESA_FORMAT_S_UINT8)
|
||||
return 8;
|
||||
|
||||
if (brw->gen >= 9 && mt->tr_mode != INTEL_MIPTREE_TRMODE_NONE) {
|
||||
uint32_t align = tr_mode_horizontal_texture_alignment(brw, mt);
|
||||
/* XY_FAST_COPY_BLT doesn't support horizontal alignment < 32. */
|
||||
return align < 32 ? 32 : align;
|
||||
}
|
||||
|
||||
if (brw->gen >= 7 && mt->format == MESA_FORMAT_Z_UNORM16)
|
||||
return 8;
|
||||
|
|
@ -248,12 +216,6 @@ intel_vertical_texture_alignment_unit(struct brw_context *brw,
|
|||
const struct intel_mipmap_tree *mt)
|
||||
{
|
||||
/**
|
||||
* From the "Alignment Unit Size" section of various specs, namely:
|
||||
* - Gen3 Spec: "Memory Data Formats" Volume, Section 1.20.1.4
|
||||
* - i965 and G45 PRMs: Volume 1, Section 6.17.3.4.
|
||||
* - Ironlake and Sandybridge PRMs: Volume 1, Part 1, Section 7.18.3.4
|
||||
* - BSpec (for Ivybridge and slight variations in separate stencil)
|
||||
*
|
||||
* +----------------------------------------------------------------------+
|
||||
* | | alignment unit height ("j") |
|
||||
* | Surface Property |-----------------------------|
|
||||
|
|
@ -270,18 +232,6 @@ intel_vertical_texture_alignment_unit(struct brw_context *brw,
|
|||
* Where "*" means either VALIGN_2 or VALIGN_4 depending on the setting of
|
||||
* the SURFACE_STATE "Surface Vertical Alignment" field.
|
||||
*/
|
||||
if (_mesa_is_format_compressed(mt->format))
|
||||
/* See comment above for the horizontal alignment */
|
||||
return brw->gen >= 9 ? 16 : 4;
|
||||
|
||||
if (mt->format == MESA_FORMAT_S_UINT8)
|
||||
return brw->gen >= 7 ? 8 : 4;
|
||||
|
||||
if (mt->tr_mode != INTEL_MIPTREE_TRMODE_NONE) {
|
||||
uint32_t align = tr_mode_vertical_texture_alignment(brw, mt);
|
||||
/* XY_FAST_COPY_BLT doesn't support vertical alignment < 64 */
|
||||
return align < 64 ? 64 : align;
|
||||
}
|
||||
|
||||
/* Broadwell only supports VALIGN of 4, 8, and 16. The BSpec says 4
|
||||
* should always be used, except for stencil buffers, which should be 8.
|
||||
|
|
@ -367,7 +317,7 @@ brw_miptree_layout_2d(struct intel_mipmap_tree *mt)
|
|||
mt->total_width = mt->physical_width0;
|
||||
|
||||
if (mt->compressed)
|
||||
mt->total_width = ALIGN(mt->total_width, bw);
|
||||
mt->total_width = ALIGN_NPOT(mt->total_width, bw);
|
||||
|
||||
/* May need to adjust width to accommodate the placement of
|
||||
* the 2nd mipmap. This occurs when the alignment
|
||||
|
|
@ -378,10 +328,10 @@ brw_miptree_layout_2d(struct intel_mipmap_tree *mt)
|
|||
unsigned mip1_width;
|
||||
|
||||
if (mt->compressed) {
|
||||
mip1_width = ALIGN(minify(mt->physical_width0, 1), mt->align_w) +
|
||||
ALIGN(minify(mt->physical_width0, 2), bw);
|
||||
mip1_width = ALIGN_NPOT(minify(mt->physical_width0, 1), mt->align_w) +
|
||||
ALIGN_NPOT(minify(mt->physical_width0, 2), bw);
|
||||
} else {
|
||||
mip1_width = ALIGN(minify(mt->physical_width0, 1), mt->align_w) +
|
||||
mip1_width = ALIGN_NPOT(minify(mt->physical_width0, 1), mt->align_w) +
|
||||
minify(mt->physical_width0, 2);
|
||||
}
|
||||
|
||||
|
|
@ -390,6 +340,7 @@ brw_miptree_layout_2d(struct intel_mipmap_tree *mt)
|
|||
}
|
||||
}
|
||||
|
||||
mt->total_width /= bw;
|
||||
mt->total_height = 0;
|
||||
|
||||
for (unsigned level = mt->first_level; level <= mt->last_level; level++) {
|
||||
|
|
@ -397,7 +348,7 @@ brw_miptree_layout_2d(struct intel_mipmap_tree *mt)
|
|||
|
||||
intel_miptree_set_level_info(mt, level, x, y, depth);
|
||||
|
||||
img_height = ALIGN(height, mt->align_h);
|
||||
img_height = ALIGN_NPOT(height, mt->align_h);
|
||||
if (mt->compressed)
|
||||
img_height /= bh;
|
||||
|
||||
|
|
@ -414,7 +365,7 @@ brw_miptree_layout_2d(struct intel_mipmap_tree *mt)
|
|||
/* Layout_below: step right after second mipmap.
|
||||
*/
|
||||
if (level == mt->first_level + 1) {
|
||||
x += ALIGN(width, mt->align_w);
|
||||
x += ALIGN_NPOT(width, mt->align_w) / bw;
|
||||
} else {
|
||||
y += img_height;
|
||||
}
|
||||
|
|
@ -434,7 +385,7 @@ brw_miptree_get_horizontal_slice_pitch(const struct brw_context *brw,
|
|||
{
|
||||
if ((brw->gen < 9 && mt->target == GL_TEXTURE_3D) ||
|
||||
(brw->gen == 4 && mt->target == GL_TEXTURE_CUBE_MAP)) {
|
||||
return ALIGN(minify(mt->physical_width0, level), mt->align_w);
|
||||
return ALIGN_NPOT(minify(mt->physical_width0, level), mt->align_w);
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
|
|
@ -475,11 +426,11 @@ brw_miptree_get_vertical_slice_pitch(const struct brw_context *brw,
|
|||
} else if (mt->target == GL_TEXTURE_3D ||
|
||||
(brw->gen == 4 && mt->target == GL_TEXTURE_CUBE_MAP) ||
|
||||
mt->array_layout == ALL_SLICES_AT_EACH_LOD) {
|
||||
return ALIGN(minify(mt->physical_height0, level), mt->align_h);
|
||||
return ALIGN_NPOT(minify(mt->physical_height0, level), mt->align_h);
|
||||
|
||||
} else {
|
||||
const unsigned h0 = ALIGN(mt->physical_height0, mt->align_h);
|
||||
const unsigned h1 = ALIGN(minify(mt->physical_height0, 1), mt->align_h);
|
||||
const unsigned h0 = ALIGN_NPOT(mt->physical_height0, mt->align_h);
|
||||
const unsigned h1 = ALIGN_NPOT(minify(mt->physical_height0, 1), mt->align_h);
|
||||
|
||||
return h0 + h1 + (brw->gen >= 7 ? 12 : 11) * mt->align_h;
|
||||
}
|
||||
|
|
@ -551,7 +502,7 @@ brw_miptree_layout_texture_array(struct brw_context *brw,
|
|||
|
||||
for (unsigned level = mt->first_level; level <= mt->last_level; level++) {
|
||||
unsigned img_height;
|
||||
img_height = ALIGN(height, mt->align_h);
|
||||
img_height = ALIGN_NPOT(height, mt->align_h);
|
||||
if (mt->compressed)
|
||||
img_height /= mt->align_h;
|
||||
|
||||
|
|
@ -574,18 +525,20 @@ static void
|
|||
brw_miptree_layout_texture_3d(struct brw_context *brw,
|
||||
struct intel_mipmap_tree *mt)
|
||||
{
|
||||
unsigned yscale = mt->compressed ? 4 : 1;
|
||||
|
||||
mt->total_width = 0;
|
||||
mt->total_height = 0;
|
||||
|
||||
unsigned ysum = 0;
|
||||
unsigned bh, bw;
|
||||
|
||||
_mesa_get_format_block_size(mt->format, &bw, &bh);
|
||||
|
||||
for (unsigned level = mt->first_level; level <= mt->last_level; level++) {
|
||||
unsigned WL = MAX2(mt->physical_width0 >> level, 1);
|
||||
unsigned HL = MAX2(mt->physical_height0 >> level, 1);
|
||||
unsigned DL = MAX2(mt->physical_depth0 >> level, 1);
|
||||
unsigned wL = ALIGN(WL, mt->align_w);
|
||||
unsigned hL = ALIGN(HL, mt->align_h);
|
||||
unsigned wL = ALIGN_NPOT(WL, mt->align_w);
|
||||
unsigned hL = ALIGN_NPOT(HL, mt->align_h);
|
||||
|
||||
if (mt->target == GL_TEXTURE_CUBE_MAP)
|
||||
DL = 6;
|
||||
|
|
@ -596,9 +549,9 @@ brw_miptree_layout_texture_3d(struct brw_context *brw,
|
|||
unsigned x = (q % (1 << level)) * wL;
|
||||
unsigned y = ysum + (q >> level) * hL;
|
||||
|
||||
intel_miptree_set_image_offset(mt, level, q, x, y / yscale);
|
||||
mt->total_width = MAX2(mt->total_width, x + wL);
|
||||
mt->total_height = MAX2(mt->total_height, (y + hL) / yscale);
|
||||
intel_miptree_set_image_offset(mt, level, q, x / bw, y / bh);
|
||||
mt->total_width = MAX2(mt->total_width, (x + wL) / bw);
|
||||
mt->total_height = MAX2(mt->total_height, (y + hL) / bh);
|
||||
}
|
||||
|
||||
ysum += ALIGN(DL, 1 << level) / (1 << level) * hL;
|
||||
|
|
@ -767,6 +720,13 @@ intel_miptree_set_alignment(struct brw_context *brw,
|
|||
struct intel_mipmap_tree *mt,
|
||||
uint32_t layout_flags)
|
||||
{
|
||||
/**
|
||||
* From the "Alignment Unit Size" section of various specs, namely:
|
||||
* - Gen3 Spec: "Memory Data Formats" Volume, Section 1.20.1.4
|
||||
* - i965 and G45 PRMs: Volume 1, Section 6.17.3.4.
|
||||
* - Ironlake and Sandybridge PRMs: Volume 1, Part 1, Section 7.18.3.4
|
||||
* - BSpec (for Ivybridge and slight variations in separate stencil)
|
||||
*/
|
||||
bool gen6_hiz_or_stencil = false;
|
||||
|
||||
if (brw->gen == 6 && mt->array_layout == ALL_SLICES_AT_EACH_LOD) {
|
||||
|
|
@ -798,6 +758,29 @@ intel_miptree_set_alignment(struct brw_context *brw,
|
|||
mt->align_w = 128 / mt->cpp;
|
||||
mt->align_h = 32;
|
||||
}
|
||||
} else if (mt->compressed) {
|
||||
/* The hardware alignment requirements for compressed textures
|
||||
* happen to match the block boundaries.
|
||||
*/
|
||||
_mesa_get_format_block_size(mt->format, &mt->align_w, &mt->align_h);
|
||||
|
||||
/* On Gen9+ we can pick our own alignment for compressed textures but it
|
||||
* has to be a multiple of the block size. The minimum alignment we can
|
||||
* pick is 4 so we effectively have to align to 4 times the block
|
||||
* size
|
||||
*/
|
||||
if (brw->gen >= 9) {
|
||||
mt->align_w *= 4;
|
||||
mt->align_h *= 4;
|
||||
}
|
||||
} else if (mt->format == MESA_FORMAT_S_UINT8) {
|
||||
mt->align_w = 8;
|
||||
mt->align_h = brw->gen >= 7 ? 8 : 4;
|
||||
} else if (brw->gen >= 9 && mt->tr_mode != INTEL_MIPTREE_TRMODE_NONE) {
|
||||
/* XY_FAST_COPY_BLT doesn't support horizontal alignment < 32 or
|
||||
* vertical alignment < 64. */
|
||||
mt->align_w = MAX2(tr_mode_horizontal_texture_alignment(brw, mt), 32);
|
||||
mt->align_h = MAX2(tr_mode_vertical_texture_alignment(brw, mt), 64);
|
||||
} else {
|
||||
mt->align_w =
|
||||
intel_horizontal_texture_alignment_unit(brw, mt, layout_flags);
|
||||
|
|
|
|||
|
|
@ -216,8 +216,9 @@ dst_reg::equals(const dst_reg &r) const
|
|||
writemask == r.writemask &&
|
||||
(reladdr == r.reladdr ||
|
||||
(reladdr && r.reladdr && reladdr->equals(*r.reladdr))) &&
|
||||
memcmp(&fixed_hw_reg, &r.fixed_hw_reg,
|
||||
sizeof(fixed_hw_reg)) == 0);
|
||||
((file != HW_REG && file != IMM) ||
|
||||
memcmp(&fixed_hw_reg, &r.fixed_hw_reg,
|
||||
sizeof(fixed_hw_reg)) == 0));
|
||||
}
|
||||
|
||||
bool
|
||||
|
|
|
|||
|
|
@ -238,6 +238,20 @@ gen8_emit_texture_surface_state(struct brw_context *brw,
|
|||
surf[0] |= BRW_SURFACE_CUBEFACE_ENABLES;
|
||||
}
|
||||
|
||||
/* From the CHV PRM, Volume 2d, page 321 (RENDER_SURFACE_STATE dword 0
|
||||
* bit 9 "Sampler L2 Bypass Mode Disable" Programming Notes):
|
||||
*
|
||||
* This bit must be set for the following surface types: BC2_UNORM
|
||||
* BC3_UNORM BC5_UNORM BC5_SNORM BC7_UNORM
|
||||
*/
|
||||
if ((brw->gen >= 9 || brw->is_cherryview) &&
|
||||
(format == BRW_SURFACEFORMAT_BC2_UNORM ||
|
||||
format == BRW_SURFACEFORMAT_BC3_UNORM ||
|
||||
format == BRW_SURFACEFORMAT_BC5_UNORM ||
|
||||
format == BRW_SURFACEFORMAT_BC5_SNORM ||
|
||||
format == BRW_SURFACEFORMAT_BC7_UNORM))
|
||||
surf[0] |= GEN8_SURFACE_SAMPLER_L2_BYPASS_DISABLE;
|
||||
|
||||
if (_mesa_is_array_texture(target) || target == GL_TEXTURE_CUBE_MAP)
|
||||
surf[0] |= GEN8_SURFACE_IS_ARRAY;
|
||||
|
||||
|
|
|
|||
|
|
@ -41,7 +41,6 @@ copy_image_with_blitter(struct brw_context *brw,
|
|||
{
|
||||
GLuint bw, bh;
|
||||
uint32_t src_image_x, src_image_y, dst_image_x, dst_image_y;
|
||||
int cpp;
|
||||
|
||||
/* The blitter doesn't understand multisampling at all. */
|
||||
if (src_mt->num_samples > 0 || dst_mt->num_samples > 0)
|
||||
|
|
@ -86,16 +85,6 @@ copy_image_with_blitter(struct brw_context *brw,
|
|||
src_y /= (int)bh;
|
||||
src_width /= (int)bw;
|
||||
src_height /= (int)bh;
|
||||
|
||||
/* Inside of the miptree, the x offsets are stored in pixels while
|
||||
* the y offsets are stored in blocks. We need to scale just the x
|
||||
* offset.
|
||||
*/
|
||||
src_image_x /= bw;
|
||||
|
||||
cpp = _mesa_get_format_bytes(src_mt->format);
|
||||
} else {
|
||||
cpp = src_mt->cpp;
|
||||
}
|
||||
src_x += src_image_x;
|
||||
src_y += src_image_y;
|
||||
|
|
@ -111,18 +100,12 @@ copy_image_with_blitter(struct brw_context *brw,
|
|||
|
||||
dst_x /= (int)bw;
|
||||
dst_y /= (int)bh;
|
||||
|
||||
/* Inside of the miptree, the x offsets are stored in pixels while
|
||||
* the y offsets are stored in blocks. We need to scale just the x
|
||||
* offset.
|
||||
*/
|
||||
dst_image_x /= bw;
|
||||
}
|
||||
dst_x += dst_image_x;
|
||||
dst_y += dst_image_y;
|
||||
|
||||
return intelEmitCopyBlit(brw,
|
||||
cpp,
|
||||
src_mt->cpp,
|
||||
src_mt->pitch,
|
||||
src_mt->bo, src_mt->offset,
|
||||
src_mt->tiling,
|
||||
|
|
|
|||
|
|
@ -68,7 +68,7 @@ static const struct dri_debug_control debug_control[] = {
|
|||
{ "optimizer", DEBUG_OPTIMIZER },
|
||||
{ "ann", DEBUG_ANNOTATION },
|
||||
{ "no8", DEBUG_NO8 },
|
||||
{ "vec4vs", DEBUG_VEC4VS },
|
||||
{ "vec4", DEBUG_VEC4VS },
|
||||
{ "spill", DEBUG_SPILL },
|
||||
{ "cs", DEBUG_CS },
|
||||
{ NULL, 0 }
|
||||
|
|
|
|||
|
|
@ -313,15 +313,7 @@ intel_miptree_create_layout(struct brw_context *brw,
|
|||
mt->fast_clear_state = INTEL_FAST_CLEAR_STATE_NO_MCS;
|
||||
mt->disable_aux_buffers = (layout_flags & MIPTREE_LAYOUT_DISABLE_AUX) != 0;
|
||||
exec_list_make_empty(&mt->hiz_map);
|
||||
|
||||
/* The cpp is bytes per (1, blockheight)-sized block for compressed
|
||||
* textures. This is why you'll see divides by blockheight all over
|
||||
*/
|
||||
unsigned bw, bh;
|
||||
_mesa_get_format_block_size(format, &bw, &bh);
|
||||
assert(_mesa_get_format_bytes(mt->format) % bw == 0);
|
||||
mt->cpp = _mesa_get_format_bytes(mt->format) / bw;
|
||||
|
||||
mt->cpp = _mesa_get_format_bytes(format);
|
||||
mt->num_samples = num_samples;
|
||||
mt->compressed = _mesa_is_format_compressed(format);
|
||||
mt->msaa_layout = INTEL_MSAA_LAYOUT_NONE;
|
||||
|
|
@ -1272,8 +1264,8 @@ intel_miptree_copy_slice(struct brw_context *brw,
|
|||
if (dst_mt->compressed) {
|
||||
unsigned int i, j;
|
||||
_mesa_get_format_block_size(dst_mt->format, &i, &j);
|
||||
height = ALIGN(height, j) / j;
|
||||
width = ALIGN(width, i);
|
||||
height = ALIGN_NPOT(height, j) / j;
|
||||
width = ALIGN_NPOT(width, i) / i;
|
||||
}
|
||||
|
||||
/* If it's a packed depth/stencil buffer with separate stencil, the blit
|
||||
|
|
@ -2105,7 +2097,9 @@ intel_miptree_map_gtt(struct brw_context *brw,
|
|||
*/
|
||||
_mesa_get_format_block_size(mt->format, &bw, &bh);
|
||||
assert(y % bh == 0);
|
||||
assert(x % bw == 0);
|
||||
y /= bh;
|
||||
x /= bw;
|
||||
|
||||
base = intel_miptree_map_raw(brw, mt) + mt->offset;
|
||||
|
||||
|
|
|
|||
|
|
@ -390,7 +390,7 @@ struct intel_mipmap_tree
|
|||
*/
|
||||
GLuint physical_width0, physical_height0, physical_depth0;
|
||||
|
||||
GLuint cpp; /**< bytes per pixel */
|
||||
GLuint cpp; /**< bytes per pixel (or bytes per block if compressed) */
|
||||
GLuint num_samples;
|
||||
bool compressed;
|
||||
|
||||
|
|
|
|||
|
|
@ -44,12 +44,6 @@
|
|||
|
||||
#define INTEL_UPLOAD_SIZE (64*1024)
|
||||
|
||||
/**
|
||||
* Like ALIGN(), but works with a non-power-of-two alignment.
|
||||
*/
|
||||
#define ALIGN_NPOT(value, alignment) \
|
||||
(((value) + (alignment) - 1) / (alignment) * (alignment))
|
||||
|
||||
void
|
||||
intel_upload_finish(struct brw_context *brw)
|
||||
{
|
||||
|
|
|
|||
|
|
@ -336,12 +336,15 @@ static const struct extension extension_table[] = {
|
|||
{ "GL_OES_texture_half_float", o(OES_texture_half_float), ES2, 2005 },
|
||||
{ "GL_OES_texture_half_float_linear", o(OES_texture_half_float_linear), ES2, 2005 },
|
||||
{ "GL_OES_texture_mirrored_repeat", o(dummy_true), ES1, 2005 },
|
||||
{ "GL_OES_texture_storage_multisample_2d_array",o(ARB_texture_multisample), ES31, 2014 },
|
||||
{ "GL_OES_texture_npot", o(ARB_texture_non_power_of_two), ES1 | ES2, 2005 },
|
||||
{ "GL_OES_vertex_array_object", o(dummy_true), ES1 | ES2, 2010 },
|
||||
|
||||
/* KHR extensions */
|
||||
{ "GL_KHR_debug", o(dummy_true), GL, 2012 },
|
||||
{ "GL_KHR_context_flush_control", o(dummy_true), GL | ES2, 2014 },
|
||||
{ "GL_KHR_texture_compression_astc_hdr", o(KHR_texture_compression_astc_hdr), GL | ES2, 2012 },
|
||||
{ "GL_KHR_texture_compression_astc_ldr", o(KHR_texture_compression_astc_ldr), GL | ES2, 2012 },
|
||||
|
||||
/* Vendor extensions */
|
||||
{ "GL_3DFX_texture_compression_FXT1", o(TDFX_texture_compression_FXT1), GL, 1999 },
|
||||
|
|
|
|||
|
|
@ -122,6 +122,9 @@ def get_channel_bits(fmat, chan_name):
|
|||
elif fmat.layout == 'bptc':
|
||||
bits = 16 if fmat.name.endswith('_FLOAT') else 8
|
||||
return bits if fmat.has_channel(chan_name) else 0
|
||||
elif fmat.layout == 'astc':
|
||||
bits = 16 if 'RGBA' in fmat.name else 8
|
||||
return bits if fmat.has_channel(chan_name) else 0
|
||||
else:
|
||||
assert False
|
||||
else:
|
||||
|
|
|
|||
|
|
@ -197,6 +197,7 @@ _mesa_get_format_max_bits(mesa_format format)
|
|||
* MESA_FORMAT_LAYOUT_ETC1
|
||||
* MESA_FORMAT_LAYOUT_ETC2
|
||||
* MESA_FORMAT_LAYOUT_BPTC
|
||||
* MESA_FORMAT_LAYOUT_ASTC
|
||||
* MESA_FORMAT_LAYOUT_OTHER
|
||||
*/
|
||||
extern enum mesa_format_layout
|
||||
|
|
@ -663,6 +664,48 @@ _mesa_get_srgb_format_linear(mesa_format format)
|
|||
case MESA_FORMAT_BPTC_SRGB_ALPHA_UNORM:
|
||||
format = MESA_FORMAT_BPTC_RGBA_UNORM;
|
||||
break;
|
||||
case MESA_FORMAT_SRGB8_ALPHA8_ASTC_4x4:
|
||||
format = MESA_FORMAT_RGBA_ASTC_4x4;
|
||||
break;
|
||||
case MESA_FORMAT_SRGB8_ALPHA8_ASTC_5x4:
|
||||
format = MESA_FORMAT_RGBA_ASTC_5x4;
|
||||
break;
|
||||
case MESA_FORMAT_SRGB8_ALPHA8_ASTC_5x5:
|
||||
format = MESA_FORMAT_RGBA_ASTC_5x5;
|
||||
break;
|
||||
case MESA_FORMAT_SRGB8_ALPHA8_ASTC_6x5:
|
||||
format = MESA_FORMAT_RGBA_ASTC_6x5;
|
||||
break;
|
||||
case MESA_FORMAT_SRGB8_ALPHA8_ASTC_6x6:
|
||||
format = MESA_FORMAT_RGBA_ASTC_6x6;
|
||||
break;
|
||||
case MESA_FORMAT_SRGB8_ALPHA8_ASTC_8x5:
|
||||
format = MESA_FORMAT_RGBA_ASTC_8x5;
|
||||
break;
|
||||
case MESA_FORMAT_SRGB8_ALPHA8_ASTC_8x6:
|
||||
format = MESA_FORMAT_RGBA_ASTC_8x6;
|
||||
break;
|
||||
case MESA_FORMAT_SRGB8_ALPHA8_ASTC_8x8:
|
||||
format = MESA_FORMAT_RGBA_ASTC_8x8;
|
||||
break;
|
||||
case MESA_FORMAT_SRGB8_ALPHA8_ASTC_10x5:
|
||||
format = MESA_FORMAT_RGBA_ASTC_10x5;
|
||||
break;
|
||||
case MESA_FORMAT_SRGB8_ALPHA8_ASTC_10x6:
|
||||
format = MESA_FORMAT_RGBA_ASTC_10x6;
|
||||
break;
|
||||
case MESA_FORMAT_SRGB8_ALPHA8_ASTC_10x8:
|
||||
format = MESA_FORMAT_RGBA_ASTC_10x8;
|
||||
break;
|
||||
case MESA_FORMAT_SRGB8_ALPHA8_ASTC_10x10:
|
||||
format = MESA_FORMAT_RGBA_ASTC_10x10;
|
||||
break;
|
||||
case MESA_FORMAT_SRGB8_ALPHA8_ASTC_12x10:
|
||||
format = MESA_FORMAT_RGBA_ASTC_12x10;
|
||||
break;
|
||||
case MESA_FORMAT_SRGB8_ALPHA8_ASTC_12x12:
|
||||
format = MESA_FORMAT_RGBA_ASTC_12x12;
|
||||
break;
|
||||
case MESA_FORMAT_B8G8R8X8_SRGB:
|
||||
format = MESA_FORMAT_B8G8R8X8_UNORM;
|
||||
break;
|
||||
|
|
|
|||
|
|
@ -301,3 +301,34 @@ MESA_FORMAT_BPTC_RGBA_UNORM , bptc , 4, 4, x128, , ,
|
|||
MESA_FORMAT_BPTC_SRGB_ALPHA_UNORM , bptc , 4, 4, x128, , , , xyzw, srgb
|
||||
MESA_FORMAT_BPTC_RGB_SIGNED_FLOAT , bptc , 4, 4, x128, , , , xyz1, rgb
|
||||
MESA_FORMAT_BPTC_RGB_UNSIGNED_FLOAT , bptc , 4, 4, x128, , , , xyz1, rgb
|
||||
|
||||
# ASTC compressed formats
|
||||
MESA_FORMAT_RGBA_ASTC_4x4 , astc , 4, 4, x128, , , , xyzw, rgb
|
||||
MESA_FORMAT_RGBA_ASTC_5x4 , astc , 5, 4, x128, , , , xyzw, rgb
|
||||
MESA_FORMAT_RGBA_ASTC_5x5 , astc , 5, 5, x128, , , , xyzw, rgb
|
||||
MESA_FORMAT_RGBA_ASTC_6x5 , astc , 6, 5, x128, , , , xyzw, rgb
|
||||
MESA_FORMAT_RGBA_ASTC_6x6 , astc , 6, 6, x128, , , , xyzw, rgb
|
||||
MESA_FORMAT_RGBA_ASTC_8x5 , astc , 8, 5, x128, , , , xyzw, rgb
|
||||
MESA_FORMAT_RGBA_ASTC_8x6 , astc , 8, 6, x128, , , , xyzw, rgb
|
||||
MESA_FORMAT_RGBA_ASTC_8x8 , astc , 8, 8, x128, , , , xyzw, rgb
|
||||
MESA_FORMAT_RGBA_ASTC_10x5 , astc ,10, 5, x128, , , , xyzw, rgb
|
||||
MESA_FORMAT_RGBA_ASTC_10x6 , astc ,10, 6, x128, , , , xyzw, rgb
|
||||
MESA_FORMAT_RGBA_ASTC_10x8 , astc ,10, 8, x128, , , , xyzw, rgb
|
||||
MESA_FORMAT_RGBA_ASTC_10x10 , astc ,10,10, x128, , , , xyzw, rgb
|
||||
MESA_FORMAT_RGBA_ASTC_12x10 , astc ,12,10, x128, , , , xyzw, rgb
|
||||
MESA_FORMAT_RGBA_ASTC_12x12 , astc ,12,12, x128, , , , xyzw, rgb
|
||||
|
||||
MESA_FORMAT_SRGB8_ALPHA8_ASTC_4x4 , astc , 4, 4, x128, , , , xyzw, srgb
|
||||
MESA_FORMAT_SRGB8_ALPHA8_ASTC_5x4 , astc , 5, 4, x128, , , , xyzw, srgb
|
||||
MESA_FORMAT_SRGB8_ALPHA8_ASTC_5x5 , astc , 5, 5, x128, , , , xyzw, srgb
|
||||
MESA_FORMAT_SRGB8_ALPHA8_ASTC_6x5 , astc , 6, 5, x128, , , , xyzw, srgb
|
||||
MESA_FORMAT_SRGB8_ALPHA8_ASTC_6x6 , astc , 6, 6, x128, , , , xyzw, srgb
|
||||
MESA_FORMAT_SRGB8_ALPHA8_ASTC_8x5 , astc , 8, 5, x128, , , , xyzw, srgb
|
||||
MESA_FORMAT_SRGB8_ALPHA8_ASTC_8x6 , astc , 8, 6, x128, , , , xyzw, srgb
|
||||
MESA_FORMAT_SRGB8_ALPHA8_ASTC_8x8 , astc , 8, 8, x128, , , , xyzw, srgb
|
||||
MESA_FORMAT_SRGB8_ALPHA8_ASTC_10x5 , astc ,10, 5, x128, , , , xyzw, srgb
|
||||
MESA_FORMAT_SRGB8_ALPHA8_ASTC_10x6 , astc ,10, 6, x128, , , , xyzw, srgb
|
||||
MESA_FORMAT_SRGB8_ALPHA8_ASTC_10x8 , astc ,10, 8, x128, , , , xyzw, srgb
|
||||
MESA_FORMAT_SRGB8_ALPHA8_ASTC_10x10 , astc ,10,10, x128, , , , xyzw, srgb
|
||||
MESA_FORMAT_SRGB8_ALPHA8_ASTC_12x10 , astc ,12,10, x128, , , , xyzw, srgb
|
||||
MESA_FORMAT_SRGB8_ALPHA8_ASTC_12x12 , astc ,12,12, x128, , , , xyzw, srgb
|
||||
|
|
|
|||
|
Can't render this file because it contains an unexpected character in line 9 and column 3.
|
|
|
@ -70,6 +70,7 @@ enum mesa_format_layout {
|
|||
MESA_FORMAT_LAYOUT_ETC1,
|
||||
MESA_FORMAT_LAYOUT_ETC2,
|
||||
MESA_FORMAT_LAYOUT_BPTC,
|
||||
MESA_FORMAT_LAYOUT_ASTC,
|
||||
MESA_FORMAT_LAYOUT_OTHER,
|
||||
};
|
||||
|
||||
|
|
@ -586,6 +587,36 @@ typedef enum
|
|||
MESA_FORMAT_BPTC_RGB_SIGNED_FLOAT,
|
||||
MESA_FORMAT_BPTC_RGB_UNSIGNED_FLOAT,
|
||||
|
||||
/* ASTC compressed formats */
|
||||
MESA_FORMAT_RGBA_ASTC_4x4,
|
||||
MESA_FORMAT_RGBA_ASTC_5x4,
|
||||
MESA_FORMAT_RGBA_ASTC_5x5,
|
||||
MESA_FORMAT_RGBA_ASTC_6x5,
|
||||
MESA_FORMAT_RGBA_ASTC_6x6,
|
||||
MESA_FORMAT_RGBA_ASTC_8x5,
|
||||
MESA_FORMAT_RGBA_ASTC_8x6,
|
||||
MESA_FORMAT_RGBA_ASTC_8x8,
|
||||
MESA_FORMAT_RGBA_ASTC_10x5,
|
||||
MESA_FORMAT_RGBA_ASTC_10x6,
|
||||
MESA_FORMAT_RGBA_ASTC_10x8,
|
||||
MESA_FORMAT_RGBA_ASTC_10x10,
|
||||
MESA_FORMAT_RGBA_ASTC_12x10,
|
||||
MESA_FORMAT_RGBA_ASTC_12x12,
|
||||
|
||||
MESA_FORMAT_SRGB8_ALPHA8_ASTC_4x4,
|
||||
MESA_FORMAT_SRGB8_ALPHA8_ASTC_5x4,
|
||||
MESA_FORMAT_SRGB8_ALPHA8_ASTC_5x5,
|
||||
MESA_FORMAT_SRGB8_ALPHA8_ASTC_6x5,
|
||||
MESA_FORMAT_SRGB8_ALPHA8_ASTC_6x6,
|
||||
MESA_FORMAT_SRGB8_ALPHA8_ASTC_8x5,
|
||||
MESA_FORMAT_SRGB8_ALPHA8_ASTC_8x6,
|
||||
MESA_FORMAT_SRGB8_ALPHA8_ASTC_8x8,
|
||||
MESA_FORMAT_SRGB8_ALPHA8_ASTC_10x5,
|
||||
MESA_FORMAT_SRGB8_ALPHA8_ASTC_10x6,
|
||||
MESA_FORMAT_SRGB8_ALPHA8_ASTC_10x8,
|
||||
MESA_FORMAT_SRGB8_ALPHA8_ASTC_10x10,
|
||||
MESA_FORMAT_SRGB8_ALPHA8_ASTC_12x10,
|
||||
MESA_FORMAT_SRGB8_ALPHA8_ASTC_12x12,
|
||||
MESA_FORMAT_COUNT
|
||||
} mesa_format;
|
||||
|
||||
|
|
|
|||
|
|
@ -111,6 +111,7 @@ _mesa_generate_texture_mipmap(struct gl_context *ctx,
|
|||
|
||||
if (_mesa_is_enum_format_integer(srcImage->InternalFormat) ||
|
||||
_mesa_is_depthstencil_format(srcImage->InternalFormat) ||
|
||||
_mesa_is_astc_format(srcImage->InternalFormat) ||
|
||||
_mesa_is_stencil_format(srcImage->InternalFormat)) {
|
||||
_mesa_unlock_texture(ctx, texObj);
|
||||
_mesa_error(ctx, GL_INVALID_OPERATION,
|
||||
|
|
|
|||
|
|
@ -35,6 +35,7 @@
|
|||
#include "mtypes.h"
|
||||
#include "state.h"
|
||||
#include "texcompress.h"
|
||||
#include "texstate.h"
|
||||
#include "framebuffer.h"
|
||||
#include "samplerobj.h"
|
||||
#include "stencil.h"
|
||||
|
|
@ -993,16 +994,7 @@ find_custom_value(struct gl_context *ctx, const struct value_desc *d, union valu
|
|||
{
|
||||
struct gl_sampler_object *samp =
|
||||
ctx->Texture.Unit[ctx->Texture.CurrentUnit].Sampler;
|
||||
|
||||
/*
|
||||
* The sampler object may have been deleted on another context,
|
||||
* so we try to lookup the sampler object before returning its Name.
|
||||
*/
|
||||
if (samp && _mesa_lookup_samplerobj(ctx, samp->Name)) {
|
||||
v->value_int = samp->Name;
|
||||
} else {
|
||||
v->value_int = 0;
|
||||
}
|
||||
v->value_int = samp ? samp->Name : 0;
|
||||
}
|
||||
break;
|
||||
/* GL_ARB_uniform_buffer_object */
|
||||
|
|
@ -1750,6 +1742,52 @@ _mesa_GetDoublev(GLenum pname, GLdouble *params)
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert a GL texture binding enum such as GL_TEXTURE_BINDING_2D
|
||||
* into the corresponding Mesa texture target index.
|
||||
* \return TEXTURE_x_INDEX or -1 if binding is invalid
|
||||
*/
|
||||
static int
|
||||
tex_binding_to_index(const struct gl_context *ctx, GLenum binding)
|
||||
{
|
||||
switch (binding) {
|
||||
case GL_TEXTURE_BINDING_1D:
|
||||
return _mesa_is_desktop_gl(ctx) ? TEXTURE_1D_INDEX : -1;
|
||||
case GL_TEXTURE_BINDING_2D:
|
||||
return TEXTURE_2D_INDEX;
|
||||
case GL_TEXTURE_BINDING_3D:
|
||||
return ctx->API != API_OPENGLES ? TEXTURE_3D_INDEX : -1;
|
||||
case GL_TEXTURE_BINDING_CUBE_MAP:
|
||||
return ctx->Extensions.ARB_texture_cube_map
|
||||
? TEXTURE_CUBE_INDEX : -1;
|
||||
case GL_TEXTURE_BINDING_RECTANGLE:
|
||||
return _mesa_is_desktop_gl(ctx) && ctx->Extensions.NV_texture_rectangle
|
||||
? TEXTURE_RECT_INDEX : -1;
|
||||
case GL_TEXTURE_BINDING_1D_ARRAY:
|
||||
return _mesa_is_desktop_gl(ctx) && ctx->Extensions.EXT_texture_array
|
||||
? TEXTURE_1D_ARRAY_INDEX : -1;
|
||||
case GL_TEXTURE_BINDING_2D_ARRAY:
|
||||
return (_mesa_is_desktop_gl(ctx) && ctx->Extensions.EXT_texture_array)
|
||||
|| _mesa_is_gles3(ctx)
|
||||
? TEXTURE_2D_ARRAY_INDEX : -1;
|
||||
case GL_TEXTURE_BINDING_BUFFER:
|
||||
return ctx->API == API_OPENGL_CORE &&
|
||||
ctx->Extensions.ARB_texture_buffer_object ?
|
||||
TEXTURE_BUFFER_INDEX : -1;
|
||||
case GL_TEXTURE_BINDING_CUBE_MAP_ARRAY:
|
||||
return _mesa_is_desktop_gl(ctx) && ctx->Extensions.ARB_texture_cube_map_array
|
||||
? TEXTURE_CUBE_ARRAY_INDEX : -1;
|
||||
case GL_TEXTURE_BINDING_2D_MULTISAMPLE:
|
||||
return _mesa_is_desktop_gl(ctx) && ctx->Extensions.ARB_texture_multisample
|
||||
? TEXTURE_2D_MULTISAMPLE_INDEX : -1;
|
||||
case GL_TEXTURE_BINDING_2D_MULTISAMPLE_ARRAY:
|
||||
return _mesa_is_desktop_gl(ctx) && ctx->Extensions.ARB_texture_multisample
|
||||
? TEXTURE_2D_MULTISAMPLE_ARRAY_INDEX : -1;
|
||||
default:
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
static enum value_type
|
||||
find_value_indexed(const char *func, GLenum pname, GLuint index, union value *v)
|
||||
{
|
||||
|
|
@ -2013,6 +2051,45 @@ find_value_indexed(const char *func, GLenum pname, GLuint index, union value *v)
|
|||
v->value_int = ctx->ImageUnits[index].Format;
|
||||
return TYPE_INT;
|
||||
|
||||
/* ARB_direct_state_access */
|
||||
case GL_TEXTURE_BINDING_1D:
|
||||
case GL_TEXTURE_BINDING_1D_ARRAY:
|
||||
case GL_TEXTURE_BINDING_2D:
|
||||
case GL_TEXTURE_BINDING_2D_ARRAY:
|
||||
case GL_TEXTURE_BINDING_2D_MULTISAMPLE:
|
||||
case GL_TEXTURE_BINDING_2D_MULTISAMPLE_ARRAY:
|
||||
case GL_TEXTURE_BINDING_3D:
|
||||
case GL_TEXTURE_BINDING_BUFFER:
|
||||
case GL_TEXTURE_BINDING_CUBE_MAP:
|
||||
case GL_TEXTURE_BINDING_CUBE_MAP_ARRAY:
|
||||
case GL_TEXTURE_BINDING_RECTANGLE: {
|
||||
int target;
|
||||
|
||||
if (ctx->API != API_OPENGL_CORE)
|
||||
goto invalid_enum;
|
||||
target = tex_binding_to_index(ctx, pname);
|
||||
if (target < 0)
|
||||
goto invalid_enum;
|
||||
if (index >= _mesa_max_tex_unit(ctx))
|
||||
goto invalid_value;
|
||||
|
||||
v->value_int = ctx->Texture.Unit[index].CurrentTex[target]->Name;
|
||||
return TYPE_INT;
|
||||
}
|
||||
|
||||
case GL_SAMPLER_BINDING: {
|
||||
struct gl_sampler_object *samp;
|
||||
|
||||
if (ctx->API != API_OPENGL_CORE)
|
||||
goto invalid_enum;
|
||||
if (index >= _mesa_max_tex_unit(ctx))
|
||||
goto invalid_value;
|
||||
|
||||
samp = ctx->Texture.Unit[index].Sampler;
|
||||
v->value_int = samp ? samp->Name : 0;
|
||||
return TYPE_INT;
|
||||
}
|
||||
|
||||
case GL_MAX_COMPUTE_WORK_GROUP_COUNT:
|
||||
if (!_mesa_has_compute_shaders(ctx))
|
||||
goto invalid_enum;
|
||||
|
|
|
|||
|
|
@ -434,6 +434,9 @@ descriptor=[
|
|||
[ "SAMPLE_MASK", "CONTEXT_BOOL(Multisample.SampleMask), extra_ARB_texture_multisample" ],
|
||||
[ "MAX_SAMPLE_MASK_WORDS", "CONST(1), extra_ARB_texture_multisample" ],
|
||||
|
||||
# GL_ARB_texture_multisample / ES 3.1 with GL_OES_texture_storage_multisample_2d_array
|
||||
[ "TEXTURE_BINDING_2D_MULTISAMPLE_ARRAY", "LOC_CUSTOM, TYPE_INT, TEXTURE_2D_MULTISAMPLE_ARRAY_INDEX, extra_ARB_texture_multisample" ],
|
||||
|
||||
# GL_ARB_texture_gather / GLES 3.1
|
||||
[ "MIN_PROGRAM_TEXTURE_GATHER_OFFSET", "CONTEXT_INT(Const.MinProgramTextureGatherOffset), extra_ARB_texture_gather"],
|
||||
[ "MAX_PROGRAM_TEXTURE_GATHER_OFFSET", "CONTEXT_INT(Const.MaxProgramTextureGatherOffset), extra_ARB_texture_gather"],
|
||||
|
|
@ -740,9 +743,6 @@ descriptor=[
|
|||
[ "TEXTURE_BUFFER_FORMAT_ARB", "LOC_CUSTOM, TYPE_INT, 0, extra_texture_buffer_object" ],
|
||||
[ "TEXTURE_BUFFER_ARB", "LOC_CUSTOM, TYPE_INT, 0, extra_texture_buffer_object" ],
|
||||
|
||||
# GL_ARB_texture_multisample / GL 3.2
|
||||
[ "TEXTURE_BINDING_2D_MULTISAMPLE_ARRAY", "LOC_CUSTOM, TYPE_INT, TEXTURE_2D_MULTISAMPLE_ARRAY_INDEX, extra_ARB_texture_multisample" ],
|
||||
|
||||
# GL 3.0
|
||||
[ "CONTEXT_FLAGS", "CONTEXT_INT(Const.ContextFlags), extra_version_30" ],
|
||||
|
||||
|
|
|
|||
|
|
@ -820,6 +820,47 @@ _mesa_is_enum_format_signed_int(GLenum format)
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test if the given format is an ASTC format.
|
||||
*/
|
||||
GLboolean
|
||||
_mesa_is_astc_format(GLenum internalFormat)
|
||||
{
|
||||
switch (internalFormat) {
|
||||
case GL_COMPRESSED_RGBA_ASTC_4x4_KHR:
|
||||
case GL_COMPRESSED_RGBA_ASTC_5x4_KHR:
|
||||
case GL_COMPRESSED_RGBA_ASTC_5x5_KHR:
|
||||
case GL_COMPRESSED_RGBA_ASTC_6x5_KHR:
|
||||
case GL_COMPRESSED_RGBA_ASTC_6x6_KHR:
|
||||
case GL_COMPRESSED_RGBA_ASTC_8x5_KHR:
|
||||
case GL_COMPRESSED_RGBA_ASTC_8x6_KHR:
|
||||
case GL_COMPRESSED_RGBA_ASTC_8x8_KHR:
|
||||
case GL_COMPRESSED_RGBA_ASTC_10x5_KHR:
|
||||
case GL_COMPRESSED_RGBA_ASTC_10x6_KHR:
|
||||
case GL_COMPRESSED_RGBA_ASTC_10x8_KHR:
|
||||
case GL_COMPRESSED_RGBA_ASTC_10x10_KHR:
|
||||
case GL_COMPRESSED_RGBA_ASTC_12x10_KHR:
|
||||
case GL_COMPRESSED_RGBA_ASTC_12x12_KHR:
|
||||
case GL_COMPRESSED_SRGB8_ALPHA8_ASTC_4x4_KHR:
|
||||
case GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x4_KHR:
|
||||
case GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x5_KHR:
|
||||
case GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x5_KHR:
|
||||
case GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x6_KHR:
|
||||
case GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x5_KHR:
|
||||
case GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x6_KHR:
|
||||
case GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x8_KHR:
|
||||
case GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x5_KHR:
|
||||
case GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x6_KHR:
|
||||
case GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x8_KHR:
|
||||
case GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x10_KHR:
|
||||
case GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x10_KHR:
|
||||
case GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x12_KHR:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Test if the given format is an integer (non-normalized) format.
|
||||
|
|
@ -1262,6 +1303,35 @@ _mesa_is_compressed_format(const struct gl_context *ctx, GLenum format)
|
|||
case GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT:
|
||||
return _mesa_is_desktop_gl(ctx) &&
|
||||
ctx->Extensions.ARB_texture_compression_bptc;
|
||||
case GL_COMPRESSED_RGBA_ASTC_4x4_KHR:
|
||||
case GL_COMPRESSED_RGBA_ASTC_5x4_KHR:
|
||||
case GL_COMPRESSED_RGBA_ASTC_5x5_KHR:
|
||||
case GL_COMPRESSED_RGBA_ASTC_6x5_KHR:
|
||||
case GL_COMPRESSED_RGBA_ASTC_6x6_KHR:
|
||||
case GL_COMPRESSED_RGBA_ASTC_8x5_KHR:
|
||||
case GL_COMPRESSED_RGBA_ASTC_8x6_KHR:
|
||||
case GL_COMPRESSED_RGBA_ASTC_8x8_KHR:
|
||||
case GL_COMPRESSED_RGBA_ASTC_10x5_KHR:
|
||||
case GL_COMPRESSED_RGBA_ASTC_10x6_KHR:
|
||||
case GL_COMPRESSED_RGBA_ASTC_10x8_KHR:
|
||||
case GL_COMPRESSED_RGBA_ASTC_10x10_KHR:
|
||||
case GL_COMPRESSED_RGBA_ASTC_12x10_KHR:
|
||||
case GL_COMPRESSED_RGBA_ASTC_12x12_KHR:
|
||||
case GL_COMPRESSED_SRGB8_ALPHA8_ASTC_4x4_KHR:
|
||||
case GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x4_KHR:
|
||||
case GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x5_KHR:
|
||||
case GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x5_KHR:
|
||||
case GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x6_KHR:
|
||||
case GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x5_KHR:
|
||||
case GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x6_KHR:
|
||||
case GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x8_KHR:
|
||||
case GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x5_KHR:
|
||||
case GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x6_KHR:
|
||||
case GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x8_KHR:
|
||||
case GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x10_KHR:
|
||||
case GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x10_KHR:
|
||||
case GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x12_KHR:
|
||||
return ctx->Extensions.KHR_texture_compression_astc_ldr;
|
||||
case GL_PALETTE4_RGB8_OES:
|
||||
case GL_PALETTE4_RGBA8_OES:
|
||||
case GL_PALETTE4_R5_G6_B5_OES:
|
||||
|
|
|
|||
|
|
@ -56,6 +56,9 @@ _mesa_bytes_per_pixel( GLenum format, GLenum type );
|
|||
extern GLint
|
||||
_mesa_bytes_per_vertex_attrib(GLint comps, GLenum type);
|
||||
|
||||
extern GLboolean
|
||||
_mesa_is_astc_format(GLenum internalFormat);
|
||||
|
||||
extern GLboolean
|
||||
_mesa_is_type_unsigned(GLenum type);
|
||||
|
||||
|
|
|
|||
|
|
@ -690,7 +690,22 @@ minify(unsigned value, unsigned levels)
|
|||
*
|
||||
* \sa ROUND_DOWN_TO()
|
||||
*/
|
||||
#define ALIGN(value, alignment) (((value) + (alignment) - 1) & ~((alignment) - 1))
|
||||
static inline uintptr_t
|
||||
ALIGN(uintptr_t value, int32_t alignment)
|
||||
{
|
||||
assert((alignment > 0) && _mesa_is_pow_two(alignment));
|
||||
return (((value) + (alignment) - 1) & ~((alignment) - 1));
|
||||
}
|
||||
|
||||
/**
|
||||
* Like ALIGN(), but works with a non-power-of-two alignment.
|
||||
*/
|
||||
static inline uintptr_t
|
||||
ALIGN_NPOT(uintptr_t value, int32_t alignment)
|
||||
{
|
||||
assert(alignment > 0);
|
||||
return (value + alignment - 1) / alignment * alignment;
|
||||
}
|
||||
|
||||
/**
|
||||
* Align a value down to an alignment value
|
||||
|
|
@ -703,7 +718,12 @@ minify(unsigned value, unsigned levels)
|
|||
*
|
||||
* \sa ALIGN()
|
||||
*/
|
||||
#define ROUND_DOWN_TO(value, alignment) ((value) & ~(alignment - 1))
|
||||
static inline uintptr_t
|
||||
ROUND_DOWN_TO(uintptr_t value, int32_t alignment)
|
||||
{
|
||||
assert((alignment > 0) && _mesa_is_pow_two(alignment));
|
||||
return ((value) & ~(alignment - 1));
|
||||
}
|
||||
|
||||
|
||||
/** Cross product of two 3-element vectors */
|
||||
|
|
|
|||
|
|
@ -3751,6 +3751,8 @@ struct gl_extensions
|
|||
GLboolean ATI_fragment_shader;
|
||||
GLboolean ATI_separate_stencil;
|
||||
GLboolean INTEL_performance_query;
|
||||
GLboolean KHR_texture_compression_astc_hdr;
|
||||
GLboolean KHR_texture_compression_astc_ldr;
|
||||
GLboolean MESA_pack_invert;
|
||||
GLboolean MESA_ycbcr_texture;
|
||||
GLboolean NV_conditional_render;
|
||||
|
|
|
|||
|
|
@ -2480,5 +2480,8 @@ const struct function gles31_functions_possible[] = {
|
|||
{ "glVertexAttribBinding", 31, -1 },
|
||||
{ "glVertexBindingDivisor", 31, -1 },
|
||||
|
||||
/* GL_OES_texture_storage_multisample_2d_array */
|
||||
{ "glTexStorage3DMultisampleOES", 31, -1 },
|
||||
|
||||
{ NULL, 0, -1 },
|
||||
};
|
||||
|
|
|
|||
|
|
@ -229,6 +229,28 @@ _mesa_gl_compressed_format_base_format(GLenum format)
|
|||
* what GL_NUM_COMPRESSED_TEXTURE_FORMATS and
|
||||
* GL_COMPRESSED_TEXTURE_FORMATS return."
|
||||
*
|
||||
* The KHR_texture_compression_astc_hdr spec says:
|
||||
*
|
||||
* "Interactions with OpenGL 4.2
|
||||
*
|
||||
* OpenGL 4.2 supports the feature that compressed textures can be
|
||||
* compressed online, by passing the compressed texture format enum as
|
||||
* the internal format when uploading a texture using TexImage1D,
|
||||
* TexImage2D or TexImage3D (see Section 3.9.3, Texture Image
|
||||
* Specification, subsection Encoding of Special Internal Formats).
|
||||
*
|
||||
* Due to the complexity of the ASTC compression algorithm, it is not
|
||||
* usually suitable for online use, and therefore ASTC support will be
|
||||
* limited to pre-compressed textures only. Where on-device compression
|
||||
* is required, a domain-specific limited compressor will typically
|
||||
* be used, and this is therefore not suitable for implementation in
|
||||
* the driver.
|
||||
*
|
||||
* In particular, the ASTC format specifiers will not be added to
|
||||
* Table 3.14, and thus will not be accepted by the TexImage*D
|
||||
* functions, and will not be returned by the (already deprecated)
|
||||
* COMPRESSED_TEXTURE_FORMATS query."
|
||||
*
|
||||
* There is no formal spec for GL_ATI_texture_compression_3dc. Since the
|
||||
* formats added by this extension are luminance-alpha formats, it is
|
||||
* reasonable to expect them to follow the same rules as
|
||||
|
|
@ -378,15 +400,15 @@ _mesa_glenum_to_compressed_format(GLenum format)
|
|||
|
||||
case GL_COMPRESSED_RGB_S3TC_DXT1_EXT:
|
||||
case GL_RGB_S3TC:
|
||||
case GL_RGB4_S3TC:
|
||||
return MESA_FORMAT_RGB_DXT1;
|
||||
case GL_COMPRESSED_RGBA_S3TC_DXT1_EXT:
|
||||
case GL_RGB4_S3TC:
|
||||
return MESA_FORMAT_RGBA_DXT1;
|
||||
case GL_COMPRESSED_RGBA_S3TC_DXT3_EXT:
|
||||
case GL_RGBA_S3TC:
|
||||
case GL_RGBA4_S3TC:
|
||||
return MESA_FORMAT_RGBA_DXT3;
|
||||
case GL_COMPRESSED_RGBA_S3TC_DXT5_EXT:
|
||||
case GL_RGBA4_S3TC:
|
||||
return MESA_FORMAT_RGBA_DXT5;
|
||||
|
||||
case GL_COMPRESSED_SRGB_S3TC_DXT1_EXT:
|
||||
|
|
@ -449,6 +471,63 @@ _mesa_glenum_to_compressed_format(GLenum format)
|
|||
case GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT:
|
||||
return MESA_FORMAT_BPTC_RGB_UNSIGNED_FLOAT;
|
||||
|
||||
case GL_COMPRESSED_RGBA_ASTC_4x4_KHR:
|
||||
return MESA_FORMAT_RGBA_ASTC_4x4;
|
||||
case GL_COMPRESSED_RGBA_ASTC_5x4_KHR:
|
||||
return MESA_FORMAT_RGBA_ASTC_5x4;
|
||||
case GL_COMPRESSED_RGBA_ASTC_5x5_KHR:
|
||||
return MESA_FORMAT_RGBA_ASTC_5x5;
|
||||
case GL_COMPRESSED_RGBA_ASTC_6x5_KHR:
|
||||
return MESA_FORMAT_RGBA_ASTC_6x5;
|
||||
case GL_COMPRESSED_RGBA_ASTC_6x6_KHR:
|
||||
return MESA_FORMAT_RGBA_ASTC_6x6;
|
||||
case GL_COMPRESSED_RGBA_ASTC_8x5_KHR:
|
||||
return MESA_FORMAT_RGBA_ASTC_8x5;
|
||||
case GL_COMPRESSED_RGBA_ASTC_8x6_KHR:
|
||||
return MESA_FORMAT_RGBA_ASTC_8x6;
|
||||
case GL_COMPRESSED_RGBA_ASTC_8x8_KHR:
|
||||
return MESA_FORMAT_RGBA_ASTC_8x8;
|
||||
case GL_COMPRESSED_RGBA_ASTC_10x5_KHR:
|
||||
return MESA_FORMAT_RGBA_ASTC_10x5;
|
||||
case GL_COMPRESSED_RGBA_ASTC_10x6_KHR:
|
||||
return MESA_FORMAT_RGBA_ASTC_10x6;
|
||||
case GL_COMPRESSED_RGBA_ASTC_10x8_KHR:
|
||||
return MESA_FORMAT_RGBA_ASTC_10x8;
|
||||
case GL_COMPRESSED_RGBA_ASTC_10x10_KHR:
|
||||
return MESA_FORMAT_RGBA_ASTC_10x10;
|
||||
case GL_COMPRESSED_RGBA_ASTC_12x10_KHR:
|
||||
return MESA_FORMAT_RGBA_ASTC_12x10;
|
||||
case GL_COMPRESSED_RGBA_ASTC_12x12_KHR:
|
||||
return MESA_FORMAT_RGBA_ASTC_12x12;
|
||||
case GL_COMPRESSED_SRGB8_ALPHA8_ASTC_4x4_KHR:
|
||||
return MESA_FORMAT_SRGB8_ALPHA8_ASTC_4x4;
|
||||
case GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x4_KHR:
|
||||
return MESA_FORMAT_SRGB8_ALPHA8_ASTC_5x4;
|
||||
case GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x5_KHR:
|
||||
return MESA_FORMAT_SRGB8_ALPHA8_ASTC_5x5;
|
||||
case GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x5_KHR:
|
||||
return MESA_FORMAT_SRGB8_ALPHA8_ASTC_6x5;
|
||||
case GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x6_KHR:
|
||||
return MESA_FORMAT_SRGB8_ALPHA8_ASTC_6x6;
|
||||
case GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x5_KHR:
|
||||
return MESA_FORMAT_SRGB8_ALPHA8_ASTC_8x5;
|
||||
case GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x6_KHR:
|
||||
return MESA_FORMAT_SRGB8_ALPHA8_ASTC_8x6;
|
||||
case GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x8_KHR:
|
||||
return MESA_FORMAT_SRGB8_ALPHA8_ASTC_8x8;
|
||||
case GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x5_KHR:
|
||||
return MESA_FORMAT_SRGB8_ALPHA8_ASTC_10x5;
|
||||
case GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x6_KHR:
|
||||
return MESA_FORMAT_SRGB8_ALPHA8_ASTC_10x6;
|
||||
case GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x8_KHR:
|
||||
return MESA_FORMAT_SRGB8_ALPHA8_ASTC_10x8;
|
||||
case GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x10_KHR:
|
||||
return MESA_FORMAT_SRGB8_ALPHA8_ASTC_10x10;
|
||||
case GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x10_KHR:
|
||||
return MESA_FORMAT_SRGB8_ALPHA8_ASTC_12x10;
|
||||
case GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x12_KHR:
|
||||
return MESA_FORMAT_SRGB8_ALPHA8_ASTC_12x12;
|
||||
|
||||
default:
|
||||
return MESA_FORMAT_NONE;
|
||||
}
|
||||
|
|
@ -539,6 +618,63 @@ _mesa_compressed_format_to_glenum(struct gl_context *ctx, mesa_format mesaFormat
|
|||
case MESA_FORMAT_BPTC_RGB_UNSIGNED_FLOAT:
|
||||
return GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT;
|
||||
|
||||
case MESA_FORMAT_RGBA_ASTC_4x4:
|
||||
return GL_COMPRESSED_RGBA_ASTC_4x4_KHR;
|
||||
case MESA_FORMAT_RGBA_ASTC_5x4:
|
||||
return GL_COMPRESSED_RGBA_ASTC_5x4_KHR;
|
||||
case MESA_FORMAT_RGBA_ASTC_5x5:
|
||||
return GL_COMPRESSED_RGBA_ASTC_5x5_KHR;
|
||||
case MESA_FORMAT_RGBA_ASTC_6x5:
|
||||
return GL_COMPRESSED_RGBA_ASTC_6x5_KHR;
|
||||
case MESA_FORMAT_RGBA_ASTC_6x6:
|
||||
return GL_COMPRESSED_RGBA_ASTC_6x6_KHR;
|
||||
case MESA_FORMAT_RGBA_ASTC_8x5:
|
||||
return GL_COMPRESSED_RGBA_ASTC_8x5_KHR;
|
||||
case MESA_FORMAT_RGBA_ASTC_8x6:
|
||||
return GL_COMPRESSED_RGBA_ASTC_8x6_KHR;
|
||||
case MESA_FORMAT_RGBA_ASTC_8x8:
|
||||
return GL_COMPRESSED_RGBA_ASTC_8x8_KHR;
|
||||
case MESA_FORMAT_RGBA_ASTC_10x5:
|
||||
return GL_COMPRESSED_RGBA_ASTC_10x5_KHR;
|
||||
case MESA_FORMAT_RGBA_ASTC_10x6:
|
||||
return GL_COMPRESSED_RGBA_ASTC_10x6_KHR;
|
||||
case MESA_FORMAT_RGBA_ASTC_10x8:
|
||||
return GL_COMPRESSED_RGBA_ASTC_10x8_KHR;
|
||||
case MESA_FORMAT_RGBA_ASTC_10x10:
|
||||
return GL_COMPRESSED_RGBA_ASTC_10x10_KHR;
|
||||
case MESA_FORMAT_RGBA_ASTC_12x10:
|
||||
return GL_COMPRESSED_RGBA_ASTC_12x10_KHR;
|
||||
case MESA_FORMAT_RGBA_ASTC_12x12:
|
||||
return GL_COMPRESSED_RGBA_ASTC_12x12_KHR;
|
||||
case MESA_FORMAT_SRGB8_ALPHA8_ASTC_4x4:
|
||||
return GL_COMPRESSED_SRGB8_ALPHA8_ASTC_4x4_KHR;
|
||||
case MESA_FORMAT_SRGB8_ALPHA8_ASTC_5x4:
|
||||
return GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x4_KHR;
|
||||
case MESA_FORMAT_SRGB8_ALPHA8_ASTC_5x5:
|
||||
return GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x5_KHR;
|
||||
case MESA_FORMAT_SRGB8_ALPHA8_ASTC_6x5:
|
||||
return GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x5_KHR;
|
||||
case MESA_FORMAT_SRGB8_ALPHA8_ASTC_6x6:
|
||||
return GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x6_KHR;
|
||||
case MESA_FORMAT_SRGB8_ALPHA8_ASTC_8x5:
|
||||
return GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x5_KHR;
|
||||
case MESA_FORMAT_SRGB8_ALPHA8_ASTC_8x6:
|
||||
return GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x6_KHR;
|
||||
case MESA_FORMAT_SRGB8_ALPHA8_ASTC_8x8:
|
||||
return GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x8_KHR;
|
||||
case MESA_FORMAT_SRGB8_ALPHA8_ASTC_10x5:
|
||||
return GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x5_KHR;
|
||||
case MESA_FORMAT_SRGB8_ALPHA8_ASTC_10x6:
|
||||
return GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x6_KHR;
|
||||
case MESA_FORMAT_SRGB8_ALPHA8_ASTC_10x8:
|
||||
return GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x8_KHR;
|
||||
case MESA_FORMAT_SRGB8_ALPHA8_ASTC_10x10:
|
||||
return GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x10_KHR;
|
||||
case MESA_FORMAT_SRGB8_ALPHA8_ASTC_12x10:
|
||||
return GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x10_KHR;
|
||||
case MESA_FORMAT_SRGB8_ALPHA8_ASTC_12x12:
|
||||
return GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x12_KHR;
|
||||
|
||||
default:
|
||||
_mesa_problem(ctx, "Unexpected mesa texture format in"
|
||||
" _mesa_compressed_format_to_glenum()");
|
||||
|
|
|
|||
|
|
@ -38,6 +38,7 @@
|
|||
#include "mtypes.h"
|
||||
#include "texcompress.h"
|
||||
#include "texformat.h"
|
||||
#include "glformats.h"
|
||||
|
||||
#define RETURN_IF_SUPPORTED(f) do { \
|
||||
if (ctx->TextureFormatSupported[f]) \
|
||||
|
|
@ -276,87 +277,6 @@ _mesa_choose_tex_format(struct gl_context *ctx, GLenum target,
|
|||
RETURN_IF_SUPPORTED(MESA_FORMAT_YCBCR_REV);
|
||||
break;
|
||||
|
||||
/* For non-generic compressed format we assert two things:
|
||||
*
|
||||
* 1. The format has already been validated against the set of available
|
||||
* extensions.
|
||||
*
|
||||
* 2. The driver only enables the extension if it supports all of the
|
||||
* formats that are part of that extension.
|
||||
*/
|
||||
case GL_COMPRESSED_RGB_FXT1_3DFX:
|
||||
return MESA_FORMAT_RGB_FXT1;
|
||||
case GL_COMPRESSED_RGBA_FXT1_3DFX:
|
||||
return MESA_FORMAT_RGBA_FXT1;
|
||||
case GL_COMPRESSED_RGB_S3TC_DXT1_EXT:
|
||||
case GL_RGB_S3TC:
|
||||
case GL_RGB4_S3TC:
|
||||
return MESA_FORMAT_RGB_DXT1;
|
||||
case GL_COMPRESSED_RGBA_S3TC_DXT1_EXT:
|
||||
return MESA_FORMAT_RGBA_DXT1;
|
||||
case GL_COMPRESSED_RGBA_S3TC_DXT3_EXT:
|
||||
case GL_RGBA_S3TC:
|
||||
case GL_RGBA4_S3TC:
|
||||
return MESA_FORMAT_RGBA_DXT3;
|
||||
case GL_COMPRESSED_RGBA_S3TC_DXT5_EXT:
|
||||
return MESA_FORMAT_RGBA_DXT5;
|
||||
case GL_COMPRESSED_RED_RGTC1:
|
||||
return MESA_FORMAT_R_RGTC1_UNORM;
|
||||
case GL_COMPRESSED_SIGNED_RED_RGTC1:
|
||||
return MESA_FORMAT_R_RGTC1_SNORM;
|
||||
case GL_COMPRESSED_RG_RGTC2:
|
||||
return MESA_FORMAT_RG_RGTC2_UNORM;
|
||||
case GL_COMPRESSED_SIGNED_RG_RGTC2:
|
||||
return MESA_FORMAT_RG_RGTC2_SNORM;
|
||||
case GL_COMPRESSED_LUMINANCE_LATC1_EXT:
|
||||
return MESA_FORMAT_L_LATC1_UNORM;
|
||||
case GL_COMPRESSED_SIGNED_LUMINANCE_LATC1_EXT:
|
||||
return MESA_FORMAT_L_LATC1_SNORM;
|
||||
case GL_COMPRESSED_LUMINANCE_ALPHA_LATC2_EXT:
|
||||
return MESA_FORMAT_LA_LATC2_UNORM;
|
||||
case GL_COMPRESSED_SIGNED_LUMINANCE_ALPHA_LATC2_EXT:
|
||||
return MESA_FORMAT_LA_LATC2_SNORM;
|
||||
case GL_COMPRESSED_LUMINANCE_ALPHA_3DC_ATI:
|
||||
return MESA_FORMAT_LA_LATC2_UNORM;
|
||||
case GL_ETC1_RGB8_OES:
|
||||
return MESA_FORMAT_ETC1_RGB8;
|
||||
case GL_COMPRESSED_RGB8_ETC2:
|
||||
return MESA_FORMAT_ETC2_RGB8;
|
||||
case GL_COMPRESSED_SRGB8_ETC2:
|
||||
return MESA_FORMAT_ETC2_SRGB8;
|
||||
case GL_COMPRESSED_RGBA8_ETC2_EAC:
|
||||
return MESA_FORMAT_ETC2_RGBA8_EAC;
|
||||
case GL_COMPRESSED_SRGB8_ALPHA8_ETC2_EAC:
|
||||
return MESA_FORMAT_ETC2_SRGB8_ALPHA8_EAC;
|
||||
case GL_COMPRESSED_R11_EAC:
|
||||
return MESA_FORMAT_ETC2_R11_EAC;
|
||||
case GL_COMPRESSED_RG11_EAC:
|
||||
return MESA_FORMAT_ETC2_RG11_EAC;
|
||||
case GL_COMPRESSED_SIGNED_R11_EAC:
|
||||
return MESA_FORMAT_ETC2_SIGNED_R11_EAC;
|
||||
case GL_COMPRESSED_SIGNED_RG11_EAC:
|
||||
return MESA_FORMAT_ETC2_SIGNED_RG11_EAC;
|
||||
case GL_COMPRESSED_RGB8_PUNCHTHROUGH_ALPHA1_ETC2:
|
||||
return MESA_FORMAT_ETC2_RGB8_PUNCHTHROUGH_ALPHA1;
|
||||
case GL_COMPRESSED_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2:
|
||||
return MESA_FORMAT_ETC2_SRGB8_PUNCHTHROUGH_ALPHA1;
|
||||
case GL_COMPRESSED_SRGB_S3TC_DXT1_EXT:
|
||||
return MESA_FORMAT_SRGB_DXT1;
|
||||
case GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT:
|
||||
return MESA_FORMAT_SRGBA_DXT1;
|
||||
case GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT:
|
||||
return MESA_FORMAT_SRGBA_DXT3;
|
||||
case GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT:
|
||||
return MESA_FORMAT_SRGBA_DXT5;
|
||||
case GL_COMPRESSED_RGBA_BPTC_UNORM:
|
||||
return MESA_FORMAT_BPTC_RGBA_UNORM;
|
||||
case GL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM:
|
||||
return MESA_FORMAT_BPTC_SRGB_ALPHA_UNORM;
|
||||
case GL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT:
|
||||
return MESA_FORMAT_BPTC_RGB_SIGNED_FLOAT;
|
||||
case GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT:
|
||||
return MESA_FORMAT_BPTC_RGB_UNSIGNED_FLOAT;
|
||||
|
||||
case GL_ALPHA16F_ARB:
|
||||
RETURN_IF_SUPPORTED(MESA_FORMAT_A_FLOAT16);
|
||||
RETURN_IF_SUPPORTED(MESA_FORMAT_A_FLOAT32);
|
||||
|
|
@ -844,6 +764,18 @@ _mesa_choose_tex_format(struct gl_context *ctx, GLenum target,
|
|||
case GL_BGRA:
|
||||
RETURN_IF_SUPPORTED(MESA_FORMAT_B8G8R8A8_UNORM);
|
||||
break;
|
||||
|
||||
default:
|
||||
/* For non-generic compressed format we assert two things:
|
||||
*
|
||||
* 1. The format has already been validated against the set of available
|
||||
* extensions.
|
||||
*
|
||||
* 2. The driver only enables the extension if it supports all of the
|
||||
* formats that are part of that extension.
|
||||
*/
|
||||
if (_mesa_is_compressed_format(ctx, internalFormat))
|
||||
return _mesa_glenum_to_compressed_format(internalFormat);
|
||||
}
|
||||
|
||||
_mesa_problem(ctx, "unexpected format %s in _mesa_choose_tex_format()",
|
||||
|
|
|
|||
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Reference in a new issue