mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-01-24 10:20:22 +01:00
freedreno/ir3: add support for ucp
Use nir_lower_clip pass for adding the VS/FS instructions to handle user-clip-planes and CLIPDIST. Wire up support for load_user_clip_plane intrinsic to fetch ucp[plane] values as driver-params (passed as const's to the shader). Signed-off-by: Rob Clark <robclark@freedesktop.org>
This commit is contained in:
parent
509e0c4505
commit
91ec210ea8
4 changed files with 80 additions and 13 deletions
|
|
@ -94,6 +94,7 @@ static void print_usage(void)
|
|||
printf(" --saturate-t MASK - bitmask of samplers to saturate T coord\n");
|
||||
printf(" --saturate-r MASK - bitmask of samplers to saturate R coord\n");
|
||||
printf(" --stream-out - enable stream-out (aka transform feedback)\n");
|
||||
printf(" --ucp MASK - bitmask of enabled user-clip-planes\n");
|
||||
printf(" --help - show this message\n");
|
||||
}
|
||||
|
||||
|
|
@ -190,6 +191,13 @@ int main(int argc, char **argv)
|
|||
continue;
|
||||
}
|
||||
|
||||
if (!strcmp(argv[n], "--ucp")) {
|
||||
debug_printf(" %s %s", argv[n], argv[n+1]);
|
||||
key.ucp_enables = strtol(argv[n+1], NULL, 0);
|
||||
n += 2;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!strcmp(argv[n], "--help")) {
|
||||
print_usage();
|
||||
return 0;
|
||||
|
|
|
|||
|
|
@ -127,7 +127,8 @@ struct ir3_compile {
|
|||
static struct ir3_instruction * create_immed(struct ir3_block *block, uint32_t val);
|
||||
static struct ir3_block * get_block(struct ir3_compile *ctx, nir_block *nblock);
|
||||
|
||||
static struct nir_shader *to_nir(const struct tgsi_token *tokens)
|
||||
static struct nir_shader *to_nir(const struct tgsi_token *tokens,
|
||||
struct ir3_shader_variant *so)
|
||||
{
|
||||
struct nir_shader_compiler_options options = {
|
||||
.lower_fpow = true,
|
||||
|
|
@ -149,6 +150,11 @@ static struct nir_shader *to_nir(const struct tgsi_token *tokens)
|
|||
|
||||
nir_opt_global_to_local(s);
|
||||
nir_convert_to_ssa(s);
|
||||
if (s->stage == MESA_SHADER_VERTEX) {
|
||||
nir_lower_clip_vs(s, so->key.ucp_enables);
|
||||
} else if (s->stage == MESA_SHADER_FRAGMENT) {
|
||||
nir_lower_clip_fs(s, so->key.ucp_enables);
|
||||
}
|
||||
nir_lower_idiv(s);
|
||||
nir_lower_load_const_to_scalar(s);
|
||||
|
||||
|
|
@ -251,7 +257,7 @@ compile_init(struct ir3_compiler *compiler,
|
|||
lowered_tokens = lower_tgsi(ctx, tokens, so);
|
||||
if (!lowered_tokens)
|
||||
lowered_tokens = tokens;
|
||||
ctx->s = to_nir(lowered_tokens);
|
||||
ctx->s = to_nir(lowered_tokens, so);
|
||||
|
||||
if (lowered_tokens != tokens)
|
||||
free((void *)lowered_tokens);
|
||||
|
|
@ -263,7 +269,7 @@ compile_init(struct ir3_compiler *compiler,
|
|||
* num_uniform * vec4 - user consts
|
||||
* 4 * vec4 - UBO addresses
|
||||
* if (vertex shader) {
|
||||
* 1 * vec4 - driver params (IR3_DP_*)
|
||||
* N * vec4 - driver params (IR3_DP_*)
|
||||
* 1 * vec4 - stream-out addresses
|
||||
* }
|
||||
*
|
||||
|
|
@ -275,8 +281,8 @@ compile_init(struct ir3_compiler *compiler,
|
|||
so->first_immediate += 4;
|
||||
|
||||
if (so->type == SHADER_VERTEX) {
|
||||
/* one (vec4) slot for driver params (see ir3_driver_param): */
|
||||
so->first_immediate++;
|
||||
/* driver params (see ir3_driver_param): */
|
||||
so->first_immediate += IR3_DP_COUNT/4; /* convert to vec4 */
|
||||
/* one (vec4) slot for stream-output base addresses: */
|
||||
so->first_immediate++;
|
||||
}
|
||||
|
|
@ -828,7 +834,9 @@ static struct ir3_instruction *
|
|||
create_driver_param(struct ir3_compile *ctx, enum ir3_driver_param dp)
|
||||
{
|
||||
/* first four vec4 sysval's reserved for UBOs: */
|
||||
unsigned r = regid(ctx->so->first_driver_param + 4, dp);
|
||||
/* NOTE: dp is in scalar, but there can be >4 dp components: */
|
||||
unsigned n = ctx->so->first_driver_param + IR3_DRIVER_PARAM_OFF;
|
||||
unsigned r = regid(n + dp / 4, dp % 4);
|
||||
return create_uniform(ctx, r);
|
||||
}
|
||||
|
||||
|
|
@ -1199,7 +1207,7 @@ emit_intrinsic_load_ubo(struct ir3_compile *ctx, nir_intrinsic_instr *intr,
|
|||
struct ir3_block *b = ctx->block;
|
||||
struct ir3_instruction *addr, *src0, *src1;
|
||||
/* UBO addresses are the first driver params: */
|
||||
unsigned ubo = regid(ctx->so->first_driver_param, 0);
|
||||
unsigned ubo = regid(ctx->so->first_driver_param + IR3_UBOS_OFF, 0);
|
||||
unsigned off = intr->const_index[0];
|
||||
|
||||
/* First src is ubo index, which could either be an immed or not: */
|
||||
|
|
@ -1459,6 +1467,12 @@ emit_intrinisic(struct ir3_compile *ctx, nir_intrinsic_instr *intr)
|
|||
}
|
||||
dst[0] = ctx->instance_id;
|
||||
break;
|
||||
case nir_intrinsic_load_user_clip_plane:
|
||||
for (int i = 0; i < intr->num_components; i++) {
|
||||
unsigned n = idx * 4 + i;
|
||||
dst[i] = create_driver_param(ctx, IR3_DP_UCP0_X + n);
|
||||
}
|
||||
break;
|
||||
case nir_intrinsic_discard_if:
|
||||
case nir_intrinsic_discard: {
|
||||
struct ir3_instruction *cond, *kill;
|
||||
|
|
@ -2066,7 +2080,7 @@ emit_stream_out(struct ir3_compile *ctx)
|
|||
unsigned stride = strmout->stride[i];
|
||||
struct ir3_instruction *base, *off;
|
||||
|
||||
base = create_uniform(ctx, regid(v->first_driver_param + 5, i));
|
||||
base = create_uniform(ctx, regid(v->first_driver_param + IR3_TFBOS_OFF, i));
|
||||
|
||||
/* 24-bit should be enough: */
|
||||
off = ir3_MUL_U(ctx->block, vtxcnt, 0,
|
||||
|
|
@ -2250,6 +2264,8 @@ setup_output(struct ir3_compile *ctx, nir_variable *out)
|
|||
case VARYING_SLOT_BFC0:
|
||||
case VARYING_SLOT_BFC1:
|
||||
case VARYING_SLOT_FOGC:
|
||||
case VARYING_SLOT_CLIP_DIST0:
|
||||
case VARYING_SLOT_CLIP_DIST1:
|
||||
break;
|
||||
default:
|
||||
if (slot >= VARYING_SLOT_VAR0)
|
||||
|
|
|
|||
|
|
@ -501,7 +501,7 @@ static void
|
|||
emit_ubos(struct ir3_shader_variant *v, struct fd_ringbuffer *ring,
|
||||
struct fd_constbuf_stateobj *constbuf)
|
||||
{
|
||||
uint32_t offset = v->first_driver_param; /* UBOs after user consts */
|
||||
uint32_t offset = v->first_driver_param + IR3_UBOS_OFF;
|
||||
if (v->constlen > offset) {
|
||||
struct fd_context *ctx = fd_context(v->shader->pctx);
|
||||
uint32_t params = MIN2(4, v->constlen - offset) * 4;
|
||||
|
|
@ -554,7 +554,8 @@ emit_immediates(struct ir3_shader_variant *v, struct fd_ringbuffer *ring)
|
|||
static void
|
||||
emit_tfbos(struct ir3_shader_variant *v, struct fd_ringbuffer *ring)
|
||||
{
|
||||
uint32_t offset = v->first_driver_param + 5; /* streamout addresses after driver-params*/
|
||||
/* streamout addresses after driver-params: */
|
||||
uint32_t offset = v->first_driver_param + IR3_TFBOS_OFF;
|
||||
if (v->constlen > offset) {
|
||||
struct fd_context *ctx = fd_context(v->shader->pctx);
|
||||
struct fd_streamout_stateobj *so = &ctx->streamout;
|
||||
|
|
@ -657,17 +658,33 @@ ir3_emit_consts(struct ir3_shader_variant *v, struct fd_ringbuffer *ring,
|
|||
/* emit driver params every time: */
|
||||
/* TODO skip emit if shader doesn't use driver params to avoid WFI.. */
|
||||
if (info && (v->type == SHADER_VERTEX)) {
|
||||
uint32_t offset = v->first_driver_param + 4; /* driver params after UBOs */
|
||||
uint32_t offset = v->first_driver_param + IR3_DRIVER_PARAM_OFF;
|
||||
if (v->constlen >= offset) {
|
||||
uint32_t vertex_params[4] = {
|
||||
uint32_t vertex_params[IR3_DP_COUNT] = {
|
||||
[IR3_DP_VTXID_BASE] = info->indexed ?
|
||||
info->index_bias : info->start,
|
||||
[IR3_DP_VTXCNT_MAX] = max_tf_vtx(v),
|
||||
};
|
||||
/* if no user-clip-planes, we don't need to emit the
|
||||
* entire thing:
|
||||
*/
|
||||
uint32_t vertex_params_size = 4;
|
||||
|
||||
if (v->key.ucp_enables) {
|
||||
struct pipe_clip_state *ucp = &ctx->ucp;
|
||||
unsigned pos = IR3_DP_UCP0_X;
|
||||
for (unsigned i = 0; pos <= IR3_DP_UCP7_W; i++) {
|
||||
for (unsigned j = 0; j < 4; j++) {
|
||||
vertex_params[pos] = fui(ucp->ucp[i][j]);
|
||||
pos++;
|
||||
}
|
||||
}
|
||||
vertex_params_size = ARRAY_SIZE(vertex_params);
|
||||
}
|
||||
|
||||
fd_wfi(ctx, ring);
|
||||
ctx->emit_const(ring, SHADER_VERTEX, offset * 4, 0,
|
||||
ARRAY_SIZE(vertex_params), vertex_params, NULL);
|
||||
vertex_params_size, vertex_params, NULL);
|
||||
|
||||
/* if needed, emit stream-out buffer addresses: */
|
||||
if (vertex_params[IR3_DP_VTXCNT_MAX] > 0) {
|
||||
|
|
|
|||
|
|
@ -39,8 +39,29 @@
|
|||
enum ir3_driver_param {
|
||||
IR3_DP_VTXID_BASE = 0,
|
||||
IR3_DP_VTXCNT_MAX = 1,
|
||||
/* user-clip-plane components, up to 8x vec4's: */
|
||||
IR3_DP_UCP0_X = 4,
|
||||
/* .... */
|
||||
IR3_DP_UCP7_W = 35,
|
||||
IR3_DP_COUNT = 36 /* must be aligned to vec4 */
|
||||
};
|
||||
|
||||
/* Layout of constant registers:
|
||||
*
|
||||
* num_uniform * vec4 - user consts
|
||||
* 4 * vec4 - UBO addresses
|
||||
* if (vertex shader) {
|
||||
* N * vec4 - driver params (IR3_DP_*)
|
||||
* 1 * vec4 - stream-out addresses
|
||||
* }
|
||||
*
|
||||
* TODO this could be made more dynamic, to at least skip sections
|
||||
* that we don't need..
|
||||
*/
|
||||
#define IR3_UBOS_OFF 0 /* UBOs after user consts */
|
||||
#define IR3_DRIVER_PARAM_OFF 4 /* driver params after UBOs */
|
||||
#define IR3_TFBOS_OFF (IR3_DRIVER_PARAM_OFF + IR3_DP_COUNT/4)
|
||||
|
||||
/* Configuration key used to identify a shader variant.. different
|
||||
* shader variants can be used to implement features not supported
|
||||
* in hw (two sided color), binning-pass vertex shader, etc.
|
||||
|
|
@ -48,6 +69,11 @@ enum ir3_driver_param {
|
|||
struct ir3_shader_key {
|
||||
union {
|
||||
struct {
|
||||
/*
|
||||
* Combined Vertex/Fragment shader parameters:
|
||||
*/
|
||||
unsigned ucp_enables : 8;
|
||||
|
||||
/* do we need to check {v,f}saturate_{s,t,r}? */
|
||||
unsigned has_per_samp : 1;
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue