mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-01-03 05:00:09 +01:00
freedreno/ir3: large const support
Signed-off-by: Rob Clark <robclark@freedesktop.org>
This commit is contained in:
parent
e71a3f80fb
commit
652b8fbbbb
5 changed files with 33 additions and 13 deletions
|
|
@ -186,6 +186,7 @@ fd3_program_emit(struct fd_ringbuffer *ring, struct fd3_emit *emit)
|
|||
enum a3xx_instrbuffermode fpbuffer, vpbuffer;
|
||||
uint32_t fpbuffersz, vpbuffersz, fsoff;
|
||||
uint32_t pos_regid, posz_regid, psize_regid, color_regid;
|
||||
int constmode;
|
||||
int i, j, k;
|
||||
|
||||
vp = fd3_emit_get_vp(emit);
|
||||
|
|
@ -241,6 +242,9 @@ fd3_program_emit(struct fd_ringbuffer *ring, struct fd3_emit *emit)
|
|||
fsoff = 256 - fpbuffersz;
|
||||
}
|
||||
|
||||
/* seems like vs->constlen + fs->constlen > 256, then CONSTMODE=1 */
|
||||
constmode = ((vp->constlen + fp->constlen) > 256) ? 1 : 0;
|
||||
|
||||
pos_regid = find_output_regid(vp,
|
||||
ir3_semantic_name(TGSI_SEMANTIC_POSITION, 0));
|
||||
posz_regid = find_output_regid(fp,
|
||||
|
|
@ -256,6 +260,7 @@ fd3_program_emit(struct fd_ringbuffer *ring, struct fd3_emit *emit)
|
|||
|
||||
OUT_PKT0(ring, REG_A3XX_HLSQ_CONTROL_0_REG, 6);
|
||||
OUT_RING(ring, A3XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE(FOUR_QUADS) |
|
||||
A3XX_HLSQ_CONTROL_0_REG_CONSTMODE(constmode) |
|
||||
/* NOTE: I guess SHADERRESTART and CONSTFULLUPDATE maybe
|
||||
* flush some caches? I think we only need to set those
|
||||
* bits if we have updated const or shader..
|
||||
|
|
@ -275,7 +280,7 @@ fd3_program_emit(struct fd_ringbuffer *ring, struct fd3_emit *emit)
|
|||
A3XX_HLSQ_FS_CONTROL_REG_INSTRLENGTH(fpbuffersz));
|
||||
|
||||
OUT_PKT0(ring, REG_A3XX_SP_SP_CTRL_REG, 1);
|
||||
OUT_RING(ring, A3XX_SP_SP_CTRL_REG_CONSTMODE(0) |
|
||||
OUT_RING(ring, A3XX_SP_SP_CTRL_REG_CONSTMODE(constmode) |
|
||||
COND(emit->key.binning_pass, A3XX_SP_SP_CTRL_REG_BINNING) |
|
||||
A3XX_SP_SP_CTRL_REG_SLEEPMODE(1) |
|
||||
A3XX_SP_SP_CTRL_REG_L0MODE(0));
|
||||
|
|
@ -381,11 +386,9 @@ fd3_program_emit(struct fd_ringbuffer *ring, struct fd3_emit *emit)
|
|||
A3XX_SP_FS_CTRL_REG1_CONSTFOOTPRINT(MAX2(fp->constlen + 1, 0)) |
|
||||
A3XX_SP_FS_CTRL_REG1_HALFPRECVAROFFSET(63));
|
||||
|
||||
/* NOTE: I believe VS.CONSTLEN should be <= FS.CONSTOBJOFFSET*/
|
||||
debug_assert(vp->constlen <= 128);
|
||||
|
||||
OUT_PKT0(ring, REG_A3XX_SP_FS_OBJ_OFFSET_REG, 2);
|
||||
OUT_RING(ring, A3XX_SP_FS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET(128) |
|
||||
OUT_RING(ring, A3XX_SP_FS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET(
|
||||
MAX2(128, vp->constlen)) |
|
||||
A3XX_SP_FS_OBJ_OFFSET_REG_SHADEROBJOFFSET(fsoff));
|
||||
OUT_RELOC(ring, fp->bo, 0, 0, 0); /* SP_FS_OBJ_START_REG */
|
||||
}
|
||||
|
|
|
|||
|
|
@ -354,7 +354,11 @@ fd_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader,
|
|||
case PIPE_SHADER_CAP_MAX_TEMPS:
|
||||
return 64; /* Max native temporaries. */
|
||||
case PIPE_SHADER_CAP_MAX_CONST_BUFFER_SIZE:
|
||||
return ((screen->gpu_id >= 300) ? 1024 : 64) * sizeof(float[4]);
|
||||
/* NOTE: seems to be limit for a3xx is actually 512 but
|
||||
* split between VS and FS. Use lower limit of 256 to
|
||||
* avoid getting into impossible situations:
|
||||
*/
|
||||
return ((screen->gpu_id >= 300) ? 256 : 64) * sizeof(float[4]);
|
||||
case PIPE_SHADER_CAP_MAX_CONST_BUFFERS:
|
||||
return 1;
|
||||
case PIPE_SHADER_CAP_MAX_PREDS:
|
||||
|
|
|
|||
|
|
@ -104,7 +104,7 @@ static uint32_t reg(struct ir3_register *reg, struct ir3_info *info,
|
|||
val.iim_val = reg->iim_val;
|
||||
} else {
|
||||
int8_t components = util_last_bit(reg->wrmask);
|
||||
int8_t max = (reg->num + repeat + components - 1) >> 2;
|
||||
int16_t max = (reg->num + repeat + components - 1) >> 2;
|
||||
|
||||
val.comp = reg->num & 0x3;
|
||||
val.num = reg->num >> 2;
|
||||
|
|
|
|||
|
|
@ -47,7 +47,7 @@ struct ir3_info {
|
|||
*/
|
||||
int8_t max_reg; /* highest GPR # used by shader */
|
||||
int8_t max_half_reg;
|
||||
int8_t max_const;
|
||||
int16_t max_const;
|
||||
};
|
||||
|
||||
struct ir3_register {
|
||||
|
|
|
|||
|
|
@ -600,11 +600,6 @@ add_src_reg_wrmask(struct ir3_compile_context *ctx,
|
|||
struct ir3_register *reg;
|
||||
struct ir3_instruction *orig = NULL;
|
||||
|
||||
/* TODO we need to use a mov to temp for const >= 64.. or maybe
|
||||
* we could use relative addressing..
|
||||
*/
|
||||
compile_assert(ctx, src->Index < 64);
|
||||
|
||||
switch (src->File) {
|
||||
case TGSI_FILE_IMMEDIATE:
|
||||
/* TODO if possible, use actual immediate instead of const.. but
|
||||
|
|
@ -632,6 +627,24 @@ add_src_reg_wrmask(struct ir3_compile_context *ctx,
|
|||
break;
|
||||
}
|
||||
|
||||
/* We seem to have 8 bits (6.2) for dst register always, so I think
|
||||
* it is safe to assume GPR cannot be >=64
|
||||
*
|
||||
* cat3 instructions only have 8 bits for src2, but cannot take a
|
||||
* const for src2
|
||||
*
|
||||
* cat5 and cat6 in some cases only has 8 bits, but cannot take a
|
||||
* const for any src.
|
||||
*
|
||||
* Other than that we seem to have 12 bits to encode const src,
|
||||
* except for cat1 which may only have 11 bits (but that seems like
|
||||
* a bug)
|
||||
*/
|
||||
if (flags & IR3_REG_CONST)
|
||||
compile_assert(ctx, src->Index < (1 << 9));
|
||||
else
|
||||
compile_assert(ctx, src->Index < (1 << 6));
|
||||
|
||||
if (src->Absolute)
|
||||
flags |= IR3_REG_ABS;
|
||||
if (src->Negate)
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue