mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-05 00:58:05 +02:00
freedreno/ir3: Simpify the immediates from an array of vec4 to array of dwords.
We usually had to split the idx/swiz out of the dword index anyway. Note that incidentally, immediates_size now increments in vec4s instad of 4*vec4s. Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5990>
This commit is contained in:
parent
e873c4da08
commit
51acfe2230
7 changed files with 26 additions and 38 deletions
|
|
@ -225,9 +225,9 @@ cs_const_emit(struct fd_ringbuffer *ring, struct kernel *kernel, uint32_t grid[3
|
|||
if (ir3_kernel->info.numwg != INVALID_REG) {
|
||||
assert((ir3_kernel->info.numwg & 0x3) == 0);
|
||||
int idx = ir3_kernel->info.numwg >> 2;
|
||||
const_state->immediates[idx].val[0] = grid[0];
|
||||
const_state->immediates[idx].val[1] = grid[1];
|
||||
const_state->immediates[idx].val[2] = grid[2];
|
||||
const_state->immediates[idx * 4 + 0] = grid[0];
|
||||
const_state->immediates[idx * 4 + 1] = grid[1];
|
||||
const_state->immediates[idx * 4 + 2] = grid[2];
|
||||
}
|
||||
|
||||
/* truncate size to avoid writing constants that shader
|
||||
|
|
@ -240,7 +240,7 @@ cs_const_emit(struct fd_ringbuffer *ring, struct kernel *kernel, uint32_t grid[3
|
|||
size *= 4;
|
||||
|
||||
if (size > 0) {
|
||||
emit_const(ring, base, size, const_state->immediates[0].val);
|
||||
emit_const(ring, base, size, const_state->immediates);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -161,8 +161,6 @@ lower_immed(struct ir3_cp_ctx *ctx, struct ir3_instruction *instr, unsigned n,
|
|||
if (!ir3_valid_flags(instr, n, new_flags))
|
||||
return false;
|
||||
|
||||
unsigned swiz, idx, i;
|
||||
|
||||
reg = ir3_reg_clone(ctx->shader, reg);
|
||||
|
||||
/* Half constant registers seems to handle only 32-bit values
|
||||
|
|
@ -196,9 +194,12 @@ lower_immed(struct ir3_cp_ctx *ctx, struct ir3_instruction *instr, unsigned n,
|
|||
new_flags &= ~IR3_REG_FNEG;
|
||||
}
|
||||
|
||||
/* Reallocate for 4 more elements whenever it's necessary */
|
||||
/* Reallocate for 4 more elements whenever it's necessary. Note that ir3
|
||||
* printing relies on having groups of 4 dwords, so we fill the unused
|
||||
* slots with a dummy value.
|
||||
*/
|
||||
struct ir3_const_state *const_state = ir3_const_state(ctx->so);
|
||||
if (const_state->immediates_count == const_state->immediates_size * 4) {
|
||||
if (const_state->immediates_count == const_state->immediates_size) {
|
||||
const_state->immediates = rerzalloc(const_state,
|
||||
const_state->immediates,
|
||||
__typeof__(const_state->immediates[0]),
|
||||
|
|
@ -206,17 +207,14 @@ lower_immed(struct ir3_cp_ctx *ctx, struct ir3_instruction *instr, unsigned n,
|
|||
const_state->immediates_size + 4);
|
||||
const_state->immediates_size += 4;
|
||||
|
||||
for (int i = const_state->immediates_count; i < const_state->immediates_size * 4; i++)
|
||||
const_state->immediates[i / 4].val[i % 4] = 0xd0d0d0d0;
|
||||
for (int i = const_state->immediates_count; i < const_state->immediates_size; i++)
|
||||
const_state->immediates[i] = 0xd0d0d0d0;
|
||||
}
|
||||
|
||||
int i;
|
||||
for (i = 0; i < const_state->immediates_count; i++) {
|
||||
swiz = i % 4;
|
||||
idx = i / 4;
|
||||
|
||||
if (const_state->immediates[idx].val[swiz] == reg->uim_val) {
|
||||
if (const_state->immediates[i] == reg->uim_val)
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (i == const_state->immediates_count) {
|
||||
|
|
@ -227,10 +225,7 @@ lower_immed(struct ir3_cp_ctx *ctx, struct ir3_instruction *instr, unsigned n,
|
|||
ir3_max_const(ctx->so))
|
||||
return false;
|
||||
|
||||
swiz = i % 4;
|
||||
idx = i / 4;
|
||||
|
||||
const_state->immediates[idx].val[swiz] = reg->uim_val;
|
||||
const_state->immediates[i] = reg->uim_val;
|
||||
const_state->immediates_count++;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -156,7 +156,7 @@ static void add_const(unsigned reg, unsigned c0, unsigned c1, unsigned c2, unsig
|
|||
struct ir3_const_state *const_state = ir3_const_state(variant);
|
||||
assert((reg & 0x7) == 0);
|
||||
int idx = reg >> (1 + 2); /* low bit is half vs full, next two bits are swiz */
|
||||
if (const_state->immediates_count == const_state->immediates_size * 4) {
|
||||
if (const_state->immediates_count == const_state->immediates_size) {
|
||||
const_state->immediates = rerzalloc(const_state,
|
||||
const_state->immediates,
|
||||
__typeof__(const_state->immediates[0]),
|
||||
|
|
@ -164,10 +164,10 @@ static void add_const(unsigned reg, unsigned c0, unsigned c1, unsigned c2, unsig
|
|||
const_state->immediates_size + 4);
|
||||
const_state->immediates_size += 4;
|
||||
}
|
||||
const_state->immediates[idx].val[0] = c0;
|
||||
const_state->immediates[idx].val[1] = c1;
|
||||
const_state->immediates[idx].val[2] = c2;
|
||||
const_state->immediates[idx].val[3] = c3;
|
||||
const_state->immediates[idx * 4 + 0] = c0;
|
||||
const_state->immediates[idx * 4 + 1] = c1;
|
||||
const_state->immediates[idx * 4 + 2] = c2;
|
||||
const_state->immediates[idx * 4 + 3] = c3;
|
||||
const_state->immediates_count++;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -574,10 +574,10 @@ ir3_shader_disasm(struct ir3_shader_variant *so, uint32_t *bin, FILE *out)
|
|||
for (i = 0; i < DIV_ROUND_UP(const_state->immediates_count, 4); i++) {
|
||||
fprintf(out, "@const(c%d.x)\t", const_state->offsets.immediate + i);
|
||||
fprintf(out, "0x%08x, 0x%08x, 0x%08x, 0x%08x\n",
|
||||
const_state->immediates[i].val[0],
|
||||
const_state->immediates[i].val[1],
|
||||
const_state->immediates[i].val[2],
|
||||
const_state->immediates[i].val[3]);
|
||||
const_state->immediates[i * 4 + 0],
|
||||
const_state->immediates[i * 4 + 1],
|
||||
const_state->immediates[i * 4 + 2],
|
||||
const_state->immediates[i * 4 + 3]);
|
||||
}
|
||||
|
||||
disasm_a3xx(bin, so->info.sizedwords, 0, out, ir->compiler->gpu_id);
|
||||
|
|
|
|||
|
|
@ -193,9 +193,7 @@ struct ir3_const_state {
|
|||
|
||||
unsigned immediates_count;
|
||||
unsigned immediates_size;
|
||||
struct {
|
||||
uint32_t val[4];
|
||||
} *immediates;
|
||||
uint32_t *immediates;
|
||||
|
||||
/* State of ubo access lowered to push consts: */
|
||||
struct ir3_ubo_analysis_state ubo_state;
|
||||
|
|
|
|||
|
|
@ -464,12 +464,7 @@ tu6_emit_xs_config(struct tu_cs *cs,
|
|||
tu_cs_emit(cs, CP_LOAD_STATE6_1_EXT_SRC_ADDR(0));
|
||||
tu_cs_emit(cs, CP_LOAD_STATE6_2_EXT_SRC_ADDR_HI(0));
|
||||
|
||||
for (unsigned i = 0; i < size; i++) {
|
||||
tu_cs_emit(cs, const_state->immediates[i].val[0]);
|
||||
tu_cs_emit(cs, const_state->immediates[i].val[1]);
|
||||
tu_cs_emit(cs, const_state->immediates[i].val[2]);
|
||||
tu_cs_emit(cs, const_state->immediates[i].val[3]);
|
||||
}
|
||||
tu_cs_emit_array(cs, const_state->immediates, size * 4);
|
||||
}
|
||||
|
||||
static void
|
||||
|
|
|
|||
|
|
@ -271,7 +271,7 @@ ir3_emit_immediates(struct fd_screen *screen, const struct ir3_shader_variant *v
|
|||
size *= 4;
|
||||
|
||||
if (size > 0)
|
||||
emit_const(ring, v, base, 0, size, const_state->immediates[0].val, NULL);
|
||||
emit_const(ring, v, base, 0, size, const_state->immediates, NULL);
|
||||
}
|
||||
|
||||
static inline void
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue