diff --git a/src/panfrost/bifrost/bi_pack.c b/src/panfrost/bifrost/bi_pack.c index c3372474ecd..aaed48094b7 100644 --- a/src/panfrost/bifrost/bi_pack.c +++ b/src/panfrost/bifrost/bi_pack.c @@ -702,7 +702,7 @@ bi_collect_blend_ret_addr(bi_context *ctx, struct util_dynarray *emission, const bi_clause *clause) { /* No need to collect return addresses when we're in a blend shader. */ - if (ctx->is_blend) + if (ctx->inputs->is_blend) return; const bi_tuple *tuple = &clause->tuples[clause->tuple_count - 1]; diff --git a/src/panfrost/bifrost/bi_ra.c b/src/panfrost/bifrost/bi_ra.c index a4f1fe81444..f927b3eeb2c 100644 --- a/src/panfrost/bifrost/bi_ra.c +++ b/src/panfrost/bifrost/bi_ra.c @@ -104,7 +104,8 @@ bi_compute_interference(bi_context *ctx, struct lcra_state *l) bi_foreach_clause_in_block_rev(blk, clause) { bi_mark_sr_live(blk, clause, node_count, live); - bi_mark_interference(blk, clause, l, live, node_count, ctx->is_blend); + bi_mark_interference(blk, clause, l, live, node_count, + ctx->inputs->is_blend); } free(live); @@ -130,7 +131,7 @@ bi_allocate_registers(bi_context *ctx, bool *success) for (unsigned i = 0; i < 4; i++) l->solutions[node_count + i] = i * 16; - if (ctx->is_blend) { + if (ctx->inputs->is_blend) { /* R0-R3 are reserved for the blend input */ l->class_start[BI_REG_CLASS_WORK] = 0; l->class_size[BI_REG_CLASS_WORK] = 16 * 4; @@ -145,7 +146,8 @@ bi_allocate_registers(bi_context *ctx, bool *success) unsigned dest = bi_get_node(ins->dest[d]); /* Blend shaders expect the src colour to be in r0-r3 */ - if (ins->op == BI_OPCODE_BLEND && !ctx->is_blend) { + if (ins->op == BI_OPCODE_BLEND && + !ctx->inputs->is_blend) { unsigned node = bi_get_node(ins->src[0]); assert(node < node_count); l->solutions[node] = 0; diff --git a/src/panfrost/bifrost/bifrost_compile.c b/src/panfrost/bifrost/bifrost_compile.c index 6340cb81e2a..6cbc593c27d 100644 --- a/src/panfrost/bifrost/bifrost_compile.c +++ b/src/panfrost/bifrost/bifrost_compile.c @@ -85,7 +85,7 @@ bi_emit_jump(bi_builder *b, nir_jump_instr *instr) static void bi_emit_ld_tile(bi_builder *b, nir_intrinsic_instr *instr) { - assert(b->shader->is_blend); + assert(b->shader->inputs->is_blend); /* We want to load the current pixel. * FIXME: The sample to load is currently hardcoded to 0. This should @@ -95,13 +95,14 @@ bi_emit_ld_tile(bi_builder *b, nir_intrinsic_instr *instr) .y = BIFROST_CURRENT_PIXEL, }; + uint64_t blend_desc = b->shader->inputs->blend.bifrost_blend_desc; uint32_t indices = 0; memcpy(&indices, &pix, sizeof(indices)); bi_ld_tile_to(b, bi_dest_index(&instr->dest), bi_imm_u32(indices), bi_register(60), /* coverage bitmap, TODO ra */ /* Only keep the conversion part of the blend descriptor. */ - bi_imm_u32(b->shader->blend_desc >> 32), + bi_imm_u32(blend_desc >> 32), (instr->num_components - 1)); } @@ -347,13 +348,15 @@ bi_emit_load_blend_input(bi_builder *b, nir_intrinsic_instr *instr) static void bi_emit_blend_op(bi_builder *b, bi_index rgba, nir_alu_type T, unsigned rt) { - if (b->shader->is_blend) { + if (b->shader->inputs->is_blend) { + uint64_t blend_desc = b->shader->inputs->blend.bifrost_blend_desc; + /* Blend descriptor comes from the compile inputs */ /* Put the result in r0 */ bi_blend_to(b, bi_register(0), rgba, bi_register(60) /* TODO RA */, - bi_imm_u32(b->shader->blend_desc & 0xffffffff), - bi_imm_u32(b->shader->blend_desc >> 32)); + bi_imm_u32(blend_desc & 0xffffffff), + bi_imm_u32(blend_desc >> 32)); } else { /* Blend descriptor comes from the FAU RAM. By convention, the * return address is stored in r48 and will be used by the @@ -379,7 +382,7 @@ bi_emit_blend_op(bi_builder *b, bi_index rgba, nir_alu_type T, unsigned rt) static bool bi_skip_atest(bi_context *ctx, bool emit_zs) { - return (ctx->is_blit && !emit_zs) || ctx->is_blend; + return (ctx->inputs->is_blit && !emit_zs) || ctx->inputs->is_blend; } static void @@ -483,7 +486,7 @@ bi_emit_fragment_out(bi_builder *b, nir_intrinsic_instr *instr) bi_emit_blend_op(b, color, nir_intrinsic_src_type(instr), rt); } - if (b->shader->is_blend) { + if (b->shader->inputs->is_blend) { /* Jump back to the fragment shader, return address is stored * in r48 (see above). */ @@ -699,7 +702,7 @@ bi_emit_intrinsic(bi_builder *b, nir_intrinsic_instr *instr) break; case nir_intrinsic_load_interpolated_input: case nir_intrinsic_load_input: - if (b->shader->is_blend) + if (b->shader->inputs->is_blend) bi_emit_load_blend_input(b, instr); else if (stage == MESA_SHADER_FRAGMENT) bi_emit_load_vary(b, instr); @@ -816,22 +819,22 @@ bi_emit_intrinsic(bi_builder *b, nir_intrinsic_instr *instr) break; case nir_intrinsic_load_blend_const_color_r_float: bi_mov_i32_to(b, dst, - bi_imm_f32(b->shader->blend_constants[0])); + bi_imm_f32(b->shader->inputs->blend.constants[0])); break; case nir_intrinsic_load_blend_const_color_g_float: bi_mov_i32_to(b, dst, - bi_imm_f32(b->shader->blend_constants[1])); + bi_imm_f32(b->shader->inputs->blend.constants[1])); break; case nir_intrinsic_load_blend_const_color_b_float: bi_mov_i32_to(b, dst, - bi_imm_f32(b->shader->blend_constants[2])); + bi_imm_f32(b->shader->inputs->blend.constants[2])); break; case nir_intrinsic_load_blend_const_color_a_float: bi_mov_i32_to(b, dst, - bi_imm_f32(b->shader->blend_constants[3])); + bi_imm_f32(b->shader->inputs->blend.constants[3])); break; case nir_intrinsic_load_sample_positions_pan: @@ -2326,7 +2329,7 @@ bi_print_stats(bi_context *ctx, unsigned size, FILE *fp) "%u quadwords, %u threads, %u loops, " "%u:%u spills:fills\n", ctx->nir->info.label ?: "", - ctx->is_blend ? "PAN_SHADER_BLEND" : + ctx->inputs->is_blend ? "PAN_SHADER_BLEND" : gl_shader_stage_name(ctx->stage), nr_ins, nr_nops, nr_clauses, size / 16, nr_threads, @@ -2519,15 +2522,12 @@ bifrost_compile_shader_nir(void *mem_ctx, nir_shader *nir, bi_context *ctx = rzalloc(NULL, bi_context); ctx->sysval_to_id = panfrost_init_sysvals(&ctx->sysvals, ctx); + ctx->inputs = inputs; ctx->nir = nir; ctx->stage = nir->info.stage; ctx->quirks = bifrost_get_quirks(inputs->gpu_id); ctx->arch = inputs->gpu_id >> 12; - ctx->is_blend = inputs->is_blend; - ctx->is_blit = inputs->is_blit; - ctx->blend_desc = inputs->blend.bifrost_blend_desc; ctx->push = &program->push; - memcpy(ctx->blend_constants, inputs->blend.constants, sizeof(ctx->blend_constants)); list_inithead(&ctx->blocks); /* Lower gl_Position pre-optimisation, but after lowering vars to ssa diff --git a/src/panfrost/bifrost/compiler.h b/src/panfrost/bifrost/compiler.h index 35e260cddad..37de7ccdb27 100644 --- a/src/panfrost/bifrost/compiler.h +++ b/src/panfrost/bifrost/compiler.h @@ -494,6 +494,7 @@ typedef struct bi_block { } bi_block; typedef struct { + const struct panfrost_compile_inputs *inputs; nir_shader *nir; gl_shader_stage stage; struct list_head blocks; /* list of bi_block */ @@ -504,18 +505,9 @@ typedef struct { unsigned arch; unsigned tls_size; - /* Is internally a blend/blit shader? Depends on stage == FRAGMENT */ - bool is_blend, is_blit; - - /* Blend constants */ - float blend_constants[4]; - /* Blend return offsets */ uint32_t blend_ret_offsets[8]; - /* Blend tile buffer conversion desc */ - uint64_t blend_desc; - /* During NIR->BIR */ bi_block *current_block; bi_block *after_block; diff --git a/src/panfrost/midgard/compiler.h b/src/panfrost/midgard/compiler.h index a4e7991af69..fdb8160d203 100644 --- a/src/panfrost/midgard/compiler.h +++ b/src/panfrost/midgard/compiler.h @@ -236,15 +236,10 @@ enum midgard_rt_id { #define MIDGARD_MAX_SAMPLE_ITER 16 typedef struct compiler_context { + const struct panfrost_compile_inputs *inputs; nir_shader *nir; gl_shader_stage stage; - /* Is internally a blend shader? Depends on stage == FRAGMENT */ - bool is_blend; - - /* Render target number for a keyed blend shader. Depends on is_blend */ - unsigned blend_rt; - /* Number of samples for a keyed blend shader. Depends on is_blend */ unsigned blend_sample_iterations; @@ -254,9 +249,6 @@ typedef struct compiler_context { /* Index to precolour to r2 for a dual-source blend colour */ unsigned blend_src1; - /* Blend constants */ - float blend_constants[4]; - /* Number of bytes used for Thread Local Storage */ unsigned tls_size; diff --git a/src/panfrost/midgard/midgard_compile.c b/src/panfrost/midgard/midgard_compile.c index ab9cfd68c25..16617e0b8fc 100644 --- a/src/panfrost/midgard/midgard_compile.c +++ b/src/panfrost/midgard/midgard_compile.c @@ -1606,8 +1606,8 @@ mir_get_branch_cond(nir_src *src, bool *invert) static uint8_t output_load_rt_addr(compiler_context *ctx, nir_intrinsic_instr *instr) { - if (ctx->is_blend) - return ctx->blend_rt; + if (ctx->inputs->is_blend) + return MIDGARD_COLOR_RT0 + ctx->inputs->blend.rt; const nir_variable *var; var = nir_find_variable_with_driver_location(ctx->nir, nir_var_shader_out, nir_intrinsic_base(instr)); @@ -1722,9 +1722,9 @@ emit_intrinsic(compiler_context *ctx, nir_intrinsic_instr *instr) } else if (is_global || is_shared || is_scratch) { unsigned seg = is_global ? LDST_GLOBAL : (is_shared ? LDST_SHARED : LDST_SCRATCH); emit_global(ctx, &instr->instr, true, reg, src_offset, seg); - } else if (ctx->stage == MESA_SHADER_FRAGMENT && !ctx->is_blend) { + } else if (ctx->stage == MESA_SHADER_FRAGMENT && !ctx->inputs->is_blend) { emit_varying_read(ctx, reg, offset, nr_comp, component, indirect_offset, t | nir_dest_bit_size(instr->dest), is_flat); - } else if (ctx->is_blend) { + } else if (ctx->inputs->is_blend) { /* ctx->blend_input will be precoloured to r0/r2, where * the input is preloaded */ @@ -1810,12 +1810,13 @@ emit_intrinsic(compiler_context *ctx, nir_intrinsic_instr *instr) } case nir_intrinsic_load_blend_const_color_rgba: { - assert(ctx->is_blend); + assert(ctx->inputs->is_blend); reg = nir_dest_index(&instr->dest); midgard_instruction ins = v_mov(SSA_FIXED_REGISTER(REGISTER_CONSTANT), reg); ins.has_constants = true; - memcpy(ins.constants.f32, ctx->blend_constants, sizeof(ctx->blend_constants)); + memcpy(ins.constants.f32, ctx->inputs->blend.constants, + sizeof(ctx->inputs->blend.constants)); emit_mir_instruction(ctx, ins); break; } @@ -1941,7 +1942,9 @@ emit_intrinsic(compiler_context *ctx, nir_intrinsic_instr *instr) assert (ctx->stage == MESA_SHADER_FRAGMENT); reg = nir_src_index(ctx, &instr->src[0]); for (unsigned s = 0; s < ctx->blend_sample_iterations; s++) - emit_fragment_store(ctx, reg, ~0, ~0, ctx->blend_rt, s); + emit_fragment_store(ctx, reg, ~0, ~0, + ctx->inputs->blend.rt + MIDGARD_COLOR_RT0, + s); break; case nir_intrinsic_store_global: @@ -2989,10 +2992,9 @@ midgard_compile_shader_nir(void *mem_ctx, nir_shader *nir, compiler_context *ctx = rzalloc(NULL, compiler_context); ctx->sysval_to_id = panfrost_init_sysvals(&ctx->sysvals, ctx); + ctx->inputs = inputs; ctx->nir = nir; ctx->stage = nir->info.stage; - ctx->is_blend = inputs->is_blend; - ctx->blend_rt = MIDGARD_COLOR_RT0 + inputs->blend.rt; ctx->push = &program->push; if (inputs->is_blend) { @@ -3004,7 +3006,6 @@ midgard_compile_shader_nir(void *mem_ctx, nir_shader *nir, ctx->blend_sample_iterations = DIV_ROUND_UP(desc->block.bits * nr_samples, 128); } - memcpy(ctx->blend_constants, inputs->blend.constants, sizeof(ctx->blend_constants)); ctx->blend_input = ~0; ctx->blend_src1 = ~0; ctx->quirks = midgard_get_quirks(inputs->gpu_id); @@ -3222,7 +3223,7 @@ midgard_compile_shader_nir(void *mem_ctx, nir_shader *nir, "%u registers, %u threads, %u loops, " "%u:%u spills:fills\n", ctx->nir->info.label ?: "", - ctx->is_blend ? "PAN_SHADER_BLEND" : + ctx->inputs->is_blend ? "PAN_SHADER_BLEND" : gl_shader_stage_name(ctx->stage), nr_ins, nr_bundles, ctx->quadword_count, nr_registers, nr_threads, diff --git a/src/panfrost/midgard/midgard_ra.c b/src/panfrost/midgard/midgard_ra.c index 5e746d962ab..37cecb1c339 100644 --- a/src/panfrost/midgard/midgard_ra.c +++ b/src/panfrost/midgard/midgard_ra.c @@ -321,7 +321,7 @@ mir_compute_interference( /* We need to force r1.w live throughout a blend shader */ - if (ctx->is_blend) { + if (ctx->inputs->is_blend) { unsigned r1w = ~0; mir_foreach_block(ctx, _block) { @@ -394,7 +394,7 @@ allocate_registers(compiler_context *ctx, bool *spilled) /* The number of vec4 work registers available depends on when the * uniforms start and the shader stage. By ABI we limit blend shaders * to 8 registers, should be lower XXX */ - int work_count = ctx->is_blend ? 8 : + int work_count = ctx->inputs->is_blend ? 8 : 16 - MAX2((ctx->uniform_cutoff - 8), 0); /* No register allocation to do with no SSA */ @@ -827,7 +827,7 @@ mir_spill_register( unsigned spill_class, unsigned *spill_count) { - if (spill_class == REG_CLASS_WORK && ctx->is_blend) + if (spill_class == REG_CLASS_WORK && ctx->inputs->is_blend) unreachable("Blend shader spilling is currently unimplemented"); unsigned spill_index = ctx->temp_count; diff --git a/src/panfrost/midgard/midgard_schedule.c b/src/panfrost/midgard/midgard_schedule.c index 973af220c2e..438325ead10 100644 --- a/src/panfrost/midgard/midgard_schedule.c +++ b/src/panfrost/midgard/midgard_schedule.c @@ -1156,7 +1156,7 @@ mir_schedule_alu( * this will be in sadd, we boost this to prevent scheduling csel into * smul */ - if (writeout && (branch->constants.u32[0] || ctx->is_blend)) { + if (writeout && (branch->constants.u32[0] || ctx->inputs->is_blend)) { sadd = ralloc(ctx, midgard_instruction); *sadd = v_mov(~0, make_compiler_temp(ctx)); sadd->unit = UNIT_SADD; @@ -1183,11 +1183,11 @@ mir_schedule_alu( * they are paired with MRT or not so they always need this, at least * on MFBD GPUs. */ - if (writeout && (ctx->is_blend || ctx->writeout_branch[1])) { + if (writeout && (ctx->inputs->is_blend || ctx->writeout_branch[1])) { vadd = ralloc(ctx, midgard_instruction); *vadd = v_mov(~0, make_compiler_temp(ctx)); - if (!ctx->is_blend) { + if (!ctx->inputs->is_blend) { vadd->op = midgard_alu_op_iadd; vadd->src[0] = SSA_FIXED_REGISTER(31); vadd->src_types[0] = nir_type_uint32;