diff --git a/src/broadcom/cle/meson.build b/src/broadcom/cle/meson.build index 8ac32b313e4..a3ed2cff759 100644 --- a/src/broadcom/cle/meson.build +++ b/src/broadcom/cle/meson.build @@ -21,8 +21,6 @@ # [version, cle XML version] v3d_versions = [ [21, 21], - [33, 33], - [41, 33], [42, 33], [71, 33] ] diff --git a/src/broadcom/cle/v3dx_pack.h b/src/broadcom/cle/v3dx_pack.h index e5a1eb26698..0062ddbd516 100644 --- a/src/broadcom/cle/v3dx_pack.h +++ b/src/broadcom/cle/v3dx_pack.h @@ -31,10 +31,6 @@ #if (V3D_VERSION == 21) # include "cle/v3d_packet_v21_pack.h" -#elif (V3D_VERSION == 33) -# include "cle/v3d_packet_v33_pack.h" -#elif (V3D_VERSION == 41) -# include "cle/v3d_packet_v41_pack.h" #elif (V3D_VERSION == 42) # include "cle/v3d_packet_v42_pack.h" #elif (V3D_VERSION == 71) diff --git a/src/broadcom/clif/clif_dump.c b/src/broadcom/clif/clif_dump.c index ede6f42eedf..db94edba113 100644 --- a/src/broadcom/clif/clif_dump.c +++ b/src/broadcom/clif/clif_dump.c @@ -106,12 +106,16 @@ static bool clif_dump_packet(struct clif_dump *clif, uint32_t offset, const uint8_t *cl, uint32_t *size, bool reloc_mode) { - if (clif->devinfo->ver >= 42) + + switch (clif->devinfo->ver) { + case 42: return v3d42_clif_dump_packet(clif, offset, cl, size, reloc_mode); - else if (clif->devinfo->ver >= 41) - return v3d41_clif_dump_packet(clif, offset, cl, size, reloc_mode); - else - return v3d33_clif_dump_packet(clif, offset, cl, size, reloc_mode); + case 71: + return v3d71_clif_dump_packet(clif, offset, cl, size, reloc_mode); + default: + break; + }; + unreachable("Unknown HW version"); } static uint32_t diff --git a/src/broadcom/clif/clif_private.h b/src/broadcom/clif/clif_private.h index cda407a00bf..d4e55e03730 100644 --- a/src/broadcom/clif/clif_private.h +++ b/src/broadcom/clif/clif_private.h @@ -95,10 +95,6 @@ clif_dump_add_address_to_worklist(struct clif_dump *clif, enum reloc_worklist_type type, uint32_t addr); -bool v3d33_clif_dump_packet(struct clif_dump *clif, uint32_t offset, - const uint8_t *cl, uint32_t *size, bool reloc_mode); -bool v3d41_clif_dump_packet(struct clif_dump *clif, uint32_t offset, - const uint8_t *cl, uint32_t *size, bool reloc_mode); bool v3d42_clif_dump_packet(struct clif_dump *clif, uint32_t offset, const uint8_t *cl, uint32_t *size, bool reloc_mode); bool v3d71_clif_dump_packet(struct clif_dump *clif, uint32_t offset, diff --git a/src/broadcom/common/v3d_device_info.c b/src/broadcom/common/v3d_device_info.c index 7bc2b662cfc..fa85a7d5077 100644 --- a/src/broadcom/common/v3d_device_info.c +++ b/src/broadcom/common/v3d_device_info.c @@ -68,8 +68,6 @@ v3d_get_device_info(int fd, struct v3d_device_info* devinfo, v3d_ioctl_fun drm_i devinfo->has_accumulators = devinfo->ver < 71; switch (devinfo->ver) { - case 33: - case 41: case 42: case 71: break; diff --git a/src/broadcom/common/v3d_macros.h b/src/broadcom/common/v3d_macros.h index b4291fb5350..4ab66f647ab 100644 --- a/src/broadcom/common/v3d_macros.h +++ b/src/broadcom/common/v3d_macros.h @@ -32,12 +32,6 @@ #if (V3D_VERSION == 21) # define V3DX(x) V3D21_##x # define v3dX(x) v3d21_##x -#elif (V3D_VERSION == 33) -# define V3DX(x) V3D33_##x -# define v3dX(x) v3d33_##x -#elif (V3D_VERSION == 41) -# define V3DX(x) V3D41_##x -# define v3dX(x) v3d41_##x #elif (V3D_VERSION == 42) # define V3DX(x) V3D42_##x # define v3dX(x) v3d42_##x diff --git a/src/broadcom/common/v3d_performance_counters.h b/src/broadcom/common/v3d_performance_counters.h index a8f0cff8784..33e3e0e78db 100644 --- a/src/broadcom/common/v3d_performance_counters.h +++ b/src/broadcom/common/v3d_performance_counters.h @@ -130,7 +130,7 @@ static const char *v3d_performance_counters[][3] = { {"QPU", "QPU-stalls-other", "[QPU] Stalled qcycles waiting for any other reason (vary/W/Z)"}, }; -#elif (V3D_VERSION >= 41) +#elif (V3D_VERSION >= 42) static const char *v3d_performance_counters[][3] = { {"FEP", "FEP-valid-primitives-no-rendered-pixels", "[FEP] Valid primitives that result in no rendered pixels, for all rendered tiles"}, diff --git a/src/broadcom/compiler/meson.build b/src/broadcom/compiler/meson.build index 453af22aecc..4f696fd5aff 100644 --- a/src/broadcom/compiler/meson.build +++ b/src/broadcom/compiler/meson.build @@ -32,9 +32,7 @@ libbroadcom_compiler_files = files( 'vir_to_qpu.c', 'qpu_schedule.c', 'qpu_validate.c', - 'v3d33_tex.c', - 'v3d40_tex.c', - 'v3d33_vpm_setup.c', + 'v3d_tex.c', 'v3d_compiler.h', 'v3d_nir_lower_io.c', 'v3d_nir_lower_image_load_store.c', diff --git a/src/broadcom/compiler/nir_to_vir.c b/src/broadcom/compiler/nir_to_vir.c index 220c864a056..ad677e3e0fb 100644 --- a/src/broadcom/compiler/nir_to_vir.c +++ b/src/broadcom/compiler/nir_to_vir.c @@ -38,7 +38,7 @@ #define __gen_address_type uint32_t #define __gen_address_offset(reloc) (*reloc) #define __gen_emit_reloc(cl, reloc) -#include "cle/v3d_packet_v41_pack.h" +#include "cle/v3d_packet_v42_pack.h" #define GENERAL_TMU_LOOKUP_PER_QUAD (0 << 7) #define GENERAL_TMU_LOOKUP_PER_PIXEL (1 << 7) @@ -963,10 +963,7 @@ ntq_emit_tex(struct v3d_compile *c, nir_tex_instr *instr) break; } - if (c->devinfo->ver >= 40) - v3d40_vir_emit_tex(c, instr); - else - v3d33_vir_emit_tex(c, instr); + v3d_vir_emit_tex(c, instr); } static struct qreg @@ -1040,15 +1037,10 @@ emit_fragment_varying(struct v3d_compile *c, nir_variable *var, struct qinst *ldvary = NULL; struct qreg vary; - if (c->devinfo->ver >= 41) { - ldvary = vir_add_inst(V3D_QPU_A_NOP, c->undef, - c->undef, c->undef); - ldvary->qpu.sig.ldvary = true; - vary = vir_emit_def(c, ldvary); - } else { - vir_NOP(c)->qpu.sig.ldvary = true; - vary = vir_reg(QFILE_MAGIC, V3D_QPU_WADDR_R3); - } + ldvary = vir_add_inst(V3D_QPU_A_NOP, c->undef, + c->undef, c->undef); + ldvary->qpu.sig.ldvary = true; + vary = vir_emit_def(c, ldvary); /* Store the input value before interpolation so we can implement * GLSL's interpolateAt functions if the shader uses them. @@ -1904,12 +1896,8 @@ emit_frag_end(struct v3d_compile *c) inst = vir_MOV_dest(c, tlbu_reg, c->outputs[c->output_position_index]); - if (c->devinfo->ver >= 42) { - tlb_specifier |= (TLB_V42_DEPTH_TYPE_PER_PIXEL | - TLB_SAMPLE_MODE_PER_PIXEL); - } else { - tlb_specifier |= TLB_DEPTH_TYPE_PER_PIXEL; - } + tlb_specifier |= (TLB_V42_DEPTH_TYPE_PER_PIXEL | + TLB_SAMPLE_MODE_PER_PIXEL); } else { /* Shader doesn't write to gl_FragDepth, take Z from * FEP. @@ -1917,16 +1905,11 @@ emit_frag_end(struct v3d_compile *c) c->writes_z_from_fep = true; inst = vir_MOV_dest(c, tlbu_reg, vir_nop_reg()); - if (c->devinfo->ver >= 42) { - /* The spec says the PER_PIXEL flag is ignored - * for invariant writes, but the simulator - * demands it. - */ - tlb_specifier |= (TLB_V42_DEPTH_TYPE_INVARIANT | - TLB_SAMPLE_MODE_PER_PIXEL); - } else { - tlb_specifier |= TLB_DEPTH_TYPE_INVARIANT; - } + /* The spec says the PER_PIXEL flag is ignored for + * invariant writes, but the simulator demands it. + */ + tlb_specifier |= (TLB_V42_DEPTH_TYPE_INVARIANT | + TLB_SAMPLE_MODE_PER_PIXEL); /* Since (single-threaded) fragment shaders always need * a TLB write, if we dond't have any we emit a @@ -1956,7 +1939,6 @@ vir_VPM_WRITE_indirect(struct v3d_compile *c, struct qreg vpm_index, bool uniform_vpm_index) { - assert(c->devinfo->ver >= 40); if (uniform_vpm_index) vir_STVPMV(c, vpm_index, val); else @@ -1966,13 +1948,8 @@ vir_VPM_WRITE_indirect(struct v3d_compile *c, static void vir_VPM_WRITE(struct v3d_compile *c, struct qreg val, uint32_t vpm_index) { - if (c->devinfo->ver >= 40) { - vir_VPM_WRITE_indirect(c, val, - vir_uniform_ui(c, vpm_index), true); - } else { - /* XXX: v3d33_vir_vpm_write_setup(c); */ - vir_MOV_dest(c, vir_reg(QFILE_MAGIC, V3D_QPU_WADDR_VPM), val); - } + vir_VPM_WRITE_indirect(c, val, + vir_uniform_ui(c, vpm_index), true); } static void @@ -1980,7 +1957,7 @@ emit_vert_end(struct v3d_compile *c) { /* GFXH-1684: VPM writes need to be complete by the end of the shader. */ - if (c->devinfo->ver >= 40 && c->devinfo->ver <= 42) + if (c->devinfo->ver == 42) vir_VPMWT(c); } @@ -1989,7 +1966,7 @@ emit_geom_end(struct v3d_compile *c) { /* GFXH-1684: VPM writes need to be complete by the end of the shader. */ - if (c->devinfo->ver >= 40 && c->devinfo->ver <= 42) + if (c->devinfo->ver == 42) vir_VPMWT(c); } @@ -2174,26 +2151,9 @@ ntq_emit_vpm_read(struct v3d_compile *c, uint32_t *remaining, uint32_t vpm_index) { - if (c->devinfo->ver >= 40 ) { - return vir_LDVPMV_IN(c, - vir_uniform_ui(c, - (*num_components_queued)++)); - } - - struct qreg vpm = vir_reg(QFILE_VPM, vpm_index); - if (*num_components_queued != 0) { - (*num_components_queued)--; - return vir_MOV(c, vpm); - } - - uint32_t num_components = MIN2(*remaining, 32); - - v3d33_vir_vpm_read_setup(c, num_components); - - *num_components_queued = num_components - 1; - *remaining -= num_components; - - return vir_MOV(c, vpm); + return vir_LDVPMV_IN(c, + vir_uniform_ui(c, + (*num_components_queued)++)); } static void @@ -2263,31 +2223,8 @@ ntq_setup_vs_inputs(struct v3d_compile *c) } /* The actual loads will happen directly in nir_intrinsic_load_input - * on newer versions. */ - if (c->devinfo->ver >= 40) - return; - - for (int loc = 0; loc < ARRAY_SIZE(c->vattr_sizes); loc++) { - resize_qreg_array(c, &c->inputs, &c->inputs_array_size, - (loc + 1) * 4); - - for (int i = 0; i < c->vattr_sizes[loc]; i++) { - c->inputs[loc * 4 + i] = - ntq_emit_vpm_read(c, - &vpm_components_queued, - &num_components, - loc * 4 + i); - - } - } - - if (c->devinfo->ver >= 40) { - assert(vpm_components_queued == num_components); - } else { - assert(vpm_components_queued == 0); - assert(num_components == 0); - } + return; } static bool @@ -2533,10 +2470,8 @@ vir_emit_tlb_color_read(struct v3d_compile *c, nir_intrinsic_instr *instr) * switch instead -- see vir_emit_thrsw(). */ if (!c->emitted_tlb_load) { - if (!c->last_thrsw_at_top_level) { - assert(c->devinfo->ver >= 41); + if (!c->last_thrsw_at_top_level) vir_emit_thrsw(c); - } c->emitted_tlb_load = true; } @@ -2744,7 +2679,7 @@ ntq_emit_load_input(struct v3d_compile *c, nir_intrinsic_instr *instr) unsigned offset = nir_intrinsic_base(instr) + nir_src_as_uint(instr->src[0]); - if (c->s->info.stage != MESA_SHADER_FRAGMENT && c->devinfo->ver >= 40) { + if (c->s->info.stage != MESA_SHADER_FRAGMENT) { /* Emit the LDVPM directly now, rather than at the top * of the shader like we did for V3D 3.x (which needs * vpmsetup when not just taking the next offset). @@ -3328,11 +3263,11 @@ ntq_emit_intrinsic(struct v3d_compile *c, nir_intrinsic_instr *instr) case nir_intrinsic_image_store: case nir_intrinsic_image_atomic: case nir_intrinsic_image_atomic_swap: - v3d40_vir_emit_image_load_store(c, instr); + v3d_vir_emit_image_load_store(c, instr); break; case nir_intrinsic_image_load: - v3d40_vir_emit_image_load_store(c, instr); + v3d_vir_emit_image_load_store(c, instr); /* Not really a general TMU load, but we only use this flag * for NIR scheduling and we do schedule these under the same * policy as general TMU. @@ -3502,21 +3437,8 @@ ntq_emit_intrinsic(struct v3d_compile *c, nir_intrinsic_instr *instr) * (actually supergroup) to block until the last * invocation reaches the TSY op. */ - if (c->devinfo->ver >= 42) { - vir_BARRIERID_dest(c, vir_reg(QFILE_MAGIC, - V3D_QPU_WADDR_SYNCB)); - } else { - struct qinst *sync = - vir_BARRIERID_dest(c, - vir_reg(QFILE_MAGIC, - V3D_QPU_WADDR_SYNCU)); - sync->uniform = - vir_get_uniform_index(c, QUNIFORM_CONSTANT, - 0xffffff00 | - V3D_TSY_WAIT_INC_CHECK); - - } - + vir_BARRIERID_dest(c, vir_reg(QFILE_MAGIC, + V3D_QPU_WADDR_SYNCB)); /* The blocking of a TSY op only happens at the next * thread switch. No texturing may be outstanding at the * time of a TSY blocking operation. @@ -4330,14 +4252,12 @@ nir_to_vir(struct v3d_compile *c) emit_fragment_varying(c, NULL, -1, 0, 0); } - if (c->fs_key->is_points && - (c->devinfo->ver < 40 || program_reads_point_coord(c))) { + if (c->fs_key->is_points && program_reads_point_coord(c)) { c->point_x = emit_fragment_varying(c, NULL, -1, 0, 0); c->point_y = emit_fragment_varying(c, NULL, -1, 0, 0); c->uses_implicit_point_line_varyings = true; } else if (c->fs_key->is_lines && - (c->devinfo->ver < 40 || - BITSET_TEST(c->s->info.system_values_read, + (BITSET_TEST(c->s->info.system_values_read, SYSTEM_VALUE_LINE_COORD))) { c->line_x = emit_fragment_varying(c, NULL, -1, 0, 0); c->uses_implicit_point_line_varyings = true; @@ -4350,7 +4270,7 @@ nir_to_vir(struct v3d_compile *c) V3D_QPU_WADDR_SYNC)); } - if (c->devinfo->ver <= 42) { + if (c->devinfo->ver == 42) { c->cs_payload[0] = vir_MOV(c, vir_reg(QFILE_REG, 0)); c->cs_payload[1] = vir_MOV(c, vir_reg(QFILE_REG, 2)); } else if (c->devinfo->ver >= 71) { @@ -4461,25 +4381,12 @@ vir_emit_last_thrsw(struct v3d_compile *c, { *restore_last_thrsw = c->last_thrsw; - /* On V3D before 4.1, we need a TMU op to be outstanding when thread - * switching, so disable threads if we didn't do any TMU ops (each of - * which would have emitted a THRSW). - */ - if (!c->last_thrsw_at_top_level && c->devinfo->ver < 41) { - c->threads = 1; - if (c->last_thrsw) - vir_remove_thrsw(c); - *restore_last_thrsw = NULL; - } - /* If we're threaded and the last THRSW was in conditional code, then * we need to emit another one so that we can flag it as the last * thrsw. */ - if (c->last_thrsw && !c->last_thrsw_at_top_level) { - assert(c->devinfo->ver >= 41); + if (c->last_thrsw && !c->last_thrsw_at_top_level) vir_emit_thrsw(c); - } /* If we're threaded, then we need to mark the last THRSW instruction * so we can emit a pair of them at QPU emit time. @@ -4487,10 +4394,8 @@ vir_emit_last_thrsw(struct v3d_compile *c, * For V3D 4.x, we can spawn the non-fragment shaders already in the * post-last-THRSW state, so we can skip this. */ - if (!c->last_thrsw && c->s->info.stage == MESA_SHADER_FRAGMENT) { - assert(c->devinfo->ver >= 41); + if (!c->last_thrsw && c->s->info.stage == MESA_SHADER_FRAGMENT) vir_emit_thrsw(c); - } /* If we have not inserted a last thread switch yet, do it now to ensure * any potential spilling we do happens before this. If we don't spill @@ -4616,7 +4521,7 @@ v3d_nir_to_vir(struct v3d_compile *c) /* Attempt to allocate registers for the temporaries. If we fail, * reduce thread count and try again. */ - int min_threads = (c->devinfo->ver >= 41) ? 2 : 1; + int min_threads = 2; struct qpu_reg *temp_registers; while (true) { temp_registers = v3d_register_allocate(c); diff --git a/src/broadcom/compiler/qpu_schedule.c b/src/broadcom/compiler/qpu_schedule.c index 86494706386..8c0e65e3fe8 100644 --- a/src/broadcom/compiler/qpu_schedule.c +++ b/src/broadcom/compiler/qpu_schedule.c @@ -202,9 +202,6 @@ tmu_write_is_sequence_terminator(uint32_t waddr) static bool can_reorder_tmu_write(const struct v3d_device_info *devinfo, uint32_t waddr) { - if (devinfo->ver < 40) - return false; - if (tmu_write_is_sequence_terminator(waddr)) return false; @@ -267,8 +264,7 @@ process_waddr_deps(struct schedule_state *state, struct schedule_node *n, break; case V3D_QPU_WADDR_UNIFA: - if (state->devinfo->ver >= 40) - add_write_dep(state, &state->last_unifa, n); + add_write_dep(state, &state->last_unifa, n); break; case V3D_QPU_WADDR_NOP: @@ -660,7 +656,7 @@ writes_too_soon_after_write(const struct v3d_device_info *devinfo, v3d_qpu_writes_r4(devinfo, inst)) return true; - if (devinfo->ver <= 42) + if (devinfo->ver == 42) return false; /* Don't schedule anything that writes rf0 right after ldvary, since @@ -854,13 +850,10 @@ qpu_compatible_peripheral_access(const struct v3d_device_info *devinfo, if (util_bitcount(a_peripherals) + util_bitcount(b_peripherals) <= 1) return true; - if (devinfo->ver < 41) - return false; - /* V3D 4.x can't do more than one peripheral access except in a * few cases: */ - if (devinfo->ver <= 42) { + if (devinfo->ver == 42) { /* WRTMUC signal with TMU register write (other than tmuc). */ if (a_peripherals == V3D_PERIPHERAL_TMU_WRTMUC_SIG && b_peripherals == V3D_PERIPHERAL_TMU_WRITE) { @@ -984,7 +977,7 @@ qpu_merge_raddrs(struct v3d_qpu_instr *result, result->sig.small_imm_d) <= 1; } - assert(devinfo->ver <= 42); + assert(devinfo->ver == 42); uint64_t raddrs_used = qpu_raddrs_used(add_instr, mul_instr); int naddrs = util_bitcount64(raddrs_used); @@ -1499,7 +1492,7 @@ retry: * as long as it is not the last delay slot. */ if (inst->sig.ldvary) { - if (c->devinfo->ver <= 42 && + if (c->devinfo->ver == 42 && scoreboard->last_thrsw_tick + 2 >= scoreboard->tick - 1) { continue; @@ -1607,7 +1600,7 @@ update_scoreboard_for_magic_waddr(struct choose_scoreboard *scoreboard, { if (v3d_qpu_magic_waddr_is_sfu(waddr)) scoreboard->last_magic_sfu_write_tick = scoreboard->tick; - else if (devinfo->ver >= 40 && waddr == V3D_QPU_WADDR_UNIFA) + else if (waddr == V3D_QPU_WADDR_UNIFA) scoreboard->last_unifa_write_tick = scoreboard->tick; } @@ -1938,7 +1931,7 @@ qpu_inst_valid_in_thrend_slot(struct v3d_compile *c, if (slot > 0 && qinst->uniform != ~0) return false; - if (c->devinfo->ver <= 42 && v3d_qpu_waits_vpm(inst)) + if (c->devinfo->ver == 42 && v3d_qpu_waits_vpm(inst)) return false; if (inst->sig.ldvary) @@ -1946,12 +1939,12 @@ qpu_inst_valid_in_thrend_slot(struct v3d_compile *c, if (inst->type == V3D_QPU_INSTR_TYPE_ALU) { /* GFXH-1625: TMUWT not allowed in the final instruction. */ - if (c->devinfo->ver <= 42 && slot == 2 && + if (c->devinfo->ver == 42 && slot == 2 && inst->alu.add.op == V3D_QPU_A_TMUWT) { return false; } - if (c->devinfo->ver <= 42) { + if (c->devinfo->ver == 42) { /* No writing physical registers at the end. */ bool add_is_nop = inst->alu.add.op == V3D_QPU_A_NOP; bool mul_is_nop = inst->alu.mul.op == V3D_QPU_M_NOP; @@ -1977,10 +1970,7 @@ qpu_inst_valid_in_thrend_slot(struct v3d_compile *c, } } - if (c->devinfo->ver < 40 && inst->alu.add.op == V3D_QPU_A_SETMSF) - return false; - - if (c->devinfo->ver <= 42) { + if (c->devinfo->ver == 42) { /* RF0-2 might be overwritten during the delay slots by * fragment shader setup. */ @@ -2034,7 +2024,7 @@ qpu_inst_before_thrsw_valid_in_delay_slot(struct v3d_compile *c, return false; if (qinst->qpu.sig.ldvary) { - if (c->devinfo->ver <= 42 && slot > 0) + if (c->devinfo->ver == 42 && slot > 0) return false; if (c->devinfo->ver >= 71 && slot == 2) return false; @@ -2475,7 +2465,7 @@ alu_reads_register(const struct v3d_device_info *devinfo, else num_src = v3d_qpu_mul_op_num_src(inst->alu.mul.op); - if (devinfo->ver <= 42) { + if (devinfo->ver == 42) { enum v3d_qpu_mux mux_a, mux_b; if (add) { mux_a = inst->alu.add.a.mux; @@ -2639,7 +2629,7 @@ fixup_pipelined_ldvary(struct v3d_compile *c, * and flagging it for a fixup. In V3D 7.x this is limited only to the * second delay slot. */ - assert((devinfo->ver <= 42 && + assert((devinfo->ver == 42 && scoreboard->last_thrsw_tick + 2 < scoreboard->tick - 1) || (devinfo->ver >= 71 && scoreboard->last_thrsw_tick + 2 != scoreboard->tick - 1)); @@ -2672,7 +2662,7 @@ fixup_pipelined_ldvary(struct v3d_compile *c, * ldvary write to r5/rf0 happens in the next instruction). */ assert(!v3d_qpu_writes_r5(devinfo, inst)); - assert(devinfo->ver <= 42 || + assert(devinfo->ver == 42 || (!v3d_qpu_writes_rf0_implicitly(devinfo, inst) && !v3d71_qpu_writes_waddr_explicitly(devinfo, inst, 0))); diff --git a/src/broadcom/compiler/qpu_validate.c b/src/broadcom/compiler/qpu_validate.c index 0466ee5d0b6..6c15153b9cb 100644 --- a/src/broadcom/compiler/qpu_validate.c +++ b/src/broadcom/compiler/qpu_validate.c @@ -243,7 +243,7 @@ qpu_validate_inst(struct v3d_qpu_validate_state *state, struct qinst *qinst) } if (inst->sig.ldvary) { - if (devinfo->ver <= 42) + if (devinfo->ver == 42) fail_instr(state, "LDVARY during THRSW delay slots"); if (devinfo->ver >= 71 && state->ip - state->last_thrsw_ip == 2) { @@ -276,7 +276,7 @@ qpu_validate_inst(struct v3d_qpu_validate_state *state, struct qinst *qinst) vpm_writes + tlb_writes + tsy_writes + - (devinfo->ver <= 42 ? inst->sig.ldtmu : 0) + + (devinfo->ver == 42 ? inst->sig.ldtmu : 0) + inst->sig.ldtlb + inst->sig.ldvpm + inst->sig.ldtlbu > 1) { @@ -316,7 +316,7 @@ qpu_validate_inst(struct v3d_qpu_validate_state *state, struct qinst *qinst) inst->type == V3D_QPU_INSTR_TYPE_ALU) { if ((inst->alu.add.op != V3D_QPU_A_NOP && !inst->alu.add.magic_write)) { - if (devinfo->ver <= 42) { + if (devinfo->ver == 42) { fail_instr(state, "RF write after THREND"); } else if (devinfo->ver >= 71) { if (state->last_thrsw_ip - state->ip == 0) { @@ -333,7 +333,7 @@ qpu_validate_inst(struct v3d_qpu_validate_state *state, struct qinst *qinst) if ((inst->alu.mul.op != V3D_QPU_M_NOP && !inst->alu.mul.magic_write)) { - if (devinfo->ver <= 42) { + if (devinfo->ver == 42) { fail_instr(state, "RF write after THREND"); } else if (devinfo->ver >= 71) { if (state->last_thrsw_ip - state->ip == 0) { @@ -351,7 +351,7 @@ qpu_validate_inst(struct v3d_qpu_validate_state *state, struct qinst *qinst) if (v3d_qpu_sig_writes_address(devinfo, &inst->sig) && !inst->sig_magic) { - if (devinfo->ver <= 42) { + if (devinfo->ver == 42) { fail_instr(state, "RF write after THREND"); } else if (devinfo->ver >= 71 && (inst->sig_addr == 2 || diff --git a/src/broadcom/compiler/v3d33_tex.c b/src/broadcom/compiler/v3d33_tex.c deleted file mode 100644 index b4c888aab07..00000000000 --- a/src/broadcom/compiler/v3d33_tex.c +++ /dev/null @@ -1,193 +0,0 @@ -/* - * Copyright © 2016-2018 Broadcom - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include "v3d_compiler.h" - -/* We don't do any address packing. */ -#define __gen_user_data void -#define __gen_address_type uint32_t -#define __gen_address_offset(reloc) (*reloc) -#define __gen_emit_reloc(cl, reloc) -#include "cle/v3d_packet_v33_pack.h" - -void -v3d33_vir_emit_tex(struct v3d_compile *c, nir_tex_instr *instr) -{ - /* FIXME: We don't bother implementing pipelining for texture reads - * for any pre 4.x hardware. It should be straight forward to do but - * we are not really testing or even targeting this hardware at - * present. - */ - ntq_flush_tmu(c); - - unsigned unit = instr->texture_index; - - struct V3D33_TEXTURE_UNIFORM_PARAMETER_0_CFG_MODE1 p0_unpacked = { - V3D33_TEXTURE_UNIFORM_PARAMETER_0_CFG_MODE1_header, - - .fetch_sample_mode = instr->op == nir_texop_txf, - }; - - struct V3D33_TEXTURE_UNIFORM_PARAMETER_1_CFG_MODE1 p1_unpacked = { - }; - - switch (instr->sampler_dim) { - case GLSL_SAMPLER_DIM_1D: - if (instr->is_array) - p0_unpacked.lookup_type = TEXTURE_1D_ARRAY; - else - p0_unpacked.lookup_type = TEXTURE_1D; - break; - case GLSL_SAMPLER_DIM_2D: - case GLSL_SAMPLER_DIM_RECT: - if (instr->is_array) - p0_unpacked.lookup_type = TEXTURE_2D_ARRAY; - else - p0_unpacked.lookup_type = TEXTURE_2D; - break; - case GLSL_SAMPLER_DIM_3D: - p0_unpacked.lookup_type = TEXTURE_3D; - break; - case GLSL_SAMPLER_DIM_CUBE: - p0_unpacked.lookup_type = TEXTURE_CUBE_MAP; - break; - default: - unreachable("Bad sampler type"); - } - - struct qreg coords[5]; - int next_coord = 0; - for (unsigned i = 0; i < instr->num_srcs; i++) { - switch (instr->src[i].src_type) { - case nir_tex_src_coord: - for (int j = 0; j < instr->coord_components; j++) { - coords[next_coord++] = - ntq_get_src(c, instr->src[i].src, j); - } - if (instr->coord_components < 2) - coords[next_coord++] = vir_uniform_f(c, 0.5); - break; - case nir_tex_src_bias: - coords[next_coord++] = - ntq_get_src(c, instr->src[i].src, 0); - - p0_unpacked.bias_supplied = true; - break; - case nir_tex_src_lod: - coords[next_coord++] = - vir_FADD(c, - ntq_get_src(c, instr->src[i].src, 0), - vir_uniform(c, QUNIFORM_TEXTURE_FIRST_LEVEL, - unit)); - - if (instr->op != nir_texop_txf && - instr->op != nir_texop_tg4) { - p0_unpacked.disable_autolod_use_bias_only = true; - } - break; - case nir_tex_src_comparator: - coords[next_coord++] = - ntq_get_src(c, instr->src[i].src, 0); - - p0_unpacked.shadow = true; - break; - - case nir_tex_src_offset: { - p0_unpacked.texel_offset_for_s_coordinate = - nir_src_comp_as_int(instr->src[i].src, 0); - - if (instr->coord_components >= 2) - p0_unpacked.texel_offset_for_t_coordinate = - nir_src_comp_as_int(instr->src[i].src, 1); - - if (instr->coord_components >= 3) - p0_unpacked.texel_offset_for_r_coordinate = - nir_src_comp_as_int(instr->src[i].src, 2); - break; - } - - default: - unreachable("unknown texture source"); - } - } - - /* Limit the number of channels returned to both how many the NIR - * instruction writes and how many the instruction could produce. - */ - p1_unpacked.return_words_of_texture_data = - nir_def_components_read(&instr->def); - - uint32_t p0_packed; - V3D33_TEXTURE_UNIFORM_PARAMETER_0_CFG_MODE1_pack(NULL, - (uint8_t *)&p0_packed, - &p0_unpacked); - - uint32_t p1_packed; - V3D33_TEXTURE_UNIFORM_PARAMETER_1_CFG_MODE1_pack(NULL, - (uint8_t *)&p1_packed, - &p1_unpacked); - /* Load unit number into the address field, which will be be used by - * the driver to decide which texture to put in the actual address - * field. - */ - p1_packed |= unit << 5; - - /* There is no native support for GL texture rectangle coordinates, so - * we have to rescale from ([0, width], [0, height]) to ([0, 1], [0, - * 1]). - */ - if (instr->sampler_dim == GLSL_SAMPLER_DIM_RECT) { - coords[0] = vir_FMUL(c, coords[0], - vir_uniform(c, QUNIFORM_TEXRECT_SCALE_X, - unit)); - coords[1] = vir_FMUL(c, coords[1], - vir_uniform(c, QUNIFORM_TEXRECT_SCALE_Y, - unit)); - } - - int texture_u[] = { - vir_get_uniform_index(c, QUNIFORM_TEXTURE_CONFIG_P0_0 + unit, p0_packed), - vir_get_uniform_index(c, QUNIFORM_TEXTURE_CONFIG_P1, p1_packed), - }; - - for (int i = 0; i < next_coord; i++) { - struct qreg dst; - - if (i == next_coord - 1) - dst = vir_reg(QFILE_MAGIC, V3D_QPU_WADDR_TMUL); - else - dst = vir_reg(QFILE_MAGIC, V3D_QPU_WADDR_TMU); - - struct qinst *tmu = vir_MOV_dest(c, dst, coords[i]); - - if (i < 2) - tmu->uniform = texture_u[i]; - } - - vir_emit_thrsw(c); - - for (int i = 0; i < 4; i++) { - if (p1_unpacked.return_words_of_texture_data & (1 << i)) - ntq_store_def(c, &instr->def, i, vir_LDTMU(c)); - } -} diff --git a/src/broadcom/compiler/v3d33_vpm_setup.c b/src/broadcom/compiler/v3d33_vpm_setup.c deleted file mode 100644 index 8bce67dfae9..00000000000 --- a/src/broadcom/compiler/v3d33_vpm_setup.c +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright © 2016-2018 Broadcom - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include "v3d_compiler.h" - -/* We don't do any address packing. */ -#define __gen_user_data void -#define __gen_address_type uint32_t -#define __gen_address_offset(reloc) (*reloc) -#define __gen_emit_reloc(cl, reloc) -#include "broadcom/cle/v3d_packet_v33_pack.h" - -void -v3d33_vir_vpm_read_setup(struct v3d_compile *c, int num_components) -{ - struct V3D33_VPM_GENERIC_BLOCK_READ_SETUP unpacked = { - V3D33_VPM_GENERIC_BLOCK_READ_SETUP_header, - - .horiz = true, - .laned = false, - /* If the field is 0, that means a read count of 32. */ - .num = num_components & 31, - .segs = true, - .stride = 1, - .size = VPM_SETUP_SIZE_32_BIT, - .addr = c->num_inputs, - }; - - uint32_t packed; - V3D33_VPM_GENERIC_BLOCK_READ_SETUP_pack(NULL, - (uint8_t *)&packed, - &unpacked); - vir_VPMSETUP(c, vir_uniform_ui(c, packed)); -} - -void -v3d33_vir_vpm_write_setup(struct v3d_compile *c) -{ - uint32_t packed; - struct V3D33_VPM_GENERIC_BLOCK_WRITE_SETUP unpacked = { - V3D33_VPM_GENERIC_BLOCK_WRITE_SETUP_header, - - .horiz = true, - .laned = false, - .segs = true, - .stride = 1, - .size = VPM_SETUP_SIZE_32_BIT, - .addr = 0, - }; - - V3D33_VPM_GENERIC_BLOCK_WRITE_SETUP_pack(NULL, - (uint8_t *)&packed, - &unpacked); - vir_VPMSETUP(c, vir_uniform_ui(c, packed)); -} diff --git a/src/broadcom/compiler/v3d_compiler.h b/src/broadcom/compiler/v3d_compiler.h index 78f6c0c0db9..cb9b2ae5757 100644 --- a/src/broadcom/compiler/v3d_compiler.h +++ b/src/broadcom/compiler/v3d_compiler.h @@ -96,14 +96,6 @@ enum qfile { */ QFILE_TEMP, - /** - * VPM reads use this with an index value to say what part of the VPM - * is being read. - * - * Used only for ver < 40. For ver >= 40 we use ldvpm. - */ - QFILE_VPM, - /** * Stores an immediate value in the index field that will be used * directly by qpu_load_imm(). @@ -1150,7 +1142,6 @@ bool vir_is_raw_mov(struct qinst *inst); bool vir_is_tex(const struct v3d_device_info *devinfo, struct qinst *inst); bool vir_is_add(struct qinst *inst); bool vir_is_mul(struct qinst *inst); -bool vir_writes_r3_implicitly(const struct v3d_device_info *devinfo, struct qinst *inst); bool vir_writes_r4_implicitly(const struct v3d_device_info *devinfo, struct qinst *inst); struct qreg vir_follow_movs(struct v3d_compile *c, struct qreg reg); uint8_t vir_channels_written(struct qinst *inst); @@ -1187,12 +1178,9 @@ bool v3d_nir_lower_txf_ms(nir_shader *s); bool v3d_nir_lower_image_load_store(nir_shader *s); bool v3d_nir_lower_load_store_bitsize(nir_shader *s); -void v3d33_vir_vpm_read_setup(struct v3d_compile *c, int num_components); -void v3d33_vir_vpm_write_setup(struct v3d_compile *c); -void v3d33_vir_emit_tex(struct v3d_compile *c, nir_tex_instr *instr); -void v3d40_vir_emit_tex(struct v3d_compile *c, nir_tex_instr *instr); -void v3d40_vir_emit_image_load_store(struct v3d_compile *c, - nir_intrinsic_instr *instr); +void v3d_vir_emit_tex(struct v3d_compile *c, nir_tex_instr *instr); +void v3d_vir_emit_image_load_store(struct v3d_compile *c, + nir_intrinsic_instr *instr); void v3d_vir_to_qpu(struct v3d_compile *c, struct qpu_reg *temp_registers); uint32_t v3d_qpu_schedule_instructions(struct v3d_compile *c); @@ -1302,28 +1290,18 @@ vir_##name(struct v3d_compile *c, struct qreg a, struct qreg b) \ #define VIR_SFU(name) \ static inline struct qreg \ vir_##name(struct v3d_compile *c, struct qreg a) \ -{ \ - if (c->devinfo->ver >= 41) { \ - return vir_emit_def(c, vir_add_inst(V3D_QPU_A_##name, \ - c->undef, \ - a, c->undef)); \ - } else { \ - vir_FMOV_dest(c, vir_reg(QFILE_MAGIC, V3D_QPU_WADDR_##name), a); \ - return vir_FMOV(c, vir_reg(QFILE_MAGIC, V3D_QPU_WADDR_R4)); \ - } \ +{ \ + return vir_emit_def(c, vir_add_inst(V3D_QPU_A_##name, \ + c->undef, \ + a, c->undef)); \ } \ static inline struct qinst * \ vir_##name##_dest(struct v3d_compile *c, struct qreg dest, \ struct qreg a) \ { \ - if (c->devinfo->ver >= 41) { \ - return vir_emit_nondef(c, vir_add_inst(V3D_QPU_A_##name, \ - dest, \ - a, c->undef)); \ - } else { \ - vir_FMOV_dest(c, vir_reg(QFILE_MAGIC, V3D_QPU_WADDR_##name), a); \ - return vir_FMOV_dest(c, dest, vir_reg(QFILE_MAGIC, V3D_QPU_WADDR_R4)); \ - } \ + return vir_emit_nondef(c, vir_add_inst(V3D_QPU_A_##name, \ + dest, \ + a, c->undef)); \ } #define VIR_A_ALU2(name) VIR_ALU2(name, vir_add_inst, V3D_QPU_A_##name) @@ -1454,16 +1432,11 @@ vir_NOP(struct v3d_compile *c) static inline struct qreg vir_LDTMU(struct v3d_compile *c) { - if (c->devinfo->ver >= 41) { - struct qinst *ldtmu = vir_add_inst(V3D_QPU_A_NOP, c->undef, - c->undef, c->undef); - ldtmu->qpu.sig.ldtmu = true; + struct qinst *ldtmu = vir_add_inst(V3D_QPU_A_NOP, c->undef, + c->undef, c->undef); + ldtmu->qpu.sig.ldtmu = true; - return vir_emit_def(c, ldtmu); - } else { - vir_NOP(c)->qpu.sig.ldtmu = true; - return vir_MOV(c, vir_reg(QFILE_MAGIC, V3D_QPU_WADDR_R4)); - } + return vir_emit_def(c, ldtmu); } static inline struct qreg @@ -1476,7 +1449,6 @@ vir_UMUL(struct v3d_compile *c, struct qreg src0, struct qreg src1) static inline struct qreg vir_TLBU_COLOR_READ(struct v3d_compile *c, uint32_t config) { - assert(c->devinfo->ver >= 41); /* XXX */ assert((config & 0xffffff00) == 0xffffff00); struct qinst *ldtlb = vir_add_inst(V3D_QPU_A_NOP, c->undef, @@ -1489,8 +1461,6 @@ vir_TLBU_COLOR_READ(struct v3d_compile *c, uint32_t config) static inline struct qreg vir_TLB_COLOR_READ(struct v3d_compile *c) { - assert(c->devinfo->ver >= 41); /* XXX */ - struct qinst *ldtlb = vir_add_inst(V3D_QPU_A_NOP, c->undef, c->undef, c->undef); ldtlb->qpu.sig.ldtlb = true; diff --git a/src/broadcom/compiler/v3d_nir_lower_io.c b/src/broadcom/compiler/v3d_nir_lower_io.c index f70195e5ec6..55e2e4f2e11 100644 --- a/src/broadcom/compiler/v3d_nir_lower_io.c +++ b/src/broadcom/compiler/v3d_nir_lower_io.c @@ -515,7 +515,7 @@ v3d_nir_emit_ff_vpm_outputs(struct v3d_compile *c, nir_builder *b, * The correct fix for this as recommended by Broadcom * is to convert to .8 fixed-point with ffloor(). */ - if (c->devinfo->ver <= 42) + if (c->devinfo->ver == 42) pos = nir_f2i32(b, nir_ffloor(b, pos)); else pos = nir_f2i32(b, nir_fround_even(b, pos)); diff --git a/src/broadcom/compiler/v3d40_tex.c b/src/broadcom/compiler/v3d_tex.c similarity index 94% rename from src/broadcom/compiler/v3d40_tex.c rename to src/broadcom/compiler/v3d_tex.c index 9ae993859c5..7e0bc1aa0e5 100644 --- a/src/broadcom/compiler/v3d40_tex.c +++ b/src/broadcom/compiler/v3d_tex.c @@ -28,7 +28,7 @@ #define __gen_address_type uint32_t #define __gen_address_offset(reloc) (*reloc) #define __gen_emit_reloc(cl, reloc) -#include "cle/v3d_packet_v41_pack.h" +#include "cle/v3d_packet_v42_pack.h" static inline struct qinst * vir_TMU_WRITE(struct v3d_compile *c, enum v3d_qpu_waddr waddr, struct qreg val) @@ -61,11 +61,11 @@ vir_WRTMUC(struct v3d_compile *c, enum quniform_contents contents, uint32_t data inst->uniform = vir_get_uniform_index(c, contents, data); } -static const struct V3D41_TMU_CONFIG_PARAMETER_1 p1_unpacked_default = { +static const struct V3D42_TMU_CONFIG_PARAMETER_1 p1_unpacked_default = { .per_pixel_mask_enable = true, }; -static const struct V3D41_TMU_CONFIG_PARAMETER_2 p2_unpacked_default = { +static const struct V3D42_TMU_CONFIG_PARAMETER_2 p2_unpacked_default = { .op = V3D_TMU_OP_REGULAR, }; @@ -86,7 +86,7 @@ handle_tex_src(struct v3d_compile *c, nir_tex_instr *instr, unsigned src_idx, unsigned non_array_components, - struct V3D41_TMU_CONFIG_PARAMETER_2 *p2_unpacked, + struct V3D42_TMU_CONFIG_PARAMETER_2 *p2_unpacked, struct qreg *s_out, unsigned *tmu_writes) { @@ -201,7 +201,7 @@ handle_tex_src(struct v3d_compile *c, static void vir_tex_handle_srcs(struct v3d_compile *c, nir_tex_instr *instr, - struct V3D41_TMU_CONFIG_PARAMETER_2 *p2_unpacked, + struct V3D42_TMU_CONFIG_PARAMETER_2 *p2_unpacked, struct qreg *s, unsigned *tmu_writes) { @@ -224,10 +224,8 @@ get_required_tex_tmu_writes(struct v3d_compile *c, nir_tex_instr *instr) } void -v3d40_vir_emit_tex(struct v3d_compile *c, nir_tex_instr *instr) +v3d_vir_emit_tex(struct v3d_compile *c, nir_tex_instr *instr) { - assert(instr->op != nir_texop_lod || c->devinfo->ver >= 42); - unsigned texture_idx = instr->texture_index; /* For instructions that don't have a sampler (i.e. txf) we bind @@ -244,7 +242,7 @@ v3d40_vir_emit_tex(struct v3d_compile *c, nir_tex_instr *instr) bool output_type_32_bit = c->key->sampler[sampler_idx].return_size == 32; - struct V3D41_TMU_CONFIG_PARAMETER_0 p0_unpacked = { + struct V3D42_TMU_CONFIG_PARAMETER_0 p0_unpacked = { }; /* Limit the number of channels returned to both how many the NIR @@ -275,7 +273,7 @@ v3d40_vir_emit_tex(struct v3d_compile *c, nir_tex_instr *instr) } assert(p0_unpacked.return_words_of_texture_data != 0); - struct V3D41_TMU_CONFIG_PARAMETER_2 p2_unpacked = { + struct V3D42_TMU_CONFIG_PARAMETER_2 p2_unpacked = { .op = V3D_TMU_OP_REGULAR, .gather_mode = instr->op == nir_texop_tg4, .gather_component = instr->component, @@ -304,12 +302,12 @@ v3d40_vir_emit_tex(struct v3d_compile *c, nir_tex_instr *instr) vir_tex_handle_srcs(c, instr, &p2_unpacked, &s, NULL); uint32_t p0_packed; - V3D41_TMU_CONFIG_PARAMETER_0_pack(NULL, + V3D42_TMU_CONFIG_PARAMETER_0_pack(NULL, (uint8_t *)&p0_packed, &p0_unpacked); uint32_t p2_packed; - V3D41_TMU_CONFIG_PARAMETER_2_pack(NULL, + V3D42_TMU_CONFIG_PARAMETER_2_pack(NULL, (uint8_t *)&p2_packed, &p2_unpacked); @@ -339,7 +337,7 @@ v3d40_vir_emit_tex(struct v3d_compile *c, nir_tex_instr *instr) output_type_32_bit; if (non_default_p1_config) { - struct V3D41_TMU_CONFIG_PARAMETER_1 p1_unpacked = { + struct V3D42_TMU_CONFIG_PARAMETER_1 p1_unpacked = { .output_type_32_bit = output_type_32_bit, .unnormalized_coordinates = (instr->sampler_dim == @@ -356,7 +354,7 @@ v3d40_vir_emit_tex(struct v3d_compile *c, nir_tex_instr *instr) p0_unpacked.return_words_of_texture_data < (1 << 2)); uint32_t p1_packed; - V3D41_TMU_CONFIG_PARAMETER_1_pack(NULL, + V3D42_TMU_CONFIG_PARAMETER_1_pack(NULL, (uint8_t *)&p1_packed, &p1_unpacked); @@ -384,7 +382,7 @@ v3d40_vir_emit_tex(struct v3d_compile *c, nir_tex_instr *instr) * address */ uint32_t p1_packed_default; - V3D41_TMU_CONFIG_PARAMETER_1_pack(NULL, + V3D42_TMU_CONFIG_PARAMETER_1_pack(NULL, (uint8_t *)&p1_packed_default, &p1_unpacked_default); vir_WRTMUC(c, QUNIFORM_CONSTANT, p1_packed_default); @@ -412,7 +410,7 @@ v3d40_vir_emit_tex(struct v3d_compile *c, nir_tex_instr *instr) } static uint32_t -v3d40_image_atomic_tmu_op(nir_intrinsic_instr *instr) +v3d_image_atomic_tmu_op(nir_intrinsic_instr *instr) { nir_atomic_op atomic_op = nir_intrinsic_atomic_op(instr); switch (atomic_op) { @@ -431,7 +429,7 @@ v3d40_image_atomic_tmu_op(nir_intrinsic_instr *instr) } static uint32_t -v3d40_image_load_store_tmu_op(nir_intrinsic_instr *instr) +v3d_image_load_store_tmu_op(nir_intrinsic_instr *instr) { switch (instr->intrinsic) { case nir_intrinsic_image_load: @@ -440,7 +438,7 @@ v3d40_image_load_store_tmu_op(nir_intrinsic_instr *instr) case nir_intrinsic_image_atomic: case nir_intrinsic_image_atomic_swap: - return v3d40_image_atomic_tmu_op(instr); + return v3d_image_atomic_tmu_op(instr); default: unreachable("unknown image intrinsic"); @@ -552,21 +550,21 @@ get_required_image_tmu_writes(struct v3d_compile *c, } void -v3d40_vir_emit_image_load_store(struct v3d_compile *c, - nir_intrinsic_instr *instr) +v3d_vir_emit_image_load_store(struct v3d_compile *c, + nir_intrinsic_instr *instr) { unsigned format = nir_intrinsic_format(instr); unsigned unit = nir_src_as_uint(instr->src[0]); - struct V3D41_TMU_CONFIG_PARAMETER_0 p0_unpacked = { + struct V3D42_TMU_CONFIG_PARAMETER_0 p0_unpacked = { }; - struct V3D41_TMU_CONFIG_PARAMETER_1 p1_unpacked = { + struct V3D42_TMU_CONFIG_PARAMETER_1 p1_unpacked = { .per_pixel_mask_enable = true, .output_type_32_bit = v3d_gl_format_is_return_32(format), }; - struct V3D41_TMU_CONFIG_PARAMETER_2 p2_unpacked = { 0 }; + struct V3D42_TMU_CONFIG_PARAMETER_2 p2_unpacked = { 0 }; /* Limit the number of channels returned to both how many the NIR * instruction writes and how many the instruction could produce. @@ -578,7 +576,7 @@ v3d40_vir_emit_image_load_store(struct v3d_compile *c, p0_unpacked.return_words_of_texture_data = (1 << instr_return_channels) - 1; - p2_unpacked.op = v3d40_image_load_store_tmu_op(instr); + p2_unpacked.op = v3d_image_load_store_tmu_op(instr); /* If we were able to replace atomic_add for an inc/dec, then we * need/can to do things slightly different, like not loading the @@ -591,7 +589,7 @@ v3d40_vir_emit_image_load_store(struct v3d_compile *c, p2_unpacked.op == V3D_TMU_OP_WRITE_OR_READ_DEC); uint32_t p0_packed; - V3D41_TMU_CONFIG_PARAMETER_0_pack(NULL, + V3D42_TMU_CONFIG_PARAMETER_0_pack(NULL, (uint8_t *)&p0_packed, &p0_unpacked); @@ -602,12 +600,12 @@ v3d40_vir_emit_image_load_store(struct v3d_compile *c, p0_packed |= unit << 24; uint32_t p1_packed; - V3D41_TMU_CONFIG_PARAMETER_1_pack(NULL, + V3D42_TMU_CONFIG_PARAMETER_1_pack(NULL, (uint8_t *)&p1_packed, &p1_unpacked); uint32_t p2_packed; - V3D41_TMU_CONFIG_PARAMETER_2_pack(NULL, + V3D42_TMU_CONFIG_PARAMETER_2_pack(NULL, (uint8_t *)&p2_packed, &p2_unpacked); diff --git a/src/broadcom/compiler/vir.c b/src/broadcom/compiler/vir.c index 8c536b8fbcc..eb83dde784a 100644 --- a/src/broadcom/compiler/vir.c +++ b/src/broadcom/compiler/vir.c @@ -155,32 +155,6 @@ vir_is_tex(const struct v3d_device_info *devinfo, struct qinst *inst) return false; } -bool -vir_writes_r3_implicitly(const struct v3d_device_info *devinfo, - struct qinst *inst) -{ - if (!devinfo->has_accumulators) - return false; - - for (int i = 0; i < vir_get_nsrc(inst); i++) { - switch (inst->src[i].file) { - case QFILE_VPM: - return true; - default: - break; - } - } - - if (devinfo->ver < 41 && (inst->qpu.sig.ldvary || - inst->qpu.sig.ldtlb || - inst->qpu.sig.ldtlbu || - inst->qpu.sig.ldvpm)) { - return true; - } - - return false; -} - bool vir_writes_r4_implicitly(const struct v3d_device_info *devinfo, struct qinst *inst) @@ -203,9 +177,6 @@ vir_writes_r4_implicitly(const struct v3d_device_info *devinfo, break; } - if (devinfo->ver < 41 && inst->qpu.sig.ldtmu) - return true; - return false; } diff --git a/src/broadcom/compiler/vir_dump.c b/src/broadcom/compiler/vir_dump.c index ab5d4043039..631eeee52ab 100644 --- a/src/broadcom/compiler/vir_dump.c +++ b/src/broadcom/compiler/vir_dump.c @@ -182,11 +182,6 @@ vir_print_reg(struct v3d_compile *c, const struct qinst *inst, break; } - case QFILE_VPM: - fprintf(stderr, "vpm%d.%d", - reg.index / 4, reg.index % 4); - break; - case QFILE_TEMP: fprintf(stderr, "t%d", reg.index); break; @@ -197,9 +192,6 @@ static void vir_dump_sig_addr(const struct v3d_device_info *devinfo, const struct v3d_qpu_instr *instr) { - if (devinfo->ver < 41) - return; - if (!instr->sig_magic) fprintf(stderr, ".rf%d", instr->sig_addr); else { diff --git a/src/broadcom/compiler/vir_opt_copy_propagate.c b/src/broadcom/compiler/vir_opt_copy_propagate.c index 1260838ca05..611c4693ed3 100644 --- a/src/broadcom/compiler/vir_opt_copy_propagate.c +++ b/src/broadcom/compiler/vir_opt_copy_propagate.c @@ -62,7 +62,7 @@ is_copy_mov(const struct v3d_device_info *devinfo, struct qinst *inst) return false; } - if (devinfo->ver <= 42) { + if (devinfo->ver == 42) { switch (inst->src[0].file) { case QFILE_MAGIC: /* No copy propagating from R3/R4/R5 -- the MOVs from diff --git a/src/broadcom/compiler/vir_opt_dead_code.c b/src/broadcom/compiler/vir_opt_dead_code.c index 5101e62254a..fd1af944427 100644 --- a/src/broadcom/compiler/vir_opt_dead_code.c +++ b/src/broadcom/compiler/vir_opt_dead_code.c @@ -51,22 +51,11 @@ dce(struct v3d_compile *c, struct qinst *inst) vir_remove_instruction(c, inst); } -static bool -has_nonremovable_reads(struct v3d_compile *c, struct qinst *inst) -{ - for (int i = 0; i < vir_get_nsrc(inst); i++) { - if (inst->src[i].file == QFILE_VPM) - return true; - } - - return false; -} - static bool can_write_to_null(struct v3d_compile *c, struct qinst *inst) { /* The SFU instructions must write to a physical register. */ - if (c->devinfo->ver >= 41 && v3d_qpu_uses_sfu(&inst->qpu)) + if (v3d_qpu_uses_sfu(&inst->qpu)) return false; return true; @@ -241,7 +230,6 @@ vir_opt_dead_code(struct v3d_compile *c) } if (v3d_qpu_writes_flags(&inst->qpu) || - has_nonremovable_reads(c, inst) || (is_ldunifa && !is_first_ldunifa && !is_last_ldunifa)) { /* If we can't remove the instruction, but we * don't need its destination value, just diff --git a/src/broadcom/compiler/vir_opt_small_immediates.c b/src/broadcom/compiler/vir_opt_small_immediates.c index ed5bc011964..56f0bf20706 100644 --- a/src/broadcom/compiler/vir_opt_small_immediates.c +++ b/src/broadcom/compiler/vir_opt_small_immediates.c @@ -82,7 +82,7 @@ vir_opt_small_immediates(struct v3d_compile *c) */ struct v3d_qpu_sig new_sig = inst->qpu.sig; uint32_t sig_packed; - if (c->devinfo->ver <= 42) { + if (c->devinfo->ver == 42) { new_sig.small_imm_b = true; } else { if (vir_is_add(inst)) { diff --git a/src/broadcom/compiler/vir_register_allocate.c b/src/broadcom/compiler/vir_register_allocate.c index 081376c0f08..53e84840899 100644 --- a/src/broadcom/compiler/vir_register_allocate.c +++ b/src/broadcom/compiler/vir_register_allocate.c @@ -942,7 +942,7 @@ v3d_ra_select_rf(struct v3d_ra_select_callback_data *v3d_ra, * avoid allocating these to registers used by the last instructions * in the shader. */ - const uint32_t safe_rf_start = v3d_ra->devinfo->ver <= 42 ? 3 : 4; + const uint32_t safe_rf_start = v3d_ra->devinfo->ver == 42 ? 3 : 4; if (v3d_ra->nodes->info[node].is_program_end && v3d_ra->next_phys < safe_rf_start) { v3d_ra->next_phys = safe_rf_start; @@ -1004,7 +1004,7 @@ vir_init_reg_sets(struct v3d_compiler *compiler) /* Allocate up to 3 regfile classes, for the ways the physical * register file can be divided up for fragment shader threading. */ - int max_thread_index = (compiler->devinfo->ver >= 40 ? 2 : 3); + int max_thread_index = 2; uint8_t phys_index = get_phys_index(compiler->devinfo); compiler->regs = ra_alloc_reg_set(compiler, phys_index + PHYS_COUNT, @@ -1070,20 +1070,10 @@ update_graph_and_reg_classes_for_inst(struct v3d_compile *c, int32_t ip = inst->ip; assert(ip >= 0); - /* If the instruction writes r3/r4 (and optionally moves its - * result to a temp), nothing else can be stored in r3/r4 across + /* If the instruction writes r4 (and optionally moves its + * result to a temp), nothing else can be stored in r4 across * it. */ - if (vir_writes_r3_implicitly(c->devinfo, inst)) { - for (int i = 0; i < c->num_temps; i++) { - if (c->temp_start[i] < ip && c->temp_end[i] > ip) { - ra_add_node_interference(c->g, - temp_to_node(c, i), - acc_nodes[3]); - } - } - } - if (vir_writes_r4_implicitly(c->devinfo, inst)) { for (int i = 0; i < c->num_temps; i++) { if (c->temp_start[i] < ip && c->temp_end[i] > ip) { @@ -1207,15 +1197,6 @@ update_graph_and_reg_classes_for_inst(struct v3d_compile *c, set_temp_class_bits(c, inst->dst.index, class_bits); - } else { - /* Until V3D 4.x, we could only load a uniform - * to r5, so we'll need to spill if uniform - * loads interfere with each other. - */ - if (c->devinfo->ver < 40) { - set_temp_class_bits(c, inst->dst.index, - CLASS_BITS_R5); - } } } else { /* Make sure we don't allocate the ldvary's @@ -1320,7 +1301,7 @@ v3d_register_allocate(struct v3d_compile *c) * RF0-2. Start at RF4 in 7.x to prevent TLB writes from * using RF2-3. */ - .next_phys = c->devinfo->ver <= 42 ? 3 : 4, + .next_phys = c->devinfo->ver == 42 ? 3 : 4, .nodes = &c->nodes, .devinfo = c->devinfo, }; @@ -1333,10 +1314,8 @@ v3d_register_allocate(struct v3d_compile *c) * are available at both 1x and 2x threading, and 4x has 32. */ c->thread_index = ffs(c->threads) - 1; - if (c->devinfo->ver >= 40) { - if (c->thread_index >= 1) - c->thread_index--; - } + if (c->thread_index >= 1) + c->thread_index--; c->g = ra_alloc_interference_graph(c->compiler->regs, num_ra_nodes); ra_set_select_reg_callback(c->g, v3d_ra_select_callback, &callback_data); diff --git a/src/broadcom/compiler/vir_to_qpu.c b/src/broadcom/compiler/vir_to_qpu.c index 4ed184cbbcb..605c3e4c7d5 100644 --- a/src/broadcom/compiler/vir_to_qpu.c +++ b/src/broadcom/compiler/vir_to_qpu.c @@ -108,7 +108,7 @@ v3d71_set_src(struct v3d_qpu_instr *instr, uint8_t *raddr, struct qpu_reg src) * fields of the instruction. */ static void -v3d33_set_src(struct v3d_qpu_instr *instr, enum v3d_qpu_mux *mux, struct qpu_reg src) +v3d42_set_src(struct v3d_qpu_instr *instr, enum v3d_qpu_mux *mux, struct qpu_reg src) { if (src.smimm) { assert(instr->sig.small_imm_b); @@ -158,13 +158,13 @@ set_src(struct v3d_qpu_instr *instr, const struct v3d_device_info *devinfo) { if (devinfo->ver < 71) - return v3d33_set_src(instr, mux, src); + return v3d42_set_src(instr, mux, src); else return v3d71_set_src(instr, raddr, src); } static bool -v3d33_mov_src_and_dst_equal(struct qinst *qinst) +v3d42_mov_src_and_dst_equal(struct qinst *qinst) { enum v3d_qpu_waddr waddr = qinst->qpu.alu.mul.waddr; if (qinst->qpu.alu.mul.magic_write) { @@ -216,7 +216,7 @@ mov_src_and_dst_equal(struct qinst *qinst, const struct v3d_device_info *devinfo) { if (devinfo->ver < 71) - return v3d33_mov_src_and_dst_equal(qinst); + return v3d42_mov_src_and_dst_equal(qinst); else return v3d71_mov_src_and_dst_equal(qinst); } @@ -262,8 +262,6 @@ v3d_generate_code_block(struct v3d_compile *c, struct qblock *block, struct qpu_reg *temp_registers) { - int last_vpm_read_index = -1; - vir_for_each_inst_safe(qinst, block) { #if 0 fprintf(stderr, "translating qinst to qpu: "); @@ -271,8 +269,6 @@ v3d_generate_code_block(struct v3d_compile *c, fprintf(stderr, "\n"); #endif - struct qinst *temp; - if (vir_has_uniform(qinst)) c->num_uniforms++; @@ -303,19 +299,6 @@ v3d_generate_code_block(struct v3d_compile *c, case QFILE_SMALL_IMM: src[i].smimm = true; break; - - case QFILE_VPM: - assert(c->devinfo->ver < 40); - assert((int)qinst->src[i].index >= - last_vpm_read_index); - (void)last_vpm_read_index; - last_vpm_read_index = qinst->src[i].index; - - temp = new_qpu_nop_before(qinst); - temp->qpu.sig.ldvpm = true; - - src[i] = qpu_magic(V3D_QPU_WADDR_R3); - break; } } @@ -337,10 +320,6 @@ v3d_generate_code_block(struct v3d_compile *c, dst = temp_registers[qinst->dst.index]; break; - case QFILE_VPM: - dst = qpu_magic(V3D_QPU_WADDR_VPM); - break; - case QFILE_SMALL_IMM: case QFILE_LOAD_IMM: assert(!"not reached"); @@ -361,8 +340,6 @@ v3d_generate_code_block(struct v3d_compile *c, } if (use_rf) { - assert(c->devinfo->ver >= 40); - if (qinst->qpu.sig.ldunif) { qinst->qpu.sig.ldunif = false; qinst->qpu.sig.ldunifrf = true; @@ -470,11 +447,7 @@ v3d_dump_qpu(struct v3d_compile *c) const char *str = v3d_qpu_disasm(c->devinfo, c->qpu_insts[i]); fprintf(stderr, "0x%016"PRIx64" %s", c->qpu_insts[i], str); - /* We can only do this on 4.x, because we're not tracking TMU - * implicit uniforms here on 3.x. - */ - if (c->devinfo->ver >= 40 && - reads_uniform(c->devinfo, c->qpu_insts[i])) { + if (reads_uniform(c->devinfo, c->qpu_insts[i])) { fprintf(stderr, " ("); vir_dump_uniform(c->uniform_contents[next_uniform], c->uniform_data[next_uniform]); @@ -486,8 +459,7 @@ v3d_dump_qpu(struct v3d_compile *c) } /* Make sure our dumping lined up. */ - if (c->devinfo->ver >= 40) - assert(next_uniform == c->num_uniforms); + assert(next_uniform == c->num_uniforms); fprintf(stderr, "\n"); } diff --git a/src/broadcom/meson.build b/src/broadcom/meson.build index 73cb7aa0575..30eb57e515f 100644 --- a/src/broadcom/meson.build +++ b/src/broadcom/meson.build @@ -22,7 +22,7 @@ inc_broadcom = include_directories('.', 'cle') subdir('cle') -v3d_versions = ['33', '41', '42', '71'] +v3d_versions = ['42', '71'] v3d_libs = [] if with_gallium_v3d or with_broadcom_vk diff --git a/src/broadcom/simulator/v3d_simulator.h b/src/broadcom/simulator/v3d_simulator.h index 92305634468..03575ae8951 100644 --- a/src/broadcom/simulator/v3d_simulator.h +++ b/src/broadcom/simulator/v3d_simulator.h @@ -45,11 +45,7 @@ uint32_t v3d_simulator_get_mem_free(void); #ifdef v3dX # include "v3dx_simulator.h" #else -# define v3dX(x) v3d33_##x -# include "v3dx_simulator.h" -# undef v3dX - -# define v3dX(x) v3d41_##x +# define v3dX(x) v3d42_##x # include "v3dx_simulator.h" # undef v3dX @@ -61,15 +57,10 @@ uint32_t v3d_simulator_get_mem_free(void); /* Helper to call simulator ver specific functions */ #define v3d_X_simulator(thing) ({ \ - __typeof(&v3d33_simulator_##thing) v3d_X_sim_thing;\ + __typeof(&v3d42_simulator_##thing) v3d_X_sim_thing;\ switch (sim_state.ver) { \ - case 33: \ - case 40: \ - v3d_X_sim_thing = &v3d33_simulator_##thing; \ - break; \ - case 41: \ case 42: \ - v3d_X_sim_thing = &v3d41_simulator_##thing; \ + v3d_X_sim_thing = &v3d42_simulator_##thing; \ break; \ case 71: \ v3d_X_sim_thing = &v3d71_simulator_##thing; \ diff --git a/src/broadcom/simulator/v3dx_simulator.c b/src/broadcom/simulator/v3dx_simulator.c index 904cf2d1b76..0517d4d4658 100644 --- a/src/broadcom/simulator/v3dx_simulator.c +++ b/src/broadcom/simulator/v3dx_simulator.c @@ -51,27 +51,14 @@ #if V3D_VERSION == 71 #include "libs/core/v3d/registers/7.1.6.0/v3d.h" #else -#if V3D_VERSION == 41 || V3D_VERSION == 42 +#if V3D_VERSION == 42 #include "libs/core/v3d/registers/4.2.14.0/v3d.h" -#else -#include "libs/core/v3d/registers/3.3.0.0/v3d.h" #endif #endif #define V3D_WRITE(reg, val) v3d_hw_write_reg(v3d, reg, val) #define V3D_READ(reg) v3d_hw_read_reg(v3d, reg) -static void -v3d_invalidate_l3(struct v3d_hw *v3d) -{ -#if V3D_VERSION < 40 - uint32_t gca_ctrl = V3D_READ(V3D_GCA_CACHE_CTRL); - - V3D_WRITE(V3D_GCA_CACHE_CTRL, gca_ctrl | V3D_GCA_CACHE_CTRL_FLUSH_SET); - V3D_WRITE(V3D_GCA_CACHE_CTRL, gca_ctrl & ~V3D_GCA_CACHE_CTRL_FLUSH_SET); -#endif -} - /* Invalidates the L2C cache. This is a read-only cache for uniforms and instructions. */ static void v3d_invalidate_l2c(struct v3d_hw *v3d) @@ -156,7 +143,6 @@ v3d_invalidate_slices(struct v3d_hw *v3d) static void v3d_invalidate_caches(struct v3d_hw *v3d) { - v3d_invalidate_l3(v3d); v3d_invalidate_l2c(v3d); v3d_invalidate_l2t(v3d); v3d_invalidate_slices(v3d); @@ -225,7 +211,7 @@ v3dX(simulator_submit_csd_ioctl)(struct v3d_hw *v3d, struct drm_v3d_submit_csd *args, uint32_t gmp_ofs) { -#if V3D_VERSION >= 41 +#if V3D_VERSION >= 42 int last_completed_jobs = (V3D_READ(V3D_CSD_0_STATUS) & V3D_CSD_0_STATUS_NUM_COMPLETED_JOBS_SET); g_gmp_ofs = gmp_ofs; @@ -282,13 +268,13 @@ v3dX(simulator_get_param_ioctl)(struct v3d_hw *v3d, args->value = 1; return 0; case DRM_V3D_PARAM_SUPPORTS_CSD: - args->value = V3D_VERSION >= 41; + args->value = V3D_VERSION >= 42; return 0; case DRM_V3D_PARAM_SUPPORTS_CACHE_FLUSH: args->value = 1; return 0; case DRM_V3D_PARAM_SUPPORTS_PERFMON: - args->value = V3D_VERSION >= 41; + args->value = V3D_VERSION >= 42; return 0; case DRM_V3D_PARAM_SUPPORTS_MULTISYNC_EXT: args->value = 1; @@ -359,8 +345,7 @@ handle_mmu_interruptions(struct v3d_hw *v3d, uint32_t axi_id = V3D_READ(V3D_MMU_VIO_ID); uint32_t va_width = 30; -#if V3D_VERSION >= 41 - static const char *const v3d41_axi_ids[] = { + static const char *const v3d42_axi_ids[] = { "L2T", "PTB", "PSE", @@ -372,14 +357,14 @@ handle_mmu_interruptions(struct v3d_hw *v3d, }; axi_id = axi_id >> 5; - if (axi_id < ARRAY_SIZE(v3d41_axi_ids)) - client = v3d41_axi_ids[axi_id]; + if (axi_id < ARRAY_SIZE(v3d42_axi_ids)) + client = v3d42_axi_ids[axi_id]; uint32_t mmu_debug = V3D_READ(V3D_MMU_DEBUG_INFO); va_width += ((mmu_debug & V3D_MMU_DEBUG_INFO_VA_WIDTH_SET) >> V3D_MMU_DEBUG_INFO_VA_WIDTH_LSB); -#endif + /* Only the top bits (final number depends on the gen) of the virtual * address are reported in the MMU VIO_ADDR register. */ @@ -454,18 +439,6 @@ v3d_isr(uint32_t hub_status) void v3dX(simulator_init_regs)(struct v3d_hw *v3d) { -#if V3D_VERSION == 33 - /* Set OVRTMUOUT to match kernel behavior. - * - * This means that the texture sampler uniform configuration's tmu - * output type field is used, instead of using the hardware default - * behavior based on the texture type. If you want the default - * behavior, you can still put "2" in the indirect texture state's - * output_type field. - */ - V3D_WRITE(V3D_CTL_0_MISCCFG, V3D_CTL_1_MISCCFG_OVRTMUOUT_SET); -#endif - /* FIXME: the kernel captures some additional core interrupts here, * for tracing. Perhaps we should evaluate to do the same here and add * some debug options. @@ -514,13 +487,11 @@ v3dX(simulator_submit_cl_ioctl)(struct v3d_hw *v3d, V3D_WRITE(V3D_CLE_0_CT0QMA, submit->qma); V3D_WRITE(V3D_CLE_0_CT0QMS, submit->qms); } -#if V3D_VERSION >= 41 if (submit->qts) { V3D_WRITE(V3D_CLE_0_CT0QTS, V3D_CLE_0_CT0QTS_CTQTSEN_SET | submit->qts); } -#endif V3D_WRITE(V3D_CLE_0_CT0QBA, submit->bcl_start); V3D_WRITE(V3D_CLE_0_CT0QEA, submit->bcl_end); @@ -544,21 +515,18 @@ v3dX(simulator_submit_cl_ioctl)(struct v3d_hw *v3d, } } -#if V3D_VERSION >= 41 #define V3D_PCTR_0_PCTR_N(x) (V3D_PCTR_0_PCTR0 + 4 * (x)) #define V3D_PCTR_0_SRC_N(x) (V3D_PCTR_0_SRC_0_3 + 4 * (x)) #define V3D_PCTR_0_SRC_N_SHIFT(x) ((x) * 8) #define V3D_PCTR_0_SRC_N_MASK(x) (BITFIELD_RANGE(V3D_PCTR_0_SRC_N_SHIFT(x), \ V3D_PCTR_0_SRC_N_SHIFT(x) + \ V3D_PCTR_0_SRC_0_3_PCTRS0_MSB)) -#endif void v3dX(simulator_perfmon_start)(struct v3d_hw *v3d, uint32_t ncounters, uint8_t *events) { -#if V3D_VERSION >= 41 int i, j; uint32_t source; uint32_t mask = BITFIELD_RANGE(0, ncounters); @@ -573,21 +541,18 @@ v3dX(simulator_perfmon_start)(struct v3d_hw *v3d, V3D_WRITE(V3D_PCTR_0_CLR, mask); V3D_WRITE(V3D_PCTR_0_OVERFLOW, mask); V3D_WRITE(V3D_PCTR_0_EN, mask); -#endif } void v3dX(simulator_perfmon_stop)(struct v3d_hw *v3d, uint32_t ncounters, uint64_t *values) { -#if V3D_VERSION >= 41 int i; for (i = 0; i < ncounters; i++) values[i] += V3D_READ(V3D_PCTR_0_PCTR_N(i)); V3D_WRITE(V3D_PCTR_0_EN, 0); -#endif } void v3dX(simulator_get_perfcnt_total)(uint32_t *count) diff --git a/src/broadcom/vulkan/v3dv_cl.c b/src/broadcom/vulkan/v3dv_cl.c index acdd013a996..851e1388a8d 100644 --- a/src/broadcom/vulkan/v3dv_cl.c +++ b/src/broadcom/vulkan/v3dv_cl.c @@ -27,7 +27,7 @@ * versions, so we just explicitly set the V3D_VERSION and include v3dx_pack * here */ -#define V3D_VERSION 33 +#define V3D_VERSION 42 #include "broadcom/common/v3d_macros.h" #include "broadcom/cle/v3dx_pack.h" diff --git a/src/broadcom/vulkan/v3dv_private.h b/src/broadcom/vulkan/v3dv_private.h index 9c104b3d6d4..8d9914938e2 100644 --- a/src/broadcom/vulkan/v3dv_private.h +++ b/src/broadcom/vulkan/v3dv_private.h @@ -618,10 +618,10 @@ struct v3dv_device_memory { #define V3DV_MAX_PLANE_COUNT 3 struct v3dv_format_plane { - /* One of V3D33_OUTPUT_IMAGE_FORMAT_*, or OUTPUT_IMAGE_FORMAT_NO */ + /* One of V3D42_OUTPUT_IMAGE_FORMAT_*, or OUTPUT_IMAGE_FORMAT_NO */ uint8_t rt_type; - /* One of V3D33_TEXTURE_DATA_FORMAT_*. */ + /* One of V3D42_TEXTURE_DATA_FORMAT_*. */ uint8_t tex_type; /* Swizzle to apply to the RGBA shader output for storing to the tile diff --git a/src/gallium/drivers/v3d/meson.build b/src/gallium/drivers/v3d/meson.build index 289473d2ca1..600840b8764 100644 --- a/src/gallium/drivers/v3d/meson.build +++ b/src/gallium/drivers/v3d/meson.build @@ -59,7 +59,7 @@ if dep_v3dv3.found() v3d_args += '-DUSE_V3D_SIMULATOR' endif -v3d_versions = ['33', '42', '71'] +v3d_versions = ['42', '71'] per_version_libs = [] foreach ver : v3d_versions diff --git a/src/gallium/drivers/v3d/v3d_blit.c b/src/gallium/drivers/v3d/v3d_blit.c index f62d3a4f40f..32030a7b4e1 100644 --- a/src/gallium/drivers/v3d/v3d_blit.c +++ b/src/gallium/drivers/v3d/v3d_blit.c @@ -309,7 +309,7 @@ v3d_tlb_blit(struct pipe_context *pctx, struct pipe_blit_info *info) struct v3d_screen *screen = v3d->screen; struct v3d_device_info *devinfo = &screen->devinfo; - if (devinfo->ver < 40 || !info->mask) + if (!info->mask) return; bool is_color_blit = info->mask & PIPE_MASK_RGBA; diff --git a/src/gallium/drivers/v3d/v3d_cl.c b/src/gallium/drivers/v3d/v3d_cl.c index c03927e0453..d8ee4ffc206 100644 --- a/src/gallium/drivers/v3d/v3d_cl.c +++ b/src/gallium/drivers/v3d/v3d_cl.c @@ -28,7 +28,7 @@ * hw versions, so we just explicitly set the V3D_VERSION and include * v3dx_pack here */ -#define V3D_VERSION 33 +#define V3D_VERSION 42 #include "broadcom/common/v3d_macros.h" #include "broadcom/cle/v3dx_pack.h" diff --git a/src/gallium/drivers/v3d/v3d_context.c b/src/gallium/drivers/v3d/v3d_context.c index 1dc4bd017fe..240c99672f9 100644 --- a/src/gallium/drivers/v3d/v3d_context.c +++ b/src/gallium/drivers/v3d/v3d_context.c @@ -300,16 +300,11 @@ v3d_get_sample_position(struct pipe_context *pctx, unsigned sample_count, unsigned sample_index, float *xy) { - struct v3d_context *v3d = v3d_context(pctx); - if (sample_count <= 1) { xy[0] = 0.5; xy[1] = 0.5; } else { - static const int xoffsets_v33[] = { 1, -3, 3, -1 }; - static const int xoffsets_v42[] = { -1, 3, -3, 1 }; - const int *xoffsets = (v3d->screen->devinfo.ver >= 42 ? - xoffsets_v42 : xoffsets_v33); + static const int xoffsets[] = { -1, 3, -3, 1 }; xy[0] = 0.5 + xoffsets[sample_index] * .125; xy[1] = .125 + sample_index * .25; diff --git a/src/gallium/drivers/v3d/v3d_context.h b/src/gallium/drivers/v3d/v3d_context.h index 948abe686d7..2f27693fef6 100644 --- a/src/gallium/drivers/v3d/v3d_context.h +++ b/src/gallium/drivers/v3d/v3d_context.h @@ -825,12 +825,8 @@ void v3d_disk_cache_store(struct v3d_context *v3d, /* Helper to call hw ver specific functions */ #define v3d_X(devinfo, thing) ({ \ - __typeof(&v3d33_##thing) v3d_X_thing; \ + __typeof(&v3d42_##thing) v3d_X_thing; \ switch (devinfo->ver) { \ - case 33: \ - case 40: \ - v3d_X_thing = &v3d33_##thing; \ - break; \ case 42: \ v3d_X_thing = &v3d42_##thing; \ break; \ @@ -846,19 +842,13 @@ void v3d_disk_cache_store(struct v3d_context *v3d, /* FIXME: The same for vulkan/opengl. Common place? define it at the * v3d_packet files? */ -#define V3D33_CLIPPER_XY_GRANULARITY 256.0f #define V3D42_CLIPPER_XY_GRANULARITY 256.0f #define V3D71_CLIPPER_XY_GRANULARITY 64.0f /* Helper to get hw-specific macro values */ #define V3DV_X(devinfo, thing) ({ \ - __typeof(V3D33_##thing) V3D_X_THING; \ + __typeof(V3D42_##thing) V3D_X_THING; \ switch (devinfo->ver) { \ - case 33: \ - case 40: \ - V3D_X_THING = V3D33_##thing; \ - break; \ - case 41: \ case 42: \ V3D_X_THING = V3D42_##thing; \ break; \ @@ -874,10 +864,6 @@ void v3d_disk_cache_store(struct v3d_context *v3d, #ifdef v3dX # include "v3dx_context.h" #else -# define v3dX(x) v3d33_##x -# include "v3dx_context.h" -# undef v3dX - # define v3dX(x) v3d42_##x # include "v3dx_context.h" # undef v3dX diff --git a/src/gallium/drivers/v3d/v3d_format_table.h b/src/gallium/drivers/v3d/v3d_format_table.h index b291708c3ed..45cddeb669d 100644 --- a/src/gallium/drivers/v3d/v3d_format_table.h +++ b/src/gallium/drivers/v3d/v3d_format_table.h @@ -30,10 +30,10 @@ struct v3d_format { /** Set if the pipe format is defined in the table. */ bool present; - /** One of V3D33_OUTPUT_IMAGE_FORMAT_*, or OUTPUT_IMAGE_FORMAT_NO */ + /** One of V3D42_OUTPUT_IMAGE_FORMAT_*, or OUTPUT_IMAGE_FORMAT_NO */ uint8_t rt_type; - /** One of V3D33_TEXTURE_DATA_FORMAT_*. */ + /** One of V3D42_TEXTURE_DATA_FORMAT_*. */ uint8_t tex_type; /** diff --git a/src/gallium/drivers/v3d/v3d_formats.c b/src/gallium/drivers/v3d/v3d_formats.c index 559c6681e22..cb01f05e31b 100644 --- a/src/gallium/drivers/v3d/v3d_formats.c +++ b/src/gallium/drivers/v3d/v3d_formats.c @@ -38,7 +38,7 @@ #include "v3d_format_table.h" /* The format internal types are the same across V3D versions */ -#define V3D_VERSION 33 +#define V3D_VERSION 42 #include "broadcom/cle/v3dx_pack.h" bool diff --git a/src/gallium/drivers/v3d/v3d_job.c b/src/gallium/drivers/v3d/v3d_job.c index d837a9b23ac..68b67a5ce10 100644 --- a/src/gallium/drivers/v3d/v3d_job.c +++ b/src/gallium/drivers/v3d/v3d_job.c @@ -29,7 +29,7 @@ #include #include "v3d_context.h" /* The OQ/semaphore packets are the same across V3D versions. */ -#define V3D_VERSION 33 +#define V3D_VERSION 42 #include "broadcom/cle/v3dx_pack.h" #include "broadcom/common/v3d_macros.h" #include "util/hash_table.h" @@ -547,7 +547,7 @@ v3d_job_submit(struct v3d_context *v3d, struct v3d_job *job) /* On V3D 4.1, the tile alloc/state setup moved to register writes * instead of binner packets. */ - if (devinfo->ver >= 41) { + if (devinfo->ver >= 42) { v3d_job_add_bo(job, job->tile_alloc); job->submit.qma = job->tile_alloc->offset; job->submit.qms = job->tile_alloc->size; diff --git a/src/gallium/drivers/v3d/v3d_program.c b/src/gallium/drivers/v3d/v3d_program.c index 89fee012ddd..236dd15ced1 100644 --- a/src/gallium/drivers/v3d/v3d_program.c +++ b/src/gallium/drivers/v3d/v3d_program.c @@ -35,7 +35,8 @@ #include "nir/tgsi_to_nir.h" #include "compiler/v3d_compiler.h" #include "v3d_context.h" -#include "broadcom/cle/v3d_packet_v33_pack.h" +/* packets here are the same across V3D versions. */ +#include "broadcom/cle/v3d_packet_v42_pack.h" static struct v3d_compiled_shader * v3d_get_compiled_shader(struct v3d_context *v3d, @@ -136,7 +137,7 @@ v3d_set_transform_feedback_outputs(struct v3d_uncompiled_shader *so, while (vpm_size) { uint32_t write_size = MIN2(vpm_size, 1 << 4); - struct V3D33_TRANSFORM_FEEDBACK_OUTPUT_DATA_SPEC unpacked = { + struct V3D42_TRANSFORM_FEEDBACK_OUTPUT_DATA_SPEC unpacked = { /* We need the offset from the coordinate shader's VPM * output block, which has the [X, Y, Z, W, Xs, Ys] * values at the start. @@ -151,7 +152,7 @@ v3d_set_transform_feedback_outputs(struct v3d_uncompiled_shader *so, so->num_tf_specs != 0); assert(so->num_tf_specs != ARRAY_SIZE(so->tf_specs)); - V3D33_TRANSFORM_FEEDBACK_OUTPUT_DATA_SPEC_pack(NULL, + V3D42_TRANSFORM_FEEDBACK_OUTPUT_DATA_SPEC_pack(NULL, (void *)&so->tf_specs[so->num_tf_specs], &unpacked); @@ -166,7 +167,7 @@ v3d_set_transform_feedback_outputs(struct v3d_uncompiled_shader *so, assert(unpacked.first_shaded_vertex_value_to_output != 8 || so->num_tf_specs != 0); - V3D33_TRANSFORM_FEEDBACK_OUTPUT_DATA_SPEC_pack(NULL, + V3D42_TRANSFORM_FEEDBACK_OUTPUT_DATA_SPEC_pack(NULL, (void *)&so->tf_specs_psiz[so->num_tf_specs], &unpacked); so->num_tf_specs++; @@ -559,7 +560,6 @@ v3d_setup_shared_key(struct v3d_context *v3d, struct v3d_key *key, assert(key->num_tex_used == key->num_samplers_used); for (int i = 0; i < texstate->num_textures; i++) { struct pipe_sampler_view *sampler = texstate->textures[i]; - struct v3d_sampler_view *v3d_sampler = v3d_sampler_view(sampler); if (!sampler) continue; @@ -573,27 +573,16 @@ v3d_setup_shared_key(struct v3d_context *v3d, struct v3d_key *key, */ if (key->sampler[i].return_size == 16) { key->sampler[i].return_channels = 2; - } else if (devinfo->ver > 40) { - key->sampler[i].return_channels = 4; } else { - key->sampler[i].return_channels = - v3d_get_tex_return_channels(devinfo, - sampler->format); + key->sampler[i].return_channels = 4; } - if (key->sampler[i].return_size == 32 && devinfo->ver < 40) { - memcpy(key->tex[i].swizzle, - v3d_sampler->swizzle, - sizeof(v3d_sampler->swizzle)); - } else { - /* For 16-bit returns, we let the sampler state handle - * the swizzle. - */ - key->tex[i].swizzle[0] = PIPE_SWIZZLE_X; - key->tex[i].swizzle[1] = PIPE_SWIZZLE_Y; - key->tex[i].swizzle[2] = PIPE_SWIZZLE_Z; - key->tex[i].swizzle[3] = PIPE_SWIZZLE_W; - } + /* We let the sampler state handle the swizzle. + */ + key->tex[i].swizzle[0] = PIPE_SWIZZLE_X; + key->tex[i].swizzle[1] = PIPE_SWIZZLE_Y; + key->tex[i].swizzle[2] = PIPE_SWIZZLE_Z; + key->tex[i].swizzle[3] = PIPE_SWIZZLE_W; } } diff --git a/src/gallium/drivers/v3d/v3d_resource.c b/src/gallium/drivers/v3d/v3d_resource.c index d9a79614dd1..4003ed722b5 100644 --- a/src/gallium/drivers/v3d/v3d_resource.c +++ b/src/gallium/drivers/v3d/v3d_resource.c @@ -36,7 +36,8 @@ #include "v3d_screen.h" #include "v3d_context.h" #include "v3d_resource.h" -#include "broadcom/cle/v3d_packet_v33_pack.h" +/* The packets used here the same across V3D versions. */ +#include "broadcom/cle/v3d_packet_v42_pack.h" static void v3d_debug_resource_layout(struct v3d_resource *rsc, const char *caller) @@ -747,8 +748,6 @@ static struct v3d_resource * v3d_resource_setup(struct pipe_screen *pscreen, const struct pipe_resource *tmpl) { - struct v3d_screen *screen = v3d_screen(pscreen); - struct v3d_device_info *devinfo = &screen->devinfo; struct v3d_resource *rsc = CALLOC_STRUCT(v3d_resource); if (!rsc) @@ -760,34 +759,7 @@ v3d_resource_setup(struct pipe_screen *pscreen, pipe_reference_init(&prsc->reference, 1); prsc->screen = pscreen; - if (prsc->nr_samples <= 1 || - devinfo->ver >= 40 || - util_format_is_depth_or_stencil(prsc->format)) { - rsc->cpp = util_format_get_blocksize(prsc->format); - if (devinfo->ver < 40 && prsc->nr_samples > 1) - rsc->cpp *= prsc->nr_samples; - } else { - assert(v3d_rt_format_supported(devinfo, prsc->format)); - uint32_t output_image_format = - v3d_get_rt_format(devinfo, prsc->format); - uint32_t internal_type; - uint32_t internal_bpp; - v3d_X(devinfo, get_internal_type_bpp_for_output_format) - (output_image_format, &internal_type, &internal_bpp); - - switch (internal_bpp) { - case V3D_INTERNAL_BPP_32: - rsc->cpp = 4; - break; - case V3D_INTERNAL_BPP_64: - rsc->cpp = 8; - break; - case V3D_INTERNAL_BPP_128: - rsc->cpp = 16; - break; - } - } - + rsc->cpp = util_format_get_blocksize(prsc->format); rsc->serial_id++; assert(rsc->cpp); diff --git a/src/gallium/drivers/v3d/v3d_screen.c b/src/gallium/drivers/v3d/v3d_screen.c index 08d02c9a73b..44d5b90c44d 100644 --- a/src/gallium/drivers/v3d/v3d_screen.c +++ b/src/gallium/drivers/v3d/v3d_screen.c @@ -153,7 +153,7 @@ v3d_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) return 1; case PIPE_CAP_POLYGON_OFFSET_CLAMP: - return screen->devinfo.ver >= 41; + return screen->devinfo.ver >= 42; case PIPE_CAP_TEXTURE_QUERY_LOD: @@ -182,20 +182,18 @@ v3d_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) return PIPE_TEXTURE_TRANSFER_BLIT; case PIPE_CAP_COMPUTE: - return screen->has_csd && screen->devinfo.ver >= 41; + return screen->has_csd && screen->devinfo.ver >= 42; case PIPE_CAP_GENERATE_MIPMAP: return v3d_has_feature(screen, DRM_V3D_PARAM_SUPPORTS_TFU); case PIPE_CAP_INDEP_BLEND_ENABLE: - return screen->devinfo.ver >= 40; + return 1; case PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT: return V3D_NON_COHERENT_ATOM_SIZE; case PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS: - if (screen->devinfo.ver < 40) - return 0; return 4; case PIPE_CAP_SHADER_BUFFER_OFFSET_ALIGNMENT: @@ -218,15 +216,9 @@ v3d_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_FS_COORD_ORIGIN_LOWER_LEFT: return 0; case PIPE_CAP_FS_COORD_PIXEL_CENTER_INTEGER: - if (screen->devinfo.ver >= 40) - return 0; - else - return 1; + return 0; case PIPE_CAP_FS_COORD_PIXEL_CENTER_HALF_INTEGER: - if (screen->devinfo.ver >= 40) - return 1; - else - return 0; + return 1; case PIPE_CAP_MIXED_FRAMEBUFFER_SIZES: case PIPE_CAP_MIXED_COLOR_DEPTH_BITS: @@ -240,18 +232,13 @@ v3d_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) /* Texturing. */ case PIPE_CAP_MAX_TEXTURE_2D_SIZE: - if (screen->devinfo.ver < 40) - return 2048; - else if (screen->nonmsaa_texture_size_limit) + if (screen->nonmsaa_texture_size_limit) return 7680; else return V3D_MAX_IMAGE_DIMENSION; case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS: case PIPE_CAP_MAX_TEXTURE_3D_LEVELS: - if (screen->devinfo.ver < 40) - return 12; - else - return V3D_MAX_MIP_LEVELS; + return V3D_MAX_MIP_LEVELS; case PIPE_CAP_MAX_TEXTURE_ARRAY_LAYERS: return V3D_MAX_ARRAY_LAYERS; @@ -361,7 +348,7 @@ v3d_screen_get_shader_param(struct pipe_screen *pscreen, enum pipe_shader_type s return 0; break; case PIPE_SHADER_GEOMETRY: - if (screen->devinfo.ver < 41) + if (screen->devinfo.ver < 42) return 0; break; default: @@ -454,7 +441,7 @@ v3d_screen_get_shader_param(struct pipe_screen *pscreen, enum pipe_shader_type s case PIPE_SHADER_CAP_MAX_SHADER_IMAGES: if (screen->has_cache_flush) { - if (screen->devinfo.ver < 41) + if (screen->devinfo.ver < 42) return 0; else return PIPE_MAX_SHADER_IMAGES; diff --git a/src/gallium/drivers/v3d/v3d_uniforms.c b/src/gallium/drivers/v3d/v3d_uniforms.c index 64c217d4f6c..c3b52dd39e3 100644 --- a/src/gallium/drivers/v3d/v3d_uniforms.c +++ b/src/gallium/drivers/v3d/v3d_uniforms.c @@ -28,9 +28,9 @@ #include "compiler/v3d_compiler.h" /* We don't expect that the packets we use in this file change across across - * hw versions, so we just include directly the v33 header + * hw versions, so we just include directly the v42 header */ -#include "broadcom/cle/v3d_packet_v33_pack.h" +#include "broadcom/cle/v3d_packet_v42_pack.h" static uint32_t get_texrect_scale(struct v3d_texture_stateobj *texstate, @@ -124,54 +124,6 @@ get_image_size(struct v3d_shaderimg_stateobj *shaderimg, } } -/** - * Writes the V3D 3.x P0 (CFG_MODE=1) texture parameter. - * - * Some bits of this field are dependent on the type of sample being done by - * the shader, while other bits are dependent on the sampler state. We OR the - * two together here. - */ -static void -write_texture_p0(struct v3d_job *job, - struct v3d_cl_out **uniforms, - struct v3d_texture_stateobj *texstate, - uint32_t unit, - uint32_t shader_data) -{ - struct pipe_sampler_state *psampler = texstate->samplers[unit]; - struct v3d_sampler_state *sampler = v3d_sampler_state(psampler); - - cl_aligned_u32(uniforms, shader_data | sampler->p0); -} - -/** Writes the V3D 3.x P1 (CFG_MODE=1) texture parameter. */ -static void -write_texture_p1(struct v3d_job *job, - struct v3d_cl_out **uniforms, - struct v3d_texture_stateobj *texstate, - uint32_t data) -{ - /* Extract the texture unit from the top bits, and the compiler's - * packed p1 from the bottom. - */ - uint32_t unit = data >> 5; - uint32_t p1 = data & 0x1f; - - struct pipe_sampler_view *psview = texstate->textures[unit]; - struct v3d_sampler_view *sview = v3d_sampler_view(psview); - - struct V3D33_TEXTURE_UNIFORM_PARAMETER_1_CFG_MODE1 unpacked = { - .texture_state_record_base_address = texstate->texture_state[unit], - }; - - uint32_t packed; - V3D33_TEXTURE_UNIFORM_PARAMETER_1_CFG_MODE1_pack(&job->indirect, - (uint8_t *)&packed, - &unpacked); - - cl_aligned_u32(uniforms, p1 | packed | sview->p1); -} - /** Writes the V3D 4.x TMU configuration parameter 0. */ static void write_tmu_p0(struct v3d_job *job, @@ -328,11 +280,6 @@ v3d_write_uniforms(struct v3d_context *v3d, struct v3d_job *job, &v3d->shaderimg[stage], data); break; - case QUNIFORM_TEXTURE_CONFIG_P1: - write_texture_p1(job, &uniforms, texstate, - data); - break; - case QUNIFORM_TEXRECT_SCALE_X: case QUNIFORM_TEXRECT_SCALE_Y: cl_aligned_u32(&uniforms, @@ -437,13 +384,7 @@ v3d_write_uniforms(struct v3d_context *v3d, struct v3d_job *job, break; default: - assert(quniform_contents_is_texture_p0(uinfo->contents[i])); - - write_texture_p0(job, &uniforms, texstate, - uinfo->contents[i] - - QUNIFORM_TEXTURE_CONFIG_P0_0, - data); - break; + unreachable("Unknown QUNIFORM"); } #if 0 diff --git a/src/gallium/drivers/v3d/v3dx_draw.c b/src/gallium/drivers/v3d/v3dx_draw.c index 92dfdd9fc1d..0f3802fbf0a 100644 --- a/src/gallium/drivers/v3d/v3dx_draw.c +++ b/src/gallium/drivers/v3d/v3dx_draw.c @@ -75,7 +75,7 @@ v3dX(start_binning)(struct v3d_context *v3d, struct v3d_job *job) job->tile_alloc = v3d_bo_alloc(v3d->screen, tile_alloc_size, "tile_alloc"); - uint32_t tsda_per_tile_size = v3d->screen->devinfo.ver >= 40 ? 256 : 64; + uint32_t tsda_per_tile_size = 256; job->tile_state = v3d_bo_alloc(v3d->screen, MAX2(job->num_layers, 1) * job->draw_tiles_y * @@ -83,7 +83,6 @@ v3dX(start_binning)(struct v3d_context *v3d, struct v3d_job *job) tsda_per_tile_size, "TSDA"); -#if V3D_VERSION >= 41 /* This must go before the binning mode configuration. It is * required for layered framebuffers to work. */ @@ -92,7 +91,6 @@ v3dX(start_binning)(struct v3d_context *v3d, struct v3d_job *job) config.number_of_layers = job->num_layers; } } -#endif assert(!job->msaa || !job->double_buffer); #if V3D_VERSION >= 71 @@ -113,7 +111,7 @@ v3dX(start_binning)(struct v3d_context *v3d, struct v3d_job *job) #endif -#if V3D_VERSION >= 40 && V3D_VERSION <= 42 +#if V3D_VERSION == 42 cl_emit(&job->bcl, TILE_BINNING_MODE_CFG, config) { config.width_in_pixels = job->draw_width; config.height_in_pixels = job->draw_height; @@ -126,34 +124,6 @@ v3dX(start_binning)(struct v3d_context *v3d, struct v3d_job *job) config.maximum_bpp_of_all_render_targets = job->internal_bpp; } #endif -#if V3D_VERSION < 40 - /* "Binning mode lists start with a Tile Binning Mode Configuration - * item (120)" - * - * Part1 signals the end of binning config setup. - */ - cl_emit(&job->bcl, TILE_BINNING_MODE_CFG_PART2, config) { - config.tile_allocation_memory_address = - cl_address(job->tile_alloc, 0); - config.tile_allocation_memory_size = job->tile_alloc->size; - } - - cl_emit(&job->bcl, TILE_BINNING_MODE_CFG_PART1, config) { - config.tile_state_data_array_base_address = - cl_address(job->tile_state, 0); - - config.width_in_tiles = job->draw_tiles_x; - config.height_in_tiles = job->draw_tiles_y; - /* Must be >= 1 */ - config.number_of_render_targets = - MAX2(job->nr_cbufs, 1); - - config.multisample_mode_4x = job->msaa; - config.double_buffer_in_non_ms_mode = job->double_buffer; - - config.maximum_bpp_of_all_render_targets = job->internal_bpp; - } -#endif /* There's definitely nothing in the VCD cache we want. */ cl_emit(&job->bcl, FLUSH_VCD_CACHE, bin); @@ -380,7 +350,6 @@ v3d_emit_wait_for_tf_if_needed(struct v3d_context *v3d, struct v3d_job *job) } } -#if V3D_VERSION >= 41 static void v3d_emit_gs_state_record(struct v3d_job *job, struct v3d_compiled_shader *gs_bin, @@ -396,7 +365,7 @@ v3d_emit_gs_state_record(struct v3d_job *job, gs_bin->prog_data.gs->base.threads == 4; shader.geometry_bin_mode_shader_start_in_final_thread_section = gs_bin->prog_data.gs->base.single_seg; -#if V3D_VERSION <= 42 +#if V3D_VERSION == 42 shader.geometry_bin_mode_shader_propagate_nans = true; #endif shader.geometry_bin_mode_shader_uniforms_address = @@ -408,7 +377,7 @@ v3d_emit_gs_state_record(struct v3d_job *job, gs->prog_data.gs->base.threads == 4; shader.geometry_render_mode_shader_start_in_final_thread_section = gs->prog_data.gs->base.single_seg; -#if V3D_VERSION <= 42 +#if V3D_VERSION == 42 shader.geometry_render_mode_shader_propagate_nans = true; #endif shader.geometry_render_mode_shader_uniforms_address = @@ -500,7 +469,6 @@ v3d_emit_tes_gs_shader_params(struct v3d_job *job, shader.gbg_min_gs_output_segments_required_in_play = 1; } } -#endif static void v3d_emit_gl_shader_state(struct v3d_context *v3d, @@ -559,14 +527,12 @@ v3d_emit_gl_shader_state(struct v3d_context *v3d, uint32_t shader_state_record_length = cl_packet_length(GL_SHADER_STATE_RECORD); -#if V3D_VERSION >= 41 if (v3d->prog.gs) { shader_state_record_length += cl_packet_length(GEOMETRY_SHADER_STATE_RECORD) + cl_packet_length(TESSELLATION_GEOMETRY_COMMON_PARAMS) + 2 * cl_packet_length(TESSELLATION_GEOMETRY_SHADER_PARAMS); } -#endif /* See GFXH-930 workaround below */ uint32_t shader_rec_offset = @@ -582,8 +548,6 @@ v3d_emit_gl_shader_state(struct v3d_context *v3d, */ struct vpm_config vpm_cfg_bin, vpm_cfg; - - assert(v3d->screen->devinfo.ver >= 41 || !v3d->prog.gs); v3d_compute_vpm_config(&v3d->screen->devinfo, v3d->prog.cs->prog_data.vs, v3d->prog.vs->prog_data.vs, @@ -593,7 +557,6 @@ v3d_emit_gl_shader_state(struct v3d_context *v3d, &vpm_cfg); if (v3d->prog.gs) { -#if V3D_VERSION >= 41 v3d_emit_gs_state_record(v3d->job, v3d->prog.gs_bin, gs_bin_uniforms, v3d->prog.gs, gs_uniforms); @@ -614,9 +577,6 @@ v3d_emit_gl_shader_state(struct v3d_context *v3d, vpm_cfg.gs_width, vpm_cfg.Gd, vpm_cfg.Gv); -#else - unreachable("No GS support pre-4.1"); -#endif } cl_emit(&job->indirect, GL_SHADER_STATE_RECORD, shader) { @@ -643,20 +603,16 @@ v3d_emit_gl_shader_state(struct v3d_context *v3d, shader.fragment_shader_uses_real_pixel_centre_w_in_addition_to_centroid_w2 = v3d->prog.fs->prog_data.fs->uses_center_w; -#if V3D_VERSION >= 41 shader.any_shader_reads_hardware_written_primitive_id = (v3d->prog.gs && v3d->prog.gs->prog_data.gs->uses_pid) || v3d->prog.fs->prog_data.fs->uses_pid; shader.insert_primitive_id_as_first_varying_to_fragment_shader = !v3d->prog.gs && v3d->prog.fs->prog_data.fs->uses_pid; -#endif -#if V3D_VERSION >= 40 - shader.do_scoreboard_wait_on_first_thread_switch = + shader.do_scoreboard_wait_on_first_thread_switch = v3d->prog.fs->prog_data.fs->lock_scoreboard_on_first_thrsw; - shader.disable_implicit_point_line_varyings = + shader.disable_implicit_point_line_varyings = !v3d->prog.fs->prog_data.fs->uses_implicit_point_line_varyings; -#endif shader.number_of_varyings_in_fragment_shader = v3d->prog.fs->prog_data.fs->num_inputs; @@ -671,7 +627,7 @@ v3d_emit_gl_shader_state(struct v3d_context *v3d, cl_address(v3d_resource(v3d->prog.fs->resource)->bo, v3d->prog.fs->offset); -#if V3D_VERSION <= 42 +#if V3D_VERSION == 42 shader.coordinate_shader_propagate_nans = true; shader.vertex_shader_propagate_nans = true; shader.fragment_shader_propagate_nans = true; @@ -711,7 +667,6 @@ v3d_emit_gl_shader_state(struct v3d_context *v3d, shader.vertex_shader_uniforms_address = vs_uniforms; shader.fragment_shader_uniforms_address = fs_uniforms; -#if V3D_VERSION >= 41 shader.min_coord_shader_input_segments_required_in_play = vpm_cfg_bin.As; shader.min_vertex_shader_input_segments_required_in_play = @@ -735,20 +690,6 @@ v3d_emit_gl_shader_state(struct v3d_context *v3d, v3d->prog.vs->prog_data.vs->base.single_seg; shader.fragment_shader_start_in_final_thread_section = v3d->prog.fs->prog_data.fs->base.single_seg; -#else - shader.coordinate_shader_4_way_threadable = - v3d->prog.cs->prog_data.vs->base.threads == 4; - shader.coordinate_shader_2_way_threadable = - v3d->prog.cs->prog_data.vs->base.threads == 2; - shader.vertex_shader_4_way_threadable = - v3d->prog.vs->prog_data.vs->base.threads == 4; - shader.vertex_shader_2_way_threadable = - v3d->prog.vs->prog_data.vs->base.threads == 2; - shader.fragment_shader_4_way_threadable = - v3d->prog.fs->prog_data.fs->base.threads == 4; - shader.fragment_shader_2_way_threadable = - v3d->prog.fs->prog_data.fs->base.threads == 2; -#endif shader.vertex_id_read_by_coordinate_shader = v3d->prog.cs->prog_data.vs->uses_vid; @@ -759,7 +700,7 @@ v3d_emit_gl_shader_state(struct v3d_context *v3d, shader.instance_id_read_by_vertex_shader = v3d->prog.vs->prog_data.vs->uses_iid; -#if V3D_VERSION <= 42 +#if V3D_VERSION == 42 shader.address_of_default_attribute_values = cl_address(v3d_resource(vtx->defaults)->bo, vtx->defaults_offset); @@ -802,9 +743,7 @@ v3d_emit_gl_shader_state(struct v3d_context *v3d, if (i == vtx->num_elements - 1 && !cs_loaded_any) { attr.number_of_values_read_by_coordinate_shader = 1; } -#if V3D_VERSION >= 41 attr.maximum_index = 0xffffff; -#endif } STATIC_ASSERT(sizeof(vtx->attrs) >= V3D_MAX_VS_INPUTS / 4 * size); } @@ -833,7 +772,6 @@ v3d_emit_gl_shader_state(struct v3d_context *v3d, vcm.number_of_16_vertex_batches_for_rendering = vpm_cfg.Vc; } -#if V3D_VERSION >= 41 if (v3d->prog.gs) { cl_emit(&job->bcl, GL_SHADER_STATE_INCLUDING_GS, state) { state.address = cl_address(job->indirect.bo, @@ -847,13 +785,6 @@ v3d_emit_gl_shader_state(struct v3d_context *v3d, state.number_of_attribute_arrays = num_elements_to_emit; } } -#else - assert(!v3d->prog.gs); - cl_emit(&job->bcl, GL_SHADER_STATE, state) { - state.address = cl_address(job->indirect.bo, shader_rec_offset); - state.number_of_attribute_arrays = num_elements_to_emit; - } -#endif v3d_bo_unreference(&cs_uniforms.bo); v3d_bo_unreference(&vs_uniforms.bo); @@ -1164,13 +1095,6 @@ v3d_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info, } uint32_t prim_tf_enable = 0; -#if V3D_VERSION < 40 - /* V3D 3.x: The HW only processes transform feedback on primitives - * with the flag set. - */ - if (v3d->streamout.num_targets) - prim_tf_enable = (V3D_PRIM_POINTS_TF - V3D_PRIM_POINTS); -#endif v3d->prim_restart = info->primitive_restart; @@ -1194,20 +1118,14 @@ v3d_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info, } struct v3d_resource *rsc = v3d_resource(prsc); -#if V3D_VERSION >= 40 cl_emit(&job->bcl, INDEX_BUFFER_SETUP, ib) { ib.address = cl_address(rsc->bo, 0); ib.size = rsc->bo->size; } -#endif if (indirect && indirect->buffer) { cl_emit(&job->bcl, INDIRECT_INDEXED_INSTANCED_PRIM_LIST, prim) { prim.index_type = ffs(info->index_size) - 1; -#if V3D_VERSION < 40 - prim.address_of_indices_list = - cl_address(rsc->bo, offset); -#endif /* V3D_VERSION < 40 */ prim.mode = hw_prim_type | prim_tf_enable; prim.enable_primitive_restarts = info->primitive_restart; @@ -1220,13 +1138,7 @@ v3d_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info, } else if (info->instance_count > 1) { cl_emit(&job->bcl, INDEXED_INSTANCED_PRIM_LIST, prim) { prim.index_type = ffs(info->index_size) - 1; -#if V3D_VERSION >= 40 prim.index_offset = offset; -#else /* V3D_VERSION < 40 */ - prim.maximum_index = (1u << 31) - 1; /* XXX */ - prim.address_of_indices_list = - cl_address(rsc->bo, offset); -#endif /* V3D_VERSION < 40 */ prim.mode = hw_prim_type | prim_tf_enable; prim.enable_primitive_restarts = info->primitive_restart; @@ -1237,13 +1149,7 @@ v3d_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info, cl_emit(&job->bcl, INDEXED_PRIM_LIST, prim) { prim.index_type = ffs(info->index_size) - 1; prim.length = draws[0].count; -#if V3D_VERSION >= 40 prim.index_offset = offset; -#else /* V3D_VERSION < 40 */ - prim.maximum_index = (1u << 31) - 1; /* XXX */ - prim.address_of_indices_list = - cl_address(rsc->bo, offset); -#endif /* V3D_VERSION < 40 */ prim.mode = hw_prim_type | prim_tf_enable; prim.enable_primitive_restarts = info->primitive_restart; } @@ -1361,7 +1267,6 @@ v3d_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info, v3d_flush(pctx); } -#if V3D_VERSION >= 41 #define V3D_CSD_CFG012_WG_COUNT_SHIFT 16 #define V3D_CSD_CFG012_WG_OFFSET_SHIFT 0 /* Allow this dispatch to start while the last one is still running. */ @@ -1563,7 +1468,6 @@ v3d_launch_grid(struct pipe_context *pctx, const struct pipe_grid_info *info) v3d_bo_unreference(&uniforms.bo); v3d_bo_unreference(&v3d->compute_shared_memory); } -#endif /** * Implements gallium's clear() hook (glClear()) by drawing a pair of triangles. @@ -1607,7 +1511,7 @@ v3d_tlb_clear(struct v3d_job *job, unsigned buffers, * if it would be possible to need to emit a load of just one after * we've set up our TLB clears. This issue is fixed since V3D 4.3.18. */ - if (v3d->screen->devinfo.ver <= 42 && + if (v3d->screen->devinfo.ver == 42 && buffers & PIPE_CLEAR_DEPTHSTENCIL && (buffers & PIPE_CLEAR_DEPTHSTENCIL) != PIPE_CLEAR_DEPTHSTENCIL && job->zsbuf && @@ -1762,8 +1666,6 @@ v3dX(draw_init)(struct pipe_context *pctx) pctx->clear = v3d_clear; pctx->clear_render_target = v3d_clear_render_target; pctx->clear_depth_stencil = v3d_clear_depth_stencil; -#if V3D_VERSION >= 41 if (v3d_context(pctx)->screen->has_csd) pctx->launch_grid = v3d_launch_grid; -#endif } diff --git a/src/gallium/drivers/v3d/v3dx_emit.c b/src/gallium/drivers/v3d/v3dx_emit.c index ee17b935e19..705b3b74969 100644 --- a/src/gallium/drivers/v3d/v3dx_emit.c +++ b/src/gallium/drivers/v3d/v3dx_emit.c @@ -78,172 +78,6 @@ v3d_factor(enum pipe_blendfactor factor, bool dst_alpha_one) } } -#if V3D_VERSION < 40 -static inline uint16_t -swizzled_border_color(const struct v3d_device_info *devinfo, - struct pipe_sampler_state *sampler, - struct v3d_sampler_view *sview, - int chan) -{ - const struct util_format_description *desc = - util_format_description(sview->base.format); - uint8_t swiz = chan; - - /* If we're doing swizzling in the sampler, then only rearrange the - * border color for the mismatch between the V3D texture format and - * the PIPE_FORMAT, since GL_ARB_texture_swizzle will be handled by - * the sampler's swizzle. - * - * For swizzling in the shader, we don't do any pre-swizzling of the - * border color. - */ - if (v3d_get_tex_return_size(devinfo, sview->base.format) != 32) - swiz = desc->swizzle[swiz]; - - switch (swiz) { - case PIPE_SWIZZLE_0: - return _mesa_float_to_half(0.0); - case PIPE_SWIZZLE_1: - return _mesa_float_to_half(1.0); - default: - return _mesa_float_to_half(sampler->border_color.f[swiz]); - } -} - -static void -emit_one_texture(struct v3d_context *v3d, struct v3d_texture_stateobj *stage_tex, - int i) -{ - struct v3d_job *job = v3d->job; - struct pipe_sampler_state *psampler = stage_tex->samplers[i]; - struct v3d_sampler_state *sampler = v3d_sampler_state(psampler); - struct pipe_sampler_view *psview = stage_tex->textures[i]; - struct v3d_sampler_view *sview = v3d_sampler_view(psview); - struct pipe_resource *prsc = psview->texture; - struct v3d_resource *rsc = v3d_resource(prsc); - const struct v3d_device_info *devinfo = &v3d->screen->devinfo; - - stage_tex->texture_state[i].offset = - v3d_cl_ensure_space(&job->indirect, - cl_packet_length(TEXTURE_SHADER_STATE), - 32); - v3d_bo_set_reference(&stage_tex->texture_state[i].bo, - job->indirect.bo); - - uint32_t return_size = v3d_get_tex_return_size(devinfo, psview->format); - - struct V3D33_TEXTURE_SHADER_STATE unpacked = { - /* XXX */ - .border_color_red = swizzled_border_color(devinfo, psampler, - sview, 0), - .border_color_green = swizzled_border_color(devinfo, psampler, - sview, 1), - .border_color_blue = swizzled_border_color(devinfo, psampler, - sview, 2), - .border_color_alpha = swizzled_border_color(devinfo, psampler, - sview, 3), - - /* In the normal texturing path, the LOD gets clamped between - * min/max, and the base_level field (set in the sampler view - * from first_level) only decides where the min/mag switch - * happens, so we need to use the LOD clamps to keep us - * between min and max. - * - * For txf, the LOD clamp is still used, despite GL not - * wanting that. We will need to have a separate - * TEXTURE_SHADER_STATE that ignores psview->min/max_lod to - * support txf properly. - */ - .min_level_of_detail = MIN2(psview->u.tex.first_level + - MAX2(psampler->min_lod, 0), - psview->u.tex.last_level), - .max_level_of_detail = MIN2(psview->u.tex.first_level + - MAX2(psampler->max_lod, - psampler->min_lod), - psview->u.tex.last_level), - - .texture_base_pointer = cl_address(rsc->bo, - rsc->slices[0].offset), - - .output_32_bit = return_size == 32, - }; - - /* Set up the sampler swizzle if we're doing 16-bit sampling. For - * 32-bit, we leave swizzling up to the shader compiler. - * - * Note: Contrary to the docs, the swizzle still applies even if the - * return size is 32. It's just that you probably want to swizzle in - * the shader, because you need the Y/Z/W channels to be defined. - */ - if (return_size == 32) { - unpacked.swizzle_r = v3d_translate_pipe_swizzle(PIPE_SWIZZLE_X); - unpacked.swizzle_g = v3d_translate_pipe_swizzle(PIPE_SWIZZLE_Y); - unpacked.swizzle_b = v3d_translate_pipe_swizzle(PIPE_SWIZZLE_Z); - unpacked.swizzle_a = v3d_translate_pipe_swizzle(PIPE_SWIZZLE_W); - } else { - unpacked.swizzle_r = v3d_translate_pipe_swizzle(sview->swizzle[0]); - unpacked.swizzle_g = v3d_translate_pipe_swizzle(sview->swizzle[1]); - unpacked.swizzle_b = v3d_translate_pipe_swizzle(sview->swizzle[2]); - unpacked.swizzle_a = v3d_translate_pipe_swizzle(sview->swizzle[3]); - } - - int min_img_filter = psampler->min_img_filter; - int min_mip_filter = psampler->min_mip_filter; - int mag_img_filter = psampler->mag_img_filter; - - if (return_size == 32) { - min_mip_filter = PIPE_TEX_MIPFILTER_NEAREST; - min_img_filter = PIPE_TEX_FILTER_NEAREST; - mag_img_filter = PIPE_TEX_FILTER_NEAREST; - } - - bool min_nearest = min_img_filter == PIPE_TEX_FILTER_NEAREST; - switch (min_mip_filter) { - case PIPE_TEX_MIPFILTER_NONE: - unpacked.filter += min_nearest ? 2 : 0; - break; - case PIPE_TEX_MIPFILTER_NEAREST: - unpacked.filter += min_nearest ? 4 : 8; - break; - case PIPE_TEX_MIPFILTER_LINEAR: - unpacked.filter += min_nearest ? 4 : 8; - unpacked.filter += 2; - break; - } - - if (mag_img_filter == PIPE_TEX_FILTER_NEAREST) - unpacked.filter++; - - if (psampler->max_anisotropy > 8) - unpacked.filter = V3D_TMU_FILTER_ANISOTROPIC_16_1; - else if (psampler->max_anisotropy > 4) - unpacked.filter = V3D_TMU_FILTER_ANISOTROPIC_8_1; - else if (psampler->max_anisotropy > 2) - unpacked.filter = V3D_TMU_FILTER_ANISOTROPIC_4_1; - else if (psampler->max_anisotropy) - unpacked.filter = V3D_TMU_FILTER_ANISOTROPIC_2_1; - - uint8_t packed[cl_packet_length(TEXTURE_SHADER_STATE)]; - cl_packet_pack(TEXTURE_SHADER_STATE)(&job->indirect, packed, &unpacked); - - for (int i = 0; i < ARRAY_SIZE(packed); i++) - packed[i] |= sview->texture_shader_state[i] | sampler->texture_shader_state[i]; - - /* TMU indirect structs need to be 32b aligned. */ - v3d_cl_ensure_space(&job->indirect, ARRAY_SIZE(packed), 32); - cl_emit_prepacked(&job->indirect, &packed); -} - -static void -emit_textures(struct v3d_context *v3d, struct v3d_texture_stateobj *stage_tex) -{ - for (int i = 0; i < stage_tex->num_textures; i++) { - if (stage_tex->textures[i]) - emit_one_texture(v3d, stage_tex, i); - } -} -#endif /* V3D_VERSION < 40 */ - static uint32_t translate_colormask(struct v3d_context *v3d, uint32_t colormask, int rt) { @@ -263,18 +97,12 @@ emit_rt_blend(struct v3d_context *v3d, struct v3d_job *job, { struct pipe_rt_blend_state *rtblend = &blend->rt[rt]; -#if V3D_VERSION >= 40 /* We don't need to emit blend state for disabled RTs. */ if (!rtblend->blend_enable) return; -#endif cl_emit(&job->bcl, BLEND_CFG, config) { -#if V3D_VERSION >= 40 config.render_target_mask = rt_mask; -#else - assert(rt == 0); -#endif config.color_blend_mode = rtblend->rgb_func; config.color_blend_dst_factor = @@ -311,7 +139,6 @@ emit_flat_shade_flags(struct v3d_job *job, } } -#if V3D_VERSION >= 40 static void emit_noperspective_flags(struct v3d_job *job, int varying_offset, @@ -345,7 +172,6 @@ emit_centroid_flags(struct v3d_job *job, higher; } } -#endif /* V3D_VERSION >= 40 */ static bool emit_varying_flags(struct v3d_job *job, uint32_t *flags, @@ -433,14 +259,6 @@ v3dX(emit_state)(struct pipe_context *pctx) if (maxx > minx && maxy > miny) { clip.clip_window_width_in_pixels = maxx - minx; clip.clip_window_height_in_pixels = maxy - miny; - } else if (V3D_VERSION < 41) { - /* The HW won't entirely clip out when scissor - * w/h is 0. Just treat it the same as - * rasterizer discard. - */ - rasterizer_discard = true; - clip.clip_window_width_in_pixels = 1; - clip.clip_window_height_in_pixels = 1; } } @@ -512,14 +330,14 @@ v3dX(emit_state)(struct pipe_context *pctx) /* Note: EZ state may update based on the compiled FS, * along with ZSA */ -#if V3D_VERSION <= 42 +#if V3D_VERSION == 42 config.early_z_updates_enable = (job->ez_state != V3D_EZ_DISABLED); #endif if (v3d->zsa->base.depth_enabled) { config.z_updates_enable = v3d->zsa->base.depth_writemask; -#if V3D_VERSION <= 42 +#if V3D_VERSION == 42 config.early_z_enable = config.early_z_updates_enable; #endif @@ -559,7 +377,7 @@ v3dX(emit_state)(struct pipe_context *pctx) if (v3d->dirty & V3D_DIRTY_RASTERIZER && v3d->rasterizer->base.offset_tri) { - if (v3d->screen->devinfo.ver <= 42 && + if (v3d->screen->devinfo.ver == 42 && job->zsbuf && job->zsbuf->format == PIPE_FORMAT_Z16_UNORM) { cl_emit_prepacked_sized(&job->bcl, @@ -583,7 +401,7 @@ v3dX(emit_state)(struct pipe_context *pctx) } if (v3d->dirty & V3D_DIRTY_VIEWPORT) { -#if V3D_VERSION <= 42 +#if V3D_VERSION == 42 cl_emit(&job->bcl, CLIPPER_XY_SCALING, clip) { clip.viewport_half_width_in_1_256th_of_pixel = v3d->viewport.scale[0] * 256.0f; @@ -617,12 +435,6 @@ v3dX(emit_state)(struct pipe_context *pctx) } cl_emit(&job->bcl, VIEWPORT_OFFSET, vp) { -#if V3D_VERSION < 41 - vp.viewport_centre_x_coordinate = - v3d->viewport.translate[0]; - vp.viewport_centre_y_coordinate = - v3d->viewport.translate[1]; -#else float vp_fine_x = v3d->viewport.translate[0]; float vp_fine_y = v3d->viewport.translate[1]; int32_t vp_coarse_x = 0; @@ -649,7 +461,6 @@ v3dX(emit_state)(struct pipe_context *pctx) vp.fine_y = vp_fine_y; vp.coarse_x = vp_coarse_x; vp.coarse_y = vp_coarse_y; -#endif } } @@ -657,11 +468,9 @@ v3dX(emit_state)(struct pipe_context *pctx) struct v3d_blend_state *blend = v3d->blend; if (blend->blend_enables) { -#if V3D_VERSION >= 40 cl_emit(&job->bcl, BLEND_ENABLES, enables) { enables.mask = blend->blend_enables; } -#endif const uint32_t max_rts = V3D_MAX_RENDER_TARGETS(v3d->screen->devinfo.ver); @@ -716,8 +525,7 @@ v3dX(emit_state)(struct pipe_context *pctx) /* GFXH-1431: On V3D 3.x, writing BLEND_CONFIG resets the constant * color. */ - if (v3d->dirty & V3D_DIRTY_BLEND_COLOR || - (V3D_VERSION < 41 && (v3d->dirty & V3D_DIRTY_BLEND))) { + if (v3d->dirty & V3D_DIRTY_BLEND_COLOR) { cl_emit(&job->bcl, BLEND_CONSTANT_COLOR, color) { color.red_f16 = (v3d->swap_color_rb ? v3d->blend_color.hf[2] : @@ -751,20 +559,6 @@ v3dX(emit_state)(struct pipe_context *pctx) } } -#if V3D_VERSION < 40 - /* Pre-4.x, we have texture state that depends on both the sampler and - * the view, so we merge them together at draw time. - */ - if (v3d->dirty & V3D_DIRTY_FRAGTEX) - emit_textures(v3d, &v3d->tex[PIPE_SHADER_FRAGMENT]); - - if (v3d->dirty & V3D_DIRTY_GEOMTEX) - emit_textures(v3d, &v3d->tex[PIPE_SHADER_GEOMETRY]); - - if (v3d->dirty & V3D_DIRTY_VERTTEX) - emit_textures(v3d, &v3d->tex[PIPE_SHADER_VERTEX]); -#endif - if (v3d->dirty & V3D_DIRTY_FLAT_SHADE_FLAGS) { if (!emit_varying_flags(job, v3d->prog.fs->prog_data.fs->flat_shade_flags, @@ -773,7 +567,6 @@ v3dX(emit_state)(struct pipe_context *pctx) } } -#if V3D_VERSION >= 40 if (v3d->dirty & V3D_DIRTY_NOPERSPECTIVE_FLAGS) { if (!emit_varying_flags(job, v3d->prog.fs->prog_data.fs->noperspective_flags, @@ -789,7 +582,6 @@ v3dX(emit_state)(struct pipe_context *pctx) cl_emit(&job->bcl, ZERO_ALL_CENTROID_FLAGS, flags); } } -#endif /* Set up the transform feedback data specs (which VPM entries to * output to which buffers). @@ -807,7 +599,6 @@ v3dX(emit_state)(struct pipe_context *pctx) tf_shader->tf_specs_psiz : tf_shader->tf_specs); -#if V3D_VERSION >= 40 bool tf_enabled = v3d_transform_feedback_enabled(v3d); job->tf_enabled |= tf_enabled; @@ -816,23 +607,13 @@ v3dX(emit_state)(struct pipe_context *pctx) tf_shader->num_tf_specs; tfe.enable = tf_enabled; }; -#else /* V3D_VERSION < 40 */ - cl_emit(&job->bcl, TRANSFORM_FEEDBACK_ENABLE, tfe) { - tfe.number_of_32_bit_output_buffer_address_following = - so->num_targets; - tfe.number_of_16_bit_output_data_specs_following = - tf_shader->num_tf_specs; - }; -#endif /* V3D_VERSION < 40 */ for (int i = 0; i < tf_shader->num_tf_specs; i++) { cl_emit_prepacked(&job->bcl, &tf_specs[i]); } } else { -#if V3D_VERSION >= 40 cl_emit(&job->bcl, TRANSFORM_FEEDBACK_SPECS, tfe) { tfe.enable = false; }; -#endif /* V3D_VERSION >= 40 */ } } @@ -850,7 +631,6 @@ v3dX(emit_state)(struct pipe_context *pctx) uint32_t offset = target ? v3d_stream_output_target(target)->offset * info->stride[i] * 4 : 0; -#if V3D_VERSION >= 40 if (!target) continue; @@ -863,16 +643,6 @@ v3dX(emit_state)(struct pipe_context *pctx) (target->buffer_size - offset) >> 2; output.buffer_number = i; } -#else /* V3D_VERSION < 40 */ - cl_emit(&job->bcl, TRANSFORM_FEEDBACK_OUTPUT_ADDRESS, output) { - if (target) { - output.address = - cl_address(rsc->bo, - target->buffer_offset + - offset); - } - }; -#endif /* V3D_VERSION < 40 */ if (target) { v3d_job_add_tf_write_resource(v3d->job, target->buffer); @@ -889,7 +659,6 @@ v3dX(emit_state)(struct pipe_context *pctx) } } -#if V3D_VERSION >= 40 if (v3d->dirty & V3D_DIRTY_SAMPLE_STATE) { cl_emit(&job->bcl, SAMPLE_STATE, state) { /* Note: SampleCoverage was handled at the @@ -899,5 +668,4 @@ v3dX(emit_state)(struct pipe_context *pctx) state.mask = job->msaa ? v3d->sample_mask : 0xf; } } -#endif } diff --git a/src/gallium/drivers/v3d/v3dx_format_table.c b/src/gallium/drivers/v3d/v3dx_format_table.c index 78f6d955be3..76b1074cb62 100644 --- a/src/gallium/drivers/v3d/v3dx_format_table.c +++ b/src/gallium/drivers/v3d/v3dx_format_table.c @@ -145,7 +145,6 @@ static const struct v3d_format format_table[] = { FORMAT(R11G11B10_FLOAT, R11F_G11F_B10F, R11F_G11F_B10F, SWIZ_XYZ1, 16, 0), FORMAT(R9G9B9E5_FLOAT, NO, RGB9_E5, SWIZ_XYZ1, 16, 0), -#if V3D_VERSION >= 40 FORMAT(S8_UINT_Z24_UNORM, D24S8, DEPTH24_X8, SWIZ_XXXX, 32, 1), FORMAT(X8Z24_UNORM, D24S8, DEPTH24_X8, SWIZ_XXXX, 32, 1), FORMAT(S8X24_UINT, S8, RGBA8UI, SWIZ_XXXX, 16, 1), @@ -155,16 +154,6 @@ static const struct v3d_format format_table[] = { /* Pretend we support this, but it'll be separate Z32F depth and S8. */ FORMAT(Z32_FLOAT_S8X24_UINT, D32F, DEPTH_COMP32F, SWIZ_XXXX, 32, 1), FORMAT(X32_S8X24_UINT, S8, R8UI, SWIZ_XXXX, 16, 1), -#else - FORMAT(S8_UINT_Z24_UNORM, ZS_DEPTH24_STENCIL8, DEPTH24_X8, SWIZ_XXXX, 32, 1), - FORMAT(X8Z24_UNORM, ZS_DEPTH24_STENCIL8, DEPTH24_X8, SWIZ_XXXX, 32, 1), - FORMAT(S8X24_UINT, NO, R32F, SWIZ_XXXX, 32, 1), - FORMAT(Z32_FLOAT, ZS_DEPTH_COMPONENT32F, R32F, SWIZ_XXXX, 32, 1), - FORMAT(Z16_UNORM, ZS_DEPTH_COMPONENT16, DEPTH_COMP16, SWIZ_XXXX, 32, 1), - - /* Pretend we support this, but it'll be separate Z32F depth and S8. */ - FORMAT(Z32_FLOAT_S8X24_UINT, ZS_DEPTH_COMPONENT32F, R32F, SWIZ_XXXX, 32, 1), -#endif FORMAT(ETC2_RGB8, NO, RGB8_ETC2, SWIZ_XYZ1, 16, 0), FORMAT(ETC2_SRGB8, NO, RGB8_ETC2, SWIZ_XYZ1, 16, 0), @@ -233,9 +222,6 @@ v3dX(get_internal_type_bpp_for_output_format)(uint32_t format, { switch (format) { case V3D_OUTPUT_IMAGE_FORMAT_RGBA8: -#if V3D_VERSION < 41 - case V3D_OUTPUT_IMAGE_FORMAT_RGBX8: -#endif case V3D_OUTPUT_IMAGE_FORMAT_RGB8: case V3D_OUTPUT_IMAGE_FORMAT_RG8: case V3D_OUTPUT_IMAGE_FORMAT_R8: @@ -264,9 +250,6 @@ v3dX(get_internal_type_bpp_for_output_format)(uint32_t format, case V3D_OUTPUT_IMAGE_FORMAT_SRGB: case V3D_OUTPUT_IMAGE_FORMAT_RGB10_A2: case V3D_OUTPUT_IMAGE_FORMAT_R11F_G11F_B10F: -#if V3D_VERSION < 41 - case V3D_OUTPUT_IMAGE_FORMAT_SRGBX8: -#endif case V3D_OUTPUT_IMAGE_FORMAT_RGBA16F: /* Note that sRGB RTs are stored in the tile buffer at 16F, * and the conversion to sRGB happens at tilebuffer diff --git a/src/gallium/drivers/v3d/v3dx_job.c b/src/gallium/drivers/v3d/v3dx_job.c index d48c97f99ff..391c1383971 100644 --- a/src/gallium/drivers/v3d/v3dx_job.c +++ b/src/gallium/drivers/v3d/v3dx_job.c @@ -34,9 +34,7 @@ void v3dX(bcl_epilogue)(struct v3d_context *v3d, struct v3d_job *job) { v3d_cl_ensure_space_with_branch(&job->bcl, cl_packet_length(PRIMITIVE_COUNTS_FEEDBACK) + -#if V3D_VERSION >= 41 cl_packet_length(TRANSFORM_FEEDBACK_SPECS) + -#endif cl_packet_length(FLUSH)); if (job->tf_enabled || job->needs_primitives_generated) { @@ -57,13 +55,11 @@ void v3dX(bcl_epilogue)(struct v3d_context *v3d, struct v3d_job *job) * cleans up and finishes before it gets reset by the next * frame's tile binning mode cfg packet. (SWVC5-718). */ -#if V3D_VERSION >= 41 if (job->tf_enabled) { cl_emit(&job->bcl, TRANSFORM_FEEDBACK_SPECS, tfe) { tfe.enable = false; }; } -#endif /* V3D_VERSION >= 41 */ /* We just FLUSH here to tell the HW to cap the bin CLs with a * return. Any remaining state changes won't be flushed to diff --git a/src/gallium/drivers/v3d/v3dx_rcl.c b/src/gallium/drivers/v3d/v3dx_rcl.c index 8bac9739e29..2efe70b7b42 100644 --- a/src/gallium/drivers/v3d/v3dx_rcl.c +++ b/src/gallium/drivers/v3d/v3dx_rcl.c @@ -36,23 +36,6 @@ #define PIPE_FIRST_COLOR_BUFFER_BIT (ffs(PIPE_CLEAR_COLOR0) - 1) -/* The HW queues up the load until the tile coordinates show up, but can only - * track one at a time. If we need to do more than one load, then we need to - * flush out the previous load by emitting the tile coordinates and doing a - * dummy store. - */ -static void -flush_last_load(struct v3d_cl *cl) -{ - if (V3D_VERSION >= 40) - return; - - cl_emit(cl, TILE_COORDINATES_IMPLICIT, coords); - cl_emit(cl, STORE_TILE_BUFFER_GENERAL, store) { - store.buffer_to_store = NONE; - } -} - static void load_general(struct v3d_cl *cl, struct pipe_surface *psurf, int buffer, int layer, uint32_t pipe_bit, uint32_t *loads_pending) @@ -73,7 +56,6 @@ load_general(struct v3d_cl *cl, struct pipe_surface *psurf, int buffer, load.buffer_to_load = buffer; load.address = cl_address(rsc->bo, layer_offset); -#if V3D_VERSION >= 40 load.memory_format = surf->tiling; if (separate_stencil) load.input_image_format = V3D_OUTPUT_IMAGE_FORMAT_S8; @@ -96,20 +78,9 @@ load_general(struct v3d_cl *cl, struct pipe_surface *psurf, int buffer, else load.decimate_mode = V3D_DECIMATE_MODE_SAMPLE_0; -#else /* V3D_VERSION < 40 */ - /* Can't do raw ZSTENCIL loads -- need to load/store them to - * separate buffers for Z and stencil. - */ - assert(buffer != ZSTENCIL); - load.raw_mode = true; - load.padded_height_of_output_image_in_uif_blocks = - surf->padded_height_of_output_image_in_uif_blocks; -#endif /* V3D_VERSION < 40 */ } *loads_pending &= ~pipe_bit; - if (*loads_pending) - flush_last_load(cl); } static void @@ -127,7 +98,6 @@ store_general(struct v3d_job *job, } *stores_pending &= ~pipe_bit; - bool last_store = !(*stores_pending); struct v3d_resource *rsc = v3d_resource(psurf->texture); @@ -140,7 +110,6 @@ store_general(struct v3d_job *job, store.buffer_to_store = buffer; store.address = cl_address(rsc->bo, layer_offset); -#if V3D_VERSION >= 40 store.clear_buffer_being_stored = false; if (separate_stencil) @@ -168,35 +137,6 @@ store_general(struct v3d_job *job, store.decimate_mode = V3D_DECIMATE_MODE_4X; else store.decimate_mode = V3D_DECIMATE_MODE_SAMPLE_0; - -#else /* V3D_VERSION < 40 */ - /* Can't do raw ZSTENCIL stores -- need to load/store them to - * separate buffers for Z and stencil. - */ - assert(buffer != ZSTENCIL); - store.raw_mode = true; - if (!last_store) { - store.disable_color_buffers_clear_on_write = true; - store.disable_z_buffer_clear_on_write = true; - store.disable_stencil_buffer_clear_on_write = true; - } else { - store.disable_color_buffers_clear_on_write = - !(((pipe_bit & PIPE_CLEAR_COLOR_BUFFERS) && - general_color_clear && - (job->clear & pipe_bit))); - store.disable_z_buffer_clear_on_write = - !(job->clear & PIPE_CLEAR_DEPTH); - store.disable_stencil_buffer_clear_on_write = - !(job->clear & PIPE_CLEAR_STENCIL); - } - store.padded_height_of_output_image_in_uif_blocks = - surf->padded_height_of_output_image_in_uif_blocks; -#endif /* V3D_VERSION < 40 */ - } - - /* There must be a TILE_COORDINATES_IMPLICIT between each store. */ - if (V3D_VERSION < 40 && !last_store) { - cl_emit(cl, TILE_COORDINATES_IMPLICIT, coords); } } @@ -223,7 +163,6 @@ v3d_rcl_emit_loads(struct v3d_job *job, struct v3d_cl *cl, int layer) */ assert(!job->bbuf || job->load == 0); assert(!job->bbuf || job->nr_cbufs <= 1); - assert(!job->bbuf || V3D_VERSION >= 40); uint32_t loads_pending = job->bbuf ? job->store : job->load; @@ -235,18 +174,14 @@ v3d_rcl_emit_loads(struct v3d_job *job, struct v3d_cl *cl, int layer) struct pipe_surface *psurf = job->bbuf ? job->bbuf : job->cbufs[i]; assert(!job->bbuf || i == 0); - if (!psurf || (V3D_VERSION < 40 && - psurf->texture->nr_samples <= 1)) { + if (!psurf) continue; - } load_general(cl, psurf, RENDER_TARGET_0 + i, layer, bit, &loads_pending); } - if ((loads_pending & PIPE_CLEAR_DEPTHSTENCIL) && - (V3D_VERSION >= 40 || - (job->zsbuf && job->zsbuf->texture->nr_samples > 1))) { + if (loads_pending & PIPE_CLEAR_DEPTHSTENCIL) { assert(!job->early_zs_clear); struct pipe_surface *src = job->bbuf ? job->bbuf : job->zsbuf; struct v3d_resource *rsc = v3d_resource(src->texture); @@ -268,57 +203,14 @@ v3d_rcl_emit_loads(struct v3d_job *job, struct v3d_cl *cl, int layer) } } -#if V3D_VERSION < 40 - /* The initial reload will be queued until we get the - * tile coordinates. - */ - if (loads_pending) { - cl_emit(cl, RELOAD_TILE_COLOR_BUFFER, load) { - load.disable_color_buffer_load = - (~loads_pending & - PIPE_CLEAR_COLOR_BUFFERS) >> - PIPE_FIRST_COLOR_BUFFER_BIT; - load.enable_z_load = - loads_pending & PIPE_CLEAR_DEPTH; - load.enable_stencil_load = - loads_pending & PIPE_CLEAR_STENCIL; - } - } -#else /* V3D_VERSION >= 40 */ assert(!loads_pending); cl_emit(cl, END_OF_LOADS, end); -#endif } static void v3d_rcl_emit_stores(struct v3d_job *job, struct v3d_cl *cl, int layer) { -#if V3D_VERSION < 40 - UNUSED bool needs_color_clear = job->clear & PIPE_CLEAR_COLOR_BUFFERS; - UNUSED bool needs_z_clear = job->clear & PIPE_CLEAR_DEPTH; - UNUSED bool needs_s_clear = job->clear & PIPE_CLEAR_STENCIL; - - /* For clearing color in a TLB general on V3D 3.3: - * - * - NONE buffer store clears all TLB color buffers. - * - color buffer store clears just the TLB color buffer being stored. - * - Z/S buffers store may not clear the TLB color buffer. - * - * And on V3D 4.1, we only have one flag for "clear the buffer being - * stored" in the general packet, and a separate packet to clear all - * color TLB buffers. - * - * As a result, we only bother flagging TLB color clears in a general - * packet when we don't have to emit a separate packet to clear all - * TLB color buffers. - */ - bool general_color_clear = (needs_color_clear && - (job->clear & PIPE_CLEAR_COLOR_BUFFERS) == - (job->store & PIPE_CLEAR_COLOR_BUFFERS)); -#else bool general_color_clear = false; -#endif - uint32_t stores_pending = job->store; /* For V3D 4.1, use general stores for all TLB stores. @@ -337,17 +229,14 @@ v3d_rcl_emit_stores(struct v3d_job *job, struct v3d_cl *cl, int layer) continue; struct pipe_surface *psurf = job->cbufs[i]; - if (!psurf || - (V3D_VERSION < 40 && psurf->texture->nr_samples <= 1)) { + if (!psurf) continue; - } store_general(job, cl, psurf, layer, RENDER_TARGET_0 + i, bit, &stores_pending, general_color_clear, job->bbuf); } - if (job->store & PIPE_CLEAR_DEPTHSTENCIL && job->zsbuf && - !(V3D_VERSION < 40 && job->zsbuf->texture->nr_samples <= 1)) { + if (job->store & PIPE_CLEAR_DEPTHSTENCIL && job->zsbuf) { assert(!job->early_zs_clear); struct v3d_resource *rsc = v3d_resource(job->zsbuf->texture); if (rsc->separate_stencil) { @@ -375,35 +264,7 @@ v3d_rcl_emit_stores(struct v3d_job *job, struct v3d_cl *cl, int layer) } } -#if V3D_VERSION < 40 - if (stores_pending) { - cl_emit(cl, STORE_MULTI_SAMPLE_RESOLVED_TILE_COLOR_BUFFER_EXTENDED, store) { - store.disable_color_buffer_write = - (~stores_pending >> - PIPE_FIRST_COLOR_BUFFER_BIT) & 0xf; - store.enable_z_write = stores_pending & PIPE_CLEAR_DEPTH; - store.enable_stencil_write = stores_pending & PIPE_CLEAR_STENCIL; - - /* Note that when set this will clear all of the color - * buffers. - */ - store.disable_color_buffers_clear_on_write = - !needs_color_clear; - store.disable_z_buffer_clear_on_write = - !needs_z_clear; - store.disable_stencil_buffer_clear_on_write = - !needs_s_clear; - }; - } else if (needs_color_clear && !general_color_clear) { - /* If we didn't do our color clears in the general packet, - * then emit a packet to clear all the TLB color buffers now. - */ - cl_emit(cl, STORE_TILE_BUFFER_GENERAL, store) { - store.buffer_to_store = NONE; - } - } -#else /* V3D_VERSION >= 40 */ /* If we're emitting an RCL with GL_ARB_framebuffer_no_attachments, * we still need to emit some sort of store. */ @@ -421,7 +282,7 @@ v3d_rcl_emit_stores(struct v3d_job *job, struct v3d_cl *cl, int layer) * clearing Z/S. */ if (job->clear) { -#if V3D_VERSION <= 42 +#if V3D_VERSION == 42 cl_emit(cl, CLEAR_TILE_BUFFERS, clear) { clear.clear_z_stencil_buffer = !job->early_zs_clear; clear.clear_all_render_targets = true; @@ -432,7 +293,6 @@ v3d_rcl_emit_stores(struct v3d_job *job, struct v3d_cl *cl, int layer) #endif } -#endif /* V3D_VERSION >= 40 */ } static void @@ -445,22 +305,13 @@ v3d_rcl_emit_generic_per_tile_list(struct v3d_job *job, int layer) v3d_cl_ensure_space(cl, 200, 1); struct v3d_cl_reloc tile_list_start = cl_get_address(cl); - if (V3D_VERSION >= 40) { - /* V3D 4.x only requires a single tile coordinates, and - * END_OF_LOADS switches us between loading and rendering. - */ - cl_emit(cl, TILE_COORDINATES_IMPLICIT, coords); - } + /* V3D 4.x/7.x only requires a single tile coordinates, and + * END_OF_LOADS switches us between loading and rendering. + */ + cl_emit(cl, TILE_COORDINATES_IMPLICIT, coords); v3d_rcl_emit_loads(job, cl, layer); - if (V3D_VERSION < 40) { - /* Tile Coordinates triggers the last reload and sets where - * the stores go. There must be one per store packet. - */ - cl_emit(cl, TILE_COORDINATES_IMPLICIT, coords); - } - /* The binner starts out writing tiles assuming that the initial mode * is triangles, so make sure that's the case. */ @@ -468,20 +319,16 @@ v3d_rcl_emit_generic_per_tile_list(struct v3d_job *job, int layer) fmt.primitive_type = LIST_TRIANGLES; } -#if V3D_VERSION >= 41 /* PTB assumes that value to be 0, but hw will not set it. */ cl_emit(cl, SET_INSTANCEID, set) { set.instance_id = 0; } -#endif cl_emit(cl, BRANCH_TO_IMPLICIT_TILE_LIST, branch); v3d_rcl_emit_stores(job, cl, layer); -#if V3D_VERSION >= 40 cl_emit(cl, END_OF_TILE_MARKER, end); -#endif cl_emit(cl, RETURN_FROM_SUB_LIST, ret); @@ -491,7 +338,6 @@ v3d_rcl_emit_generic_per_tile_list(struct v3d_job *job, int layer) } } -#if V3D_VERSION > 33 /* Note that for v71, render target cfg packets has just one field that * combined the internal type and clamp mode. For simplicity we keep just one * helper. @@ -503,13 +349,11 @@ static uint32_t v3dX(clamp_for_format_and_type)(uint32_t rt_type, enum pipe_format format) { -#if V3D_VERSION >= 40 && V3D_VERSION <= 42 +#if V3D_VERSION == 42 if (util_format_is_srgb(format)) { return V3D_RENDER_TARGET_CLAMP_NORM; -#if V3D_VERSION >= 42 } else if (util_format_is_pure_integer(format)) { return V3D_RENDER_TARGET_CLAMP_INT; -#endif } else { return V3D_RENDER_TARGET_CLAMP_NONE; } @@ -541,9 +385,8 @@ v3dX(clamp_for_format_and_type)(uint32_t rt_type, } return V3D_RENDER_TARGET_TYPE_CLAMP_INVALID; #endif - return 0; + unreachable("Wrong V3D_VERSION"); } -#endif #if V3D_VERSION >= 71 static void @@ -566,7 +409,7 @@ v3d_setup_render_target(struct v3d_job *job, } #endif -#if V3D_VERSION >= 40 && V3D_VERSION <= 42 +#if V3D_VERSION == 42 static void v3d_setup_render_target(struct v3d_job *job, int cbuf, @@ -589,36 +432,6 @@ v3d_setup_render_target(struct v3d_job *job, } #endif -#if V3D_VERSION < 40 -static void -v3d_emit_z_stencil_config(struct v3d_job *job, struct v3d_surface *surf, - struct v3d_resource *rsc, bool is_separate_stencil) -{ - cl_emit(&job->rcl, TILE_RENDERING_MODE_CFG_Z_STENCIL, zs) { - zs.address = cl_address(rsc->bo, surf->offset); - - if (!is_separate_stencil) { - zs.internal_type = surf->internal_type; - zs.output_image_format = surf->format; - } else { - zs.z_stencil_id = 1; /* Separate stencil */ - } - - zs.padded_height_of_output_image_in_uif_blocks = - surf->padded_height_of_output_image_in_uif_blocks; - - assert(surf->tiling != V3D_TILING_RASTER); - zs.memory_format = surf->tiling; - } - - if (job->store & (is_separate_stencil ? - PIPE_CLEAR_STENCIL : - PIPE_CLEAR_DEPTHSTENCIL)) { - rsc->writes++; - } -} -#endif /* V3D_VERSION < 40 */ - static bool supertile_in_job_scissors(struct v3d_job *job, uint32_t x, uint32_t y, uint32_t w, uint32_t h) @@ -648,7 +461,6 @@ supertile_in_job_scissors(struct v3d_job *job, return false; } -#if V3D_VERSION >= 40 static inline bool do_double_initial_tile_clear(const struct v3d_job *job) { @@ -663,7 +475,6 @@ do_double_initial_tile_clear(const struct v3d_job *job) return job->double_buffer && (job->draw_tiles_x > 1 || job->draw_tiles_y > 1); } -#endif static void emit_render_layer(struct v3d_job *job, uint32_t layer) @@ -730,12 +541,6 @@ emit_render_layer(struct v3d_job *job, uint32_t layer) * state, we need 1 dummy store in between internal type/size * changes on V3D 3.x, and 2 dummy stores on 4.x. */ -#if V3D_VERSION < 40 - cl_emit(&job->rcl, STORE_TILE_BUFFER_GENERAL, store) { - store.buffer_to_store = NONE; - } -#endif -#if V3D_VERSION >= 40 for (int i = 0; i < 2; i++) { if (i > 0) cl_emit(&job->rcl, TILE_COORDINATES, coords); @@ -756,7 +561,6 @@ emit_render_layer(struct v3d_job *job, uint32_t layer) } cl_emit(&job->rcl, END_OF_TILE_MARKER, end); } -#endif cl_emit(&job->rcl, FLUSH_VCD_CACHE, flush); v3d_rcl_emit_generic_per_tile_list(job, layer); @@ -808,15 +612,10 @@ v3dX(emit_rcl)(struct v3d_job *job) * optional updates to the previous HW state. */ cl_emit(&job->rcl, TILE_RENDERING_MODE_CFG_COMMON, config) { -#if V3D_VERSION < 40 - config.enable_z_store = job->store & PIPE_CLEAR_DEPTH; - config.enable_stencil_store = job->store & PIPE_CLEAR_STENCIL; -#else /* V3D_VERSION >= 40 */ if (job->zsbuf) { struct v3d_surface *surf = v3d_surface(job->zsbuf); config.internal_depth_type = surf->internal_type; } -#endif /* V3D_VERSION >= 40 */ if (job->decided_global_ez_enable) { switch (job->first_ez_state) { @@ -839,7 +638,6 @@ v3dX(emit_rcl)(struct v3d_job *job) config.early_z_disable = true; } -#if V3D_VERSION >= 40 assert(job->zsbuf || config.early_z_disable); job->early_zs_clear = (job->clear & PIPE_CLEAR_DEPTHSTENCIL) && @@ -847,7 +645,6 @@ v3dX(emit_rcl)(struct v3d_job *job) !(job->store & PIPE_CLEAR_DEPTHSTENCIL); config.early_depth_stencil_clear = job->early_zs_clear; -#endif /* V3D_VERSION >= 40 */ config.image_width_pixels = job->draw_width; config.image_height_pixels = job->draw_height; @@ -858,7 +655,7 @@ v3dX(emit_rcl)(struct v3d_job *job) config.multisample_mode_4x = job->msaa; config.double_buffer_in_non_ms_mode = job->double_buffer; -#if V3D_VERSION <= 42 +#if V3D_VERSION == 42 config.maximum_bpp_of_all_render_targets = job->internal_bpp; #endif #if V3D_VERSION >= 71 @@ -921,22 +718,7 @@ v3dX(emit_rcl)(struct v3d_job *job) } } -#if V3D_VERSION < 40 - cl_emit(&job->rcl, TILE_RENDERING_MODE_CFG_COLOR, rt) { - rt.address = cl_address(rsc->bo, surf->offset); - rt.internal_type = surf->internal_type; - rt.output_image_format = surf->format; - rt.memory_format = surf->tiling; - rt.internal_bpp = surf->internal_bpp; - rt.render_target_number = i; - rt.pad = config_pad; - - if (job->store & PIPE_CLEAR_COLOR0 << i) - rsc->writes++; - } -#endif /* V3D_VERSION < 40 */ - -#if V3D_VERSION <= 42 +#if V3D_VERSION == 42 cl_emit(&job->rcl, TILE_RENDERING_MODE_CFG_CLEAR_COLORS_PART1, clear) { clear.clear_color_low_32_bits = job->clear_color[i][0]; @@ -1000,7 +782,7 @@ v3dX(emit_rcl)(struct v3d_job *job) #endif } -#if V3D_VERSION >= 40 && V3D_VERSION <= 42 +#if V3D_VERSION == 42 cl_emit(&job->rcl, TILE_RENDERING_MODE_CFG_COLOR, rt) { v3d_setup_render_target(job, 0, &rt.render_target_0_internal_bpp, @@ -1021,27 +803,6 @@ v3dX(emit_rcl)(struct v3d_job *job) } #endif -#if V3D_VERSION < 40 - /* FIXME: Don't bother emitting if we don't load/clear Z/S. */ - if (job->zsbuf) { - struct pipe_surface *psurf = job->zsbuf; - struct v3d_surface *surf = v3d_surface(psurf); - struct v3d_resource *rsc = v3d_resource(psurf->texture); - - v3d_emit_z_stencil_config(job, surf, rsc, false); - - /* Emit the separate stencil packet if we have a resource for - * it. The HW will only load/store this buffer if the - * Z/Stencil config doesn't have stencil in its format. - */ - if (surf->separate_stencil) { - v3d_emit_z_stencil_config(job, - v3d_surface(surf->separate_stencil), - rsc->separate_stencil, true); - } - } -#endif /* V3D_VERSION < 40 */ - /* Ends rendering mode config. */ cl_emit(&job->rcl, TILE_RENDERING_MODE_CFG_ZS_CLEAR_VALUES, clear) { diff --git a/src/gallium/drivers/v3d/v3dx_state.c b/src/gallium/drivers/v3d/v3dx_state.c index 6a72812ecc3..f2b2b2dab66 100644 --- a/src/gallium/drivers/v3d/v3dx_state.c +++ b/src/gallium/drivers/v3d/v3dx_state.c @@ -106,21 +106,17 @@ v3d_create_rasterizer_state(struct pipe_context *pctx, v3dx_pack(&so->depth_offset, DEPTH_OFFSET, depth) { depth.depth_offset_factor = cso->offset_scale; depth.depth_offset_units = cso->offset_units; -#if V3D_VERSION >= 41 depth.limit = cso->offset_clamp; -#endif } /* V3d 4.x treats polygon offset units based on a Z24 buffer, so we * need to scale up offset_units if we're only Z16. */ -#if V3D_VERSION <= 42 +#if V3D_VERSION == 42 v3dx_pack(&so->depth_offset_z16, DEPTH_OFFSET, depth) { depth.depth_offset_factor = cso->offset_scale; depth.depth_offset_units = cso->offset_units * 256.0; -#if V3D_VERSION >= 41 depth.limit = cso->offset_clamp; -#endif } #endif @@ -144,10 +140,6 @@ v3d_create_blend_state(struct pipe_context *pctx, if (cso->independent_blend_enable) { for (int i = 0; i < max_rts; i++) { so->blend_enables |= cso->rt[i].blend_enable << i; - - /* V3D 4.x is when we got independent blend enables. */ - assert(V3D_VERSION >= 40 || - cso->rt[i].blend_enable == cso->rt[0].blend_enable); } } else { if (cso->rt[0].blend_enable) @@ -343,7 +335,7 @@ v3d_zsa_state_bind(struct pipe_context *pctx, void *hwcso) static bool needs_default_attribute_values(void) { -#if V3D_VERSION <= 42 +#if V3D_VERSION == 42 /* FIXME: on vulkan we are able to refine even further, as we know in * advance when we create the pipeline if we have an integer vertex * attrib. Pending to check if we could do something similar here. @@ -517,18 +509,10 @@ v3d_set_framebuffer_state(struct pipe_context *pctx, struct pipe_surface *cbuf = v3d->framebuffer.cbufs[i]; if (!cbuf) continue; - struct v3d_surface *v3d_cbuf = v3d_surface(cbuf); const struct util_format_description *desc = util_format_description(cbuf->format); - /* For BGRA8 formats (DRI window system default format), we - * need to swap R and B, since the HW's format is RGBA8. On - * V3D 4.1+, the RCL can swap R and B on load/store. - */ - if (v3d->screen->devinfo.ver < 41 && v3d_cbuf->swap_rb) - v3d->swap_color_rb |= 1 << i; - if (desc->swizzle[3] == PIPE_SWIZZLE_1) v3d->blend_dst_alpha_one |= 1 << i; } @@ -555,7 +539,6 @@ translate_wrap(uint32_t pipe_wrap) } } -#if V3D_VERSION >= 40 static void v3d_upload_sampler_state_variant(void *map, const struct pipe_sampler_state *cso, @@ -720,7 +703,7 @@ v3d_upload_sampler_state_variant(void *map, break; } -#if V3D_VERSION <= 42 +#if V3D_VERSION == 42 /* The TMU in V3D 7.x always takes 32-bit floats and handles conversions * for us. In V3D 4.x we need to manually convert floating point color * values to the expected format. @@ -739,7 +722,6 @@ v3d_upload_sampler_state_variant(void *map, } } } -#endif static void * v3d_create_sampler_state(struct pipe_context *pctx, @@ -757,7 +739,6 @@ v3d_create_sampler_state(struct pipe_context *pctx, enum V3DX(Wrap_Mode) wrap_t = translate_wrap(cso->wrap_t); enum V3DX(Wrap_Mode) wrap_r = translate_wrap(cso->wrap_r); -#if V3D_VERSION >= 40 bool uses_border_color = (wrap_s == V3D_WRAP_MODE_BORDER || wrap_t == V3D_WRAP_MODE_BORDER || wrap_r == V3D_WRAP_MODE_BORDER); @@ -807,20 +788,6 @@ v3d_create_sampler_state(struct pipe_context *pctx, so->border_color_variants ? i : border_variant); } -#else /* V3D_VERSION < 40 */ - v3dx_pack(&so->p0, TEXTURE_UNIFORM_PARAMETER_0_CFG_MODE1, p0) { - p0.s_wrap_mode = wrap_s; - p0.t_wrap_mode = wrap_t; - p0.r_wrap_mode = wrap_r; - } - - v3dx_pack(&so->texture_shader_state, TEXTURE_SHADER_STATE, tex) { - tex.depth_compare_function = cso->compare_mode ? - cso->compare_func : - V3D_COMPARE_FUNC_NEVER; - tex.fixed_bias = cso->lod_bias; - } -#endif /* V3D_VERSION < 40 */ return so; } @@ -911,8 +878,7 @@ v3d_setup_texture_shader_state(const struct v3d_device_info *devinfo, tex->image_width = prsc->width0 * msaa_scale; tex->image_height = prsc->height0 * msaa_scale; -#if V3D_VERSION >= 40 - /* On 4.x, the height of a 1D texture is redefined to be the + /* On 4.x, the height of a 1D texture is redefined to be the * upper 14 bits of the width (which is only usable with txf). */ if (prsc->target == PIPE_TEXTURE_1D || @@ -922,7 +888,6 @@ v3d_setup_texture_shader_state(const struct v3d_device_info *devinfo, tex->image_width &= (1 << 14) - 1; tex->image_height &= (1 << 14) - 1; -#endif if (prsc->target == PIPE_TEXTURE_3D) { tex->image_depth = prsc->depth0; @@ -941,7 +906,6 @@ v3d_setup_texture_shader_state(const struct v3d_device_info *devinfo, tex->base_level = base_level; -#if V3D_VERSION >= 40 tex->max_level = last_level; /* Note that we don't have a job to reference the texture's sBO * at state create time, so any time this sampler view is used @@ -951,8 +915,6 @@ v3d_setup_texture_shader_state(const struct v3d_device_info *devinfo, v3d_layer_offset(prsc, 0, first_layer); tex->texture_base_pointer = cl_address(NULL, base_offset); -#endif - tex->array_stride_64_byte_aligned = rsc->cube_map_stride / 64; #if V3D_VERSION >= 71 @@ -976,12 +938,10 @@ v3d_setup_texture_shader_state(const struct v3d_device_info *devinfo, if (tex->level_0_is_strictly_uif) tex->level_0_ub_pad = rsc->slices[0].ub_pad; -#if V3D_VERSION >= 40 if (tex->uif_xor_disable || tex->level_0_is_strictly_uif) { tex->extended = true; } -#endif /* V3D_VERSION >= 40 */ } void @@ -997,16 +957,10 @@ v3dX(create_texture_shader_state_bo)(struct v3d_context *v3d, assert(so->serial_id != rsc->serial_id); -#if V3D_VERSION >= 40 v3d_bo_unreference(&so->bo); so->bo = v3d_bo_alloc(v3d->screen, cl_packet_length(TEXTURE_SHADER_STATE), "sampler"); map = v3d_bo_map(so->bo); -#else /* V3D_VERSION < 40 */ - STATIC_ASSERT(sizeof(so->texture_shader_state) >= - cl_packet_length(TEXTURE_SHADER_STATE)); - map = &so->texture_shader_state; -#endif v3dx_pack(map, TEXTURE_SHADER_STATE, tex) { if (prsc->target != PIPE_BUFFER) { @@ -1025,69 +979,20 @@ v3dX(create_texture_shader_state_bo)(struct v3d_context *v3d, } bool is_srgb = util_format_is_srgb(cso->format); -#if V3D_VERSION <= 42 +#if V3D_VERSION == 42 tex.srgb = is_srgb; #endif #if V3D_VERSION >= 71 tex.transfer_func = is_srgb ? TRANSFER_FUNC_SRGB : TRANSFER_FUNC_NONE; #endif -#if V3D_VERSION >= 40 tex.swizzle_r = v3d_translate_pipe_swizzle(so->swizzle[0]); tex.swizzle_g = v3d_translate_pipe_swizzle(so->swizzle[1]); tex.swizzle_b = v3d_translate_pipe_swizzle(so->swizzle[2]); tex.swizzle_a = v3d_translate_pipe_swizzle(so->swizzle[3]); -#endif - if (prsc->nr_samples > 1 && V3D_VERSION < 40) { - /* Using texture views to reinterpret formats on our - * MSAA textures won't work, because we don't lay out - * the bits in memory as it's expected -- for example, - * RGBA8 and RGB10_A2 are compatible in the - * ARB_texture_view spec, but in HW we lay them out as - * 32bpp RGBA8 and 64bpp RGBA16F. Just assert for now - * to catch failures. - * - * We explicitly allow remapping S8Z24 to RGBA8888 for - * v3d_blit.c's stencil blits. - */ - assert((util_format_linear(cso->format) == - util_format_linear(prsc->format)) || - (prsc->format == PIPE_FORMAT_S8_UINT_Z24_UNORM && - cso->format == PIPE_FORMAT_R8G8B8A8_UNORM)); - uint32_t output_image_format = - v3d_get_rt_format(&screen->devinfo, cso->format); - uint32_t internal_type; - uint32_t internal_bpp; - v3dX(get_internal_type_bpp_for_output_format)(output_image_format, - &internal_type, - &internal_bpp); - - switch (internal_type) { - case V3D_INTERNAL_TYPE_8: - tex.texture_type = TEXTURE_DATA_FORMAT_RGBA8; - break; - case V3D_INTERNAL_TYPE_16F: - tex.texture_type = TEXTURE_DATA_FORMAT_RGBA16F; - break; - default: - unreachable("Bad MSAA texture type"); - } - - /* sRGB was stored in the tile buffer as linear and - * would have been encoded to sRGB on resolved tile - * buffer store. Note that this means we would need - * shader code if we wanted to read an MSAA sRGB - * texture without sRGB decode. - */ -#if V3D_VERSION <= 42 - tex.srgb = false; -#endif - - } else { - tex.texture_type = v3d_get_tex_format(&screen->devinfo, - cso->format); - } + tex.texture_type = v3d_get_tex_format(&screen->devinfo, + cso->format); }; so->serial_id = rsc->serial_id; @@ -1141,7 +1046,6 @@ v3d_create_sampler_view(struct pipe_context *pctx, struct pipe_resource *prsc, if (sample_format == PIPE_FORMAT_S8_UINT_Z24_UNORM) sample_format = PIPE_FORMAT_X8Z24_UNORM; -#if V3D_VERSION >= 40 const struct util_format_description *desc = util_format_description(sample_format); @@ -1202,7 +1106,6 @@ v3d_create_sampler_view(struct pipe_context *pctx, struct pipe_resource *prsc, V3D_SAMPLER_STATE_F16); } } -#endif /* V3D still doesn't support sampling from raster textures, so we will * have to copy to a temporary tiled texture. @@ -1433,7 +1336,6 @@ v3d_create_image_view_texture_shader_state(struct v3d_context *v3d, struct v3d_shaderimg_stateobj *so, int img) { -#if V3D_VERSION >= 40 struct v3d_image_view *iview = &so->si[img]; void *map; @@ -1469,12 +1371,6 @@ v3d_create_image_view_texture_shader_state(struct v3d_context *v3d, tex.texture_type = v3d_get_tex_format(&v3d->screen->devinfo, iview->base.format); }; -#else /* V3D_VERSION < 40 */ - /* V3D 3.x doesn't use support shader image load/store operations on - * textures, so it would get lowered in the shader to general memory - * accesses. - */ -#endif } static void diff --git a/src/gallium/drivers/v3d/v3dx_tfu.c b/src/gallium/drivers/v3d/v3dx_tfu.c index f4dba0cfa48..3990409e122 100644 --- a/src/gallium/drivers/v3d/v3dx_tfu.c +++ b/src/gallium/drivers/v3d/v3dx_tfu.c @@ -118,7 +118,7 @@ v3dX(tfu)(struct pipe_context *pctx, break; } -#if V3D_VERSION <= 42 +#if V3D_VERSION == 42 if (src_base_slice->tiling == V3D_TILING_RASTER) { tfu.icfg |= (V3D33_TFU_ICFG_FORMAT_RASTER << V3D33_TFU_ICFG_FORMAT_SHIFT); @@ -152,7 +152,7 @@ v3dX(tfu)(struct pipe_context *pctx, implicit_padded_height) / uif_block_h) << V3D33_TFU_ICFG_OPAD_SHIFT); } -#endif /* V3D_VERSION <= 42 */ +#endif /* V3D_VERSION == 42 */ #if V3D_VERSION >= 71 if (src_base_slice->tiling == V3D_TILING_RASTER) {