broadcom: only support v42 and v71

Acked-by: Emma Anholt <emma@anholt.net> Reviewed-by: Iago Toral Quiroga <itoral@igalia.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25851>
2026-01-11 05:50:14 +01:00 · 2023-10-14 13:27:15 +02:00 · 2023-10-14 13:27:15 +02:00 · 85f26828fe
commit 85f26828fe
parent dcc4e1b4d7
47 changed files with 210 additions and 1597 deletions
--- a/src/broadcom/cle/meson.build
+++ b/src/broadcom/cle/meson.build
@ -21,8 +21,6 @@
 # [version, cle XML version]
 v3d_versions = [
  [21, 21],
-  [33, 33],
-  [41, 33],
  [42, 33],
  [71, 33]
 ]
--- a/src/broadcom/cle/v3dx_pack.h
+++ b/src/broadcom/cle/v3dx_pack.h
@ -31,10 +31,6 @@

 #if (V3D_VERSION == 21)
 #  include "cle/v3d_packet_v21_pack.h"
-#elif (V3D_VERSION == 33)
-#  include "cle/v3d_packet_v33_pack.h"
-#elif (V3D_VERSION == 41)
-#  include "cle/v3d_packet_v41_pack.h"
 #elif (V3D_VERSION == 42)
 #  include "cle/v3d_packet_v42_pack.h"
 #elif (V3D_VERSION == 71)
--- a/src/broadcom/clif/clif_dump.c
+++ b/src/broadcom/clif/clif_dump.c
@ -106,12 +106,16 @@ static bool
 clif_dump_packet(struct clif_dump *clif, uint32_t offset, const uint8_t *cl,
                 uint32_t *size, bool reloc_mode)
 {
-        if (clif->devinfo->ver >= 42)
+
+        switch (clif->devinfo->ver) {
+        case 42:
                return v3d42_clif_dump_packet(clif, offset, cl, size, reloc_mode);
-        else if (clif->devinfo->ver >= 41)
-                return v3d41_clif_dump_packet(clif, offset, cl, size, reloc_mode);
-        else
-                return v3d33_clif_dump_packet(clif, offset, cl, size, reloc_mode);
+        case 71:
+                return v3d71_clif_dump_packet(clif, offset, cl, size, reloc_mode);
+        default:
+                break;
+        };
+        unreachable("Unknown HW version");
 }

 static uint32_t
--- a/src/broadcom/clif/clif_private.h
+++ b/src/broadcom/clif/clif_private.h
@ -95,10 +95,6 @@ clif_dump_add_address_to_worklist(struct clif_dump *clif,
                                  enum reloc_worklist_type type,
                                  uint32_t addr);

-bool v3d33_clif_dump_packet(struct clif_dump *clif, uint32_t offset,
-                            const uint8_t *cl, uint32_t *size, bool reloc_mode);
-bool v3d41_clif_dump_packet(struct clif_dump *clif, uint32_t offset,
-                            const uint8_t *cl, uint32_t *size, bool reloc_mode);
 bool v3d42_clif_dump_packet(struct clif_dump *clif, uint32_t offset,
                            const uint8_t *cl, uint32_t *size, bool reloc_mode);
 bool v3d71_clif_dump_packet(struct clif_dump *clif, uint32_t offset,
--- a/src/broadcom/common/v3d_device_info.c
+++ b/src/broadcom/common/v3d_device_info.c
@ -68,8 +68,6 @@ v3d_get_device_info(int fd, struct v3d_device_info* devinfo, v3d_ioctl_fun drm_i
    devinfo->has_accumulators = devinfo->ver < 71;

    switch (devinfo->ver) {
-        case 33:
-        case 41:
        case 42:
        case 71:
                break;
--- a/src/broadcom/common/v3d_macros.h
+++ b/src/broadcom/common/v3d_macros.h
@ -32,12 +32,6 @@
 #if (V3D_VERSION == 21)
 #  define V3DX(x) V3D21_##x
 #  define v3dX(x) v3d21_##x
-#elif (V3D_VERSION == 33)
-#  define V3DX(x) V3D33_##x
-#  define v3dX(x) v3d33_##x
-#elif (V3D_VERSION == 41)
-#  define V3DX(x) V3D41_##x
-#  define v3dX(x) v3d41_##x
 #elif (V3D_VERSION == 42)
 #  define V3DX(x) V3D42_##x
 #  define v3dX(x) v3d42_##x
--- a/src/broadcom/common/v3d_performance_counters.h
+++ b/src/broadcom/common/v3d_performance_counters.h
@ -130,7 +130,7 @@ static const char *v3d_performance_counters[][3] = {
   {"QPU", "QPU-stalls-other", "[QPU] Stalled qcycles waiting for any other reason (vary/W/Z)"},
 };

-#elif (V3D_VERSION >= 41)
+#elif (V3D_VERSION >= 42)

 static const char *v3d_performance_counters[][3] = {
   {"FEP", "FEP-valid-primitives-no-rendered-pixels", "[FEP] Valid primitives that result in no rendered pixels, for all rendered tiles"},
--- a/src/broadcom/compiler/meson.build
+++ b/src/broadcom/compiler/meson.build
@ -32,9 +32,7 @@ libbroadcom_compiler_files = files(
  'vir_to_qpu.c',
  'qpu_schedule.c',
  'qpu_validate.c',
-  'v3d33_tex.c',
-  'v3d40_tex.c',
-  'v3d33_vpm_setup.c',
+  'v3d_tex.c',
  'v3d_compiler.h',
  'v3d_nir_lower_io.c',
  'v3d_nir_lower_image_load_store.c',
--- a/src/broadcom/compiler/nir_to_vir.c
+++ b/src/broadcom/compiler/nir_to_vir.c
@ -38,7 +38,7 @@
 #define __gen_address_type uint32_t
 #define __gen_address_offset(reloc) (*reloc)
 #define __gen_emit_reloc(cl, reloc)
-#include "cle/v3d_packet_v41_pack.h"
+#include "cle/v3d_packet_v42_pack.h"

 #define GENERAL_TMU_LOOKUP_PER_QUAD                 (0 << 7)
 #define GENERAL_TMU_LOOKUP_PER_PIXEL                (1 << 7)
@ -963,10 +963,7 @@ ntq_emit_tex(struct v3d_compile *c, nir_tex_instr *instr)
                break;
        }

-        if (c->devinfo->ver >= 40)
-                v3d40_vir_emit_tex(c, instr);
-        else
-                v3d33_vir_emit_tex(c, instr);
+        v3d_vir_emit_tex(c, instr);
 }

 static struct qreg
@ -1040,15 +1037,10 @@ emit_fragment_varying(struct v3d_compile *c, nir_variable *var,

        struct qinst *ldvary = NULL;
        struct qreg vary;
-        if (c->devinfo->ver >= 41) {
-                ldvary = vir_add_inst(V3D_QPU_A_NOP, c->undef,
-                                      c->undef, c->undef);
-                ldvary->qpu.sig.ldvary = true;
-                vary = vir_emit_def(c, ldvary);
-        } else {
-                vir_NOP(c)->qpu.sig.ldvary = true;
-                vary = vir_reg(QFILE_MAGIC, V3D_QPU_WADDR_R3);
-        }
+        ldvary = vir_add_inst(V3D_QPU_A_NOP, c->undef,
+                              c->undef, c->undef);
+        ldvary->qpu.sig.ldvary = true;
+        vary = vir_emit_def(c, ldvary);

        /* Store the input value before interpolation so we can implement
         * GLSL's interpolateAt functions if the shader uses them.
@ -1904,12 +1896,8 @@ emit_frag_end(struct v3d_compile *c)
                        inst = vir_MOV_dest(c, tlbu_reg,
                                            c->outputs[c->output_position_index]);

-                        if (c->devinfo->ver >= 42) {
-                                tlb_specifier |= (TLB_V42_DEPTH_TYPE_PER_PIXEL |
-                                                  TLB_SAMPLE_MODE_PER_PIXEL);
-                        } else {
-                                tlb_specifier |= TLB_DEPTH_TYPE_PER_PIXEL;
-                        }
+                        tlb_specifier |= (TLB_V42_DEPTH_TYPE_PER_PIXEL |
+                                          TLB_SAMPLE_MODE_PER_PIXEL);
                } else {
                        /* Shader doesn't write to gl_FragDepth, take Z from
                         * FEP.
@ -1917,16 +1905,11 @@ emit_frag_end(struct v3d_compile *c)
                        c->writes_z_from_fep = true;
                        inst = vir_MOV_dest(c, tlbu_reg, vir_nop_reg());

-                        if (c->devinfo->ver >= 42) {
-                                /* The spec says the PER_PIXEL flag is ignored
-                                 * for invariant writes, but the simulator
-                                 * demands it.
-                                 */
-                                tlb_specifier |= (TLB_V42_DEPTH_TYPE_INVARIANT |
-                                                  TLB_SAMPLE_MODE_PER_PIXEL);
-                        } else {
-                                tlb_specifier |= TLB_DEPTH_TYPE_INVARIANT;
-                        }
+                        /* The spec says the PER_PIXEL flag is ignored for
+                         * invariant writes, but the simulator demands it.
+                         */
+                        tlb_specifier |= (TLB_V42_DEPTH_TYPE_INVARIANT |
+                                          TLB_SAMPLE_MODE_PER_PIXEL);

                        /* Since (single-threaded) fragment shaders always need
                         * a TLB write, if we dond't have any we emit a
@ -1956,7 +1939,6 @@ vir_VPM_WRITE_indirect(struct v3d_compile *c,
                       struct qreg vpm_index,
                       bool uniform_vpm_index)
 {
-        assert(c->devinfo->ver >= 40);
        if (uniform_vpm_index)
                vir_STVPMV(c, vpm_index, val);
        else
@ -1966,13 +1948,8 @@ vir_VPM_WRITE_indirect(struct v3d_compile *c,
 static void
 vir_VPM_WRITE(struct v3d_compile *c, struct qreg val, uint32_t vpm_index)
 {
-        if (c->devinfo->ver >= 40) {
-                vir_VPM_WRITE_indirect(c, val,
-                                       vir_uniform_ui(c, vpm_index), true);
-        } else {
-                /* XXX: v3d33_vir_vpm_write_setup(c); */
-                vir_MOV_dest(c, vir_reg(QFILE_MAGIC, V3D_QPU_WADDR_VPM), val);
-        }
+        vir_VPM_WRITE_indirect(c, val,
+                               vir_uniform_ui(c, vpm_index), true);
 }

 static void
@ -1980,7 +1957,7 @@ emit_vert_end(struct v3d_compile *c)
 {
        /* GFXH-1684: VPM writes need to be complete by the end of the shader.
         */
-        if (c->devinfo->ver >= 40 && c->devinfo->ver <= 42)
+        if (c->devinfo->ver == 42)
                vir_VPMWT(c);
 }

@ -1989,7 +1966,7 @@ emit_geom_end(struct v3d_compile *c)
 {
        /* GFXH-1684: VPM writes need to be complete by the end of the shader.
         */
-        if (c->devinfo->ver >= 40 && c->devinfo->ver <= 42)
+        if (c->devinfo->ver == 42)
                vir_VPMWT(c);
 }

@ -2174,26 +2151,9 @@ ntq_emit_vpm_read(struct v3d_compile *c,
                  uint32_t *remaining,
                  uint32_t vpm_index)
 {
-        if (c->devinfo->ver >= 40 ) {
-                return vir_LDVPMV_IN(c,
-                                     vir_uniform_ui(c,
-                                                    (*num_components_queued)++));
-        }
-
-        struct qreg vpm = vir_reg(QFILE_VPM, vpm_index);
-        if (*num_components_queued != 0) {
-                (*num_components_queued)--;
-                return vir_MOV(c, vpm);
-        }
-
-        uint32_t num_components = MIN2(*remaining, 32);
-
-        v3d33_vir_vpm_read_setup(c, num_components);
-
-        *num_components_queued = num_components - 1;
-        *remaining -= num_components;
-
-        return vir_MOV(c, vpm);
+        return vir_LDVPMV_IN(c,
+                             vir_uniform_ui(c,
+                                            (*num_components_queued)++));
 }

 static void
@ -2263,31 +2223,8 @@ ntq_setup_vs_inputs(struct v3d_compile *c)
        }

        /* The actual loads will happen directly in nir_intrinsic_load_input
-         * on newer versions.
         */
-        if (c->devinfo->ver >= 40)
-                return;
-
-        for (int loc = 0; loc < ARRAY_SIZE(c->vattr_sizes); loc++) {
-                resize_qreg_array(c, &c->inputs, &c->inputs_array_size,
-                                  (loc + 1) * 4);
-
-                for (int i = 0; i < c->vattr_sizes[loc]; i++) {
-                        c->inputs[loc * 4 + i] =
-                                ntq_emit_vpm_read(c,
-                                                  &vpm_components_queued,
-                                                  &num_components,
-                                                  loc * 4 + i);
-
-                }
-        }
-
-        if (c->devinfo->ver >= 40) {
-                assert(vpm_components_queued == num_components);
-        } else {
-                assert(vpm_components_queued == 0);
-                assert(num_components == 0);
-        }
+        return;
 }

 static bool
@ -2533,10 +2470,8 @@ vir_emit_tlb_color_read(struct v3d_compile *c, nir_intrinsic_instr *instr)
         * switch instead -- see vir_emit_thrsw().
         */
        if (!c->emitted_tlb_load) {
-                if (!c->last_thrsw_at_top_level) {
-                        assert(c->devinfo->ver >= 41);
+                if (!c->last_thrsw_at_top_level)
                        vir_emit_thrsw(c);
-                }

                c->emitted_tlb_load = true;
        }
@ -2744,7 +2679,7 @@ ntq_emit_load_input(struct v3d_compile *c, nir_intrinsic_instr *instr)
        unsigned offset =
                nir_intrinsic_base(instr) + nir_src_as_uint(instr->src[0]);

-        if (c->s->info.stage != MESA_SHADER_FRAGMENT && c->devinfo->ver >= 40) {
+        if (c->s->info.stage != MESA_SHADER_FRAGMENT) {
               /* Emit the LDVPM directly now, rather than at the top
                * of the shader like we did for V3D 3.x (which needs
                * vpmsetup when not just taking the next offset).
@ -3328,11 +3263,11 @@ ntq_emit_intrinsic(struct v3d_compile *c, nir_intrinsic_instr *instr)
        case nir_intrinsic_image_store:
        case nir_intrinsic_image_atomic:
        case nir_intrinsic_image_atomic_swap:
-                v3d40_vir_emit_image_load_store(c, instr);
+                v3d_vir_emit_image_load_store(c, instr);
                break;

        case nir_intrinsic_image_load:
-                v3d40_vir_emit_image_load_store(c, instr);
+                v3d_vir_emit_image_load_store(c, instr);
                /* Not really a general TMU load, but we only use this flag
                 * for NIR scheduling and we do schedule these under the same
                 * policy as general TMU.
@ -3502,21 +3437,8 @@ ntq_emit_intrinsic(struct v3d_compile *c, nir_intrinsic_instr *instr)
                         * (actually supergroup) to block until the last
                         * invocation reaches the TSY op.
                         */
-                        if (c->devinfo->ver >= 42) {
-                                vir_BARRIERID_dest(c, vir_reg(QFILE_MAGIC,
-                                                              V3D_QPU_WADDR_SYNCB));
-                        } else {
-                                struct qinst *sync =
-                                        vir_BARRIERID_dest(c,
-                                                           vir_reg(QFILE_MAGIC,
-                                                                   V3D_QPU_WADDR_SYNCU));
-                                sync->uniform =
-                                        vir_get_uniform_index(c, QUNIFORM_CONSTANT,
-                                                              0xffffff00 |
-                                                              V3D_TSY_WAIT_INC_CHECK);
-
-                        }
-
+                        vir_BARRIERID_dest(c, vir_reg(QFILE_MAGIC,
+                                                      V3D_QPU_WADDR_SYNCB));
                        /* The blocking of a TSY op only happens at the next
                         * thread switch. No texturing may be outstanding at the
                         * time of a TSY blocking operation.
@ -4330,14 +4252,12 @@ nir_to_vir(struct v3d_compile *c)
                               emit_fragment_varying(c, NULL, -1, 0, 0);
                }

-                if (c->fs_key->is_points &&
-                    (c->devinfo->ver < 40 || program_reads_point_coord(c))) {
+                if (c->fs_key->is_points && program_reads_point_coord(c)) {
                        c->point_x = emit_fragment_varying(c, NULL, -1, 0, 0);
                        c->point_y = emit_fragment_varying(c, NULL, -1, 0, 0);
                        c->uses_implicit_point_line_varyings = true;
                } else if (c->fs_key->is_lines &&
-                           (c->devinfo->ver < 40 ||
-                            BITSET_TEST(c->s->info.system_values_read,
+                           (BITSET_TEST(c->s->info.system_values_read,
                                        SYSTEM_VALUE_LINE_COORD))) {
                        c->line_x = emit_fragment_varying(c, NULL, -1, 0, 0);
                        c->uses_implicit_point_line_varyings = true;
@ -4350,7 +4270,7 @@ nir_to_vir(struct v3d_compile *c)
                                                      V3D_QPU_WADDR_SYNC));
                }

-                if (c->devinfo->ver <= 42) {
+                if (c->devinfo->ver == 42) {
                        c->cs_payload[0] = vir_MOV(c, vir_reg(QFILE_REG, 0));
                        c->cs_payload[1] = vir_MOV(c, vir_reg(QFILE_REG, 2));
                } else if (c->devinfo->ver >= 71) {
@ -4461,25 +4381,12 @@ vir_emit_last_thrsw(struct v3d_compile *c,
 {
        *restore_last_thrsw = c->last_thrsw;

-        /* On V3D before 4.1, we need a TMU op to be outstanding when thread
-         * switching, so disable threads if we didn't do any TMU ops (each of
-         * which would have emitted a THRSW).
-         */
-        if (!c->last_thrsw_at_top_level && c->devinfo->ver < 41) {
-                c->threads = 1;
-                if (c->last_thrsw)
-                        vir_remove_thrsw(c);
-                *restore_last_thrsw = NULL;
-        }
-
        /* If we're threaded and the last THRSW was in conditional code, then
         * we need to emit another one so that we can flag it as the last
         * thrsw.
         */
-        if (c->last_thrsw && !c->last_thrsw_at_top_level) {
-                assert(c->devinfo->ver >= 41);
+        if (c->last_thrsw && !c->last_thrsw_at_top_level)
                vir_emit_thrsw(c);
-        }

        /* If we're threaded, then we need to mark the last THRSW instruction
         * so we can emit a pair of them at QPU emit time.
@ -4487,10 +4394,8 @@ vir_emit_last_thrsw(struct v3d_compile *c,
         * For V3D 4.x, we can spawn the non-fragment shaders already in the
         * post-last-THRSW state, so we can skip this.
         */
-        if (!c->last_thrsw && c->s->info.stage == MESA_SHADER_FRAGMENT) {
-                assert(c->devinfo->ver >= 41);
+        if (!c->last_thrsw && c->s->info.stage == MESA_SHADER_FRAGMENT)
                vir_emit_thrsw(c);
-        }

        /* If we have not inserted a last thread switch yet, do it now to ensure
         * any potential spilling we do happens before this. If we don't spill
@ -4616,7 +4521,7 @@ v3d_nir_to_vir(struct v3d_compile *c)
        /* Attempt to allocate registers for the temporaries.  If we fail,
         * reduce thread count and try again.
         */
-        int min_threads = (c->devinfo->ver >= 41) ? 2 : 1;
+        int min_threads = 2;
        struct qpu_reg *temp_registers;
        while (true) {
                temp_registers = v3d_register_allocate(c);
--- a/src/broadcom/compiler/qpu_schedule.c
+++ b/src/broadcom/compiler/qpu_schedule.c
@ -202,9 +202,6 @@ tmu_write_is_sequence_terminator(uint32_t waddr)
 static bool
 can_reorder_tmu_write(const struct v3d_device_info *devinfo, uint32_t waddr)
 {
-        if (devinfo->ver < 40)
-                return false;
-
        if (tmu_write_is_sequence_terminator(waddr))
                return false;

@ -267,8 +264,7 @@ process_waddr_deps(struct schedule_state *state, struct schedule_node *n,
                        break;

                case V3D_QPU_WADDR_UNIFA:
-                        if (state->devinfo->ver >= 40)
-                                add_write_dep(state, &state->last_unifa, n);
+                        add_write_dep(state, &state->last_unifa, n);
                        break;

                case V3D_QPU_WADDR_NOP:
@ -660,7 +656,7 @@ writes_too_soon_after_write(const struct v3d_device_info *devinfo,
            v3d_qpu_writes_r4(devinfo, inst))
                return true;

-        if (devinfo->ver <= 42)
+        if (devinfo->ver == 42)
           return false;

        /* Don't schedule anything that writes rf0 right after ldvary, since
@ -854,13 +850,10 @@ qpu_compatible_peripheral_access(const struct v3d_device_info *devinfo,
        if (util_bitcount(a_peripherals) + util_bitcount(b_peripherals) <= 1)
                return true;

-        if (devinfo->ver < 41)
-                return false;
-
        /* V3D 4.x can't do more than one peripheral access except in a
         * few cases:
         */
-        if (devinfo->ver <= 42) {
+        if (devinfo->ver == 42) {
                /* WRTMUC signal with TMU register write (other than tmuc). */
                if (a_peripherals == V3D_PERIPHERAL_TMU_WRTMUC_SIG &&
                    b_peripherals == V3D_PERIPHERAL_TMU_WRITE) {
@ -984,7 +977,7 @@ qpu_merge_raddrs(struct v3d_qpu_instr *result,
                        result->sig.small_imm_d) <= 1;
        }

-        assert(devinfo->ver <= 42);
+        assert(devinfo->ver == 42);

        uint64_t raddrs_used = qpu_raddrs_used(add_instr, mul_instr);
        int naddrs = util_bitcount64(raddrs_used);
@ -1499,7 +1492,7 @@ retry:
                         * as long as it is not the last delay slot.
                         */
                        if (inst->sig.ldvary) {
-                                if (c->devinfo->ver <= 42 &&
+                                if (c->devinfo->ver == 42 &&
                                    scoreboard->last_thrsw_tick + 2 >=
                                    scoreboard->tick - 1) {
                                        continue;
@ -1607,7 +1600,7 @@ update_scoreboard_for_magic_waddr(struct choose_scoreboard *scoreboard,
 {
        if (v3d_qpu_magic_waddr_is_sfu(waddr))
                scoreboard->last_magic_sfu_write_tick = scoreboard->tick;
-        else if (devinfo->ver >= 40 && waddr == V3D_QPU_WADDR_UNIFA)
+        else if (waddr == V3D_QPU_WADDR_UNIFA)
                scoreboard->last_unifa_write_tick = scoreboard->tick;
 }

@ -1938,7 +1931,7 @@ qpu_inst_valid_in_thrend_slot(struct v3d_compile *c,
        if (slot > 0 && qinst->uniform != ~0)
                return false;

-        if (c->devinfo->ver <= 42 && v3d_qpu_waits_vpm(inst))
+        if (c->devinfo->ver == 42 && v3d_qpu_waits_vpm(inst))
                return false;

        if (inst->sig.ldvary)
@ -1946,12 +1939,12 @@ qpu_inst_valid_in_thrend_slot(struct v3d_compile *c,

        if (inst->type == V3D_QPU_INSTR_TYPE_ALU) {
                /* GFXH-1625: TMUWT not allowed in the final instruction. */
-                if (c->devinfo->ver <= 42 && slot == 2 &&
+                if (c->devinfo->ver == 42 && slot == 2 &&
                    inst->alu.add.op == V3D_QPU_A_TMUWT) {
                        return false;
                }

-                if (c->devinfo->ver <= 42) {
+                if (c->devinfo->ver == 42) {
                        /* No writing physical registers at the end. */
                        bool add_is_nop = inst->alu.add.op == V3D_QPU_A_NOP;
                        bool mul_is_nop = inst->alu.mul.op == V3D_QPU_M_NOP;
@ -1977,10 +1970,7 @@ qpu_inst_valid_in_thrend_slot(struct v3d_compile *c,
                        }
                }

-                if (c->devinfo->ver < 40 && inst->alu.add.op == V3D_QPU_A_SETMSF)
-                        return false;
-
-                if (c->devinfo->ver <= 42) {
+                if (c->devinfo->ver == 42) {
                        /* RF0-2 might be overwritten during the delay slots by
                         * fragment shader setup.
                         */
@ -2034,7 +2024,7 @@ qpu_inst_before_thrsw_valid_in_delay_slot(struct v3d_compile *c,
                return false;

        if (qinst->qpu.sig.ldvary) {
-                if (c->devinfo->ver <= 42 && slot > 0)
+                if (c->devinfo->ver == 42 && slot > 0)
                        return false;
                if (c->devinfo->ver >= 71 && slot == 2)
                        return false;
@ -2475,7 +2465,7 @@ alu_reads_register(const struct v3d_device_info *devinfo,
        else
                num_src = v3d_qpu_mul_op_num_src(inst->alu.mul.op);

-        if (devinfo->ver <= 42) {
+        if (devinfo->ver == 42) {
                enum v3d_qpu_mux mux_a, mux_b;
                if (add) {
                        mux_a = inst->alu.add.a.mux;
@ -2639,7 +2629,7 @@ fixup_pipelined_ldvary(struct v3d_compile *c,
         * and flagging it for a fixup. In V3D 7.x this is limited only to the
         * second delay slot.
         */
-        assert((devinfo->ver <= 42 &&
+        assert((devinfo->ver == 42 &&
                scoreboard->last_thrsw_tick + 2 < scoreboard->tick - 1) ||
               (devinfo->ver >= 71 &&
                scoreboard->last_thrsw_tick + 2 != scoreboard->tick - 1));
@ -2672,7 +2662,7 @@ fixup_pipelined_ldvary(struct v3d_compile *c,
         * ldvary write to r5/rf0 happens in the next instruction).
         */
        assert(!v3d_qpu_writes_r5(devinfo, inst));
-        assert(devinfo->ver <= 42 ||
+        assert(devinfo->ver == 42 ||
               (!v3d_qpu_writes_rf0_implicitly(devinfo, inst) &&
                !v3d71_qpu_writes_waddr_explicitly(devinfo, inst, 0)));

--- a/src/broadcom/compiler/qpu_validate.c
+++ b/src/broadcom/compiler/qpu_validate.c
@ -243,7 +243,7 @@ qpu_validate_inst(struct v3d_qpu_validate_state *state, struct qinst *qinst)
                }

                if (inst->sig.ldvary) {
-                        if (devinfo->ver <= 42)
+                        if (devinfo->ver == 42)
                                fail_instr(state, "LDVARY during THRSW delay slots");
                        if (devinfo->ver >= 71 &&
                            state->ip - state->last_thrsw_ip == 2) {
@ -276,7 +276,7 @@ qpu_validate_inst(struct v3d_qpu_validate_state *state, struct qinst *qinst)
            vpm_writes +
            tlb_writes +
            tsy_writes +
-            (devinfo->ver <= 42 ? inst->sig.ldtmu : 0) +
+            (devinfo->ver == 42 ? inst->sig.ldtmu : 0) +
            inst->sig.ldtlb +
            inst->sig.ldvpm +
            inst->sig.ldtlbu > 1) {
@ -316,7 +316,7 @@ qpu_validate_inst(struct v3d_qpu_validate_state *state, struct qinst *qinst)
            inst->type == V3D_QPU_INSTR_TYPE_ALU) {
                if ((inst->alu.add.op != V3D_QPU_A_NOP &&
                     !inst->alu.add.magic_write)) {
-                        if (devinfo->ver <= 42) {
+                        if (devinfo->ver == 42) {
                                fail_instr(state, "RF write after THREND");
                        } else if (devinfo->ver >= 71) {
                                if (state->last_thrsw_ip - state->ip == 0) {
@ -333,7 +333,7 @@ qpu_validate_inst(struct v3d_qpu_validate_state *state, struct qinst *qinst)

                if ((inst->alu.mul.op != V3D_QPU_M_NOP &&
                     !inst->alu.mul.magic_write)) {
-                        if (devinfo->ver <= 42) {
+                        if (devinfo->ver == 42) {
                                fail_instr(state, "RF write after THREND");
                        } else if (devinfo->ver >= 71) {
                                if (state->last_thrsw_ip - state->ip == 0) {
@ -351,7 +351,7 @@ qpu_validate_inst(struct v3d_qpu_validate_state *state, struct qinst *qinst)

                if (v3d_qpu_sig_writes_address(devinfo, &inst->sig) &&
                    !inst->sig_magic) {
-                        if (devinfo->ver <= 42) {
+                        if (devinfo->ver == 42) {
                                fail_instr(state, "RF write after THREND");
                        } else if (devinfo->ver >= 71 &&
                                   (inst->sig_addr == 2 ||
--- a/src/broadcom/compiler/v3d33_tex.c
+++ b/src/broadcom/compiler/v3d33_tex.c
@ -1,193 +0,0 @@
-/*
- * Copyright © 2016-2018 Broadcom
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- */
-
-#include "v3d_compiler.h"
-
-/* We don't do any address packing. */
-#define __gen_user_data void
-#define __gen_address_type uint32_t
-#define __gen_address_offset(reloc) (*reloc)
-#define __gen_emit_reloc(cl, reloc)
-#include "cle/v3d_packet_v33_pack.h"
-
-void
-v3d33_vir_emit_tex(struct v3d_compile *c, nir_tex_instr *instr)
-{
-        /* FIXME: We don't bother implementing pipelining for texture reads
-         * for any pre 4.x hardware. It should be straight forward to do but
-         * we are not really testing or even targeting this hardware at
-         * present.
-         */
-        ntq_flush_tmu(c);
-
-        unsigned unit = instr->texture_index;
-
-        struct V3D33_TEXTURE_UNIFORM_PARAMETER_0_CFG_MODE1 p0_unpacked = {
-                V3D33_TEXTURE_UNIFORM_PARAMETER_0_CFG_MODE1_header,
-
-                .fetch_sample_mode = instr->op == nir_texop_txf,
-        };
-
-        struct V3D33_TEXTURE_UNIFORM_PARAMETER_1_CFG_MODE1 p1_unpacked = {
-        };
-
-        switch (instr->sampler_dim) {
-        case GLSL_SAMPLER_DIM_1D:
-                if (instr->is_array)
-                        p0_unpacked.lookup_type = TEXTURE_1D_ARRAY;
-                else
-                        p0_unpacked.lookup_type = TEXTURE_1D;
-                break;
-        case GLSL_SAMPLER_DIM_2D:
-        case GLSL_SAMPLER_DIM_RECT:
-                if (instr->is_array)
-                        p0_unpacked.lookup_type = TEXTURE_2D_ARRAY;
-                else
-                        p0_unpacked.lookup_type = TEXTURE_2D;
-                break;
-        case GLSL_SAMPLER_DIM_3D:
-                p0_unpacked.lookup_type = TEXTURE_3D;
-                break;
-        case GLSL_SAMPLER_DIM_CUBE:
-                p0_unpacked.lookup_type = TEXTURE_CUBE_MAP;
-                break;
-        default:
-                unreachable("Bad sampler type");
-        }
-
-        struct qreg coords[5];
-        int next_coord = 0;
-        for (unsigned i = 0; i < instr->num_srcs; i++) {
-                switch (instr->src[i].src_type) {
-                case nir_tex_src_coord:
-                        for (int j = 0; j < instr->coord_components; j++) {
-                                coords[next_coord++] =
-                                        ntq_get_src(c, instr->src[i].src, j);
-                        }
-                        if (instr->coord_components < 2)
-                                coords[next_coord++] = vir_uniform_f(c, 0.5);
-                        break;
-                case nir_tex_src_bias:
-                        coords[next_coord++] =
-                                ntq_get_src(c, instr->src[i].src, 0);
-
-                        p0_unpacked.bias_supplied = true;
-                        break;
-                case nir_tex_src_lod:
-                        coords[next_coord++] =
-                                vir_FADD(c,
-                                         ntq_get_src(c, instr->src[i].src, 0),
-                                         vir_uniform(c, QUNIFORM_TEXTURE_FIRST_LEVEL,
-                                                     unit));
-
-                        if (instr->op != nir_texop_txf &&
-                            instr->op != nir_texop_tg4) {
-                                p0_unpacked.disable_autolod_use_bias_only = true;
-                        }
-                        break;
-                case nir_tex_src_comparator:
-                        coords[next_coord++] =
-                                ntq_get_src(c, instr->src[i].src, 0);
-
-                        p0_unpacked.shadow = true;
-                        break;
-
-                case nir_tex_src_offset: {
-                        p0_unpacked.texel_offset_for_s_coordinate =
-                                nir_src_comp_as_int(instr->src[i].src, 0);
-
-                        if (instr->coord_components >= 2)
-                                p0_unpacked.texel_offset_for_t_coordinate =
-                                        nir_src_comp_as_int(instr->src[i].src, 1);
-
-                        if (instr->coord_components >= 3)
-                                p0_unpacked.texel_offset_for_r_coordinate =
-                                        nir_src_comp_as_int(instr->src[i].src, 2);
-                        break;
-                }
-
-                default:
-                        unreachable("unknown texture source");
-                }
-        }
-
-        /* Limit the number of channels returned to both how many the NIR
-         * instruction writes and how many the instruction could produce.
-         */
-        p1_unpacked.return_words_of_texture_data =
-                nir_def_components_read(&instr->def);
-
-        uint32_t p0_packed;
-        V3D33_TEXTURE_UNIFORM_PARAMETER_0_CFG_MODE1_pack(NULL,
-                                                         (uint8_t *)&p0_packed,
-                                                         &p0_unpacked);
-
-        uint32_t p1_packed;
-        V3D33_TEXTURE_UNIFORM_PARAMETER_1_CFG_MODE1_pack(NULL,
-                                                         (uint8_t *)&p1_packed,
-                                                         &p1_unpacked);
-        /* Load unit number into the address field, which will be be used by
-         * the driver to decide which texture to put in the actual address
-         * field.
-         */
-        p1_packed |= unit << 5;
-
-        /* There is no native support for GL texture rectangle coordinates, so
-         * we have to rescale from ([0, width], [0, height]) to ([0, 1], [0,
-         * 1]).
-         */
-        if (instr->sampler_dim == GLSL_SAMPLER_DIM_RECT) {
-                coords[0] = vir_FMUL(c, coords[0],
-                                     vir_uniform(c, QUNIFORM_TEXRECT_SCALE_X,
-                                                 unit));
-                coords[1] = vir_FMUL(c, coords[1],
-                                     vir_uniform(c, QUNIFORM_TEXRECT_SCALE_Y,
-                                                 unit));
-        }
-
-        int texture_u[] = {
-                vir_get_uniform_index(c, QUNIFORM_TEXTURE_CONFIG_P0_0 + unit, p0_packed),
-                vir_get_uniform_index(c, QUNIFORM_TEXTURE_CONFIG_P1, p1_packed),
-        };
-
-        for (int i = 0; i < next_coord; i++) {
-                struct qreg dst;
-
-                if (i == next_coord - 1)
-                        dst = vir_reg(QFILE_MAGIC, V3D_QPU_WADDR_TMUL);
-                else
-                        dst = vir_reg(QFILE_MAGIC, V3D_QPU_WADDR_TMU);
-
-                struct qinst *tmu = vir_MOV_dest(c, dst, coords[i]);
-
-                if (i < 2)
-                        tmu->uniform = texture_u[i];
-        }
-
-        vir_emit_thrsw(c);
-
-        for (int i = 0; i < 4; i++) {
-                if (p1_unpacked.return_words_of_texture_data & (1 << i))
-                        ntq_store_def(c, &instr->def, i, vir_LDTMU(c));
-        }
-}
--- a/src/broadcom/compiler/v3d33_vpm_setup.c
+++ b/src/broadcom/compiler/v3d33_vpm_setup.c
@ -1,75 +0,0 @@
-/*
- * Copyright © 2016-2018 Broadcom
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- */
-
-#include "v3d_compiler.h"
-
-/* We don't do any address packing. */
-#define __gen_user_data void
-#define __gen_address_type uint32_t
-#define __gen_address_offset(reloc) (*reloc)
-#define __gen_emit_reloc(cl, reloc)
-#include "broadcom/cle/v3d_packet_v33_pack.h"
-
-void
-v3d33_vir_vpm_read_setup(struct v3d_compile *c, int num_components)
-{
-        struct V3D33_VPM_GENERIC_BLOCK_READ_SETUP unpacked = {
-                V3D33_VPM_GENERIC_BLOCK_READ_SETUP_header,
-
-                .horiz = true,
-                .laned = false,
-                /* If the field is 0, that means a read count of 32. */
-                .num = num_components & 31,
-                .segs = true,
-                .stride = 1,
-                .size = VPM_SETUP_SIZE_32_BIT,
-                .addr = c->num_inputs,
-        };
-
-        uint32_t packed;
-        V3D33_VPM_GENERIC_BLOCK_READ_SETUP_pack(NULL,
-                                                (uint8_t *)&packed,
-                                                &unpacked);
-        vir_VPMSETUP(c, vir_uniform_ui(c, packed));
-}
-
-void
-v3d33_vir_vpm_write_setup(struct v3d_compile *c)
-{
-        uint32_t packed;
-        struct V3D33_VPM_GENERIC_BLOCK_WRITE_SETUP unpacked = {
-                V3D33_VPM_GENERIC_BLOCK_WRITE_SETUP_header,
-
-                .horiz = true,
-                .laned = false,
-                .segs = true,
-                .stride = 1,
-                .size = VPM_SETUP_SIZE_32_BIT,
-                .addr = 0,
-        };
-
-        V3D33_VPM_GENERIC_BLOCK_WRITE_SETUP_pack(NULL,
-                                                (uint8_t *)&packed,
-                                                &unpacked);
-        vir_VPMSETUP(c, vir_uniform_ui(c, packed));
-}
--- a/src/broadcom/compiler/v3d_compiler.h
+++ b/src/broadcom/compiler/v3d_compiler.h
@ -96,14 +96,6 @@ enum qfile {
         */
        QFILE_TEMP,

-        /**
-         * VPM reads use this with an index value to say what part of the VPM
-         * is being read.
-         *
-         * Used only for ver < 40. For ver >= 40 we use ldvpm.
-         */
-        QFILE_VPM,
-
        /**
         * Stores an immediate value in the index field that will be used
         * directly by qpu_load_imm().
@ -1150,7 +1142,6 @@ bool vir_is_raw_mov(struct qinst *inst);
 bool vir_is_tex(const struct v3d_device_info *devinfo, struct qinst *inst);
 bool vir_is_add(struct qinst *inst);
 bool vir_is_mul(struct qinst *inst);
-bool vir_writes_r3_implicitly(const struct v3d_device_info *devinfo, struct qinst *inst);
 bool vir_writes_r4_implicitly(const struct v3d_device_info *devinfo, struct qinst *inst);
 struct qreg vir_follow_movs(struct v3d_compile *c, struct qreg reg);
 uint8_t vir_channels_written(struct qinst *inst);
@ -1187,12 +1178,9 @@ bool v3d_nir_lower_txf_ms(nir_shader *s);
 bool v3d_nir_lower_image_load_store(nir_shader *s);
 bool v3d_nir_lower_load_store_bitsize(nir_shader *s);

-void v3d33_vir_vpm_read_setup(struct v3d_compile *c, int num_components);
-void v3d33_vir_vpm_write_setup(struct v3d_compile *c);
-void v3d33_vir_emit_tex(struct v3d_compile *c, nir_tex_instr *instr);
-void v3d40_vir_emit_tex(struct v3d_compile *c, nir_tex_instr *instr);
-void v3d40_vir_emit_image_load_store(struct v3d_compile *c,
-                                     nir_intrinsic_instr *instr);
+void v3d_vir_emit_tex(struct v3d_compile *c, nir_tex_instr *instr);
+void v3d_vir_emit_image_load_store(struct v3d_compile *c,
+                                   nir_intrinsic_instr *instr);

 void v3d_vir_to_qpu(struct v3d_compile *c, struct qpu_reg *temp_registers);
 uint32_t v3d_qpu_schedule_instructions(struct v3d_compile *c);
@ -1302,28 +1290,18 @@ vir_##name(struct v3d_compile *c, struct qreg a, struct qreg b)         \
 #define VIR_SFU(name)                                                      \
 static inline struct qreg                                                \
 vir_##name(struct v3d_compile *c, struct qreg a)                         \
-{                                                                        \
-        if (c->devinfo->ver >= 41) {                                     \
-                return vir_emit_def(c, vir_add_inst(V3D_QPU_A_##name,    \
-                                                    c->undef,            \
-                                                    a, c->undef));       \
-        } else {                                                         \
-                vir_FMOV_dest(c, vir_reg(QFILE_MAGIC, V3D_QPU_WADDR_##name), a); \
-                return vir_FMOV(c, vir_reg(QFILE_MAGIC, V3D_QPU_WADDR_R4)); \
-        }                                                                \
+{                                                                       \
+        return vir_emit_def(c, vir_add_inst(V3D_QPU_A_##name,           \
+                                            c->undef,                   \
+                                            a, c->undef));              \
 }                                                                        \
 static inline struct qinst *                                             \
 vir_##name##_dest(struct v3d_compile *c, struct qreg dest,               \
                  struct qreg a)                                         \
 {                                                                        \
-        if (c->devinfo->ver >= 41) {                                     \
-                return vir_emit_nondef(c, vir_add_inst(V3D_QPU_A_##name, \
-                                                       dest,             \
-                                                       a, c->undef));    \
-        } else {                                                         \
-                vir_FMOV_dest(c, vir_reg(QFILE_MAGIC, V3D_QPU_WADDR_##name), a); \
-                return vir_FMOV_dest(c, dest, vir_reg(QFILE_MAGIC, V3D_QPU_WADDR_R4)); \
-        }                                                                \
+        return vir_emit_nondef(c, vir_add_inst(V3D_QPU_A_##name,        \
+                                               dest,                    \
+                                               a, c->undef));           \
 }

 #define VIR_A_ALU2(name) VIR_ALU2(name, vir_add_inst, V3D_QPU_A_##name)
@ -1454,16 +1432,11 @@ vir_NOP(struct v3d_compile *c)
 static inline struct qreg
 vir_LDTMU(struct v3d_compile *c)
 {
-        if (c->devinfo->ver >= 41) {
-                struct qinst *ldtmu = vir_add_inst(V3D_QPU_A_NOP, c->undef,
-                                                   c->undef, c->undef);
-                ldtmu->qpu.sig.ldtmu = true;
+        struct qinst *ldtmu = vir_add_inst(V3D_QPU_A_NOP, c->undef,
+                                           c->undef, c->undef);
+        ldtmu->qpu.sig.ldtmu = true;

-                return vir_emit_def(c, ldtmu);
-        } else {
-                vir_NOP(c)->qpu.sig.ldtmu = true;
-                return vir_MOV(c, vir_reg(QFILE_MAGIC, V3D_QPU_WADDR_R4));
-        }
+        return vir_emit_def(c, ldtmu);
 }

 static inline struct qreg
@ -1476,7 +1449,6 @@ vir_UMUL(struct v3d_compile *c, struct qreg src0, struct qreg src1)
 static inline struct qreg
 vir_TLBU_COLOR_READ(struct v3d_compile *c, uint32_t config)
 {
-        assert(c->devinfo->ver >= 41); /* XXX */
        assert((config & 0xffffff00) == 0xffffff00);

        struct qinst *ldtlb = vir_add_inst(V3D_QPU_A_NOP, c->undef,
@ -1489,8 +1461,6 @@ vir_TLBU_COLOR_READ(struct v3d_compile *c, uint32_t config)
 static inline struct qreg
 vir_TLB_COLOR_READ(struct v3d_compile *c)
 {
-        assert(c->devinfo->ver >= 41); /* XXX */
-
        struct qinst *ldtlb = vir_add_inst(V3D_QPU_A_NOP, c->undef,
                                           c->undef, c->undef);
        ldtlb->qpu.sig.ldtlb = true;
--- a/src/broadcom/compiler/v3d_nir_lower_io.c
+++ b/src/broadcom/compiler/v3d_nir_lower_io.c
@ -515,7 +515,7 @@ v3d_nir_emit_ff_vpm_outputs(struct v3d_compile *c, nir_builder *b,
                         * The correct fix for this as recommended by Broadcom
                         * is to convert to .8 fixed-point with ffloor().
                         */
-                        if (c->devinfo->ver <= 42)
+                        if (c->devinfo->ver == 42)
                                 pos = nir_f2i32(b, nir_ffloor(b, pos));
                        else
                                 pos = nir_f2i32(b, nir_fround_even(b, pos));
--- a/src/broadcom/compiler/v3d40_tex.c
+++ b/src/broadcom/compiler/v3d40_tex.c
@ -28,7 +28,7 @@
 #define __gen_address_type uint32_t
 #define __gen_address_offset(reloc) (*reloc)
 #define __gen_emit_reloc(cl, reloc)
-#include "cle/v3d_packet_v41_pack.h"
+#include "cle/v3d_packet_v42_pack.h"

 static inline struct qinst *
 vir_TMU_WRITE(struct v3d_compile *c, enum v3d_qpu_waddr waddr, struct qreg val)
@ -61,11 +61,11 @@ vir_WRTMUC(struct v3d_compile *c, enum quniform_contents contents, uint32_t data
        inst->uniform = vir_get_uniform_index(c, contents, data);
 }

-static const struct V3D41_TMU_CONFIG_PARAMETER_1 p1_unpacked_default = {
+static const struct V3D42_TMU_CONFIG_PARAMETER_1 p1_unpacked_default = {
        .per_pixel_mask_enable = true,
 };

-static const struct V3D41_TMU_CONFIG_PARAMETER_2 p2_unpacked_default = {
+static const struct V3D42_TMU_CONFIG_PARAMETER_2 p2_unpacked_default = {
        .op = V3D_TMU_OP_REGULAR,
 };

@ -86,7 +86,7 @@ handle_tex_src(struct v3d_compile *c,
               nir_tex_instr *instr,
               unsigned src_idx,
               unsigned non_array_components,
-               struct V3D41_TMU_CONFIG_PARAMETER_2 *p2_unpacked,
+               struct V3D42_TMU_CONFIG_PARAMETER_2 *p2_unpacked,
               struct qreg *s_out,
               unsigned *tmu_writes)
 {
@ -201,7 +201,7 @@ handle_tex_src(struct v3d_compile *c,
 static void
 vir_tex_handle_srcs(struct v3d_compile *c,
                    nir_tex_instr *instr,
-                    struct V3D41_TMU_CONFIG_PARAMETER_2 *p2_unpacked,
+                    struct V3D42_TMU_CONFIG_PARAMETER_2 *p2_unpacked,
                    struct qreg *s,
                    unsigned *tmu_writes)
 {
@ -224,10 +224,8 @@ get_required_tex_tmu_writes(struct v3d_compile *c, nir_tex_instr *instr)
 }

 void
-v3d40_vir_emit_tex(struct v3d_compile *c, nir_tex_instr *instr)
+v3d_vir_emit_tex(struct v3d_compile *c, nir_tex_instr *instr)
 {
-        assert(instr->op != nir_texop_lod || c->devinfo->ver >= 42);
-
        unsigned texture_idx = instr->texture_index;

        /* For instructions that don't have a sampler (i.e. txf) we bind
@ -244,7 +242,7 @@ v3d40_vir_emit_tex(struct v3d_compile *c, nir_tex_instr *instr)
        bool output_type_32_bit =
                c->key->sampler[sampler_idx].return_size == 32;

-        struct V3D41_TMU_CONFIG_PARAMETER_0 p0_unpacked = {
+        struct V3D42_TMU_CONFIG_PARAMETER_0 p0_unpacked = {
        };

        /* Limit the number of channels returned to both how many the NIR
@ -275,7 +273,7 @@ v3d40_vir_emit_tex(struct v3d_compile *c, nir_tex_instr *instr)
        }
        assert(p0_unpacked.return_words_of_texture_data != 0);

-        struct V3D41_TMU_CONFIG_PARAMETER_2 p2_unpacked = {
+        struct V3D42_TMU_CONFIG_PARAMETER_2 p2_unpacked = {
                .op = V3D_TMU_OP_REGULAR,
                .gather_mode = instr->op == nir_texop_tg4,
                .gather_component = instr->component,
@ -304,12 +302,12 @@ v3d40_vir_emit_tex(struct v3d_compile *c, nir_tex_instr *instr)
        vir_tex_handle_srcs(c, instr, &p2_unpacked, &s, NULL);

        uint32_t p0_packed;
-        V3D41_TMU_CONFIG_PARAMETER_0_pack(NULL,
+        V3D42_TMU_CONFIG_PARAMETER_0_pack(NULL,
                                          (uint8_t *)&p0_packed,
                                          &p0_unpacked);

        uint32_t p2_packed;
-        V3D41_TMU_CONFIG_PARAMETER_2_pack(NULL,
+        V3D42_TMU_CONFIG_PARAMETER_2_pack(NULL,
                                          (uint8_t *)&p2_packed,
                                          &p2_unpacked);

@ -339,7 +337,7 @@ v3d40_vir_emit_tex(struct v3d_compile *c, nir_tex_instr *instr)
                output_type_32_bit;

        if (non_default_p1_config) {
-                struct V3D41_TMU_CONFIG_PARAMETER_1 p1_unpacked = {
+                struct V3D42_TMU_CONFIG_PARAMETER_1 p1_unpacked = {
                        .output_type_32_bit = output_type_32_bit,

                        .unnormalized_coordinates = (instr->sampler_dim ==
@ -356,7 +354,7 @@ v3d40_vir_emit_tex(struct v3d_compile *c, nir_tex_instr *instr)
                       p0_unpacked.return_words_of_texture_data < (1 << 2));

                uint32_t p1_packed;
-                V3D41_TMU_CONFIG_PARAMETER_1_pack(NULL,
+                V3D42_TMU_CONFIG_PARAMETER_1_pack(NULL,
                                                  (uint8_t *)&p1_packed,
                                                  &p1_unpacked);

@ -384,7 +382,7 @@ v3d40_vir_emit_tex(struct v3d_compile *c, nir_tex_instr *instr)
                 * address
                 */
                uint32_t p1_packed_default;
-                V3D41_TMU_CONFIG_PARAMETER_1_pack(NULL,
+                V3D42_TMU_CONFIG_PARAMETER_1_pack(NULL,
                                                  (uint8_t *)&p1_packed_default,
                                                  &p1_unpacked_default);
                vir_WRTMUC(c, QUNIFORM_CONSTANT, p1_packed_default);
@ -412,7 +410,7 @@ v3d40_vir_emit_tex(struct v3d_compile *c, nir_tex_instr *instr)
 }

 static uint32_t
-v3d40_image_atomic_tmu_op(nir_intrinsic_instr *instr)
+v3d_image_atomic_tmu_op(nir_intrinsic_instr *instr)
 {
        nir_atomic_op atomic_op = nir_intrinsic_atomic_op(instr);
        switch (atomic_op) {
@ -431,7 +429,7 @@ v3d40_image_atomic_tmu_op(nir_intrinsic_instr *instr)
 }

 static uint32_t
-v3d40_image_load_store_tmu_op(nir_intrinsic_instr *instr)
+v3d_image_load_store_tmu_op(nir_intrinsic_instr *instr)
 {
        switch (instr->intrinsic) {
        case nir_intrinsic_image_load:
@ -440,7 +438,7 @@ v3d40_image_load_store_tmu_op(nir_intrinsic_instr *instr)

        case nir_intrinsic_image_atomic:
        case nir_intrinsic_image_atomic_swap:
-                return v3d40_image_atomic_tmu_op(instr);
+                return v3d_image_atomic_tmu_op(instr);

        default:
                unreachable("unknown image intrinsic");
@ -552,21 +550,21 @@ get_required_image_tmu_writes(struct v3d_compile *c,
 }

 void
-v3d40_vir_emit_image_load_store(struct v3d_compile *c,
-                                nir_intrinsic_instr *instr)
+v3d_vir_emit_image_load_store(struct v3d_compile *c,
+                              nir_intrinsic_instr *instr)
 {
        unsigned format = nir_intrinsic_format(instr);
        unsigned unit = nir_src_as_uint(instr->src[0]);

-        struct V3D41_TMU_CONFIG_PARAMETER_0 p0_unpacked = {
+        struct V3D42_TMU_CONFIG_PARAMETER_0 p0_unpacked = {
        };

-        struct V3D41_TMU_CONFIG_PARAMETER_1 p1_unpacked = {
+        struct V3D42_TMU_CONFIG_PARAMETER_1 p1_unpacked = {
                .per_pixel_mask_enable = true,
                .output_type_32_bit = v3d_gl_format_is_return_32(format),
        };

-        struct V3D41_TMU_CONFIG_PARAMETER_2 p2_unpacked = { 0 };
+        struct V3D42_TMU_CONFIG_PARAMETER_2 p2_unpacked = { 0 };

        /* Limit the number of channels returned to both how many the NIR
         * instruction writes and how many the instruction could produce.
@ -578,7 +576,7 @@ v3d40_vir_emit_image_load_store(struct v3d_compile *c,
        p0_unpacked.return_words_of_texture_data =
                (1 << instr_return_channels) - 1;

-        p2_unpacked.op = v3d40_image_load_store_tmu_op(instr);
+        p2_unpacked.op = v3d_image_load_store_tmu_op(instr);

        /* If we were able to replace atomic_add for an inc/dec, then we
         * need/can to do things slightly different, like not loading the
@ -591,7 +589,7 @@ v3d40_vir_emit_image_load_store(struct v3d_compile *c,
                 p2_unpacked.op == V3D_TMU_OP_WRITE_OR_READ_DEC);

        uint32_t p0_packed;
-        V3D41_TMU_CONFIG_PARAMETER_0_pack(NULL,
+        V3D42_TMU_CONFIG_PARAMETER_0_pack(NULL,
                                          (uint8_t *)&p0_packed,
                                          &p0_unpacked);

@ -602,12 +600,12 @@ v3d40_vir_emit_image_load_store(struct v3d_compile *c,
        p0_packed |= unit << 24;

        uint32_t p1_packed;
-        V3D41_TMU_CONFIG_PARAMETER_1_pack(NULL,
+        V3D42_TMU_CONFIG_PARAMETER_1_pack(NULL,
                                          (uint8_t *)&p1_packed,
                                          &p1_unpacked);

        uint32_t p2_packed;
-        V3D41_TMU_CONFIG_PARAMETER_2_pack(NULL,
+        V3D42_TMU_CONFIG_PARAMETER_2_pack(NULL,
                                          (uint8_t *)&p2_packed,
                                          &p2_unpacked);

--- a/src/broadcom/compiler/vir.c
+++ b/src/broadcom/compiler/vir.c
@ -155,32 +155,6 @@ vir_is_tex(const struct v3d_device_info *devinfo, struct qinst *inst)
        return false;
 }

-bool
-vir_writes_r3_implicitly(const struct v3d_device_info *devinfo,
-                         struct qinst *inst)
-{
-        if (!devinfo->has_accumulators)
-                return false;
-
-        for (int i = 0; i < vir_get_nsrc(inst); i++) {
-                switch (inst->src[i].file) {
-                case QFILE_VPM:
-                        return true;
-                default:
-                        break;
-                }
-        }
-
-        if (devinfo->ver < 41 && (inst->qpu.sig.ldvary ||
-                                  inst->qpu.sig.ldtlb ||
-                                  inst->qpu.sig.ldtlbu ||
-                                  inst->qpu.sig.ldvpm)) {
-                return true;
-        }
-
-        return false;
-}
-
 bool
 vir_writes_r4_implicitly(const struct v3d_device_info *devinfo,
                         struct qinst *inst)
@ -203,9 +177,6 @@ vir_writes_r4_implicitly(const struct v3d_device_info *devinfo,
                break;
        }

-        if (devinfo->ver < 41 && inst->qpu.sig.ldtmu)
-                return true;
-
        return false;
 }

--- a/src/broadcom/compiler/vir_dump.c
+++ b/src/broadcom/compiler/vir_dump.c
@ -182,11 +182,6 @@ vir_print_reg(struct v3d_compile *c, const struct qinst *inst,
                break;
        }

-        case QFILE_VPM:
-                fprintf(stderr, "vpm%d.%d",
-                        reg.index / 4, reg.index % 4);
-                break;
-
        case QFILE_TEMP:
                fprintf(stderr, "t%d", reg.index);
                break;
@ -197,9 +192,6 @@ static void
 vir_dump_sig_addr(const struct v3d_device_info *devinfo,
                  const struct v3d_qpu_instr *instr)
 {
-        if (devinfo->ver < 41)
-                return;
-
        if (!instr->sig_magic)
                fprintf(stderr, ".rf%d", instr->sig_addr);
        else {
--- a/src/broadcom/compiler/vir_opt_copy_propagate.c
+++ b/src/broadcom/compiler/vir_opt_copy_propagate.c
@ -62,7 +62,7 @@ is_copy_mov(const struct v3d_device_info *devinfo, struct qinst *inst)
                return false;
        }

-        if (devinfo->ver <= 42) {
+        if (devinfo->ver == 42) {
                switch (inst->src[0].file) {
                case QFILE_MAGIC:
                        /* No copy propagating from R3/R4/R5 -- the MOVs from
--- a/src/broadcom/compiler/vir_opt_dead_code.c
+++ b/src/broadcom/compiler/vir_opt_dead_code.c
@ -51,22 +51,11 @@ dce(struct v3d_compile *c, struct qinst *inst)
        vir_remove_instruction(c, inst);
 }

-static bool
-has_nonremovable_reads(struct v3d_compile *c, struct qinst *inst)
-{
-        for (int i = 0; i < vir_get_nsrc(inst); i++) {
-                if (inst->src[i].file == QFILE_VPM)
-                        return true;
-        }
-
-        return false;
-}
-
 static bool
 can_write_to_null(struct v3d_compile *c, struct qinst *inst)
 {
        /* The SFU instructions must write to a physical register. */
-        if (c->devinfo->ver >= 41 && v3d_qpu_uses_sfu(&inst->qpu))
+        if (v3d_qpu_uses_sfu(&inst->qpu))
                return false;

        return true;
@ -241,7 +230,6 @@ vir_opt_dead_code(struct v3d_compile *c)
                        }

                        if (v3d_qpu_writes_flags(&inst->qpu) ||
-                            has_nonremovable_reads(c, inst) ||
                            (is_ldunifa && !is_first_ldunifa && !is_last_ldunifa)) {
                                /* If we can't remove the instruction, but we
                                 * don't need its destination value, just
--- a/src/broadcom/compiler/vir_opt_small_immediates.c
+++ b/src/broadcom/compiler/vir_opt_small_immediates.c
@ -82,7 +82,7 @@ vir_opt_small_immediates(struct v3d_compile *c)
                         */
                        struct v3d_qpu_sig new_sig = inst->qpu.sig;
                        uint32_t sig_packed;
-                        if (c->devinfo->ver <= 42) {
+                        if (c->devinfo->ver == 42) {
                                new_sig.small_imm_b = true;
                        } else {
                               if (vir_is_add(inst)) {
--- a/src/broadcom/compiler/vir_register_allocate.c
+++ b/src/broadcom/compiler/vir_register_allocate.c
@ -942,7 +942,7 @@ v3d_ra_select_rf(struct v3d_ra_select_callback_data *v3d_ra,
         * avoid allocating these to registers used by the last instructions
         * in the shader.
         */
-        const uint32_t safe_rf_start = v3d_ra->devinfo->ver <= 42 ? 3 : 4;
+        const uint32_t safe_rf_start = v3d_ra->devinfo->ver == 42 ? 3 : 4;
        if (v3d_ra->nodes->info[node].is_program_end &&
            v3d_ra->next_phys < safe_rf_start) {
                v3d_ra->next_phys = safe_rf_start;
@ -1004,7 +1004,7 @@ vir_init_reg_sets(struct v3d_compiler *compiler)
        /* Allocate up to 3 regfile classes, for the ways the physical
         * register file can be divided up for fragment shader threading.
         */
-        int max_thread_index = (compiler->devinfo->ver >= 40 ? 2 : 3);
+        int max_thread_index = 2;
        uint8_t phys_index = get_phys_index(compiler->devinfo);

        compiler->regs = ra_alloc_reg_set(compiler, phys_index + PHYS_COUNT,
@ -1070,20 +1070,10 @@ update_graph_and_reg_classes_for_inst(struct v3d_compile *c,
        int32_t ip = inst->ip;
        assert(ip >= 0);

-        /* If the instruction writes r3/r4 (and optionally moves its
-         * result to a temp), nothing else can be stored in r3/r4 across
+        /* If the instruction writes r4 (and optionally moves its
+         * result to a temp), nothing else can be stored in r4 across
         * it.
         */
-        if (vir_writes_r3_implicitly(c->devinfo, inst)) {
-                for (int i = 0; i < c->num_temps; i++) {
-                        if (c->temp_start[i] < ip && c->temp_end[i] > ip) {
-                                ra_add_node_interference(c->g,
-                                                         temp_to_node(c, i),
-                                                         acc_nodes[3]);
-                        }
-                }
-        }
-
        if (vir_writes_r4_implicitly(c->devinfo, inst)) {
                for (int i = 0; i < c->num_temps; i++) {
                        if (c->temp_start[i] < ip && c->temp_end[i] > ip) {
@ -1207,15 +1197,6 @@ update_graph_and_reg_classes_for_inst(struct v3d_compile *c,
                                set_temp_class_bits(c, inst->dst.index,
                                                    class_bits);

-                        } else {
-                                /* Until V3D 4.x, we could only load a uniform
-                                 * to r5, so we'll need to spill if uniform
-                                 * loads interfere with each other.
-                                 */
-                                if (c->devinfo->ver < 40) {
-                                        set_temp_class_bits(c, inst->dst.index,
-                                                            CLASS_BITS_R5);
-                                }
                        }
                } else {
                        /* Make sure we don't allocate the ldvary's
@ -1320,7 +1301,7 @@ v3d_register_allocate(struct v3d_compile *c)
                 * RF0-2. Start at RF4 in 7.x to prevent TLB writes from
                 * using RF2-3.
                 */
-                .next_phys = c->devinfo->ver <= 42 ? 3 : 4,
+                .next_phys = c->devinfo->ver == 42 ? 3 : 4,
                .nodes = &c->nodes,
                .devinfo = c->devinfo,
        };
@ -1333,10 +1314,8 @@ v3d_register_allocate(struct v3d_compile *c)
         * are available at both 1x and 2x threading, and 4x has 32.
         */
        c->thread_index = ffs(c->threads) - 1;
-        if (c->devinfo->ver >= 40) {
-                if (c->thread_index >= 1)
-                        c->thread_index--;
-        }
+        if (c->thread_index >= 1)
+                c->thread_index--;

        c->g = ra_alloc_interference_graph(c->compiler->regs, num_ra_nodes);
        ra_set_select_reg_callback(c->g, v3d_ra_select_callback, &callback_data);
--- a/src/broadcom/compiler/vir_to_qpu.c
+++ b/src/broadcom/compiler/vir_to_qpu.c
@ -108,7 +108,7 @@ v3d71_set_src(struct v3d_qpu_instr *instr, uint8_t *raddr, struct qpu_reg src)
 * fields of the instruction.
 */
 static void
-v3d33_set_src(struct v3d_qpu_instr *instr, enum v3d_qpu_mux *mux, struct qpu_reg src)
+v3d42_set_src(struct v3d_qpu_instr *instr, enum v3d_qpu_mux *mux, struct qpu_reg src)
 {
        if (src.smimm) {
                assert(instr->sig.small_imm_b);
@ -158,13 +158,13 @@ set_src(struct v3d_qpu_instr *instr,
        const struct v3d_device_info *devinfo)
 {
        if (devinfo->ver < 71)
-                return v3d33_set_src(instr, mux, src);
+                return v3d42_set_src(instr, mux, src);
        else
                return v3d71_set_src(instr, raddr, src);
 }

 static bool
-v3d33_mov_src_and_dst_equal(struct qinst *qinst)
+v3d42_mov_src_and_dst_equal(struct qinst *qinst)
 {
        enum v3d_qpu_waddr waddr = qinst->qpu.alu.mul.waddr;
        if (qinst->qpu.alu.mul.magic_write) {
@ -216,7 +216,7 @@ mov_src_and_dst_equal(struct qinst *qinst,
                      const struct v3d_device_info *devinfo)
 {
        if (devinfo->ver < 71)
-                return v3d33_mov_src_and_dst_equal(qinst);
+                return v3d42_mov_src_and_dst_equal(qinst);
        else
                return v3d71_mov_src_and_dst_equal(qinst);
 }
@ -262,8 +262,6 @@ v3d_generate_code_block(struct v3d_compile *c,
                        struct qblock *block,
                        struct qpu_reg *temp_registers)
 {
-        int last_vpm_read_index = -1;
-
        vir_for_each_inst_safe(qinst, block) {
 #if 0
                fprintf(stderr, "translating qinst to qpu: ");
@ -271,8 +269,6 @@ v3d_generate_code_block(struct v3d_compile *c,
                fprintf(stderr, "\n");
 #endif

-                struct qinst *temp;
-
                if (vir_has_uniform(qinst))
                        c->num_uniforms++;

@ -303,19 +299,6 @@ v3d_generate_code_block(struct v3d_compile *c,
                        case QFILE_SMALL_IMM:
                                src[i].smimm = true;
                                break;
-
-                        case QFILE_VPM:
-                                assert(c->devinfo->ver < 40);
-                                assert((int)qinst->src[i].index >=
-                                       last_vpm_read_index);
-                                (void)last_vpm_read_index;
-                                last_vpm_read_index = qinst->src[i].index;
-
-                                temp = new_qpu_nop_before(qinst);
-                                temp->qpu.sig.ldvpm = true;
-
-                                src[i] = qpu_magic(V3D_QPU_WADDR_R3);
-                                break;
                        }
                }

@ -337,10 +320,6 @@ v3d_generate_code_block(struct v3d_compile *c,
                        dst = temp_registers[qinst->dst.index];
                        break;

-                case QFILE_VPM:
-                        dst = qpu_magic(V3D_QPU_WADDR_VPM);
-                        break;
-
                case QFILE_SMALL_IMM:
                case QFILE_LOAD_IMM:
                        assert(!"not reached");
@ -361,8 +340,6 @@ v3d_generate_code_block(struct v3d_compile *c,
                                }

                                if (use_rf) {
-                                        assert(c->devinfo->ver >= 40);
-
                                        if (qinst->qpu.sig.ldunif) {
                                           qinst->qpu.sig.ldunif = false;
                                           qinst->qpu.sig.ldunifrf = true;
@ -470,11 +447,7 @@ v3d_dump_qpu(struct v3d_compile *c)
                const char *str = v3d_qpu_disasm(c->devinfo, c->qpu_insts[i]);
                fprintf(stderr, "0x%016"PRIx64" %s", c->qpu_insts[i], str);

-                /* We can only do this on 4.x, because we're not tracking TMU
-                 * implicit uniforms here on 3.x.
-                 */
-                if (c->devinfo->ver >= 40 &&
-                    reads_uniform(c->devinfo, c->qpu_insts[i])) {
+                if (reads_uniform(c->devinfo, c->qpu_insts[i])) {
                        fprintf(stderr, " (");
                        vir_dump_uniform(c->uniform_contents[next_uniform],
                                         c->uniform_data[next_uniform]);
@ -486,8 +459,7 @@ v3d_dump_qpu(struct v3d_compile *c)
        }

        /* Make sure our dumping lined up. */
-        if (c->devinfo->ver >= 40)
-                assert(next_uniform == c->num_uniforms);
+        assert(next_uniform == c->num_uniforms);

        fprintf(stderr, "\n");
 }
--- a/src/broadcom/meson.build
+++ b/src/broadcom/meson.build
@ -22,7 +22,7 @@ inc_broadcom = include_directories('.', 'cle')

 subdir('cle')

-v3d_versions = ['33', '41', '42', '71']
+v3d_versions = ['42', '71']
 v3d_libs = []

 if with_gallium_v3d or with_broadcom_vk
--- a/src/broadcom/simulator/v3d_simulator.h
+++ b/src/broadcom/simulator/v3d_simulator.h
@ -45,11 +45,7 @@ uint32_t v3d_simulator_get_mem_free(void);
 #ifdef v3dX
 #  include "v3dx_simulator.h"
 #else
-#  define v3dX(x) v3d33_##x
-#  include "v3dx_simulator.h"
-#  undef v3dX
-
-#  define v3dX(x) v3d41_##x
+#  define v3dX(x) v3d42_##x
 #  include "v3dx_simulator.h"
 #  undef v3dX

@ -61,15 +57,10 @@ uint32_t v3d_simulator_get_mem_free(void);

 /* Helper to call simulator ver specific functions */
 #define v3d_X_simulator(thing) ({                     \
-   __typeof(&v3d33_simulator_##thing) v3d_X_sim_thing;\
+   __typeof(&v3d42_simulator_##thing) v3d_X_sim_thing;\
   switch (sim_state.ver) {                           \
-   case 33:                                           \
-   case 40:                                           \
-      v3d_X_sim_thing = &v3d33_simulator_##thing;     \
-      break;                                          \
-   case 41:                                           \
   case 42:                                           \
-      v3d_X_sim_thing = &v3d41_simulator_##thing;     \
+      v3d_X_sim_thing = &v3d42_simulator_##thing;     \
      break;                                          \
   case 71:                                           \
      v3d_X_sim_thing = &v3d71_simulator_##thing;     \
--- a/src/broadcom/simulator/v3dx_simulator.c
+++ b/src/broadcom/simulator/v3dx_simulator.c
@ -51,27 +51,14 @@
 #if V3D_VERSION == 71
 #include "libs/core/v3d/registers/7.1.6.0/v3d.h"
 #else
-#if V3D_VERSION == 41 || V3D_VERSION == 42
+#if V3D_VERSION == 42
 #include "libs/core/v3d/registers/4.2.14.0/v3d.h"
-#else
-#include "libs/core/v3d/registers/3.3.0.0/v3d.h"
 #endif
 #endif

 #define V3D_WRITE(reg, val) v3d_hw_write_reg(v3d, reg, val)
 #define V3D_READ(reg) v3d_hw_read_reg(v3d, reg)

-static void
-v3d_invalidate_l3(struct v3d_hw *v3d)
-{
-#if V3D_VERSION < 40
-        uint32_t gca_ctrl = V3D_READ(V3D_GCA_CACHE_CTRL);
-
-        V3D_WRITE(V3D_GCA_CACHE_CTRL, gca_ctrl | V3D_GCA_CACHE_CTRL_FLUSH_SET);
-        V3D_WRITE(V3D_GCA_CACHE_CTRL, gca_ctrl & ~V3D_GCA_CACHE_CTRL_FLUSH_SET);
-#endif
-}
-
 /* Invalidates the L2C cache.  This is a read-only cache for uniforms and instructions. */
 static void
 v3d_invalidate_l2c(struct v3d_hw *v3d)
@ -156,7 +143,6 @@ v3d_invalidate_slices(struct v3d_hw *v3d)
 static void
 v3d_invalidate_caches(struct v3d_hw *v3d)
 {
-        v3d_invalidate_l3(v3d);
        v3d_invalidate_l2c(v3d);
        v3d_invalidate_l2t(v3d);
        v3d_invalidate_slices(v3d);
@ -225,7 +211,7 @@ v3dX(simulator_submit_csd_ioctl)(struct v3d_hw *v3d,
                                 struct drm_v3d_submit_csd *args,
                                 uint32_t gmp_ofs)
 {
-#if V3D_VERSION >= 41
+#if V3D_VERSION >= 42
        int last_completed_jobs = (V3D_READ(V3D_CSD_0_STATUS) &
                                   V3D_CSD_0_STATUS_NUM_COMPLETED_JOBS_SET);
        g_gmp_ofs = gmp_ofs;
@ -282,13 +268,13 @@ v3dX(simulator_get_param_ioctl)(struct v3d_hw *v3d,
                args->value = 1;
                return 0;
        case DRM_V3D_PARAM_SUPPORTS_CSD:
-                args->value = V3D_VERSION >= 41;
+                args->value = V3D_VERSION >= 42;
                return 0;
        case DRM_V3D_PARAM_SUPPORTS_CACHE_FLUSH:
                args->value = 1;
                return 0;
        case DRM_V3D_PARAM_SUPPORTS_PERFMON:
-                args->value = V3D_VERSION >= 41;
+                args->value = V3D_VERSION >= 42;
                return 0;
        case DRM_V3D_PARAM_SUPPORTS_MULTISYNC_EXT:
                args->value = 1;
@ -359,8 +345,7 @@ handle_mmu_interruptions(struct v3d_hw *v3d,
        uint32_t axi_id = V3D_READ(V3D_MMU_VIO_ID);
        uint32_t va_width = 30;

-#if V3D_VERSION >= 41
-        static const char *const v3d41_axi_ids[] = {
+        static const char *const v3d42_axi_ids[] = {
                "L2T",
                "PTB",
                "PSE",
@ -372,14 +357,14 @@ handle_mmu_interruptions(struct v3d_hw *v3d,
        };

        axi_id = axi_id >> 5;
-        if (axi_id < ARRAY_SIZE(v3d41_axi_ids))
-                client = v3d41_axi_ids[axi_id];
+        if (axi_id < ARRAY_SIZE(v3d42_axi_ids))
+                client = v3d42_axi_ids[axi_id];

        uint32_t mmu_debug = V3D_READ(V3D_MMU_DEBUG_INFO);

        va_width += ((mmu_debug & V3D_MMU_DEBUG_INFO_VA_WIDTH_SET)
                     >> V3D_MMU_DEBUG_INFO_VA_WIDTH_LSB);
-#endif
+
        /* Only the top bits (final number depends on the gen) of the virtual
         * address are reported in the MMU VIO_ADDR register.
         */
@ -454,18 +439,6 @@ v3d_isr(uint32_t hub_status)
 void
 v3dX(simulator_init_regs)(struct v3d_hw *v3d)
 {
-#if V3D_VERSION == 33
-        /* Set OVRTMUOUT to match kernel behavior.
-         *
-         * This means that the texture sampler uniform configuration's tmu
-         * output type field is used, instead of using the hardware default
-         * behavior based on the texture type.  If you want the default
-         * behavior, you can still put "2" in the indirect texture state's
-         * output_type field.
-         */
-        V3D_WRITE(V3D_CTL_0_MISCCFG, V3D_CTL_1_MISCCFG_OVRTMUOUT_SET);
-#endif
-
        /* FIXME: the kernel captures some additional core interrupts here,
         * for tracing. Perhaps we should evaluate to do the same here and add
         * some debug options.
@ -514,13 +487,11 @@ v3dX(simulator_submit_cl_ioctl)(struct v3d_hw *v3d,
                V3D_WRITE(V3D_CLE_0_CT0QMA, submit->qma);
                V3D_WRITE(V3D_CLE_0_CT0QMS, submit->qms);
        }
-#if V3D_VERSION >= 41
        if (submit->qts) {
                V3D_WRITE(V3D_CLE_0_CT0QTS,
                          V3D_CLE_0_CT0QTS_CTQTSEN_SET |
                          submit->qts);
        }
-#endif
        V3D_WRITE(V3D_CLE_0_CT0QBA, submit->bcl_start);
        V3D_WRITE(V3D_CLE_0_CT0QEA, submit->bcl_end);

@ -544,21 +515,18 @@ v3dX(simulator_submit_cl_ioctl)(struct v3d_hw *v3d,
        }
 }

-#if V3D_VERSION >= 41
 #define V3D_PCTR_0_PCTR_N(x) (V3D_PCTR_0_PCTR0 + 4 * (x))
 #define V3D_PCTR_0_SRC_N(x) (V3D_PCTR_0_SRC_0_3 + 4 * (x))
 #define V3D_PCTR_0_SRC_N_SHIFT(x) ((x) * 8)
 #define V3D_PCTR_0_SRC_N_MASK(x) (BITFIELD_RANGE(V3D_PCTR_0_SRC_N_SHIFT(x), \
                                                 V3D_PCTR_0_SRC_N_SHIFT(x) + \
                                                 V3D_PCTR_0_SRC_0_3_PCTRS0_MSB))
-#endif

 void
 v3dX(simulator_perfmon_start)(struct v3d_hw *v3d,
                              uint32_t ncounters,
                              uint8_t *events)
 {
-#if V3D_VERSION >= 41
        int i, j;
        uint32_t source;
        uint32_t mask = BITFIELD_RANGE(0, ncounters);
@ -573,21 +541,18 @@ v3dX(simulator_perfmon_start)(struct v3d_hw *v3d,
        V3D_WRITE(V3D_PCTR_0_CLR, mask);
        V3D_WRITE(V3D_PCTR_0_OVERFLOW, mask);
        V3D_WRITE(V3D_PCTR_0_EN, mask);
-#endif
 }

 void v3dX(simulator_perfmon_stop)(struct v3d_hw *v3d,
                                  uint32_t ncounters,
                                  uint64_t *values)
 {
-#if V3D_VERSION >= 41
        int i;

        for (i = 0; i < ncounters; i++)
                values[i] += V3D_READ(V3D_PCTR_0_PCTR_N(i));

        V3D_WRITE(V3D_PCTR_0_EN, 0);
-#endif
 }

 void v3dX(simulator_get_perfcnt_total)(uint32_t *count)
--- a/src/broadcom/vulkan/v3dv_cl.c
+++ b/src/broadcom/vulkan/v3dv_cl.c
@ -27,7 +27,7 @@
 * versions, so we just explicitly set the V3D_VERSION and include v3dx_pack
 * here
 */
-#define V3D_VERSION 33
+#define V3D_VERSION 42
 #include "broadcom/common/v3d_macros.h"
 #include "broadcom/cle/v3dx_pack.h"

--- a/src/broadcom/vulkan/v3dv_private.h
+++ b/src/broadcom/vulkan/v3dv_private.h
@ -618,10 +618,10 @@ struct v3dv_device_memory {

 #define V3DV_MAX_PLANE_COUNT 3
 struct v3dv_format_plane {
-   /* One of V3D33_OUTPUT_IMAGE_FORMAT_*, or OUTPUT_IMAGE_FORMAT_NO */
+   /* One of V3D42_OUTPUT_IMAGE_FORMAT_*, or OUTPUT_IMAGE_FORMAT_NO */
   uint8_t rt_type;

-   /* One of V3D33_TEXTURE_DATA_FORMAT_*. */
+   /* One of V3D42_TEXTURE_DATA_FORMAT_*. */
   uint8_t tex_type;

   /* Swizzle to apply to the RGBA shader output for storing to the tile
--- a/src/gallium/drivers/v3d/meson.build
+++ b/src/gallium/drivers/v3d/meson.build
@ -59,7 +59,7 @@ if dep_v3dv3.found()
  v3d_args += '-DUSE_V3D_SIMULATOR'
 endif

-v3d_versions = ['33', '42', '71']
+v3d_versions = ['42', '71']

 per_version_libs = []
 foreach ver : v3d_versions
--- a/src/gallium/drivers/v3d/v3d_blit.c
+++ b/src/gallium/drivers/v3d/v3d_blit.c
@ -309,7 +309,7 @@ v3d_tlb_blit(struct pipe_context *pctx, struct pipe_blit_info *info)
        struct v3d_screen *screen = v3d->screen;
        struct v3d_device_info *devinfo = &screen->devinfo;

-        if (devinfo->ver < 40 || !info->mask)
+        if (!info->mask)
                return;

        bool is_color_blit = info->mask & PIPE_MASK_RGBA;
--- a/src/gallium/drivers/v3d/v3d_cl.c
+++ b/src/gallium/drivers/v3d/v3d_cl.c
@ -28,7 +28,7 @@
 * hw versions, so we just explicitly set the V3D_VERSION and include
 * v3dx_pack here
 */
-#define V3D_VERSION 33
+#define V3D_VERSION 42
 #include "broadcom/common/v3d_macros.h"
 #include "broadcom/cle/v3dx_pack.h"

--- a/src/gallium/drivers/v3d/v3d_context.c
+++ b/src/gallium/drivers/v3d/v3d_context.c
@ -300,16 +300,11 @@ v3d_get_sample_position(struct pipe_context *pctx,
                        unsigned sample_count, unsigned sample_index,
                        float *xy)
 {
-        struct v3d_context *v3d = v3d_context(pctx);
-
        if (sample_count <= 1) {
                xy[0] = 0.5;
                xy[1] = 0.5;
        } else {
-                static const int xoffsets_v33[] = { 1, -3, 3, -1 };
-                static const int xoffsets_v42[] = { -1, 3, -3, 1 };
-                const int *xoffsets = (v3d->screen->devinfo.ver >= 42 ?
-                                       xoffsets_v42 : xoffsets_v33);
+                static const int xoffsets[] = { -1, 3, -3, 1 };

                xy[0] = 0.5 + xoffsets[sample_index] * .125;
                xy[1] = .125 + sample_index * .25;
--- a/src/gallium/drivers/v3d/v3d_context.h
+++ b/src/gallium/drivers/v3d/v3d_context.h
@ -825,12 +825,8 @@ void v3d_disk_cache_store(struct v3d_context *v3d,

 /* Helper to call hw ver specific functions */
 #define v3d_X(devinfo, thing) ({                                \
-        __typeof(&v3d33_##thing) v3d_X_thing;                   \
+        __typeof(&v3d42_##thing) v3d_X_thing;                   \
        switch (devinfo->ver) {                                 \
-        case 33:                                                \
-        case 40:                                                \
-                v3d_X_thing = &v3d33_##thing;                   \
-                break;                                          \
        case 42:                                                \
                v3d_X_thing = &v3d42_##thing;                   \
                break;                                          \
@ -846,19 +842,13 @@ void v3d_disk_cache_store(struct v3d_context *v3d,
 /* FIXME: The same for vulkan/opengl. Common place? define it at the
 * v3d_packet files?
 */
-#define V3D33_CLIPPER_XY_GRANULARITY 256.0f
 #define V3D42_CLIPPER_XY_GRANULARITY 256.0f
 #define V3D71_CLIPPER_XY_GRANULARITY 64.0f

 /* Helper to get hw-specific macro values */
 #define V3DV_X(devinfo, thing) ({                               \
-   __typeof(V3D33_##thing) V3D_X_THING;                         \
+   __typeof(V3D42_##thing) V3D_X_THING;                         \
   switch (devinfo->ver) {                                      \
-   case 33:                                                     \
-   case 40:                                                     \
-      V3D_X_THING = V3D33_##thing;                              \
-      break;                                                    \
-      case 41:                                                  \
   case 42:                                                     \
      V3D_X_THING = V3D42_##thing;                              \
      break;                                                    \
@ -874,10 +864,6 @@ void v3d_disk_cache_store(struct v3d_context *v3d,
 #ifdef v3dX
 #  include "v3dx_context.h"
 #else
-#  define v3dX(x) v3d33_##x
-#  include "v3dx_context.h"
-#  undef v3dX
-
 #  define v3dX(x) v3d42_##x
 #  include "v3dx_context.h"
 #  undef v3dX
--- a/src/gallium/drivers/v3d/v3d_format_table.h
+++ b/src/gallium/drivers/v3d/v3d_format_table.h
@ -30,10 +30,10 @@ struct v3d_format {
        /** Set if the pipe format is defined in the table. */
        bool present;

-        /** One of V3D33_OUTPUT_IMAGE_FORMAT_*, or OUTPUT_IMAGE_FORMAT_NO */
+        /** One of V3D42_OUTPUT_IMAGE_FORMAT_*, or OUTPUT_IMAGE_FORMAT_NO */
        uint8_t rt_type;

-        /** One of V3D33_TEXTURE_DATA_FORMAT_*. */
+        /** One of V3D42_TEXTURE_DATA_FORMAT_*. */
        uint8_t tex_type;

        /**
--- a/src/gallium/drivers/v3d/v3d_formats.c
+++ b/src/gallium/drivers/v3d/v3d_formats.c
@ -38,7 +38,7 @@
 #include "v3d_format_table.h"

 /* The format internal types are the same across V3D versions */
-#define V3D_VERSION 33
+#define V3D_VERSION 42
 #include "broadcom/cle/v3dx_pack.h"

 bool
--- a/src/gallium/drivers/v3d/v3d_job.c
+++ b/src/gallium/drivers/v3d/v3d_job.c
@ -29,7 +29,7 @@
 #include <xf86drm.h>
 #include "v3d_context.h"
 /* The OQ/semaphore packets are the same across V3D versions. */
-#define V3D_VERSION 33
+#define V3D_VERSION 42
 #include "broadcom/cle/v3dx_pack.h"
 #include "broadcom/common/v3d_macros.h"
 #include "util/hash_table.h"
@ -547,7 +547,7 @@ v3d_job_submit(struct v3d_context *v3d, struct v3d_job *job)
        /* On V3D 4.1, the tile alloc/state setup moved to register writes
         * instead of binner packets.
         */
-        if (devinfo->ver >= 41) {
+        if (devinfo->ver >= 42) {
                v3d_job_add_bo(job, job->tile_alloc);
                job->submit.qma = job->tile_alloc->offset;
                job->submit.qms = job->tile_alloc->size;
--- a/src/gallium/drivers/v3d/v3d_program.c
+++ b/src/gallium/drivers/v3d/v3d_program.c
@ -35,7 +35,8 @@
 #include "nir/tgsi_to_nir.h"
 #include "compiler/v3d_compiler.h"
 #include "v3d_context.h"
-#include "broadcom/cle/v3d_packet_v33_pack.h"
+/* packets here are the same across V3D versions. */
+#include "broadcom/cle/v3d_packet_v42_pack.h"

 static struct v3d_compiled_shader *
 v3d_get_compiled_shader(struct v3d_context *v3d,
@ -136,7 +137,7 @@ v3d_set_transform_feedback_outputs(struct v3d_uncompiled_shader *so,
                while (vpm_size) {
                        uint32_t write_size = MIN2(vpm_size, 1 << 4);

-                        struct V3D33_TRANSFORM_FEEDBACK_OUTPUT_DATA_SPEC unpacked = {
+                        struct V3D42_TRANSFORM_FEEDBACK_OUTPUT_DATA_SPEC unpacked = {
                                /* We need the offset from the coordinate shader's VPM
                                 * output block, which has the [X, Y, Z, W, Xs, Ys]
                                 * values at the start.
@ -151,7 +152,7 @@ v3d_set_transform_feedback_outputs(struct v3d_uncompiled_shader *so,
                               so->num_tf_specs != 0);

                        assert(so->num_tf_specs != ARRAY_SIZE(so->tf_specs));
-                        V3D33_TRANSFORM_FEEDBACK_OUTPUT_DATA_SPEC_pack(NULL,
+                        V3D42_TRANSFORM_FEEDBACK_OUTPUT_DATA_SPEC_pack(NULL,
                                                                       (void *)&so->tf_specs[so->num_tf_specs],
                                                                       &unpacked);

@ -166,7 +167,7 @@ v3d_set_transform_feedback_outputs(struct v3d_uncompiled_shader *so,
                        assert(unpacked.first_shaded_vertex_value_to_output != 8 ||
                               so->num_tf_specs != 0);

-                        V3D33_TRANSFORM_FEEDBACK_OUTPUT_DATA_SPEC_pack(NULL,
+                        V3D42_TRANSFORM_FEEDBACK_OUTPUT_DATA_SPEC_pack(NULL,
                                                                       (void *)&so->tf_specs_psiz[so->num_tf_specs],
                                                                       &unpacked);
                        so->num_tf_specs++;
@ -559,7 +560,6 @@ v3d_setup_shared_key(struct v3d_context *v3d, struct v3d_key *key,
        assert(key->num_tex_used == key->num_samplers_used);
        for (int i = 0; i < texstate->num_textures; i++) {
                struct pipe_sampler_view *sampler = texstate->textures[i];
-                struct v3d_sampler_view *v3d_sampler = v3d_sampler_view(sampler);

                if (!sampler)
                        continue;
@ -573,27 +573,16 @@ v3d_setup_shared_key(struct v3d_context *v3d, struct v3d_key *key,
                 */
                if (key->sampler[i].return_size == 16) {
                        key->sampler[i].return_channels = 2;
-                } else if (devinfo->ver > 40) {
-                        key->sampler[i].return_channels = 4;
                } else {
-                        key->sampler[i].return_channels =
-                                v3d_get_tex_return_channels(devinfo,
-                                                            sampler->format);
+                        key->sampler[i].return_channels = 4;
                }

-                if (key->sampler[i].return_size == 32 && devinfo->ver < 40) {
-                        memcpy(key->tex[i].swizzle,
-                               v3d_sampler->swizzle,
-                               sizeof(v3d_sampler->swizzle));
-                } else {
-                        /* For 16-bit returns, we let the sampler state handle
-                         * the swizzle.
-                         */
-                        key->tex[i].swizzle[0] = PIPE_SWIZZLE_X;
-                        key->tex[i].swizzle[1] = PIPE_SWIZZLE_Y;
-                        key->tex[i].swizzle[2] = PIPE_SWIZZLE_Z;
-                        key->tex[i].swizzle[3] = PIPE_SWIZZLE_W;
-                }
+                /* We let the sampler state handle the swizzle.
+                 */
+                key->tex[i].swizzle[0] = PIPE_SWIZZLE_X;
+                key->tex[i].swizzle[1] = PIPE_SWIZZLE_Y;
+                key->tex[i].swizzle[2] = PIPE_SWIZZLE_Z;
+                key->tex[i].swizzle[3] = PIPE_SWIZZLE_W;
        }
 }

--- a/src/gallium/drivers/v3d/v3d_resource.c
+++ b/src/gallium/drivers/v3d/v3d_resource.c
@ -36,7 +36,8 @@
 #include "v3d_screen.h"
 #include "v3d_context.h"
 #include "v3d_resource.h"
-#include "broadcom/cle/v3d_packet_v33_pack.h"
+/* The packets used here the same across V3D versions. */
+#include "broadcom/cle/v3d_packet_v42_pack.h"

 static void
 v3d_debug_resource_layout(struct v3d_resource *rsc, const char *caller)
@ -747,8 +748,6 @@ static struct v3d_resource *
 v3d_resource_setup(struct pipe_screen *pscreen,
                   const struct pipe_resource *tmpl)
 {
-        struct v3d_screen *screen = v3d_screen(pscreen);
-        struct v3d_device_info *devinfo = &screen->devinfo;
        struct v3d_resource *rsc = CALLOC_STRUCT(v3d_resource);

        if (!rsc)
@ -760,34 +759,7 @@ v3d_resource_setup(struct pipe_screen *pscreen,
        pipe_reference_init(&prsc->reference, 1);
        prsc->screen = pscreen;

-        if (prsc->nr_samples <= 1 ||
-            devinfo->ver >= 40 ||
-            util_format_is_depth_or_stencil(prsc->format)) {
-                rsc->cpp = util_format_get_blocksize(prsc->format);
-                if (devinfo->ver < 40 && prsc->nr_samples > 1)
-                        rsc->cpp *= prsc->nr_samples;
-        } else {
-                assert(v3d_rt_format_supported(devinfo, prsc->format));
-                uint32_t output_image_format =
-                        v3d_get_rt_format(devinfo, prsc->format);
-                uint32_t internal_type;
-                uint32_t internal_bpp;
-                v3d_X(devinfo, get_internal_type_bpp_for_output_format)
-                   (output_image_format, &internal_type, &internal_bpp);
-
-                switch (internal_bpp) {
-                case V3D_INTERNAL_BPP_32:
-                        rsc->cpp = 4;
-                        break;
-                case V3D_INTERNAL_BPP_64:
-                        rsc->cpp = 8;
-                        break;
-                case V3D_INTERNAL_BPP_128:
-                        rsc->cpp = 16;
-                        break;
-                }
-        }
-
+        rsc->cpp = util_format_get_blocksize(prsc->format);
        rsc->serial_id++;

        assert(rsc->cpp);
--- a/src/gallium/drivers/v3d/v3d_screen.c
+++ b/src/gallium/drivers/v3d/v3d_screen.c
@ -153,7 +153,7 @@ v3d_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
                return 1;

        case PIPE_CAP_POLYGON_OFFSET_CLAMP:
-                return screen->devinfo.ver >= 41;
+                return screen->devinfo.ver >= 42;


        case PIPE_CAP_TEXTURE_QUERY_LOD:
@ -182,20 +182,18 @@ v3d_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
                return PIPE_TEXTURE_TRANSFER_BLIT;

        case PIPE_CAP_COMPUTE:
-                return screen->has_csd && screen->devinfo.ver >= 41;
+                return screen->has_csd && screen->devinfo.ver >= 42;

        case PIPE_CAP_GENERATE_MIPMAP:
                return v3d_has_feature(screen, DRM_V3D_PARAM_SUPPORTS_TFU);

        case PIPE_CAP_INDEP_BLEND_ENABLE:
-                return screen->devinfo.ver >= 40;
+                return 1;

        case PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT:
                return V3D_NON_COHERENT_ATOM_SIZE;

        case PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS:
-                if (screen->devinfo.ver < 40)
-                        return 0;
                return 4;

        case PIPE_CAP_SHADER_BUFFER_OFFSET_ALIGNMENT:
@ -218,15 +216,9 @@ v3d_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
        case PIPE_CAP_FS_COORD_ORIGIN_LOWER_LEFT:
                return 0;
        case PIPE_CAP_FS_COORD_PIXEL_CENTER_INTEGER:
-                if (screen->devinfo.ver >= 40)
-                        return 0;
-                else
-                        return 1;
+                return 0;
        case PIPE_CAP_FS_COORD_PIXEL_CENTER_HALF_INTEGER:
-                if (screen->devinfo.ver >= 40)
-                        return 1;
-                else
-                        return 0;
+                return 1;

        case PIPE_CAP_MIXED_FRAMEBUFFER_SIZES:
        case PIPE_CAP_MIXED_COLOR_DEPTH_BITS:
@ -240,18 +232,13 @@ v3d_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)

                /* Texturing. */
        case PIPE_CAP_MAX_TEXTURE_2D_SIZE:
-                if (screen->devinfo.ver < 40)
-                        return 2048;
-                else if (screen->nonmsaa_texture_size_limit)
+                if (screen->nonmsaa_texture_size_limit)
                        return 7680;
                else
                        return V3D_MAX_IMAGE_DIMENSION;
        case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS:
        case PIPE_CAP_MAX_TEXTURE_3D_LEVELS:
-                if (screen->devinfo.ver < 40)
-                        return 12;
-                else
-                        return V3D_MAX_MIP_LEVELS;
+                return V3D_MAX_MIP_LEVELS;
        case PIPE_CAP_MAX_TEXTURE_ARRAY_LAYERS:
                return V3D_MAX_ARRAY_LAYERS;

@ -361,7 +348,7 @@ v3d_screen_get_shader_param(struct pipe_screen *pscreen, enum pipe_shader_type s
                        return 0;
                break;
        case PIPE_SHADER_GEOMETRY:
-                if (screen->devinfo.ver < 41)
+                if (screen->devinfo.ver < 42)
                        return 0;
                break;
        default:
@ -454,7 +441,7 @@ v3d_screen_get_shader_param(struct pipe_screen *pscreen, enum pipe_shader_type s

        case PIPE_SHADER_CAP_MAX_SHADER_IMAGES:
                if (screen->has_cache_flush) {
-                        if (screen->devinfo.ver < 41)
+                        if (screen->devinfo.ver < 42)
                                return 0;
                        else
                                return PIPE_MAX_SHADER_IMAGES;
--- a/src/gallium/drivers/v3d/v3d_uniforms.c
+++ b/src/gallium/drivers/v3d/v3d_uniforms.c
@ -28,9 +28,9 @@
 #include "compiler/v3d_compiler.h"

 /* We don't expect that the packets we use in this file change across across
- * hw versions, so we just include directly the v33 header
+ * hw versions, so we just include directly the v42 header
 */
-#include "broadcom/cle/v3d_packet_v33_pack.h"
+#include "broadcom/cle/v3d_packet_v42_pack.h"

 static uint32_t
 get_texrect_scale(struct v3d_texture_stateobj *texstate,
@ -124,54 +124,6 @@ get_image_size(struct v3d_shaderimg_stateobj *shaderimg,
        }
 }

-/**
- *  Writes the V3D 3.x P0 (CFG_MODE=1) texture parameter.
- *
- * Some bits of this field are dependent on the type of sample being done by
- * the shader, while other bits are dependent on the sampler state.  We OR the
- * two together here.
- */
-static void
-write_texture_p0(struct v3d_job *job,
-                 struct v3d_cl_out **uniforms,
-                 struct v3d_texture_stateobj *texstate,
-                 uint32_t unit,
-                 uint32_t shader_data)
-{
-        struct pipe_sampler_state *psampler = texstate->samplers[unit];
-        struct v3d_sampler_state *sampler = v3d_sampler_state(psampler);
-
-        cl_aligned_u32(uniforms, shader_data | sampler->p0);
-}
-
-/** Writes the V3D 3.x P1 (CFG_MODE=1) texture parameter. */
-static void
-write_texture_p1(struct v3d_job *job,
-                 struct v3d_cl_out **uniforms,
-                 struct v3d_texture_stateobj *texstate,
-                 uint32_t data)
-{
-        /* Extract the texture unit from the top bits, and the compiler's
-         * packed p1 from the bottom.
-         */
-        uint32_t unit = data >> 5;
-        uint32_t p1 = data & 0x1f;
-
-        struct pipe_sampler_view *psview = texstate->textures[unit];
-        struct v3d_sampler_view *sview = v3d_sampler_view(psview);
-
-        struct V3D33_TEXTURE_UNIFORM_PARAMETER_1_CFG_MODE1 unpacked = {
-                .texture_state_record_base_address = texstate->texture_state[unit],
-        };
-
-        uint32_t packed;
-        V3D33_TEXTURE_UNIFORM_PARAMETER_1_CFG_MODE1_pack(&job->indirect,
-                                                         (uint8_t *)&packed,
-                                                         &unpacked);
-
-        cl_aligned_u32(uniforms, p1 | packed | sview->p1);
-}
-
 /** Writes the V3D 4.x TMU configuration parameter 0. */
 static void
 write_tmu_p0(struct v3d_job *job,
@ -328,11 +280,6 @@ v3d_write_uniforms(struct v3d_context *v3d, struct v3d_job *job,
                                           &v3d->shaderimg[stage], data);
                        break;

-                case QUNIFORM_TEXTURE_CONFIG_P1:
-                        write_texture_p1(job, &uniforms, texstate,
-                                         data);
-                        break;
-
                case QUNIFORM_TEXRECT_SCALE_X:
                case QUNIFORM_TEXRECT_SCALE_Y:
                        cl_aligned_u32(&uniforms,
@ -437,13 +384,7 @@ v3d_write_uniforms(struct v3d_context *v3d, struct v3d_job *job,
                        break;

                default:
-                        assert(quniform_contents_is_texture_p0(uinfo->contents[i]));
-
-                        write_texture_p0(job, &uniforms, texstate,
-                                         uinfo->contents[i] -
-                                         QUNIFORM_TEXTURE_CONFIG_P0_0,
-                                         data);
-                        break;
+                        unreachable("Unknown QUNIFORM");

                }
 #if 0
--- a/src/gallium/drivers/v3d/v3dx_draw.c
+++ b/src/gallium/drivers/v3d/v3dx_draw.c
@ -75,7 +75,7 @@ v3dX(start_binning)(struct v3d_context *v3d, struct v3d_job *job)

        job->tile_alloc = v3d_bo_alloc(v3d->screen, tile_alloc_size,
                                       "tile_alloc");
-        uint32_t tsda_per_tile_size = v3d->screen->devinfo.ver >= 40 ? 256 : 64;
+        uint32_t tsda_per_tile_size = 256;
        job->tile_state = v3d_bo_alloc(v3d->screen,
                                       MAX2(job->num_layers, 1) *
                                       job->draw_tiles_y *
@ -83,7 +83,6 @@ v3dX(start_binning)(struct v3d_context *v3d, struct v3d_job *job)
                                       tsda_per_tile_size,
                                       "TSDA");

-#if V3D_VERSION >= 41
        /* This must go before the binning mode configuration. It is
         * required for layered framebuffers to work.
         */
@ -92,7 +91,6 @@ v3dX(start_binning)(struct v3d_context *v3d, struct v3d_job *job)
                        config.number_of_layers = job->num_layers;
                }
        }
-#endif

        assert(!job->msaa || !job->double_buffer);
 #if V3D_VERSION >= 71
@ -113,7 +111,7 @@ v3dX(start_binning)(struct v3d_context *v3d, struct v3d_job *job)

 #endif

-#if V3D_VERSION >= 40 && V3D_VERSION <= 42
+#if V3D_VERSION == 42
        cl_emit(&job->bcl, TILE_BINNING_MODE_CFG, config) {
                config.width_in_pixels = job->draw_width;
                config.height_in_pixels = job->draw_height;
@ -126,34 +124,6 @@ v3dX(start_binning)(struct v3d_context *v3d, struct v3d_job *job)
                config.maximum_bpp_of_all_render_targets = job->internal_bpp;
        }
 #endif
-#if V3D_VERSION < 40
-        /* "Binning mode lists start with a Tile Binning Mode Configuration
-         * item (120)"
-         *
-         * Part1 signals the end of binning config setup.
-         */
-        cl_emit(&job->bcl, TILE_BINNING_MODE_CFG_PART2, config) {
-                config.tile_allocation_memory_address =
-                        cl_address(job->tile_alloc, 0);
-                config.tile_allocation_memory_size = job->tile_alloc->size;
-        }
-
-        cl_emit(&job->bcl, TILE_BINNING_MODE_CFG_PART1, config) {
-                config.tile_state_data_array_base_address =
-                        cl_address(job->tile_state, 0);
-
-                config.width_in_tiles = job->draw_tiles_x;
-                config.height_in_tiles = job->draw_tiles_y;
-                /* Must be >= 1 */
-                config.number_of_render_targets =
-                        MAX2(job->nr_cbufs, 1);
-
-                config.multisample_mode_4x = job->msaa;
-                config.double_buffer_in_non_ms_mode = job->double_buffer;
-
-                config.maximum_bpp_of_all_render_targets = job->internal_bpp;
-        }
-#endif

        /* There's definitely nothing in the VCD cache we want. */
        cl_emit(&job->bcl, FLUSH_VCD_CACHE, bin);
@ -380,7 +350,6 @@ v3d_emit_wait_for_tf_if_needed(struct v3d_context *v3d, struct v3d_job *job)
        }
 }

-#if V3D_VERSION >= 41
 static void
 v3d_emit_gs_state_record(struct v3d_job *job,
                         struct v3d_compiled_shader *gs_bin,
@ -396,7 +365,7 @@ v3d_emit_gs_state_record(struct v3d_job *job,
                        gs_bin->prog_data.gs->base.threads == 4;
                shader.geometry_bin_mode_shader_start_in_final_thread_section =
                        gs_bin->prog_data.gs->base.single_seg;
-#if V3D_VERSION <= 42
+#if V3D_VERSION == 42
                shader.geometry_bin_mode_shader_propagate_nans = true;
 #endif
                shader.geometry_bin_mode_shader_uniforms_address =
@ -408,7 +377,7 @@ v3d_emit_gs_state_record(struct v3d_job *job,
                        gs->prog_data.gs->base.threads == 4;
                shader.geometry_render_mode_shader_start_in_final_thread_section =
                        gs->prog_data.gs->base.single_seg;
-#if V3D_VERSION <= 42
+#if V3D_VERSION == 42
                shader.geometry_render_mode_shader_propagate_nans = true;
 #endif
                shader.geometry_render_mode_shader_uniforms_address =
@ -500,7 +469,6 @@ v3d_emit_tes_gs_shader_params(struct v3d_job *job,
                shader.gbg_min_gs_output_segments_required_in_play = 1;
        }
 }
-#endif

 static void
 v3d_emit_gl_shader_state(struct v3d_context *v3d,
@ -559,14 +527,12 @@ v3d_emit_gl_shader_state(struct v3d_context *v3d,

        uint32_t shader_state_record_length =
                cl_packet_length(GL_SHADER_STATE_RECORD);
-#if V3D_VERSION >= 41
        if (v3d->prog.gs) {
                shader_state_record_length +=
                        cl_packet_length(GEOMETRY_SHADER_STATE_RECORD) +
                        cl_packet_length(TESSELLATION_GEOMETRY_COMMON_PARAMS) +
                        2 * cl_packet_length(TESSELLATION_GEOMETRY_SHADER_PARAMS);
        }
-#endif

        /* See GFXH-930 workaround below */
        uint32_t shader_rec_offset =
@ -582,8 +548,6 @@ v3d_emit_gl_shader_state(struct v3d_context *v3d,
         */

        struct vpm_config vpm_cfg_bin, vpm_cfg;
-
-        assert(v3d->screen->devinfo.ver >= 41 || !v3d->prog.gs);
        v3d_compute_vpm_config(&v3d->screen->devinfo,
                               v3d->prog.cs->prog_data.vs,
                               v3d->prog.vs->prog_data.vs,
@ -593,7 +557,6 @@ v3d_emit_gl_shader_state(struct v3d_context *v3d,
                               &vpm_cfg);

        if (v3d->prog.gs) {
-#if V3D_VERSION >= 41
                v3d_emit_gs_state_record(v3d->job,
                                         v3d->prog.gs_bin, gs_bin_uniforms,
                                         v3d->prog.gs, gs_uniforms);
@ -614,9 +577,6 @@ v3d_emit_gl_shader_state(struct v3d_context *v3d,
                                              vpm_cfg.gs_width,
                                              vpm_cfg.Gd,
                                              vpm_cfg.Gv);
-#else
-                unreachable("No GS support pre-4.1");
-#endif
        }

        cl_emit(&job->indirect, GL_SHADER_STATE_RECORD, shader) {
@ -643,20 +603,16 @@ v3d_emit_gl_shader_state(struct v3d_context *v3d,
                shader.fragment_shader_uses_real_pixel_centre_w_in_addition_to_centroid_w2 =
                        v3d->prog.fs->prog_data.fs->uses_center_w;

-#if V3D_VERSION >= 41
                shader.any_shader_reads_hardware_written_primitive_id =
                        (v3d->prog.gs && v3d->prog.gs->prog_data.gs->uses_pid) ||
                        v3d->prog.fs->prog_data.fs->uses_pid;
                shader.insert_primitive_id_as_first_varying_to_fragment_shader =
                        !v3d->prog.gs && v3d->prog.fs->prog_data.fs->uses_pid;
-#endif

-#if V3D_VERSION >= 40
-               shader.do_scoreboard_wait_on_first_thread_switch =
+                shader.do_scoreboard_wait_on_first_thread_switch =
                        v3d->prog.fs->prog_data.fs->lock_scoreboard_on_first_thrsw;
-               shader.disable_implicit_point_line_varyings =
+                shader.disable_implicit_point_line_varyings =
                        !v3d->prog.fs->prog_data.fs->uses_implicit_point_line_varyings;
-#endif

                shader.number_of_varyings_in_fragment_shader =
                        v3d->prog.fs->prog_data.fs->num_inputs;
@ -671,7 +627,7 @@ v3d_emit_gl_shader_state(struct v3d_context *v3d,
                        cl_address(v3d_resource(v3d->prog.fs->resource)->bo,
                                   v3d->prog.fs->offset);

-#if V3D_VERSION <= 42
+#if V3D_VERSION == 42
                shader.coordinate_shader_propagate_nans = true;
                shader.vertex_shader_propagate_nans = true;
                shader.fragment_shader_propagate_nans = true;
@ -711,7 +667,6 @@ v3d_emit_gl_shader_state(struct v3d_context *v3d,
                shader.vertex_shader_uniforms_address = vs_uniforms;
                shader.fragment_shader_uniforms_address = fs_uniforms;

-#if V3D_VERSION >= 41
                shader.min_coord_shader_input_segments_required_in_play =
                        vpm_cfg_bin.As;
                shader.min_vertex_shader_input_segments_required_in_play =
@ -735,20 +690,6 @@ v3d_emit_gl_shader_state(struct v3d_context *v3d,
                        v3d->prog.vs->prog_data.vs->base.single_seg;
                shader.fragment_shader_start_in_final_thread_section =
                        v3d->prog.fs->prog_data.fs->base.single_seg;
-#else
-                shader.coordinate_shader_4_way_threadable =
-                        v3d->prog.cs->prog_data.vs->base.threads == 4;
-                shader.coordinate_shader_2_way_threadable =
-                        v3d->prog.cs->prog_data.vs->base.threads == 2;
-                shader.vertex_shader_4_way_threadable =
-                        v3d->prog.vs->prog_data.vs->base.threads == 4;
-                shader.vertex_shader_2_way_threadable =
-                        v3d->prog.vs->prog_data.vs->base.threads == 2;
-                shader.fragment_shader_4_way_threadable =
-                        v3d->prog.fs->prog_data.fs->base.threads == 4;
-                shader.fragment_shader_2_way_threadable =
-                        v3d->prog.fs->prog_data.fs->base.threads == 2;
-#endif

                shader.vertex_id_read_by_coordinate_shader =
                        v3d->prog.cs->prog_data.vs->uses_vid;
@ -759,7 +700,7 @@ v3d_emit_gl_shader_state(struct v3d_context *v3d,
                shader.instance_id_read_by_vertex_shader =
                        v3d->prog.vs->prog_data.vs->uses_iid;

-#if V3D_VERSION <= 42
+#if V3D_VERSION == 42
                shader.address_of_default_attribute_values =
                        cl_address(v3d_resource(vtx->defaults)->bo,
                                   vtx->defaults_offset);
@ -802,9 +743,7 @@ v3d_emit_gl_shader_state(struct v3d_context *v3d,
                        if (i == vtx->num_elements - 1 && !cs_loaded_any) {
                                attr.number_of_values_read_by_coordinate_shader = 1;
                        }
-#if V3D_VERSION >= 41
                        attr.maximum_index = 0xffffff;
-#endif
                }
                STATIC_ASSERT(sizeof(vtx->attrs) >= V3D_MAX_VS_INPUTS / 4 * size);
        }
@ -833,7 +772,6 @@ v3d_emit_gl_shader_state(struct v3d_context *v3d,
                vcm.number_of_16_vertex_batches_for_rendering = vpm_cfg.Vc;
        }

-#if V3D_VERSION >= 41
        if (v3d->prog.gs) {
                cl_emit(&job->bcl, GL_SHADER_STATE_INCLUDING_GS, state) {
                        state.address = cl_address(job->indirect.bo,
@ -847,13 +785,6 @@ v3d_emit_gl_shader_state(struct v3d_context *v3d,
                        state.number_of_attribute_arrays = num_elements_to_emit;
                }
        }
-#else
-        assert(!v3d->prog.gs);
-        cl_emit(&job->bcl, GL_SHADER_STATE, state) {
-                state.address = cl_address(job->indirect.bo, shader_rec_offset);
-                state.number_of_attribute_arrays = num_elements_to_emit;
-        }
-#endif

        v3d_bo_unreference(&cs_uniforms.bo);
        v3d_bo_unreference(&vs_uniforms.bo);
@ -1164,13 +1095,6 @@ v3d_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info,
        }

        uint32_t prim_tf_enable = 0;
-#if V3D_VERSION < 40
-        /* V3D 3.x: The HW only processes transform feedback on primitives
-         * with the flag set.
-         */
-        if (v3d->streamout.num_targets)
-                prim_tf_enable = (V3D_PRIM_POINTS_TF - V3D_PRIM_POINTS);
-#endif

        v3d->prim_restart = info->primitive_restart;

@ -1194,20 +1118,14 @@ v3d_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info,
                }
                struct v3d_resource *rsc = v3d_resource(prsc);

-#if V3D_VERSION >= 40
                cl_emit(&job->bcl, INDEX_BUFFER_SETUP, ib) {
                        ib.address = cl_address(rsc->bo, 0);
                        ib.size = rsc->bo->size;
                }
-#endif

                if (indirect && indirect->buffer) {
                        cl_emit(&job->bcl, INDIRECT_INDEXED_INSTANCED_PRIM_LIST, prim) {
                                prim.index_type = ffs(info->index_size) - 1;
-#if V3D_VERSION < 40
-                                prim.address_of_indices_list =
-                                        cl_address(rsc->bo, offset);
-#endif /* V3D_VERSION < 40 */
                                prim.mode = hw_prim_type | prim_tf_enable;
                                prim.enable_primitive_restarts = info->primitive_restart;

@ -1220,13 +1138,7 @@ v3d_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info,
                } else if (info->instance_count > 1) {
                        cl_emit(&job->bcl, INDEXED_INSTANCED_PRIM_LIST, prim) {
                                prim.index_type = ffs(info->index_size) - 1;
-#if V3D_VERSION >= 40
                                prim.index_offset = offset;
-#else /* V3D_VERSION < 40 */
-                                prim.maximum_index = (1u << 31) - 1; /* XXX */
-                                prim.address_of_indices_list =
-                                        cl_address(rsc->bo, offset);
-#endif /* V3D_VERSION < 40 */
                                prim.mode = hw_prim_type | prim_tf_enable;
                                prim.enable_primitive_restarts = info->primitive_restart;

@ -1237,13 +1149,7 @@ v3d_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info,
                        cl_emit(&job->bcl, INDEXED_PRIM_LIST, prim) {
                                prim.index_type = ffs(info->index_size) - 1;
                                prim.length = draws[0].count;
-#if V3D_VERSION >= 40
                                prim.index_offset = offset;
-#else /* V3D_VERSION < 40 */
-                                prim.maximum_index = (1u << 31) - 1; /* XXX */
-                                prim.address_of_indices_list =
-                                        cl_address(rsc->bo, offset);
-#endif /* V3D_VERSION < 40 */
                                prim.mode = hw_prim_type | prim_tf_enable;
                                prim.enable_primitive_restarts = info->primitive_restart;
                        }
@ -1361,7 +1267,6 @@ v3d_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info,
                v3d_flush(pctx);
 }

-#if V3D_VERSION >= 41
 #define V3D_CSD_CFG012_WG_COUNT_SHIFT 16
 #define V3D_CSD_CFG012_WG_OFFSET_SHIFT 0
 /* Allow this dispatch to start while the last one is still running. */
@ -1563,7 +1468,6 @@ v3d_launch_grid(struct pipe_context *pctx, const struct pipe_grid_info *info)
        v3d_bo_unreference(&uniforms.bo);
        v3d_bo_unreference(&v3d->compute_shared_memory);
 }
-#endif

 /**
 * Implements gallium's clear() hook (glClear()) by drawing a pair of triangles.
@ -1607,7 +1511,7 @@ v3d_tlb_clear(struct v3d_job *job, unsigned buffers,
         * if it would be possible to need to emit a load of just one after
         * we've set up our TLB clears. This issue is fixed since V3D 4.3.18.
         */
-        if (v3d->screen->devinfo.ver <= 42 &&
+        if (v3d->screen->devinfo.ver == 42 &&
            buffers & PIPE_CLEAR_DEPTHSTENCIL &&
            (buffers & PIPE_CLEAR_DEPTHSTENCIL) != PIPE_CLEAR_DEPTHSTENCIL &&
            job->zsbuf &&
@ -1762,8 +1666,6 @@ v3dX(draw_init)(struct pipe_context *pctx)
        pctx->clear = v3d_clear;
        pctx->clear_render_target = v3d_clear_render_target;
        pctx->clear_depth_stencil = v3d_clear_depth_stencil;
-#if V3D_VERSION >= 41
        if (v3d_context(pctx)->screen->has_csd)
                pctx->launch_grid = v3d_launch_grid;
-#endif
 }
--- a/src/gallium/drivers/v3d/v3dx_emit.c
+++ b/src/gallium/drivers/v3d/v3dx_emit.c
@ -78,172 +78,6 @@ v3d_factor(enum pipe_blendfactor factor, bool dst_alpha_one)
        }
 }

-#if V3D_VERSION < 40
-static inline uint16_t
-swizzled_border_color(const struct v3d_device_info *devinfo,
-                      struct pipe_sampler_state *sampler,
-                      struct v3d_sampler_view *sview,
-                      int chan)
-{
-        const struct util_format_description *desc =
-                util_format_description(sview->base.format);
-        uint8_t swiz = chan;
-
-        /* If we're doing swizzling in the sampler, then only rearrange the
-         * border color for the mismatch between the V3D texture format and
-         * the PIPE_FORMAT, since GL_ARB_texture_swizzle will be handled by
-         * the sampler's swizzle.
-         *
-         * For swizzling in the shader, we don't do any pre-swizzling of the
-         * border color.
-         */
-        if (v3d_get_tex_return_size(devinfo, sview->base.format) != 32)
-                swiz = desc->swizzle[swiz];
-
-        switch (swiz) {
-        case PIPE_SWIZZLE_0:
-                return _mesa_float_to_half(0.0);
-        case PIPE_SWIZZLE_1:
-                return _mesa_float_to_half(1.0);
-        default:
-                return _mesa_float_to_half(sampler->border_color.f[swiz]);
-        }
-}
-
-static void
-emit_one_texture(struct v3d_context *v3d, struct v3d_texture_stateobj *stage_tex,
-                 int i)
-{
-        struct v3d_job *job = v3d->job;
-        struct pipe_sampler_state *psampler = stage_tex->samplers[i];
-        struct v3d_sampler_state *sampler = v3d_sampler_state(psampler);
-        struct pipe_sampler_view *psview = stage_tex->textures[i];
-        struct v3d_sampler_view *sview = v3d_sampler_view(psview);
-        struct pipe_resource *prsc = psview->texture;
-        struct v3d_resource *rsc = v3d_resource(prsc);
-        const struct v3d_device_info *devinfo = &v3d->screen->devinfo;
-
-        stage_tex->texture_state[i].offset =
-                v3d_cl_ensure_space(&job->indirect,
-                                    cl_packet_length(TEXTURE_SHADER_STATE),
-                                    32);
-        v3d_bo_set_reference(&stage_tex->texture_state[i].bo,
-                             job->indirect.bo);
-
-        uint32_t return_size = v3d_get_tex_return_size(devinfo, psview->format);
-
-        struct V3D33_TEXTURE_SHADER_STATE unpacked = {
-                /* XXX */
-                .border_color_red = swizzled_border_color(devinfo, psampler,
-                                                          sview, 0),
-                .border_color_green = swizzled_border_color(devinfo, psampler,
-                                                            sview, 1),
-                .border_color_blue = swizzled_border_color(devinfo, psampler,
-                                                           sview, 2),
-                .border_color_alpha = swizzled_border_color(devinfo, psampler,
-                                                            sview, 3),
-
-                /* In the normal texturing path, the LOD gets clamped between
-                 * min/max, and the base_level field (set in the sampler view
-                 * from first_level) only decides where the min/mag switch
-                 * happens, so we need to use the LOD clamps to keep us
-                 * between min and max.
-                 *
-                 * For txf, the LOD clamp is still used, despite GL not
-                 * wanting that.  We will need to have a separate
-                 * TEXTURE_SHADER_STATE that ignores psview->min/max_lod to
-                 * support txf properly.
-                 */
-                .min_level_of_detail = MIN2(psview->u.tex.first_level +
-                                            MAX2(psampler->min_lod, 0),
-                                            psview->u.tex.last_level),
-                .max_level_of_detail = MIN2(psview->u.tex.first_level +
-                                            MAX2(psampler->max_lod,
-                                                 psampler->min_lod),
-                                            psview->u.tex.last_level),
-
-                .texture_base_pointer = cl_address(rsc->bo,
-                                                   rsc->slices[0].offset),
-
-                .output_32_bit = return_size == 32,
-        };
-
-        /* Set up the sampler swizzle if we're doing 16-bit sampling.  For
-         * 32-bit, we leave swizzling up to the shader compiler.
-         *
-         * Note: Contrary to the docs, the swizzle still applies even if the
-         * return size is 32.  It's just that you probably want to swizzle in
-         * the shader, because you need the Y/Z/W channels to be defined.
-         */
-        if (return_size == 32) {
-                unpacked.swizzle_r = v3d_translate_pipe_swizzle(PIPE_SWIZZLE_X);
-                unpacked.swizzle_g = v3d_translate_pipe_swizzle(PIPE_SWIZZLE_Y);
-                unpacked.swizzle_b = v3d_translate_pipe_swizzle(PIPE_SWIZZLE_Z);
-                unpacked.swizzle_a = v3d_translate_pipe_swizzle(PIPE_SWIZZLE_W);
-        } else {
-                unpacked.swizzle_r = v3d_translate_pipe_swizzle(sview->swizzle[0]);
-                unpacked.swizzle_g = v3d_translate_pipe_swizzle(sview->swizzle[1]);
-                unpacked.swizzle_b = v3d_translate_pipe_swizzle(sview->swizzle[2]);
-                unpacked.swizzle_a = v3d_translate_pipe_swizzle(sview->swizzle[3]);
-        }
-
-        int min_img_filter = psampler->min_img_filter;
-        int min_mip_filter = psampler->min_mip_filter;
-        int mag_img_filter = psampler->mag_img_filter;
-
-        if (return_size == 32) {
-                min_mip_filter = PIPE_TEX_MIPFILTER_NEAREST;
-                min_img_filter = PIPE_TEX_FILTER_NEAREST;
-                mag_img_filter = PIPE_TEX_FILTER_NEAREST;
-        }
-
-        bool min_nearest = min_img_filter == PIPE_TEX_FILTER_NEAREST;
-        switch (min_mip_filter) {
-        case PIPE_TEX_MIPFILTER_NONE:
-                unpacked.filter += min_nearest ? 2 : 0;
-                break;
-        case PIPE_TEX_MIPFILTER_NEAREST:
-                unpacked.filter += min_nearest ? 4 : 8;
-                break;
-        case PIPE_TEX_MIPFILTER_LINEAR:
-                unpacked.filter += min_nearest ? 4 : 8;
-                unpacked.filter += 2;
-                break;
-        }
-
-        if (mag_img_filter == PIPE_TEX_FILTER_NEAREST)
-                unpacked.filter++;
-
-        if (psampler->max_anisotropy > 8)
-                unpacked.filter = V3D_TMU_FILTER_ANISOTROPIC_16_1;
-        else if (psampler->max_anisotropy > 4)
-                unpacked.filter = V3D_TMU_FILTER_ANISOTROPIC_8_1;
-        else if (psampler->max_anisotropy > 2)
-                unpacked.filter = V3D_TMU_FILTER_ANISOTROPIC_4_1;
-        else if (psampler->max_anisotropy)
-                unpacked.filter = V3D_TMU_FILTER_ANISOTROPIC_2_1;
-
-        uint8_t packed[cl_packet_length(TEXTURE_SHADER_STATE)];
-        cl_packet_pack(TEXTURE_SHADER_STATE)(&job->indirect, packed, &unpacked);
-
-        for (int i = 0; i < ARRAY_SIZE(packed); i++)
-                packed[i] |= sview->texture_shader_state[i] | sampler->texture_shader_state[i];
-
-        /* TMU indirect structs need to be 32b aligned. */
-        v3d_cl_ensure_space(&job->indirect, ARRAY_SIZE(packed), 32);
-        cl_emit_prepacked(&job->indirect, &packed);
-}
-
-static void
-emit_textures(struct v3d_context *v3d, struct v3d_texture_stateobj *stage_tex)
-{
-        for (int i = 0; i < stage_tex->num_textures; i++) {
-                if (stage_tex->textures[i])
-                        emit_one_texture(v3d, stage_tex, i);
-        }
-}
-#endif /* V3D_VERSION < 40 */
-
 static uint32_t
 translate_colormask(struct v3d_context *v3d, uint32_t colormask, int rt)
 {
@ -263,18 +97,12 @@ emit_rt_blend(struct v3d_context *v3d, struct v3d_job *job,
 {
        struct pipe_rt_blend_state *rtblend = &blend->rt[rt];

-#if V3D_VERSION >= 40
        /* We don't need to emit blend state for disabled RTs. */
        if (!rtblend->blend_enable)
                return;
-#endif

        cl_emit(&job->bcl, BLEND_CFG, config) {
-#if V3D_VERSION >= 40
                config.render_target_mask = rt_mask;
-#else
-                assert(rt == 0);
-#endif

                config.color_blend_mode = rtblend->rgb_func;
                config.color_blend_dst_factor =
@ -311,7 +139,6 @@ emit_flat_shade_flags(struct v3d_job *job,
        }
 }

-#if V3D_VERSION >= 40
 static void
 emit_noperspective_flags(struct v3d_job *job,
                         int varying_offset,
@ -345,7 +172,6 @@ emit_centroid_flags(struct v3d_job *job,
                        higher;
        }
 }
-#endif /* V3D_VERSION >= 40 */

 static bool
 emit_varying_flags(struct v3d_job *job, uint32_t *flags,
@ -433,14 +259,6 @@ v3dX(emit_state)(struct pipe_context *pctx)
                        if (maxx > minx && maxy > miny) {
                                clip.clip_window_width_in_pixels = maxx - minx;
                                clip.clip_window_height_in_pixels = maxy - miny;
-                        } else if (V3D_VERSION < 41) {
-                                /* The HW won't entirely clip out when scissor
-                                 * w/h is 0.  Just treat it the same as
-                                 * rasterizer discard.
-                                 */
-                                rasterizer_discard = true;
-                                clip.clip_window_width_in_pixels = 1;
-                                clip.clip_window_height_in_pixels = 1;
                        }
                }

@ -512,14 +330,14 @@ v3dX(emit_state)(struct pipe_context *pctx)
                        /* Note: EZ state may update based on the compiled FS,
                         * along with ZSA
                         */
-#if V3D_VERSION <= 42
+#if V3D_VERSION == 42
                        config.early_z_updates_enable =
                                (job->ez_state != V3D_EZ_DISABLED);
 #endif
                        if (v3d->zsa->base.depth_enabled) {
                                config.z_updates_enable =
                                        v3d->zsa->base.depth_writemask;
-#if V3D_VERSION <= 42
+#if V3D_VERSION == 42
                                config.early_z_enable =
                                        config.early_z_updates_enable;
 #endif
@ -559,7 +377,7 @@ v3dX(emit_state)(struct pipe_context *pctx)

        if (v3d->dirty & V3D_DIRTY_RASTERIZER &&
            v3d->rasterizer->base.offset_tri) {
-                if (v3d->screen->devinfo.ver <= 42 &&
+                if (v3d->screen->devinfo.ver == 42 &&
                    job->zsbuf &&
                    job->zsbuf->format == PIPE_FORMAT_Z16_UNORM) {
                        cl_emit_prepacked_sized(&job->bcl,
@ -583,7 +401,7 @@ v3dX(emit_state)(struct pipe_context *pctx)
        }

        if (v3d->dirty & V3D_DIRTY_VIEWPORT) {
-#if V3D_VERSION <= 42
+#if V3D_VERSION == 42
                cl_emit(&job->bcl, CLIPPER_XY_SCALING, clip) {
                        clip.viewport_half_width_in_1_256th_of_pixel =
                                v3d->viewport.scale[0] * 256.0f;
@ -617,12 +435,6 @@ v3dX(emit_state)(struct pipe_context *pctx)
                }

                cl_emit(&job->bcl, VIEWPORT_OFFSET, vp) {
-#if V3D_VERSION < 41
-                        vp.viewport_centre_x_coordinate =
-                                v3d->viewport.translate[0];
-                        vp.viewport_centre_y_coordinate =
-                                v3d->viewport.translate[1];
-#else
                        float vp_fine_x = v3d->viewport.translate[0];
                        float vp_fine_y = v3d->viewport.translate[1];
                        int32_t vp_coarse_x = 0;
@ -649,7 +461,6 @@ v3dX(emit_state)(struct pipe_context *pctx)
                        vp.fine_y = vp_fine_y;
                        vp.coarse_x = vp_coarse_x;
                        vp.coarse_y = vp_coarse_y;
-#endif
                }
        }

@ -657,11 +468,9 @@ v3dX(emit_state)(struct pipe_context *pctx)
                struct v3d_blend_state *blend = v3d->blend;

                if (blend->blend_enables) {
-#if V3D_VERSION >= 40
                        cl_emit(&job->bcl, BLEND_ENABLES, enables) {
                                enables.mask = blend->blend_enables;
                        }
-#endif

                        const uint32_t max_rts =
                                V3D_MAX_RENDER_TARGETS(v3d->screen->devinfo.ver);
@ -716,8 +525,7 @@ v3dX(emit_state)(struct pipe_context *pctx)
        /* GFXH-1431: On V3D 3.x, writing BLEND_CONFIG resets the constant
         * color.
         */
-        if (v3d->dirty & V3D_DIRTY_BLEND_COLOR ||
-            (V3D_VERSION < 41 && (v3d->dirty & V3D_DIRTY_BLEND))) {
+        if (v3d->dirty & V3D_DIRTY_BLEND_COLOR) {
                cl_emit(&job->bcl, BLEND_CONSTANT_COLOR, color) {
                        color.red_f16 = (v3d->swap_color_rb ?
                                          v3d->blend_color.hf[2] :
@ -751,20 +559,6 @@ v3dX(emit_state)(struct pipe_context *pctx)
                }
        }

-#if V3D_VERSION < 40
-        /* Pre-4.x, we have texture state that depends on both the sampler and
-         * the view, so we merge them together at draw time.
-         */
-        if (v3d->dirty & V3D_DIRTY_FRAGTEX)
-                emit_textures(v3d, &v3d->tex[PIPE_SHADER_FRAGMENT]);
-
-        if (v3d->dirty & V3D_DIRTY_GEOMTEX)
-                emit_textures(v3d, &v3d->tex[PIPE_SHADER_GEOMETRY]);
-
-        if (v3d->dirty & V3D_DIRTY_VERTTEX)
-                emit_textures(v3d, &v3d->tex[PIPE_SHADER_VERTEX]);
-#endif
-
        if (v3d->dirty & V3D_DIRTY_FLAT_SHADE_FLAGS) {
                if (!emit_varying_flags(job,
                                        v3d->prog.fs->prog_data.fs->flat_shade_flags,
@ -773,7 +567,6 @@ v3dX(emit_state)(struct pipe_context *pctx)
                }
        }

-#if V3D_VERSION >= 40
        if (v3d->dirty & V3D_DIRTY_NOPERSPECTIVE_FLAGS) {
                if (!emit_varying_flags(job,
                                        v3d->prog.fs->prog_data.fs->noperspective_flags,
@ -789,7 +582,6 @@ v3dX(emit_state)(struct pipe_context *pctx)
                        cl_emit(&job->bcl, ZERO_ALL_CENTROID_FLAGS, flags);
                }
        }
-#endif

        /* Set up the transform feedback data specs (which VPM entries to
         * output to which buffers).
@ -807,7 +599,6 @@ v3dX(emit_state)(struct pipe_context *pctx)
                                              tf_shader->tf_specs_psiz :
                                              tf_shader->tf_specs);

-#if V3D_VERSION >= 40
                        bool tf_enabled = v3d_transform_feedback_enabled(v3d);
                        job->tf_enabled |= tf_enabled;

@ -816,23 +607,13 @@ v3dX(emit_state)(struct pipe_context *pctx)
                                        tf_shader->num_tf_specs;
                                tfe.enable = tf_enabled;
                        };
-#else /* V3D_VERSION < 40 */
-                        cl_emit(&job->bcl, TRANSFORM_FEEDBACK_ENABLE, tfe) {
-                                tfe.number_of_32_bit_output_buffer_address_following =
-                                        so->num_targets;
-                                tfe.number_of_16_bit_output_data_specs_following =
-                                        tf_shader->num_tf_specs;
-                        };
-#endif /* V3D_VERSION < 40 */
                        for (int i = 0; i < tf_shader->num_tf_specs; i++) {
                                cl_emit_prepacked(&job->bcl, &tf_specs[i]);
                        }
                } else {
-#if V3D_VERSION >= 40
                        cl_emit(&job->bcl, TRANSFORM_FEEDBACK_SPECS, tfe) {
                                tfe.enable = false;
                        };
-#endif /* V3D_VERSION >= 40 */
                }
        }

@ -850,7 +631,6 @@ v3dX(emit_state)(struct pipe_context *pctx)
                        uint32_t offset = target ?
                                v3d_stream_output_target(target)->offset * info->stride[i] * 4 : 0;

-#if V3D_VERSION >= 40
                        if (!target)
                                continue;

@ -863,16 +643,6 @@ v3dX(emit_state)(struct pipe_context *pctx)
                                        (target->buffer_size - offset) >> 2;
                                output.buffer_number = i;
                        }
-#else /* V3D_VERSION < 40 */
-                        cl_emit(&job->bcl, TRANSFORM_FEEDBACK_OUTPUT_ADDRESS, output) {
-                                if (target) {
-                                        output.address =
-                                                cl_address(rsc->bo,
-                                                           target->buffer_offset +
-                                                           offset);
-                                }
-                        };
-#endif /* V3D_VERSION < 40 */
                        if (target) {
                                v3d_job_add_tf_write_resource(v3d->job,
                                                              target->buffer);
@ -889,7 +659,6 @@ v3dX(emit_state)(struct pipe_context *pctx)
                }
        }

-#if V3D_VERSION >= 40
        if (v3d->dirty & V3D_DIRTY_SAMPLE_STATE) {
                cl_emit(&job->bcl, SAMPLE_STATE, state) {
                        /* Note: SampleCoverage was handled at the
@ -899,5 +668,4 @@ v3dX(emit_state)(struct pipe_context *pctx)
                        state.mask = job->msaa ? v3d->sample_mask : 0xf;
                }
        }
-#endif
 }
--- a/src/gallium/drivers/v3d/v3dx_format_table.c
+++ b/src/gallium/drivers/v3d/v3dx_format_table.c
@ -145,7 +145,6 @@ static const struct v3d_format format_table[] = {
        FORMAT(R11G11B10_FLOAT,   R11F_G11F_B10F, R11F_G11F_B10F, SWIZ_XYZ1, 16, 0),
        FORMAT(R9G9B9E5_FLOAT,    NO,           RGB9_E5,     SWIZ_XYZ1, 16, 0),

-#if V3D_VERSION >= 40
        FORMAT(S8_UINT_Z24_UNORM, D24S8,        DEPTH24_X8,  SWIZ_XXXX, 32, 1),
        FORMAT(X8Z24_UNORM,       D24S8,        DEPTH24_X8,  SWIZ_XXXX, 32, 1),
        FORMAT(S8X24_UINT,        S8,           RGBA8UI, SWIZ_XXXX, 16, 1),
@ -155,16 +154,6 @@ static const struct v3d_format format_table[] = {
        /* Pretend we support this, but it'll be separate Z32F depth and S8. */
        FORMAT(Z32_FLOAT_S8X24_UINT, D32F,      DEPTH_COMP32F, SWIZ_XXXX, 32, 1),
        FORMAT(X32_S8X24_UINT,    S8,           R8UI,          SWIZ_XXXX, 16, 1),
-#else
-        FORMAT(S8_UINT_Z24_UNORM, ZS_DEPTH24_STENCIL8, DEPTH24_X8, SWIZ_XXXX, 32, 1),
-        FORMAT(X8Z24_UNORM,       ZS_DEPTH24_STENCIL8, DEPTH24_X8, SWIZ_XXXX, 32, 1),
-        FORMAT(S8X24_UINT,        NO,           R32F,        SWIZ_XXXX, 32, 1),
-        FORMAT(Z32_FLOAT,         ZS_DEPTH_COMPONENT32F, R32F, SWIZ_XXXX, 32, 1),
-        FORMAT(Z16_UNORM,         ZS_DEPTH_COMPONENT16,  DEPTH_COMP16, SWIZ_XXXX, 32, 1),
-
-        /* Pretend we support this, but it'll be separate Z32F depth and S8. */
-        FORMAT(Z32_FLOAT_S8X24_UINT, ZS_DEPTH_COMPONENT32F, R32F, SWIZ_XXXX, 32, 1),
-#endif

        FORMAT(ETC2_RGB8,         NO,           RGB8_ETC2,   SWIZ_XYZ1, 16, 0),
        FORMAT(ETC2_SRGB8,        NO,           RGB8_ETC2,   SWIZ_XYZ1, 16, 0),
@ -233,9 +222,6 @@ v3dX(get_internal_type_bpp_for_output_format)(uint32_t format,
 {
        switch (format) {
        case V3D_OUTPUT_IMAGE_FORMAT_RGBA8:
-#if V3D_VERSION < 41
-        case V3D_OUTPUT_IMAGE_FORMAT_RGBX8:
-#endif
        case V3D_OUTPUT_IMAGE_FORMAT_RGB8:
        case V3D_OUTPUT_IMAGE_FORMAT_RG8:
        case V3D_OUTPUT_IMAGE_FORMAT_R8:
@ -264,9 +250,6 @@ v3dX(get_internal_type_bpp_for_output_format)(uint32_t format,
        case V3D_OUTPUT_IMAGE_FORMAT_SRGB:
        case V3D_OUTPUT_IMAGE_FORMAT_RGB10_A2:
        case V3D_OUTPUT_IMAGE_FORMAT_R11F_G11F_B10F:
-#if V3D_VERSION < 41
-        case V3D_OUTPUT_IMAGE_FORMAT_SRGBX8:
-#endif
        case V3D_OUTPUT_IMAGE_FORMAT_RGBA16F:
                /* Note that sRGB RTs are stored in the tile buffer at 16F,
                 * and the conversion to sRGB happens at tilebuffer
--- a/src/gallium/drivers/v3d/v3dx_job.c
+++ b/src/gallium/drivers/v3d/v3dx_job.c
@ -34,9 +34,7 @@ void v3dX(bcl_epilogue)(struct v3d_context *v3d, struct v3d_job *job)
 {
                v3d_cl_ensure_space_with_branch(&job->bcl,
                                                cl_packet_length(PRIMITIVE_COUNTS_FEEDBACK) +
-#if V3D_VERSION >= 41
                                                cl_packet_length(TRANSFORM_FEEDBACK_SPECS) +
-#endif
                                                cl_packet_length(FLUSH));

                if (job->tf_enabled || job->needs_primitives_generated) {
@ -57,13 +55,11 @@ void v3dX(bcl_epilogue)(struct v3d_context *v3d, struct v3d_job *job)
                 * cleans up and finishes before it gets reset by the next
                 * frame's tile binning mode cfg packet. (SWVC5-718).
                 */
-#if V3D_VERSION >= 41
                if (job->tf_enabled) {
                        cl_emit(&job->bcl, TRANSFORM_FEEDBACK_SPECS, tfe) {
                                tfe.enable = false;
                        };
                }
-#endif /* V3D_VERSION >= 41 */

                /* We just FLUSH here to tell the HW to cap the bin CLs with a
                 * return.  Any remaining state changes won't be flushed to
--- a/src/gallium/drivers/v3d/v3dx_rcl.c
+++ b/src/gallium/drivers/v3d/v3dx_rcl.c
@ -36,23 +36,6 @@

 #define PIPE_FIRST_COLOR_BUFFER_BIT (ffs(PIPE_CLEAR_COLOR0) - 1)

-/* The HW queues up the load until the tile coordinates show up, but can only
- * track one at a time.  If we need to do more than one load, then we need to
- * flush out the previous load by emitting the tile coordinates and doing a
- * dummy store.
- */
-static void
-flush_last_load(struct v3d_cl *cl)
-{
-        if (V3D_VERSION >= 40)
-                return;
-
-        cl_emit(cl, TILE_COORDINATES_IMPLICIT, coords);
-        cl_emit(cl, STORE_TILE_BUFFER_GENERAL, store) {
-                store.buffer_to_store = NONE;
-        }
-}
-
 static void
 load_general(struct v3d_cl *cl, struct pipe_surface *psurf, int buffer,
             int layer, uint32_t pipe_bit, uint32_t *loads_pending)
@ -73,7 +56,6 @@ load_general(struct v3d_cl *cl, struct pipe_surface *psurf, int buffer,
                load.buffer_to_load = buffer;
                load.address = cl_address(rsc->bo, layer_offset);

-#if V3D_VERSION >= 40
                load.memory_format = surf->tiling;
                if (separate_stencil)
                        load.input_image_format = V3D_OUTPUT_IMAGE_FORMAT_S8;
@ -96,20 +78,9 @@ load_general(struct v3d_cl *cl, struct pipe_surface *psurf, int buffer,
                else
                        load.decimate_mode = V3D_DECIMATE_MODE_SAMPLE_0;

-#else /* V3D_VERSION < 40 */
-                /* Can't do raw ZSTENCIL loads -- need to load/store them to
-                 * separate buffers for Z and stencil.
-                 */
-                assert(buffer != ZSTENCIL);
-                load.raw_mode = true;
-                load.padded_height_of_output_image_in_uif_blocks =
-                        surf->padded_height_of_output_image_in_uif_blocks;
-#endif /* V3D_VERSION < 40 */
        }

        *loads_pending &= ~pipe_bit;
-        if (*loads_pending)
-                flush_last_load(cl);
 }

 static void
@ -127,7 +98,6 @@ store_general(struct v3d_job *job,
        }

        *stores_pending &= ~pipe_bit;
-        bool last_store = !(*stores_pending);

        struct v3d_resource *rsc = v3d_resource(psurf->texture);

@ -140,7 +110,6 @@ store_general(struct v3d_job *job,
                store.buffer_to_store = buffer;
                store.address = cl_address(rsc->bo, layer_offset);

-#if V3D_VERSION >= 40
                store.clear_buffer_being_stored = false;

                if (separate_stencil)
@ -168,35 +137,6 @@ store_general(struct v3d_job *job,
                        store.decimate_mode = V3D_DECIMATE_MODE_4X;
                else
                        store.decimate_mode = V3D_DECIMATE_MODE_SAMPLE_0;
-
-#else /* V3D_VERSION < 40 */
-                /* Can't do raw ZSTENCIL stores -- need to load/store them to
-                 * separate buffers for Z and stencil.
-                 */
-                assert(buffer != ZSTENCIL);
-                store.raw_mode = true;
-                if (!last_store) {
-                        store.disable_color_buffers_clear_on_write = true;
-                        store.disable_z_buffer_clear_on_write = true;
-                        store.disable_stencil_buffer_clear_on_write = true;
-                } else {
-                        store.disable_color_buffers_clear_on_write =
-                                !(((pipe_bit & PIPE_CLEAR_COLOR_BUFFERS) &&
-                                   general_color_clear &&
-                                   (job->clear & pipe_bit)));
-                        store.disable_z_buffer_clear_on_write =
-                                !(job->clear & PIPE_CLEAR_DEPTH);
-                        store.disable_stencil_buffer_clear_on_write =
-                                !(job->clear & PIPE_CLEAR_STENCIL);
-                }
-                store.padded_height_of_output_image_in_uif_blocks =
-                        surf->padded_height_of_output_image_in_uif_blocks;
-#endif /* V3D_VERSION < 40 */
-        }
-
-        /* There must be a TILE_COORDINATES_IMPLICIT between each store. */
-        if (V3D_VERSION < 40 && !last_store) {
-                cl_emit(cl, TILE_COORDINATES_IMPLICIT, coords);
        }
 }

@ -223,7 +163,6 @@ v3d_rcl_emit_loads(struct v3d_job *job, struct v3d_cl *cl, int layer)
         */
        assert(!job->bbuf || job->load == 0);
        assert(!job->bbuf || job->nr_cbufs <= 1);
-        assert(!job->bbuf || V3D_VERSION >= 40);

        uint32_t loads_pending = job->bbuf ? job->store : job->load;

@ -235,18 +174,14 @@ v3d_rcl_emit_loads(struct v3d_job *job, struct v3d_cl *cl, int layer)
                struct pipe_surface *psurf = job->bbuf ? job->bbuf : job->cbufs[i];
                assert(!job->bbuf || i == 0);

-                if (!psurf || (V3D_VERSION < 40 &&
-                               psurf->texture->nr_samples <= 1)) {
+                if (!psurf)
                        continue;
-                }

                load_general(cl, psurf, RENDER_TARGET_0 + i, layer,
                             bit, &loads_pending);
        }

-        if ((loads_pending & PIPE_CLEAR_DEPTHSTENCIL) &&
-            (V3D_VERSION >= 40 ||
-             (job->zsbuf && job->zsbuf->texture->nr_samples > 1))) {
+        if (loads_pending & PIPE_CLEAR_DEPTHSTENCIL) {
                assert(!job->early_zs_clear);
                struct pipe_surface *src = job->bbuf ? job->bbuf : job->zsbuf;
                struct v3d_resource *rsc = v3d_resource(src->texture);
@ -268,57 +203,14 @@ v3d_rcl_emit_loads(struct v3d_job *job, struct v3d_cl *cl, int layer)
                }
        }

-#if V3D_VERSION < 40
-        /* The initial reload will be queued until we get the
-         * tile coordinates.
-         */
-        if (loads_pending) {
-                cl_emit(cl, RELOAD_TILE_COLOR_BUFFER, load) {
-                        load.disable_color_buffer_load =
-                                (~loads_pending &
-                                 PIPE_CLEAR_COLOR_BUFFERS) >>
-                                PIPE_FIRST_COLOR_BUFFER_BIT;
-                        load.enable_z_load =
-                                loads_pending & PIPE_CLEAR_DEPTH;
-                        load.enable_stencil_load =
-                                loads_pending & PIPE_CLEAR_STENCIL;
-                }
-        }
-#else /* V3D_VERSION >= 40 */
        assert(!loads_pending);
        cl_emit(cl, END_OF_LOADS, end);
-#endif
 }

 static void
 v3d_rcl_emit_stores(struct v3d_job *job, struct v3d_cl *cl, int layer)
 {
-#if V3D_VERSION < 40
-        UNUSED bool needs_color_clear = job->clear & PIPE_CLEAR_COLOR_BUFFERS;
-        UNUSED bool needs_z_clear = job->clear & PIPE_CLEAR_DEPTH;
-        UNUSED bool needs_s_clear = job->clear & PIPE_CLEAR_STENCIL;
-
-        /* For clearing color in a TLB general on V3D 3.3:
-         *
-         * - NONE buffer store clears all TLB color buffers.
-         * - color buffer store clears just the TLB color buffer being stored.
-         * - Z/S buffers store may not clear the TLB color buffer.
-         *
-         * And on V3D 4.1, we only have one flag for "clear the buffer being
-         * stored" in the general packet, and a separate packet to clear all
-         * color TLB buffers.
-         *
-         * As a result, we only bother flagging TLB color clears in a general
-         * packet when we don't have to emit a separate packet to clear all
-         * TLB color buffers.
-         */
-        bool general_color_clear = (needs_color_clear &&
-                                    (job->clear & PIPE_CLEAR_COLOR_BUFFERS) ==
-                                    (job->store & PIPE_CLEAR_COLOR_BUFFERS));
-#else
        bool general_color_clear = false;
-#endif
-
        uint32_t stores_pending = job->store;

        /* For V3D 4.1, use general stores for all TLB stores.
@ -337,17 +229,14 @@ v3d_rcl_emit_stores(struct v3d_job *job, struct v3d_cl *cl, int layer)
                        continue;

                struct pipe_surface *psurf = job->cbufs[i];
-                if (!psurf ||
-                    (V3D_VERSION < 40 && psurf->texture->nr_samples <= 1)) {
+                if (!psurf)
                        continue;
-                }

                store_general(job, cl, psurf, layer, RENDER_TARGET_0 + i, bit,
                              &stores_pending, general_color_clear, job->bbuf);
        }

-        if (job->store & PIPE_CLEAR_DEPTHSTENCIL && job->zsbuf &&
-            !(V3D_VERSION < 40 && job->zsbuf->texture->nr_samples <= 1)) {
+        if (job->store & PIPE_CLEAR_DEPTHSTENCIL && job->zsbuf) {
                assert(!job->early_zs_clear);
                struct v3d_resource *rsc = v3d_resource(job->zsbuf->texture);
                if (rsc->separate_stencil) {
@ -375,35 +264,7 @@ v3d_rcl_emit_stores(struct v3d_job *job, struct v3d_cl *cl, int layer)
                }
        }

-#if V3D_VERSION < 40
-        if (stores_pending) {
-                cl_emit(cl, STORE_MULTI_SAMPLE_RESOLVED_TILE_COLOR_BUFFER_EXTENDED, store) {

-                        store.disable_color_buffer_write =
-                                (~stores_pending >>
-                                 PIPE_FIRST_COLOR_BUFFER_BIT) & 0xf;
-                        store.enable_z_write = stores_pending & PIPE_CLEAR_DEPTH;
-                        store.enable_stencil_write = stores_pending & PIPE_CLEAR_STENCIL;
-
-                        /* Note that when set this will clear all of the color
-                         * buffers.
-                         */
-                        store.disable_color_buffers_clear_on_write =
-                                !needs_color_clear;
-                        store.disable_z_buffer_clear_on_write =
-                                !needs_z_clear;
-                        store.disable_stencil_buffer_clear_on_write =
-                                !needs_s_clear;
-                };
-        } else if (needs_color_clear && !general_color_clear) {
-                /* If we didn't do our color clears in the general packet,
-                 * then emit a packet to clear all the TLB color buffers now.
-                 */
-                cl_emit(cl, STORE_TILE_BUFFER_GENERAL, store) {
-                        store.buffer_to_store = NONE;
-                }
-        }
-#else /* V3D_VERSION >= 40 */
        /* If we're emitting an RCL with GL_ARB_framebuffer_no_attachments,
         * we still need to emit some sort of store.
         */
@ -421,7 +282,7 @@ v3d_rcl_emit_stores(struct v3d_job *job, struct v3d_cl *cl, int layer)
         * clearing Z/S.
         */
        if (job->clear) {
-#if V3D_VERSION <= 42
+#if V3D_VERSION == 42
                cl_emit(cl, CLEAR_TILE_BUFFERS, clear) {
                        clear.clear_z_stencil_buffer = !job->early_zs_clear;
                        clear.clear_all_render_targets = true;
@ -432,7 +293,6 @@ v3d_rcl_emit_stores(struct v3d_job *job, struct v3d_cl *cl, int layer)
 #endif

        }
-#endif /* V3D_VERSION >= 40 */
 }

 static void
@ -445,22 +305,13 @@ v3d_rcl_emit_generic_per_tile_list(struct v3d_job *job, int layer)
        v3d_cl_ensure_space(cl, 200, 1);
        struct v3d_cl_reloc tile_list_start = cl_get_address(cl);

-        if (V3D_VERSION >= 40) {
-                /* V3D 4.x only requires a single tile coordinates, and
-                 * END_OF_LOADS switches us between loading and rendering.
-                 */
-                cl_emit(cl, TILE_COORDINATES_IMPLICIT, coords);
-        }
+        /* V3D 4.x/7.x only requires a single tile coordinates, and
+         * END_OF_LOADS switches us between loading and rendering.
+         */
+        cl_emit(cl, TILE_COORDINATES_IMPLICIT, coords);

        v3d_rcl_emit_loads(job, cl, layer);

-        if (V3D_VERSION < 40) {
-                /* Tile Coordinates triggers the last reload and sets where
-                 * the stores go. There must be one per store packet.
-                 */
-                cl_emit(cl, TILE_COORDINATES_IMPLICIT, coords);
-        }
-
        /* The binner starts out writing tiles assuming that the initial mode
         * is triangles, so make sure that's the case.
         */
@ -468,20 +319,16 @@ v3d_rcl_emit_generic_per_tile_list(struct v3d_job *job, int layer)
                fmt.primitive_type = LIST_TRIANGLES;
        }

-#if V3D_VERSION >= 41
        /* PTB assumes that value to be 0, but hw will not set it. */
        cl_emit(cl, SET_INSTANCEID, set) {
           set.instance_id = 0;
        }
-#endif

        cl_emit(cl, BRANCH_TO_IMPLICIT_TILE_LIST, branch);

        v3d_rcl_emit_stores(job, cl, layer);

-#if V3D_VERSION >= 40
        cl_emit(cl, END_OF_TILE_MARKER, end);
-#endif

        cl_emit(cl, RETURN_FROM_SUB_LIST, ret);

@ -491,7 +338,6 @@ v3d_rcl_emit_generic_per_tile_list(struct v3d_job *job, int layer)
        }
 }

-#if V3D_VERSION > 33
 /* Note that for v71, render target cfg packets has just one field that
 * combined the internal type and clamp mode. For simplicity we keep just one
 * helper.
@ -503,13 +349,11 @@ static uint32_t
 v3dX(clamp_for_format_and_type)(uint32_t rt_type,
                                enum pipe_format format)
 {
-#if V3D_VERSION >= 40 && V3D_VERSION <= 42
+#if V3D_VERSION == 42
        if (util_format_is_srgb(format)) {
                return V3D_RENDER_TARGET_CLAMP_NORM;
-#if V3D_VERSION >= 42
        } else if (util_format_is_pure_integer(format)) {
                return V3D_RENDER_TARGET_CLAMP_INT;
-#endif
        } else {
                return V3D_RENDER_TARGET_CLAMP_NONE;
        }
@ -541,9 +385,8 @@ v3dX(clamp_for_format_and_type)(uint32_t rt_type,
        }
        return V3D_RENDER_TARGET_TYPE_CLAMP_INVALID;
 #endif
-        return 0;
+        unreachable("Wrong V3D_VERSION");
 }
-#endif

 #if V3D_VERSION >= 71
 static void
@ -566,7 +409,7 @@ v3d_setup_render_target(struct v3d_job *job,
 }
 #endif

-#if V3D_VERSION >= 40 && V3D_VERSION <= 42
+#if V3D_VERSION == 42
 static void
 v3d_setup_render_target(struct v3d_job *job,
                        int cbuf,
@ -589,36 +432,6 @@ v3d_setup_render_target(struct v3d_job *job,
 }
 #endif

-#if V3D_VERSION < 40
-static void
-v3d_emit_z_stencil_config(struct v3d_job *job, struct v3d_surface *surf,
-                          struct v3d_resource *rsc, bool is_separate_stencil)
-{
-        cl_emit(&job->rcl, TILE_RENDERING_MODE_CFG_Z_STENCIL, zs) {
-                zs.address = cl_address(rsc->bo, surf->offset);
-
-                if (!is_separate_stencil) {
-                        zs.internal_type = surf->internal_type;
-                        zs.output_image_format = surf->format;
-                } else {
-                        zs.z_stencil_id = 1; /* Separate stencil */
-                }
-
-                zs.padded_height_of_output_image_in_uif_blocks =
-                        surf->padded_height_of_output_image_in_uif_blocks;
-
-                assert(surf->tiling != V3D_TILING_RASTER);
-                zs.memory_format = surf->tiling;
-        }
-
-        if (job->store & (is_separate_stencil ?
-                          PIPE_CLEAR_STENCIL :
-                          PIPE_CLEAR_DEPTHSTENCIL)) {
-                rsc->writes++;
-        }
-}
-#endif /* V3D_VERSION < 40 */
-
 static bool
 supertile_in_job_scissors(struct v3d_job *job,
                          uint32_t x, uint32_t y, uint32_t w, uint32_t h)
@ -648,7 +461,6 @@ supertile_in_job_scissors(struct v3d_job *job,
   return false;
 }

-#if V3D_VERSION >= 40
 static inline bool
 do_double_initial_tile_clear(const struct v3d_job *job)
 {
@ -663,7 +475,6 @@ do_double_initial_tile_clear(const struct v3d_job *job)
        return job->double_buffer &&
               (job->draw_tiles_x > 1 || job->draw_tiles_y > 1);
 }
-#endif

 static void
 emit_render_layer(struct v3d_job *job, uint32_t layer)
@ -730,12 +541,6 @@ emit_render_layer(struct v3d_job *job, uint32_t layer)
         * state, we need 1 dummy store in between internal type/size
         * changes on V3D 3.x, and 2 dummy stores on 4.x.
         */
-#if V3D_VERSION < 40
-        cl_emit(&job->rcl, STORE_TILE_BUFFER_GENERAL, store) {
-                store.buffer_to_store = NONE;
-        }
-#endif
-#if V3D_VERSION >= 40
        for (int i = 0; i < 2; i++) {
                if (i > 0)
                        cl_emit(&job->rcl, TILE_COORDINATES, coords);
@ -756,7 +561,6 @@ emit_render_layer(struct v3d_job *job, uint32_t layer)
                }
                cl_emit(&job->rcl, END_OF_TILE_MARKER, end);
        }
-#endif
        cl_emit(&job->rcl, FLUSH_VCD_CACHE, flush);

        v3d_rcl_emit_generic_per_tile_list(job, layer);
@ -808,15 +612,10 @@ v3dX(emit_rcl)(struct v3d_job *job)
         * optional updates to the previous HW state.
         */
        cl_emit(&job->rcl, TILE_RENDERING_MODE_CFG_COMMON, config) {
-#if V3D_VERSION < 40
-                config.enable_z_store = job->store & PIPE_CLEAR_DEPTH;
-                config.enable_stencil_store = job->store & PIPE_CLEAR_STENCIL;
-#else /* V3D_VERSION >= 40 */
                if (job->zsbuf) {
                        struct v3d_surface *surf = v3d_surface(job->zsbuf);
                        config.internal_depth_type = surf->internal_type;
                }
-#endif /* V3D_VERSION >= 40 */

                if (job->decided_global_ez_enable) {
                        switch (job->first_ez_state) {
@ -839,7 +638,6 @@ v3dX(emit_rcl)(struct v3d_job *job)
                        config.early_z_disable = true;
                }

-#if V3D_VERSION >= 40
                assert(job->zsbuf || config.early_z_disable);

                job->early_zs_clear = (job->clear & PIPE_CLEAR_DEPTHSTENCIL) &&
@ -847,7 +645,6 @@ v3dX(emit_rcl)(struct v3d_job *job)
                        !(job->store & PIPE_CLEAR_DEPTHSTENCIL);

                config.early_depth_stencil_clear = job->early_zs_clear;
-#endif /* V3D_VERSION >= 40 */

                config.image_width_pixels = job->draw_width;
                config.image_height_pixels = job->draw_height;
@ -858,7 +655,7 @@ v3dX(emit_rcl)(struct v3d_job *job)
                config.multisample_mode_4x = job->msaa;
                config.double_buffer_in_non_ms_mode = job->double_buffer;

-#if V3D_VERSION <= 42
+#if V3D_VERSION == 42
                config.maximum_bpp_of_all_render_targets = job->internal_bpp;
 #endif
 #if V3D_VERSION >= 71
@ -921,22 +718,7 @@ v3dX(emit_rcl)(struct v3d_job *job)
                        }
                }

-#if V3D_VERSION < 40
-                cl_emit(&job->rcl, TILE_RENDERING_MODE_CFG_COLOR, rt) {
-                        rt.address = cl_address(rsc->bo, surf->offset);
-                        rt.internal_type = surf->internal_type;
-                        rt.output_image_format = surf->format;
-                        rt.memory_format = surf->tiling;
-                        rt.internal_bpp = surf->internal_bpp;
-                        rt.render_target_number = i;
-                        rt.pad = config_pad;
-
-                        if (job->store & PIPE_CLEAR_COLOR0 << i)
-                                rsc->writes++;
-                }
-#endif /* V3D_VERSION < 40 */
-
-#if V3D_VERSION <= 42
+#if V3D_VERSION == 42
                cl_emit(&job->rcl, TILE_RENDERING_MODE_CFG_CLEAR_COLORS_PART1,
                        clear) {
                        clear.clear_color_low_32_bits = job->clear_color[i][0];
@ -1000,7 +782,7 @@ v3dX(emit_rcl)(struct v3d_job *job)
 #endif
        }

-#if V3D_VERSION >= 40 && V3D_VERSION <= 42
+#if V3D_VERSION == 42
        cl_emit(&job->rcl, TILE_RENDERING_MODE_CFG_COLOR, rt) {
                v3d_setup_render_target(job, 0,
                                        &rt.render_target_0_internal_bpp,
@ -1021,27 +803,6 @@ v3dX(emit_rcl)(struct v3d_job *job)
        }
 #endif

-#if V3D_VERSION < 40
-        /* FIXME: Don't bother emitting if we don't load/clear Z/S. */
-        if (job->zsbuf) {
-                struct pipe_surface *psurf = job->zsbuf;
-                struct v3d_surface *surf = v3d_surface(psurf);
-                struct v3d_resource *rsc = v3d_resource(psurf->texture);
-
-                v3d_emit_z_stencil_config(job, surf, rsc, false);
-
-                /* Emit the separate stencil packet if we have a resource for
-                 * it.  The HW will only load/store this buffer if the
-                 * Z/Stencil config doesn't have stencil in its format.
-                 */
-                if (surf->separate_stencil) {
-                        v3d_emit_z_stencil_config(job,
-                                                  v3d_surface(surf->separate_stencil),
-                                                  rsc->separate_stencil, true);
-                }
-        }
-#endif /* V3D_VERSION < 40 */
-
        /* Ends rendering mode config. */
        cl_emit(&job->rcl, TILE_RENDERING_MODE_CFG_ZS_CLEAR_VALUES,
                clear) {
--- a/src/gallium/drivers/v3d/v3dx_state.c
+++ b/src/gallium/drivers/v3d/v3dx_state.c
@ -106,21 +106,17 @@ v3d_create_rasterizer_state(struct pipe_context *pctx,
        v3dx_pack(&so->depth_offset, DEPTH_OFFSET, depth) {
                depth.depth_offset_factor = cso->offset_scale;
                depth.depth_offset_units = cso->offset_units;
-#if V3D_VERSION >= 41
                depth.limit = cso->offset_clamp;
-#endif
        }

        /* V3d 4.x treats polygon offset units based on a Z24 buffer, so we
         * need to scale up offset_units if we're only Z16.
         */
-#if V3D_VERSION <= 42
+#if V3D_VERSION == 42
        v3dx_pack(&so->depth_offset_z16, DEPTH_OFFSET, depth) {
                depth.depth_offset_factor = cso->offset_scale;
                depth.depth_offset_units = cso->offset_units * 256.0;
-#if V3D_VERSION >= 41
                depth.limit = cso->offset_clamp;
-#endif
        }
 #endif

@ -144,10 +140,6 @@ v3d_create_blend_state(struct pipe_context *pctx,
        if (cso->independent_blend_enable) {
                for (int i = 0; i < max_rts; i++) {
                        so->blend_enables |= cso->rt[i].blend_enable << i;
-
-                        /* V3D 4.x is when we got independent blend enables. */
-                        assert(V3D_VERSION >= 40 ||
-                               cso->rt[i].blend_enable == cso->rt[0].blend_enable);
                }
        } else {
                if (cso->rt[0].blend_enable)
@ -343,7 +335,7 @@ v3d_zsa_state_bind(struct pipe_context *pctx, void *hwcso)
 static bool
 needs_default_attribute_values(void)
 {
-#if V3D_VERSION <= 42
+#if V3D_VERSION == 42
        /* FIXME: on vulkan we are able to refine even further, as we know in
         * advance when we create the pipeline if we have an integer vertex
         * attrib. Pending to check if we could do something similar here.
@ -517,18 +509,10 @@ v3d_set_framebuffer_state(struct pipe_context *pctx,
                struct pipe_surface *cbuf = v3d->framebuffer.cbufs[i];
                if (!cbuf)
                        continue;
-                struct v3d_surface *v3d_cbuf = v3d_surface(cbuf);

                const struct util_format_description *desc =
                        util_format_description(cbuf->format);

-                /* For BGRA8 formats (DRI window system default format), we
-                 * need to swap R and B, since the HW's format is RGBA8.  On
-                 * V3D 4.1+, the RCL can swap R and B on load/store.
-                 */
-                if (v3d->screen->devinfo.ver < 41 && v3d_cbuf->swap_rb)
-                        v3d->swap_color_rb |= 1 << i;
-
                if (desc->swizzle[3] == PIPE_SWIZZLE_1)
                        v3d->blend_dst_alpha_one |= 1 << i;
        }
@ -555,7 +539,6 @@ translate_wrap(uint32_t pipe_wrap)
        }
 }

-#if V3D_VERSION >= 40
 static void
 v3d_upload_sampler_state_variant(void *map,
                                 const struct pipe_sampler_state *cso,
@ -720,7 +703,7 @@ v3d_upload_sampler_state_variant(void *map,
                                break;
                        }

-#if V3D_VERSION <= 42
+#if V3D_VERSION == 42
                        /* The TMU in V3D 7.x always takes 32-bit floats and handles conversions
                         * for us. In V3D 4.x we need to manually convert floating point color
                         * values to the expected format.
@ -739,7 +722,6 @@ v3d_upload_sampler_state_variant(void *map,
                }
        }
 }
-#endif

 static void *
 v3d_create_sampler_state(struct pipe_context *pctx,
@ -757,7 +739,6 @@ v3d_create_sampler_state(struct pipe_context *pctx,
        enum V3DX(Wrap_Mode) wrap_t = translate_wrap(cso->wrap_t);
        enum V3DX(Wrap_Mode) wrap_r = translate_wrap(cso->wrap_r);

-#if V3D_VERSION >= 40
        bool uses_border_color = (wrap_s == V3D_WRAP_MODE_BORDER ||
                                  wrap_t == V3D_WRAP_MODE_BORDER ||
                                  wrap_r == V3D_WRAP_MODE_BORDER);
@ -807,20 +788,6 @@ v3d_create_sampler_state(struct pipe_context *pctx,
                                                 so->border_color_variants ? i : border_variant);
        }

-#else /* V3D_VERSION < 40 */
-        v3dx_pack(&so->p0, TEXTURE_UNIFORM_PARAMETER_0_CFG_MODE1, p0) {
-                p0.s_wrap_mode = wrap_s;
-                p0.t_wrap_mode = wrap_t;
-                p0.r_wrap_mode = wrap_r;
-        }
-
-        v3dx_pack(&so->texture_shader_state, TEXTURE_SHADER_STATE, tex) {
-                tex.depth_compare_function = cso->compare_mode ?
-                                             cso->compare_func :
-                                             V3D_COMPARE_FUNC_NEVER;
-                tex.fixed_bias = cso->lod_bias;
-        }
-#endif /* V3D_VERSION < 40 */
        return so;
 }

@ -911,8 +878,7 @@ v3d_setup_texture_shader_state(const struct v3d_device_info *devinfo,
        tex->image_width = prsc->width0 * msaa_scale;
        tex->image_height = prsc->height0 * msaa_scale;

-#if V3D_VERSION >= 40
-        /* On 4.x, the height of a 1D texture is redefined to be the
+       /* On 4.x, the height of a 1D texture is redefined to be the
         * upper 14 bits of the width (which is only usable with txf).
         */
        if (prsc->target == PIPE_TEXTURE_1D ||
@ -922,7 +888,6 @@ v3d_setup_texture_shader_state(const struct v3d_device_info *devinfo,

        tex->image_width &= (1 << 14) - 1;
        tex->image_height &= (1 << 14) - 1;
-#endif

        if (prsc->target == PIPE_TEXTURE_3D) {
                tex->image_depth = prsc->depth0;
@ -941,7 +906,6 @@ v3d_setup_texture_shader_state(const struct v3d_device_info *devinfo,

        tex->base_level = base_level;

-#if V3D_VERSION >= 40
        tex->max_level = last_level;
        /* Note that we don't have a job to reference the texture's sBO
         * at state create time, so any time this sampler view is used
@ -951,8 +915,6 @@ v3d_setup_texture_shader_state(const struct v3d_device_info *devinfo,
                v3d_layer_offset(prsc, 0, first_layer);

        tex->texture_base_pointer = cl_address(NULL, base_offset);
-#endif
-
        tex->array_stride_64_byte_aligned = rsc->cube_map_stride / 64;

 #if V3D_VERSION >= 71
@ -976,12 +938,10 @@ v3d_setup_texture_shader_state(const struct v3d_device_info *devinfo,
        if (tex->level_0_is_strictly_uif)
                tex->level_0_ub_pad = rsc->slices[0].ub_pad;

-#if V3D_VERSION >= 40
        if (tex->uif_xor_disable ||
            tex->level_0_is_strictly_uif) {
                tex->extended = true;
        }
-#endif /* V3D_VERSION >= 40 */
 }

 void
@ -997,16 +957,10 @@ v3dX(create_texture_shader_state_bo)(struct v3d_context *v3d,

        assert(so->serial_id != rsc->serial_id);

-#if V3D_VERSION >= 40
        v3d_bo_unreference(&so->bo);
        so->bo = v3d_bo_alloc(v3d->screen,
                              cl_packet_length(TEXTURE_SHADER_STATE), "sampler");
        map = v3d_bo_map(so->bo);
-#else /* V3D_VERSION < 40 */
-        STATIC_ASSERT(sizeof(so->texture_shader_state) >=
-                      cl_packet_length(TEXTURE_SHADER_STATE));
-        map = &so->texture_shader_state;
-#endif

        v3dx_pack(map, TEXTURE_SHADER_STATE, tex) {
                if (prsc->target != PIPE_BUFFER) {
@ -1025,69 +979,20 @@ v3dX(create_texture_shader_state_bo)(struct v3d_context *v3d,
                }

                bool is_srgb = util_format_is_srgb(cso->format);
-#if V3D_VERSION <= 42
+#if V3D_VERSION == 42
                tex.srgb = is_srgb;
 #endif
 #if V3D_VERSION >= 71
                tex.transfer_func = is_srgb ? TRANSFER_FUNC_SRGB : TRANSFER_FUNC_NONE;
 #endif

-#if V3D_VERSION >= 40
                tex.swizzle_r = v3d_translate_pipe_swizzle(so->swizzle[0]);
                tex.swizzle_g = v3d_translate_pipe_swizzle(so->swizzle[1]);
                tex.swizzle_b = v3d_translate_pipe_swizzle(so->swizzle[2]);
                tex.swizzle_a = v3d_translate_pipe_swizzle(so->swizzle[3]);
-#endif

-                if (prsc->nr_samples > 1 && V3D_VERSION < 40) {
-                        /* Using texture views to reinterpret formats on our
-                         * MSAA textures won't work, because we don't lay out
-                         * the bits in memory as it's expected -- for example,
-                         * RGBA8 and RGB10_A2 are compatible in the
-                         * ARB_texture_view spec, but in HW we lay them out as
-                         * 32bpp RGBA8 and 64bpp RGBA16F.  Just assert for now
-                         * to catch failures.
-                         *
-                         * We explicitly allow remapping S8Z24 to RGBA8888 for
-                         * v3d_blit.c's stencil blits.
-                         */
-                        assert((util_format_linear(cso->format) ==
-                                util_format_linear(prsc->format)) ||
-                               (prsc->format == PIPE_FORMAT_S8_UINT_Z24_UNORM &&
-                                cso->format == PIPE_FORMAT_R8G8B8A8_UNORM));
-                        uint32_t output_image_format =
-                                v3d_get_rt_format(&screen->devinfo, cso->format);
-                        uint32_t internal_type;
-                        uint32_t internal_bpp;
-                        v3dX(get_internal_type_bpp_for_output_format)(output_image_format,
-                                                                      &internal_type,
-                                                                      &internal_bpp);
-
-                        switch (internal_type) {
-                        case V3D_INTERNAL_TYPE_8:
-                                tex.texture_type = TEXTURE_DATA_FORMAT_RGBA8;
-                                break;
-                        case V3D_INTERNAL_TYPE_16F:
-                                tex.texture_type = TEXTURE_DATA_FORMAT_RGBA16F;
-                                break;
-                        default:
-                                unreachable("Bad MSAA texture type");
-                        }
-
-                        /* sRGB was stored in the tile buffer as linear and
-                         * would have been encoded to sRGB on resolved tile
-                         * buffer store.  Note that this means we would need
-                         * shader code if we wanted to read an MSAA sRGB
-                         * texture without sRGB decode.
-                         */
-#if V3D_VERSION <= 42
-                        tex.srgb = false;
-#endif
-
-                } else {
-                        tex.texture_type = v3d_get_tex_format(&screen->devinfo,
-                                                              cso->format);
-                }
+                tex.texture_type = v3d_get_tex_format(&screen->devinfo,
+                                                      cso->format);
        };

        so->serial_id = rsc->serial_id;
@ -1141,7 +1046,6 @@ v3d_create_sampler_view(struct pipe_context *pctx, struct pipe_resource *prsc,
        if (sample_format == PIPE_FORMAT_S8_UINT_Z24_UNORM)
                sample_format = PIPE_FORMAT_X8Z24_UNORM;

-#if V3D_VERSION >= 40
        const struct util_format_description *desc =
                util_format_description(sample_format);

@ -1202,7 +1106,6 @@ v3d_create_sampler_view(struct pipe_context *pctx, struct pipe_resource *prsc,
                                                V3D_SAMPLER_STATE_F16);
                }
        }
-#endif

        /* V3D still doesn't support sampling from raster textures, so we will
         * have to copy to a temporary tiled texture.
@ -1433,7 +1336,6 @@ v3d_create_image_view_texture_shader_state(struct v3d_context *v3d,
                                           struct v3d_shaderimg_stateobj *so,
                                           int img)
 {
-#if V3D_VERSION >= 40
        struct v3d_image_view *iview = &so->si[img];

        void *map;
@ -1469,12 +1371,6 @@ v3d_create_image_view_texture_shader_state(struct v3d_context *v3d,
                tex.texture_type = v3d_get_tex_format(&v3d->screen->devinfo,
                                                      iview->base.format);
        };
-#else /* V3D_VERSION < 40 */
-        /* V3D 3.x doesn't use support shader image load/store operations on
-         * textures, so it would get lowered in the shader to general memory
-         * accesses.
-         */
-#endif
 }

 static void
--- a/src/gallium/drivers/v3d/v3dx_tfu.c
+++ b/src/gallium/drivers/v3d/v3dx_tfu.c
@ -118,7 +118,7 @@ v3dX(tfu)(struct pipe_context *pctx,
                break;
       }

-#if V3D_VERSION <= 42
+#if V3D_VERSION == 42
        if (src_base_slice->tiling == V3D_TILING_RASTER) {
                tfu.icfg |= (V3D33_TFU_ICFG_FORMAT_RASTER <<
                             V3D33_TFU_ICFG_FORMAT_SHIFT);
@ -152,7 +152,7 @@ v3dX(tfu)(struct pipe_context *pctx,
                               implicit_padded_height) / uif_block_h) <<
                             V3D33_TFU_ICFG_OPAD_SHIFT);
        }
-#endif /* V3D_VERSION <= 42 */
+#endif /* V3D_VERSION == 42 */

 #if V3D_VERSION >= 71
        if (src_base_slice->tiling == V3D_TILING_RASTER) {